gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT, rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT, rtx);
  53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  54                                     unsigned HOST_WIDE_INT,
  55                                     unsigned HOST_WIDE_INT,
  56                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  57 static rtx mask_rtx (enum machine_mode, int, int, int);
  58 static rtx lshift_value (enum machine_mode, rtx, int, int);
  59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, int);
  61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  64
  65 /* Test whether a value is zero of a power of two.  */
  66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72
  73 /* Reduce conditional compilation elsewhere.  */
  74 #ifndef HAVE_insv
  75 #define HAVE_insv       0
  76 #define CODE_FOR_insv   CODE_FOR_nothing
  77 #define gen_insv(a,b,c,d) NULL_RTX
  78 #endif
  79 #ifndef HAVE_extv
  80 #define HAVE_extv       0
  81 #define CODE_FOR_extv   CODE_FOR_nothing
  82 #define gen_extv(a,b,c,d) NULL_RTX
  83 #endif
  84 #ifndef HAVE_extzv
  85 #define HAVE_extzv      0
  86 #define CODE_FOR_extzv  CODE_FOR_nothing
  87 #define gen_extzv(a,b,c,d) NULL_RTX
  88 #endif
  89
  90 void
  91 init_expmed (void)
  92 {
  93   struct
  94   {
  95     struct rtx_def reg;         rtunion reg_fld[2];
  96     struct rtx_def plus;        rtunion plus_fld1;
  97     struct rtx_def neg;
  98     struct rtx_def mult;        rtunion mult_fld1;
  99     struct rtx_def sdiv;        rtunion sdiv_fld1;
 100     struct rtx_def udiv;        rtunion udiv_fld1;
 101     struct rtx_def zext;
 102     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 103     struct rtx_def smod_32;     rtunion smod_32_fld1;
 104     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 105     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 106     struct rtx_def wide_trunc;
 107     struct rtx_def shift;       rtunion shift_fld1;
 108     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 109     struct rtx_def shift_add;   rtunion shift_add_fld1;
 110     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 111     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 112   } all;
 113
 114   rtx pow2[MAX_BITS_PER_WORD];
 115   rtx cint[MAX_BITS_PER_WORD];
 116   int m, n;
 117   enum machine_mode mode, wider_mode;
 118   int speed;
 119
 120
 121   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 122     {
 123       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 124       cint[m] = GEN_INT (m);
 125     }
 126   memset (&all, 0, sizeof all);
 127
 128   PUT_CODE (&all.reg, REG);
 129   /* Avoid using hard regs in ways which may be unsupported.  */
 130   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 131
 132   PUT_CODE (&all.plus, PLUS);
 133   XEXP (&all.plus, 0) = &all.reg;
 134   XEXP (&all.plus, 1) = &all.reg;
 135
 136   PUT_CODE (&all.neg, NEG);
 137   XEXP (&all.neg, 0) = &all.reg;
 138
 139   PUT_CODE (&all.mult, MULT);
 140   XEXP (&all.mult, 0) = &all.reg;
 141   XEXP (&all.mult, 1) = &all.reg;
 142
 143   PUT_CODE (&all.sdiv, DIV);
 144   XEXP (&all.sdiv, 0) = &all.reg;
 145   XEXP (&all.sdiv, 1) = &all.reg;
 146
 147   PUT_CODE (&all.udiv, UDIV);
 148   XEXP (&all.udiv, 0) = &all.reg;
 149   XEXP (&all.udiv, 1) = &all.reg;
 150
 151   PUT_CODE (&all.sdiv_32, DIV);
 152   XEXP (&all.sdiv_32, 0) = &all.reg;
 153   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 154
 155   PUT_CODE (&all.smod_32, MOD);
 156   XEXP (&all.smod_32, 0) = &all.reg;
 157   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 158
 159   PUT_CODE (&all.zext, ZERO_EXTEND);
 160   XEXP (&all.zext, 0) = &all.reg;
 161
 162   PUT_CODE (&all.wide_mult, MULT);
 163   XEXP (&all.wide_mult, 0) = &all.zext;
 164   XEXP (&all.wide_mult, 1) = &all.zext;
 165
 166   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 167   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 168
 169   PUT_CODE (&all.wide_trunc, TRUNCATE);
 170   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 171
 172   PUT_CODE (&all.shift, ASHIFT);
 173   XEXP (&all.shift, 0) = &all.reg;
 174
 175   PUT_CODE (&all.shift_mult, MULT);
 176   XEXP (&all.shift_mult, 0) = &all.reg;
 177
 178   PUT_CODE (&all.shift_add, PLUS);
 179   XEXP (&all.shift_add, 0) = &all.shift_mult;
 180   XEXP (&all.shift_add, 1) = &all.reg;
 181
 182   PUT_CODE (&all.shift_sub0, MINUS);
 183   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 184   XEXP (&all.shift_sub0, 1) = &all.reg;
 185
 186   PUT_CODE (&all.shift_sub1, MINUS);
 187   XEXP (&all.shift_sub1, 0) = &all.reg;
 188   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 189
 190   for (speed = 0; speed < 2; speed++)
 191     {
 192       crtl->maybe_hot_insn_p = speed;
 193       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 194
 195       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 196            mode != VOIDmode;
 197            mode = GET_MODE_WIDER_MODE (mode))
 198         {
 199           PUT_MODE (&all.reg, mode);
 200           PUT_MODE (&all.plus, mode);
 201           PUT_MODE (&all.neg, mode);
 202           PUT_MODE (&all.mult, mode);
 203           PUT_MODE (&all.sdiv, mode);
 204           PUT_MODE (&all.udiv, mode);
 205           PUT_MODE (&all.sdiv_32, mode);
 206           PUT_MODE (&all.smod_32, mode);
 207           PUT_MODE (&all.wide_trunc, mode);
 208           PUT_MODE (&all.shift, mode);
 209           PUT_MODE (&all.shift_mult, mode);
 210           PUT_MODE (&all.shift_add, mode);
 211           PUT_MODE (&all.shift_sub0, mode);
 212           PUT_MODE (&all.shift_sub1, mode);
 213
 214           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 215           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 216           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 217           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 218           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 219
 220           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 221                                           <= 2 * add_cost[speed][mode]);
 222           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 223                                           <= 4 * add_cost[speed][mode]);
 224
 225           wider_mode = GET_MODE_WIDER_MODE (mode);
 226           if (wider_mode != VOIDmode)
 227             {
 228               PUT_MODE (&all.zext, wider_mode);
 229               PUT_MODE (&all.wide_mult, wider_mode);
 230               PUT_MODE (&all.wide_lshr, wider_mode);
 231               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 232
 233               mul_widen_cost[speed][wider_mode]
 234                 = rtx_cost (&all.wide_mult, SET, speed);
 235               mul_highpart_cost[speed][mode]
 236                 = rtx_cost (&all.wide_trunc, SET, speed);
 237             }
 238
 239           shift_cost[speed][mode][0] = 0;
 240           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 241             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 242
 243           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 244           for (m = 1; m < n; m++)
 245             {
 246               XEXP (&all.shift, 1) = cint[m];
 247               XEXP (&all.shift_mult, 1) = pow2[m];
 248
 249               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 250               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 251               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 252               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 253             }
 254         }
 255     }
 256   if (alg_hash_used_p)
 257     memset (alg_hash, 0, sizeof (alg_hash));
 258   else
 259     alg_hash_used_p = true;
 260   default_rtl_profile ();
 261 }
 262
 263 /* Return an rtx representing minus the value of X.
 264    MODE is the intended mode of the result,
 265    useful if X is a CONST_INT.  */
 266
 267 rtx
 268 negate_rtx (enum machine_mode mode, rtx x)
 269 {
 270   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 271
 272   if (result == 0)
 273     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 274
 275   return result;
 276 }
 277
 278 /* Report on the availability of insv/extv/extzv and the desired mode
 279    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 280    is false; else the mode of the specified operand.  If OPNO is -1,
 281    all the caller cares about is whether the insn is available.  */
 282 enum machine_mode
 283 mode_for_extraction (enum extraction_pattern pattern, int opno)
 284 {
 285   const struct insn_data_d *data;
 286
 287   switch (pattern)
 288     {
 289     case EP_insv:
 290       if (HAVE_insv)
 291         {
 292           data = &insn_data[CODE_FOR_insv];
 293           break;
 294         }
 295       return MAX_MACHINE_MODE;
 296
 297     case EP_extv:
 298       if (HAVE_extv)
 299         {
 300           data = &insn_data[CODE_FOR_extv];
 301           break;
 302         }
 303       return MAX_MACHINE_MODE;
 304
 305     case EP_extzv:
 306       if (HAVE_extzv)
 307         {
 308           data = &insn_data[CODE_FOR_extzv];
 309           break;
 310         }
 311       return MAX_MACHINE_MODE;
 312
 313     default:
 314       gcc_unreachable ();
 315     }
 316
 317   if (opno == -1)
 318     return VOIDmode;
 319
 320   /* Everyone who uses this function used to follow it with
 321      if (result == VOIDmode) result = word_mode; */
 322   if (data->operand[opno].mode == VOIDmode)
 323     return word_mode;
 324   return data->operand[opno].mode;
 325 }
 326 \f
 327 /* A subroutine of store_bit_field, with the same arguments.  Return true
 328    if the operation could be implemented.
 329
 330    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 331    no other way of implementing the operation.  If FALLBACK_P is false,
 332    return false instead.  */
 333
 334 static bool
 335 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                    rtx value, bool fallback_p)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 356       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 357
 358       byte_offset = 0;
 359
 360       /* Paradoxical subregs need special handling on big endian machines.  */
 361       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 362         {
 363           int difference = inner_mode_size - outer_mode_size;
 364
 365           if (WORDS_BIG_ENDIAN)
 366             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 367           if (BYTES_BIG_ENDIAN)
 368             byte_offset += difference % UNITS_PER_WORD;
 369         }
 370       else
 371         byte_offset = SUBREG_BYTE (op0);
 372
 373       bitnum += byte_offset * BITS_PER_UNIT;
 374       op0 = SUBREG_REG (op0);
 375     }
 376
 377   /* No action is needed if the target is a register and if the field
 378      lies completely outside that register.  This can occur if the source
 379      code contains an out-of-bounds access to a small array.  */
 380   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 381     return true;
 382
 383   /* Use vec_set patterns for inserting parts of vectors whenever
 384      available.  */
 385   if (VECTOR_MODE_P (GET_MODE (op0))
 386       && !MEM_P (op0)
 387       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 388       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 389       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 390       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 391     {
 392       struct expand_operand ops[3];
 393       enum machine_mode outermode = GET_MODE (op0);
 394       enum machine_mode innermode = GET_MODE_INNER (outermode);
 395       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 396       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 397
 398       create_fixed_operand (&ops[0], op0);
 399       create_input_operand (&ops[1], value, innermode);
 400       create_integer_operand (&ops[2], pos);
 401       if (maybe_expand_insn (icode, 3, ops))
 402         return true;
 403     }
 404
 405   /* If the target is a register, overwriting the entire object, or storing
 406      a full-word or multi-word field can be done with just a SUBREG.
 407
 408      If the target is memory, storing any naturally aligned field can be
 409      done with a simple store.  For targets that support fast unaligned
 410      memory, any naturally sized, unit aligned field can be done directly.  */
 411
 412   offset = bitnum / unit;
 413   bitpos = bitnum % unit;
 414   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 415                 + (offset * UNITS_PER_WORD);
 416
 417   if (bitpos == 0
 418       && bitsize == GET_MODE_BITSIZE (fieldmode)
 419       && (!MEM_P (op0)
 420           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 421               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 422              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 423                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 424                                      byte_offset)))
 425           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 426              || (offset * BITS_PER_UNIT % bitsize == 0
 427                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 428     {
 429       if (MEM_P (op0))
 430         op0 = adjust_address (op0, fieldmode, offset);
 431       else if (GET_MODE (op0) != fieldmode)
 432         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 433                                    byte_offset);
 434       emit_move_insn (op0, value);
 435       return true;
 436     }
 437
 438   /* Make sure we are playing with integral modes.  Pun with subregs
 439      if we aren't.  This must come after the entire register case above,
 440      since that case is valid for any mode.  The following cases are only
 441      valid for integral modes.  */
 442   {
 443     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 444     if (imode != GET_MODE (op0))
 445       {
 446         if (MEM_P (op0))
 447           op0 = adjust_address (op0, imode, 0);
 448         else
 449           {
 450             gcc_assert (imode != BLKmode);
 451             op0 = gen_lowpart (imode, op0);
 452           }
 453       }
 454   }
 455
 456   /* We may be accessing data outside the field, which means
 457      we can alias adjacent data.  */
 458   if (MEM_P (op0))
 459     {
 460       op0 = shallow_copy_rtx (op0);
 461       set_mem_alias_set (op0, 0);
 462       set_mem_expr (op0, 0);
 463     }
 464
 465   /* If OP0 is a register, BITPOS must count within a word.
 466      But as we have it, it counts within whatever size OP0 now has.
 467      On a bigendian machine, these are not the same, so convert.  */
 468   if (BYTES_BIG_ENDIAN
 469       && !MEM_P (op0)
 470       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 471     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 472
 473   /* Storing an lsb-aligned field in a register
 474      can be done with a movestrict instruction.  */
 475
 476   if (!MEM_P (op0)
 477       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 478       && bitsize == GET_MODE_BITSIZE (fieldmode)
 479       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 480     {
 481       struct expand_operand ops[2];
 482       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 483       rtx arg0 = op0;
 484       unsigned HOST_WIDE_INT subreg_off;
 485
 486       if (GET_CODE (arg0) == SUBREG)
 487         {
 488           /* Else we've got some float mode source being extracted into
 489              a different float mode destination -- this combination of
 490              subregs results in Severe Tire Damage.  */
 491           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 492                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 493                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 494           arg0 = SUBREG_REG (arg0);
 495         }
 496
 497       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 498                    + (offset * UNITS_PER_WORD);
 499       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 500         {
 501           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 502
 503           create_fixed_operand (&ops[0], arg0);
 504           /* Shrink the source operand to FIELDMODE.  */
 505           create_convert_operand_to (&ops[1], value, fieldmode, false);
 506           if (maybe_expand_insn (icode, 2, ops))
 507             return true;
 508         }
 509     }
 510
 511   /* Handle fields bigger than a word.  */
 512
 513   if (bitsize > BITS_PER_WORD)
 514     {
 515       /* Here we transfer the words of the field
 516          in the order least significant first.
 517          This is because the most significant word is the one which may
 518          be less than full.
 519          However, only do that if the value is not BLKmode.  */
 520
 521       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 522       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 523       unsigned int i;
 524       rtx last;
 525
 526       /* This is the mode we must force value to, so that there will be enough
 527          subwords to extract.  Note that fieldmode will often (always?) be
 528          VOIDmode, because that is what store_field uses to indicate that this
 529          is a bit field, but passing VOIDmode to operand_subword_force
 530          is not allowed.  */
 531       fieldmode = GET_MODE (value);
 532       if (fieldmode == VOIDmode)
 533         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 534
 535       last = get_last_insn ();
 536       for (i = 0; i < nwords; i++)
 537         {
 538           /* If I is 0, use the low-order word in both field and target;
 539              if I is 1, use the next to lowest word; and so on.  */
 540           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 541           unsigned int bit_offset = (backwards
 542                                      ? MAX ((int) bitsize - ((int) i + 1)
 543                                             * BITS_PER_WORD,
 544                                             0)
 545                                      : (int) i * BITS_PER_WORD);
 546           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 547
 548           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 549                                             bitsize - i * BITS_PER_WORD),
 550                                   bitnum + bit_offset, word_mode,
 551                                   value_word, fallback_p))
 552             {
 553               delete_insns_since (last);
 554               return false;
 555             }
 556         }
 557       return true;
 558     }
 559
 560   /* From here on we can assume that the field to be stored in is
 561      a full-word (whatever type that is), since it is shorter than a word.  */
 562
 563   /* OFFSET is the number of words or bytes (UNIT says which)
 564      from STR_RTX to the first word or byte containing part of the field.  */
 565
 566   if (!MEM_P (op0))
 567     {
 568       if (offset != 0
 569           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 570         {
 571           if (!REG_P (op0))
 572             {
 573               /* Since this is a destination (lvalue), we can't copy
 574                  it to a pseudo.  We can remove a SUBREG that does not
 575                  change the size of the operand.  Such a SUBREG may
 576                  have been added above.  */
 577               gcc_assert (GET_CODE (op0) == SUBREG
 578                           && (GET_MODE_SIZE (GET_MODE (op0))
 579                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 580               op0 = SUBREG_REG (op0);
 581             }
 582           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 583                                 op0, (offset * UNITS_PER_WORD));
 584         }
 585       offset = 0;
 586     }
 587
 588   /* If VALUE has a floating-point or complex mode, access it as an
 589      integer of the corresponding size.  This can occur on a machine
 590      with 64 bit registers that uses SFmode for float.  It can also
 591      occur for unaligned float or complex fields.  */
 592   orig_value = value;
 593   if (GET_MODE (value) != VOIDmode
 594       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 595       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 596     {
 597       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 598       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 599     }
 600
 601   /* Now OFFSET is nonzero only if OP0 is memory
 602      and is therefore always measured in bytes.  */
 603
 604   if (HAVE_insv
 605       && GET_MODE (value) != BLKmode
 606       && bitsize > 0
 607       && GET_MODE_BITSIZE (op_mode) >= bitsize
 608       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 609             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 610     {
 611       struct expand_operand ops[4];
 612       int xbitpos = bitpos;
 613       rtx value1;
 614       rtx xop0 = op0;
 615       rtx last = get_last_insn ();
 616       bool copy_back = false;
 617
 618       /* Add OFFSET into OP0's address.  */
 619       if (MEM_P (xop0))
 620         xop0 = adjust_address (xop0, byte_mode, offset);
 621
 622       /* If xop0 is a register, we need it in OP_MODE
 623          to make it acceptable to the format of insv.  */
 624       if (GET_CODE (xop0) == SUBREG)
 625         /* We can't just change the mode, because this might clobber op0,
 626            and we will need the original value of op0 if insv fails.  */
 627         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 628       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 629         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 630
 631       /* If the destination is a paradoxical subreg such that we need a
 632          truncate to the inner mode, perform the insertion on a temporary and
 633          truncate the result to the original destination.  Note that we can't
 634          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 635          X) 0)) is (reg:N X).  */
 636       if (GET_CODE (xop0) == SUBREG
 637           && REG_P (SUBREG_REG (xop0))
 638           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 639                                               op_mode)))
 640         {
 641           rtx tem = gen_reg_rtx (op_mode);
 642           emit_move_insn (tem, xop0);
 643           xop0 = tem;
 644           copy_back = true;
 645         }
 646
 647       /* On big-endian machines, we count bits from the most significant.
 648          If the bit field insn does not, we must invert.  */
 649
 650       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 651         xbitpos = unit - bitsize - xbitpos;
 652
 653       /* We have been counting XBITPOS within UNIT.
 654          Count instead within the size of the register.  */
 655       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 656         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 657
 658       unit = GET_MODE_BITSIZE (op_mode);
 659
 660       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 661       value1 = value;
 662       if (GET_MODE (value) != op_mode)
 663         {
 664           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 665             {
 666               /* Optimization: Don't bother really extending VALUE
 667                  if it has all the bits we will actually use.  However,
 668                  if we must narrow it, be sure we do it correctly.  */
 669
 670               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 671                 {
 672                   rtx tmp;
 673
 674                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 675                   if (! tmp)
 676                     tmp = simplify_gen_subreg (op_mode,
 677                                                force_reg (GET_MODE (value),
 678                                                           value1),
 679                                                GET_MODE (value), 0);
 680                   value1 = tmp;
 681                 }
 682               else
 683                 value1 = gen_lowpart (op_mode, value1);
 684             }
 685           else if (CONST_INT_P (value))
 686             value1 = gen_int_mode (INTVAL (value), op_mode);
 687           else
 688             /* Parse phase is supposed to make VALUE's data type
 689                match that of the component reference, which is a type
 690                at least as wide as the field; so VALUE should have
 691                a mode that corresponds to that type.  */
 692             gcc_assert (CONSTANT_P (value));
 693         }
 694
 695       create_fixed_operand (&ops[0], xop0);
 696       create_integer_operand (&ops[1], bitsize);
 697       create_integer_operand (&ops[2], xbitpos);
 698       create_input_operand (&ops[3], value1, op_mode);
 699       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 700         {
 701           if (copy_back)
 702             convert_move (op0, xop0, true);
 703           return true;
 704         }
 705       delete_insns_since (last);
 706     }
 707
 708   /* If OP0 is a memory, try copying it to a register and seeing if a
 709      cheap register alternative is available.  */
 710   if (HAVE_insv && MEM_P (op0))
 711     {
 712       enum machine_mode bestmode;
 713
 714       /* Get the mode to use for inserting into this field.  If OP0 is
 715          BLKmode, get the smallest mode consistent with the alignment. If
 716          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 717          mode. Otherwise, use the smallest mode containing the field.  */
 718
 719       if (GET_MODE (op0) == BLKmode
 720           || (op_mode != MAX_MACHINE_MODE
 721               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 722         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 723                                   (op_mode == MAX_MACHINE_MODE
 724                                    ? VOIDmode : op_mode),
 725                                   MEM_VOLATILE_P (op0));
 726       else
 727         bestmode = GET_MODE (op0);
 728
 729       if (bestmode != VOIDmode
 730           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 731           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 732                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 733         {
 734           rtx last, tempreg, xop0;
 735           unsigned HOST_WIDE_INT xoffset, xbitpos;
 736
 737           last = get_last_insn ();
 738
 739           /* Adjust address to point to the containing unit of
 740              that mode.  Compute the offset as a multiple of this unit,
 741              counting in bytes.  */
 742           unit = GET_MODE_BITSIZE (bestmode);
 743           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 744           xbitpos = bitnum % unit;
 745           xop0 = adjust_address (op0, bestmode, xoffset);
 746
 747           /* Fetch that unit, store the bitfield in it, then store
 748              the unit.  */
 749           tempreg = copy_to_reg (xop0);
 750           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 751                                  fieldmode, orig_value, false))
 752             {
 753               emit_move_insn (xop0, tempreg);
 754               return true;
 755             }
 756           delete_insns_since (last);
 757         }
 758     }
 759
 760   if (!fallback_p)
 761     return false;
 762
 763   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 764   return true;
 765 }
 766
 767 /* Generate code to store value from rtx VALUE
 768    into a bit-field within structure STR_RTX
 769    containing BITSIZE bits starting at bit BITNUM.
 770    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 771
 772 void
 773 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 774                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 775                  rtx value)
 776 {
 777   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 778     gcc_unreachable ();
 779 }
 780 \f
 781 /* Use shifts and boolean operations to store VALUE
 782    into a bit field of width BITSIZE
 783    in a memory location specified by OP0 except offset by OFFSET bytes.
 784      (OFFSET must be 0 if OP0 is a register.)
 785    The field starts at position BITPOS within the byte.
 786     (If OP0 is a register, it may be a full word or a narrower mode,
 787      but BITPOS still counts within a full word,
 788      which is significant on bigendian machines.)  */
 789
 790 static void
 791 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 792                        unsigned HOST_WIDE_INT bitsize,
 793                        unsigned HOST_WIDE_INT bitpos, rtx value)
 794 {
 795   enum machine_mode mode;
 796   unsigned int total_bits = BITS_PER_WORD;
 797   rtx temp;
 798   int all_zero = 0;
 799   int all_one = 0;
 800
 801   /* There is a case not handled here:
 802      a structure with a known alignment of just a halfword
 803      and a field split across two aligned halfwords within the structure.
 804      Or likewise a structure with a known alignment of just a byte
 805      and a field split across two bytes.
 806      Such cases are not supposed to be able to occur.  */
 807
 808   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 809     {
 810       gcc_assert (!offset);
 811       /* Special treatment for a bit field split across two registers.  */
 812       if (bitsize + bitpos > BITS_PER_WORD)
 813         {
 814           store_split_bit_field (op0, bitsize, bitpos, value);
 815           return;
 816         }
 817     }
 818   else
 819     {
 820       /* Get the proper mode to use for this field.  We want a mode that
 821          includes the entire field.  If such a mode would be larger than
 822          a word, we won't be doing the extraction the normal way.
 823          We don't want a mode bigger than the destination.  */
 824
 825       mode = GET_MODE (op0);
 826       if (GET_MODE_BITSIZE (mode) == 0
 827           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 828         mode = word_mode;
 829
 830       if (MEM_VOLATILE_P (op0)
 831           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 832           && flag_strict_volatile_bitfields > 0)
 833         mode = GET_MODE (op0);
 834       else
 835         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 836                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 837
 838       if (mode == VOIDmode)
 839         {
 840           /* The only way this should occur is if the field spans word
 841              boundaries.  */
 842           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 843                                  value);
 844           return;
 845         }
 846
 847       total_bits = GET_MODE_BITSIZE (mode);
 848
 849       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 850          be in the range 0 to total_bits-1, and put any excess bytes in
 851          OFFSET.  */
 852       if (bitpos >= total_bits)
 853         {
 854           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 855           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 856                      * BITS_PER_UNIT);
 857         }
 858
 859       /* Get ref to an aligned byte, halfword, or word containing the field.
 860          Adjust BITPOS to be position within a word,
 861          and OFFSET to be the offset of that word.
 862          Then alter OP0 to refer to that word.  */
 863       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 864       offset -= (offset % (total_bits / BITS_PER_UNIT));
 865       op0 = adjust_address (op0, mode, offset);
 866     }
 867
 868   mode = GET_MODE (op0);
 869
 870   /* Now MODE is either some integral mode for a MEM as OP0,
 871      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 872      The bit field is contained entirely within OP0.
 873      BITPOS is the starting bit number within OP0.
 874      (OP0's mode may actually be narrower than MODE.)  */
 875
 876   if (BYTES_BIG_ENDIAN)
 877       /* BITPOS is the distance between our msb
 878          and that of the containing datum.
 879          Convert it to the distance from the lsb.  */
 880       bitpos = total_bits - bitsize - bitpos;
 881
 882   /* Now BITPOS is always the distance between our lsb
 883      and that of OP0.  */
 884
 885   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 886      we must first convert its mode to MODE.  */
 887
 888   if (CONST_INT_P (value))
 889     {
 890       HOST_WIDE_INT v = INTVAL (value);
 891
 892       if (bitsize < HOST_BITS_PER_WIDE_INT)
 893         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 894
 895       if (v == 0)
 896         all_zero = 1;
 897       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 898                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 899                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 900         all_one = 1;
 901
 902       value = lshift_value (mode, value, bitpos, bitsize);
 903     }
 904   else
 905     {
 906       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 907                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 908
 909       if (GET_MODE (value) != mode)
 910         value = convert_to_mode (mode, value, 1);
 911
 912       if (must_and)
 913         value = expand_binop (mode, and_optab, value,
 914                               mask_rtx (mode, 0, bitsize, 0),
 915                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 916       if (bitpos > 0)
 917         value = expand_shift (LSHIFT_EXPR, mode, value,
 918                               bitpos, NULL_RTX, 1);
 919     }
 920
 921   /* Now clear the chosen bits in OP0,
 922      except that if VALUE is -1 we need not bother.  */
 923   /* We keep the intermediates in registers to allow CSE to combine
 924      consecutive bitfield assignments.  */
 925
 926   temp = force_reg (mode, op0);
 927
 928   if (! all_one)
 929     {
 930       temp = expand_binop (mode, and_optab, temp,
 931                            mask_rtx (mode, bitpos, bitsize, 1),
 932                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 933       temp = force_reg (mode, temp);
 934     }
 935
 936   /* Now logical-or VALUE into OP0, unless it is zero.  */
 937
 938   if (! all_zero)
 939     {
 940       temp = expand_binop (mode, ior_optab, temp, value,
 941                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 942       temp = force_reg (mode, temp);
 943     }
 944
 945   if (op0 != temp)
 946     {
 947       op0 = copy_rtx (op0);
 948       emit_move_insn (op0, temp);
 949     }
 950 }
 951 \f
 952 /* Store a bit field that is split across multiple accessible memory objects.
 953
 954    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 955    BITSIZE is the field width; BITPOS the position of its first bit
 956    (within the word).
 957    VALUE is the value to store.
 958
 959    This does not yet handle fields wider than BITS_PER_WORD.  */
 960
 961 static void
 962 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 963                        unsigned HOST_WIDE_INT bitpos, rtx value)
 964 {
 965   unsigned int unit;
 966   unsigned int bitsdone = 0;
 967
 968   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 969      much at a time.  */
 970   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 971     unit = BITS_PER_WORD;
 972   else
 973     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 974
 975   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 976      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 977      that VALUE might be a floating-point constant.  */
 978   if (CONSTANT_P (value) && !CONST_INT_P (value))
 979     {
 980       rtx word = gen_lowpart_common (word_mode, value);
 981
 982       if (word && (value != word))
 983         value = word;
 984       else
 985         value = gen_lowpart_common (word_mode,
 986                                     force_reg (GET_MODE (value) != VOIDmode
 987                                                ? GET_MODE (value)
 988                                                : word_mode, value));
 989     }
 990
 991   while (bitsdone < bitsize)
 992     {
 993       unsigned HOST_WIDE_INT thissize;
 994       rtx part, word;
 995       unsigned HOST_WIDE_INT thispos;
 996       unsigned HOST_WIDE_INT offset;
 997
 998       offset = (bitpos + bitsdone) / unit;
 999       thispos = (bitpos + bitsdone) % unit;
1000
1001       /* THISSIZE must not overrun a word boundary.  Otherwise,
1002          store_fixed_bit_field will call us again, and we will mutually
1003          recurse forever.  */
1004       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1005       thissize = MIN (thissize, unit - thispos);
1006
1007       if (BYTES_BIG_ENDIAN)
1008         {
1009           int total_bits;
1010
1011           /* We must do an endian conversion exactly the same way as it is
1012              done in extract_bit_field, so that the two calls to
1013              extract_fixed_bit_field will have comparable arguments.  */
1014           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1015             total_bits = BITS_PER_WORD;
1016           else
1017             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1018
1019           /* Fetch successively less significant portions.  */
1020           if (CONST_INT_P (value))
1021             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1022                              >> (bitsize - bitsdone - thissize))
1023                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1024           else
1025             /* The args are chosen so that the last part includes the
1026                lsb.  Give extract_bit_field the value it needs (with
1027                endianness compensation) to fetch the piece we want.  */
1028             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1029                                             total_bits - bitsize + bitsdone,
1030                                             NULL_RTX, 1, false);
1031         }
1032       else
1033         {
1034           /* Fetch successively more significant portions.  */
1035           if (CONST_INT_P (value))
1036             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1037                              >> bitsdone)
1038                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1039           else
1040             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1041                                             bitsdone, NULL_RTX, 1, false);
1042         }
1043
1044       /* If OP0 is a register, then handle OFFSET here.
1045
1046          When handling multiword bitfields, extract_bit_field may pass
1047          down a word_mode SUBREG of a larger REG for a bitfield that actually
1048          crosses a word boundary.  Thus, for a SUBREG, we must find
1049          the current word starting from the base register.  */
1050       if (GET_CODE (op0) == SUBREG)
1051         {
1052           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1053           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1054           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1055             word = word_offset ? const0_rtx : op0;
1056           else
1057             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1058                                           GET_MODE (SUBREG_REG (op0)));
1059           offset = 0;
1060         }
1061       else if (REG_P (op0))
1062         {
1063           enum machine_mode op0_mode = GET_MODE (op0);
1064           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1065             word = offset ? const0_rtx : op0;
1066           else
1067             word = operand_subword_force (op0, offset, GET_MODE (op0));
1068           offset = 0;
1069         }
1070       else
1071         word = op0;
1072
1073       /* OFFSET is in UNITs, and UNIT is in bits.
1074          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1075          it is just an out-of-bounds access.  Ignore it.  */
1076       if (word != const0_rtx)
1077         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1078                                thispos, part);
1079       bitsdone += thissize;
1080     }
1081 }
1082 \f
1083 /* A subroutine of extract_bit_field_1 that converts return value X
1084    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1085    to extract_bit_field.  */
1086
1087 static rtx
1088 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1089                              enum machine_mode tmode, bool unsignedp)
1090 {
1091   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1092     return x;
1093
1094   /* If the x mode is not a scalar integral, first convert to the
1095      integer mode of that size and then access it as a floating-point
1096      value via a SUBREG.  */
1097   if (!SCALAR_INT_MODE_P (tmode))
1098     {
1099       enum machine_mode smode;
1100
1101       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1102       x = convert_to_mode (smode, x, unsignedp);
1103       x = force_reg (smode, x);
1104       return gen_lowpart (tmode, x);
1105     }
1106
1107   return convert_to_mode (tmode, x, unsignedp);
1108 }
1109
1110 /* A subroutine of extract_bit_field, with the same arguments.
1111    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1112    if we can find no other means of implementing the operation.
1113    if FALLBACK_P is false, return NULL instead.  */
1114
1115 static rtx
1116 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1117                      unsigned HOST_WIDE_INT bitnum,
1118                      int unsignedp, bool packedp, rtx target,
1119                      enum machine_mode mode, enum machine_mode tmode,
1120                      bool fallback_p)
1121 {
1122   unsigned int unit
1123     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1124   unsigned HOST_WIDE_INT offset, bitpos;
1125   rtx op0 = str_rtx;
1126   enum machine_mode int_mode;
1127   enum machine_mode ext_mode;
1128   enum machine_mode mode1;
1129   int byte_offset;
1130
1131   if (tmode == VOIDmode)
1132     tmode = mode;
1133
1134   while (GET_CODE (op0) == SUBREG)
1135     {
1136       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1137       op0 = SUBREG_REG (op0);
1138     }
1139
1140   /* If we have an out-of-bounds access to a register, just return an
1141      uninitialized register of the required mode.  This can occur if the
1142      source code contains an out-of-bounds access to a small array.  */
1143   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1144     return gen_reg_rtx (tmode);
1145
1146   if (REG_P (op0)
1147       && mode == GET_MODE (op0)
1148       && bitnum == 0
1149       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1150     {
1151       /* We're trying to extract a full register from itself.  */
1152       return op0;
1153     }
1154
1155   /* See if we can get a better vector mode before extracting.  */
1156   if (VECTOR_MODE_P (GET_MODE (op0))
1157       && !MEM_P (op0)
1158       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1159     {
1160       enum machine_mode new_mode;
1161
1162       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1163         new_mode = MIN_MODE_VECTOR_FLOAT;
1164       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1165         new_mode = MIN_MODE_VECTOR_FRACT;
1166       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1167         new_mode = MIN_MODE_VECTOR_UFRACT;
1168       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1169         new_mode = MIN_MODE_VECTOR_ACCUM;
1170       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1171         new_mode = MIN_MODE_VECTOR_UACCUM;
1172       else
1173         new_mode = MIN_MODE_VECTOR_INT;
1174
1175       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1176         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1177             && targetm.vector_mode_supported_p (new_mode))
1178           break;
1179       if (new_mode != VOIDmode)
1180         op0 = gen_lowpart (new_mode, op0);
1181     }
1182
1183   /* Use vec_extract patterns for extracting parts of vectors whenever
1184      available.  */
1185   if (VECTOR_MODE_P (GET_MODE (op0))
1186       && !MEM_P (op0)
1187       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1188       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1189           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1190     {
1191       struct expand_operand ops[3];
1192       enum machine_mode outermode = GET_MODE (op0);
1193       enum machine_mode innermode = GET_MODE_INNER (outermode);
1194       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1195       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1196
1197       create_output_operand (&ops[0], target, innermode);
1198       create_input_operand (&ops[1], op0, outermode);
1199       create_integer_operand (&ops[2], pos);
1200       if (maybe_expand_insn (icode, 3, ops))
1201         {
1202           target = ops[0].value;
1203           if (GET_MODE (target) != mode)
1204             return gen_lowpart (tmode, target);
1205           return target;
1206         }
1207     }
1208
1209   /* Make sure we are playing with integral modes.  Pun with subregs
1210      if we aren't.  */
1211   {
1212     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1213     if (imode != GET_MODE (op0))
1214       {
1215         if (MEM_P (op0))
1216           op0 = adjust_address (op0, imode, 0);
1217         else if (imode != BLKmode)
1218           {
1219             op0 = gen_lowpart (imode, op0);
1220
1221             /* If we got a SUBREG, force it into a register since we
1222                aren't going to be able to do another SUBREG on it.  */
1223             if (GET_CODE (op0) == SUBREG)
1224               op0 = force_reg (imode, op0);
1225           }
1226         else if (REG_P (op0))
1227           {
1228             rtx reg, subreg;
1229             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1230                                             MODE_INT);
1231             reg = gen_reg_rtx (imode);
1232             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1233             emit_move_insn (subreg, op0);
1234             op0 = reg;
1235             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1236           }
1237         else
1238           {
1239             rtx mem = assign_stack_temp (GET_MODE (op0),
1240                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1241             emit_move_insn (mem, op0);
1242             op0 = adjust_address (mem, BLKmode, 0);
1243           }
1244       }
1245   }
1246
1247   /* We may be accessing data outside the field, which means
1248      we can alias adjacent data.  */
1249   if (MEM_P (op0))
1250     {
1251       op0 = shallow_copy_rtx (op0);
1252       set_mem_alias_set (op0, 0);
1253       set_mem_expr (op0, 0);
1254     }
1255
1256   /* Extraction of a full-word or multi-word value from a structure
1257      in a register or aligned memory can be done with just a SUBREG.
1258      A subword value in the least significant part of a register
1259      can also be extracted with a SUBREG.  For this, we need the
1260      byte offset of the value in op0.  */
1261
1262   bitpos = bitnum % unit;
1263   offset = bitnum / unit;
1264   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1265
1266   /* If OP0 is a register, BITPOS must count within a word.
1267      But as we have it, it counts within whatever size OP0 now has.
1268      On a bigendian machine, these are not the same, so convert.  */
1269   if (BYTES_BIG_ENDIAN
1270       && !MEM_P (op0)
1271       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1272     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1273
1274   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1275      If that's wrong, the solution is to test for it and set TARGET to 0
1276      if needed.  */
1277
1278   /* Only scalar integer modes can be converted via subregs.  There is an
1279      additional problem for FP modes here in that they can have a precision
1280      which is different from the size.  mode_for_size uses precision, but
1281      we want a mode based on the size, so we must avoid calling it for FP
1282      modes.  */
1283   mode1  = (SCALAR_INT_MODE_P (tmode)
1284             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1285             : mode);
1286
1287   /* If the bitfield is volatile, we need to make sure the access
1288      remains on a type-aligned boundary.  */
1289   if (GET_CODE (op0) == MEM
1290       && MEM_VOLATILE_P (op0)
1291       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1292       && flag_strict_volatile_bitfields > 0)
1293     goto no_subreg_mode_swap;
1294
1295   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1296         && bitpos % BITS_PER_WORD == 0)
1297        || (mode1 != BLKmode
1298            /* ??? The big endian test here is wrong.  This is correct
1299               if the value is in a register, and if mode_for_size is not
1300               the same mode as op0.  This causes us to get unnecessarily
1301               inefficient code from the Thumb port when -mbig-endian.  */
1302            && (BYTES_BIG_ENDIAN
1303                ? bitpos + bitsize == BITS_PER_WORD
1304                : bitpos == 0)))
1305       && ((!MEM_P (op0)
1306            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1307            && GET_MODE_SIZE (mode1) != 0
1308            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1309           || (MEM_P (op0)
1310               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1311                   || (offset * BITS_PER_UNIT % bitsize == 0
1312                       && MEM_ALIGN (op0) % bitsize == 0)))))
1313     {
1314       if (MEM_P (op0))
1315         op0 = adjust_address (op0, mode1, offset);
1316       else if (mode1 != GET_MODE (op0))
1317         {
1318           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1319                                          byte_offset);
1320           if (sub == NULL)
1321             goto no_subreg_mode_swap;
1322           op0 = sub;
1323         }
1324       if (mode1 != mode)
1325         return convert_to_mode (tmode, op0, unsignedp);
1326       return op0;
1327     }
1328  no_subreg_mode_swap:
1329
1330   /* Handle fields bigger than a word.  */
1331
1332   if (bitsize > BITS_PER_WORD)
1333     {
1334       /* Here we transfer the words of the field
1335          in the order least significant first.
1336          This is because the most significant word is the one which may
1337          be less than full.  */
1338
1339       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1340       unsigned int i;
1341
1342       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1343         target = gen_reg_rtx (mode);
1344
1345       /* Indicate for flow that the entire target reg is being set.  */
1346       emit_clobber (target);
1347
1348       for (i = 0; i < nwords; i++)
1349         {
1350           /* If I is 0, use the low-order word in both field and target;
1351              if I is 1, use the next to lowest word; and so on.  */
1352           /* Word number in TARGET to use.  */
1353           unsigned int wordnum
1354             = (WORDS_BIG_ENDIAN
1355                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1356                : i);
1357           /* Offset from start of field in OP0.  */
1358           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1359                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1360                                                 * (int) BITS_PER_WORD))
1361                                      : (int) i * BITS_PER_WORD);
1362           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1363           rtx result_part
1364             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1365                                            bitsize - i * BITS_PER_WORD),
1366                                  bitnum + bit_offset, 1, false, target_part, mode,
1367                                  word_mode);
1368
1369           gcc_assert (target_part);
1370
1371           if (result_part != target_part)
1372             emit_move_insn (target_part, result_part);
1373         }
1374
1375       if (unsignedp)
1376         {
1377           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1378              need to be zero'd out.  */
1379           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1380             {
1381               unsigned int i, total_words;
1382
1383               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1384               for (i = nwords; i < total_words; i++)
1385                 emit_move_insn
1386                   (operand_subword (target,
1387                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1388                                     1, VOIDmode),
1389                    const0_rtx);
1390             }
1391           return target;
1392         }
1393
1394       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1395       target = expand_shift (LSHIFT_EXPR, mode, target,
1396                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1397       return expand_shift (RSHIFT_EXPR, mode, target,
1398                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1399     }
1400
1401   /* From here on we know the desired field is smaller than a word.  */
1402
1403   /* Check if there is a correspondingly-sized integer field, so we can
1404      safely extract it as one size of integer, if necessary; then
1405      truncate or extend to the size that is wanted; then use SUBREGs or
1406      convert_to_mode to get one of the modes we really wanted.  */
1407
1408   int_mode = int_mode_for_mode (tmode);
1409   if (int_mode == BLKmode)
1410     int_mode = int_mode_for_mode (mode);
1411   /* Should probably push op0 out to memory and then do a load.  */
1412   gcc_assert (int_mode != BLKmode);
1413
1414   /* OFFSET is the number of words or bytes (UNIT says which)
1415      from STR_RTX to the first word or byte containing part of the field.  */
1416   if (!MEM_P (op0))
1417     {
1418       if (offset != 0
1419           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1420         {
1421           if (!REG_P (op0))
1422             op0 = copy_to_reg (op0);
1423           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1424                                 op0, (offset * UNITS_PER_WORD));
1425         }
1426       offset = 0;
1427     }
1428
1429   /* Now OFFSET is nonzero only for memory operands.  */
1430   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1431   if (ext_mode != MAX_MACHINE_MODE
1432       && bitsize > 0
1433       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1434       /* If op0 is a register, we need it in EXT_MODE to make it
1435          acceptable to the format of ext(z)v.  */
1436       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1437       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1438            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1439     {
1440       struct expand_operand ops[4];
1441       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1442       rtx xop0 = op0;
1443       rtx xtarget = target;
1444       rtx xspec_target = target;
1445       rtx xspec_target_subreg = 0;
1446
1447       /* If op0 is a register, we need it in EXT_MODE to make it
1448          acceptable to the format of ext(z)v.  */
1449       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1450         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1451       if (MEM_P (xop0))
1452         /* Get ref to first byte containing part of the field.  */
1453         xop0 = adjust_address (xop0, byte_mode, xoffset);
1454
1455       /* On big-endian machines, we count bits from the most significant.
1456          If the bit field insn does not, we must invert.  */
1457       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1458         xbitpos = unit - bitsize - xbitpos;
1459
1460       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1461       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1462         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1463
1464       unit = GET_MODE_BITSIZE (ext_mode);
1465
1466       if (xtarget == 0)
1467         xtarget = xspec_target = gen_reg_rtx (tmode);
1468
1469       if (GET_MODE (xtarget) != ext_mode)
1470         {
1471           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1472              between the mode of the extraction (word_mode) and the target
1473              mode.  Instead, create a temporary and use convert_move to set
1474              the target.  */
1475           if (REG_P (xtarget)
1476               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1477             {
1478               xtarget = gen_lowpart (ext_mode, xtarget);
1479               if (GET_MODE_PRECISION (ext_mode)
1480                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1481                 xspec_target_subreg = xtarget;
1482             }
1483           else
1484             xtarget = gen_reg_rtx (ext_mode);
1485         }
1486
1487       create_output_operand (&ops[0], xtarget, ext_mode);
1488       create_fixed_operand (&ops[1], xop0);
1489       create_integer_operand (&ops[2], bitsize);
1490       create_integer_operand (&ops[3], xbitpos);
1491       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1492                              4, ops))
1493         {
1494           xtarget = ops[0].value;
1495           if (xtarget == xspec_target)
1496             return xtarget;
1497           if (xtarget == xspec_target_subreg)
1498             return xspec_target;
1499           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1500         }
1501     }
1502
1503   /* If OP0 is a memory, try copying it to a register and seeing if a
1504      cheap register alternative is available.  */
1505   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1506     {
1507       enum machine_mode bestmode;
1508
1509       /* Get the mode to use for inserting into this field.  If
1510          OP0 is BLKmode, get the smallest mode consistent with the
1511          alignment. If OP0 is a non-BLKmode object that is no
1512          wider than EXT_MODE, use its mode. Otherwise, use the
1513          smallest mode containing the field.  */
1514
1515       if (GET_MODE (op0) == BLKmode
1516           || (ext_mode != MAX_MACHINE_MODE
1517               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1518         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1519                                   (ext_mode == MAX_MACHINE_MODE
1520                                    ? VOIDmode : ext_mode),
1521                                   MEM_VOLATILE_P (op0));
1522       else
1523         bestmode = GET_MODE (op0);
1524
1525       if (bestmode != VOIDmode
1526           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1527                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1528         {
1529           unsigned HOST_WIDE_INT xoffset, xbitpos;
1530
1531           /* Compute the offset as a multiple of this unit,
1532              counting in bytes.  */
1533           unit = GET_MODE_BITSIZE (bestmode);
1534           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1535           xbitpos = bitnum % unit;
1536
1537           /* Make sure the register is big enough for the whole field.  */
1538           if (xoffset * BITS_PER_UNIT + unit
1539               >= offset * BITS_PER_UNIT + bitsize)
1540             {
1541               rtx last, result, xop0;
1542
1543               last = get_last_insn ();
1544
1545               /* Fetch it to a register in that size.  */
1546               xop0 = adjust_address (op0, bestmode, xoffset);
1547               xop0 = force_reg (bestmode, xop0);
1548               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1549                                             unsignedp, packedp, target,
1550                                             mode, tmode, false);
1551               if (result)
1552                 return result;
1553
1554               delete_insns_since (last);
1555             }
1556         }
1557     }
1558
1559   if (!fallback_p)
1560     return NULL;
1561
1562   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1563                                     bitpos, target, unsignedp, packedp);
1564   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1565 }
1566
1567 /* Generate code to extract a byte-field from STR_RTX
1568    containing BITSIZE bits, starting at BITNUM,
1569    and put it in TARGET if possible (if TARGET is nonzero).
1570    Regardless of TARGET, we return the rtx for where the value is placed.
1571
1572    STR_RTX is the structure containing the byte (a REG or MEM).
1573    UNSIGNEDP is nonzero if this is an unsigned bit field.
1574    PACKEDP is nonzero if the field has the packed attribute.
1575    MODE is the natural mode of the field value once extracted.
1576    TMODE is the mode the caller would like the value to have;
1577    but the value may be returned with type MODE instead.
1578
1579    If a TARGET is specified and we can store in it at no extra cost,
1580    we do so, and return TARGET.
1581    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1582    if they are equally easy.  */
1583
1584 rtx
1585 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1586                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1587                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1588 {
1589   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1590                               target, mode, tmode, true);
1591 }
1592 \f
1593 /* Extract a bit field using shifts and boolean operations
1594    Returns an rtx to represent the value.
1595    OP0 addresses a register (word) or memory (byte).
1596    BITPOS says which bit within the word or byte the bit field starts in.
1597    OFFSET says how many bytes farther the bit field starts;
1598     it is 0 if OP0 is a register.
1599    BITSIZE says how many bits long the bit field is.
1600     (If OP0 is a register, it may be narrower than a full word,
1601      but BITPOS still counts within a full word,
1602      which is significant on bigendian machines.)
1603
1604    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1605    PACKEDP is true if the field has the packed attribute.
1606
1607    If TARGET is nonzero, attempts to store the value there
1608    and return TARGET, but this is not guaranteed.
1609    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1610
1611 static rtx
1612 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1613                          unsigned HOST_WIDE_INT offset,
1614                          unsigned HOST_WIDE_INT bitsize,
1615                          unsigned HOST_WIDE_INT bitpos, rtx target,
1616                          int unsignedp, bool packedp)
1617 {
1618   unsigned int total_bits = BITS_PER_WORD;
1619   enum machine_mode mode;
1620
1621   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1622     {
1623       /* Special treatment for a bit field split across two registers.  */
1624       if (bitsize + bitpos > BITS_PER_WORD)
1625         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1626     }
1627   else
1628     {
1629       /* Get the proper mode to use for this field.  We want a mode that
1630          includes the entire field.  If such a mode would be larger than
1631          a word, we won't be doing the extraction the normal way.  */
1632
1633       if (MEM_VOLATILE_P (op0)
1634           && flag_strict_volatile_bitfields > 0)
1635         {
1636           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1637             mode = GET_MODE (op0);
1638           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1639             mode = GET_MODE (target);
1640           else
1641             mode = tmode;
1642         }
1643       else
1644         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1645                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1646
1647       if (mode == VOIDmode)
1648         /* The only way this should occur is if the field spans word
1649            boundaries.  */
1650         return extract_split_bit_field (op0, bitsize,
1651                                         bitpos + offset * BITS_PER_UNIT,
1652                                         unsignedp);
1653
1654       total_bits = GET_MODE_BITSIZE (mode);
1655
1656       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1657          be in the range 0 to total_bits-1, and put any excess bytes in
1658          OFFSET.  */
1659       if (bitpos >= total_bits)
1660         {
1661           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1662           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1663                      * BITS_PER_UNIT);
1664         }
1665
1666       /* If we're accessing a volatile MEM, we can't do the next
1667          alignment step if it results in a multi-word access where we
1668          otherwise wouldn't have one.  So, check for that case
1669          here.  */
1670       if (MEM_P (op0)
1671           && MEM_VOLATILE_P (op0)
1672           && flag_strict_volatile_bitfields > 0
1673           && bitpos + bitsize <= total_bits
1674           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1675         {
1676           if (STRICT_ALIGNMENT)
1677             {
1678               static bool informed_about_misalignment = false;
1679               bool warned;
1680
1681               if (packedp)
1682                 {
1683                   if (bitsize == total_bits)
1684                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1685                                          "multiple accesses to volatile structure member"
1686                                          " because of packed attribute");
1687                   else
1688                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1689                                          "multiple accesses to volatile structure bitfield"
1690                                          " because of packed attribute");
1691
1692                   return extract_split_bit_field (op0, bitsize,
1693                                                   bitpos + offset * BITS_PER_UNIT,
1694                                                   unsignedp);
1695                 }
1696
1697               if (bitsize == total_bits)
1698                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1699                                      "mis-aligned access used for structure member");
1700               else
1701                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1702                                      "mis-aligned access used for structure bitfield");
1703
1704               if (! informed_about_misalignment && warned)
1705                 {
1706                   informed_about_misalignment = true;
1707                   inform (input_location,
1708                           "when a volatile object spans multiple type-sized locations,"
1709                           " the compiler must choose between using a single mis-aligned access to"
1710                           " preserve the volatility, or using multiple aligned accesses to avoid"
1711                           " runtime faults; this code may fail at runtime if the hardware does"
1712                           " not allow this access");
1713                 }
1714             }
1715         }
1716       else
1717         {
1718
1719           /* Get ref to an aligned byte, halfword, or word containing the field.
1720              Adjust BITPOS to be position within a word,
1721              and OFFSET to be the offset of that word.
1722              Then alter OP0 to refer to that word.  */
1723           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1724           offset -= (offset % (total_bits / BITS_PER_UNIT));
1725         }
1726
1727       op0 = adjust_address (op0, mode, offset);
1728     }
1729
1730   mode = GET_MODE (op0);
1731
1732   if (BYTES_BIG_ENDIAN)
1733     /* BITPOS is the distance between our msb and that of OP0.
1734        Convert it to the distance from the lsb.  */
1735     bitpos = total_bits - bitsize - bitpos;
1736
1737   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1738      We have reduced the big-endian case to the little-endian case.  */
1739
1740   if (unsignedp)
1741     {
1742       if (bitpos)
1743         {
1744           /* If the field does not already start at the lsb,
1745              shift it so it does.  */
1746           /* Maybe propagate the target for the shift.  */
1747           /* But not if we will return it--could confuse integrate.c.  */
1748           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1749           if (tmode != mode) subtarget = 0;
1750           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1751         }
1752       /* Convert the value to the desired mode.  */
1753       if (mode != tmode)
1754         op0 = convert_to_mode (tmode, op0, 1);
1755
1756       /* Unless the msb of the field used to be the msb when we shifted,
1757          mask out the upper bits.  */
1758
1759       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1760         return expand_binop (GET_MODE (op0), and_optab, op0,
1761                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1762                              target, 1, OPTAB_LIB_WIDEN);
1763       return op0;
1764     }
1765
1766   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1767      then arithmetic-shift its lsb to the lsb of the word.  */
1768   op0 = force_reg (mode, op0);
1769
1770   /* Find the narrowest integer mode that contains the field.  */
1771
1772   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1773        mode = GET_MODE_WIDER_MODE (mode))
1774     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1775       {
1776         op0 = convert_to_mode (mode, op0, 0);
1777         break;
1778       }
1779
1780   if (mode != tmode)
1781     target = 0;
1782
1783   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1784     {
1785       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1786       /* Maybe propagate the target for the shift.  */
1787       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1788       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1789     }
1790
1791   return expand_shift (RSHIFT_EXPR, mode, op0,
1792                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1793 }
1794 \f
1795 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1796    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1797    complement of that if COMPLEMENT.  The mask is truncated if
1798    necessary to the width of mode MODE.  The mask is zero-extended if
1799    BITSIZE+BITPOS is too small for MODE.  */
1800
1801 static rtx
1802 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1803 {
1804   double_int mask;
1805
1806   mask = double_int_mask (bitsize);
1807   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1808
1809   if (complement)
1810     mask = double_int_not (mask);
1811
1812   return immed_double_int_const (mask, mode);
1813 }
1814
1815 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1816    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1817
1818 static rtx
1819 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1820 {
1821   double_int val;
1822
1823   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1824   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1825
1826   return immed_double_int_const (val, mode);
1827 }
1828 \f
1829 /* Extract a bit field that is split across two words
1830    and return an RTX for the result.
1831
1832    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1833    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1834    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1835
1836 static rtx
1837 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1838                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1839 {
1840   unsigned int unit;
1841   unsigned int bitsdone = 0;
1842   rtx result = NULL_RTX;
1843   int first = 1;
1844
1845   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1846      much at a time.  */
1847   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1848     unit = BITS_PER_WORD;
1849   else
1850     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1851
1852   while (bitsdone < bitsize)
1853     {
1854       unsigned HOST_WIDE_INT thissize;
1855       rtx part, word;
1856       unsigned HOST_WIDE_INT thispos;
1857       unsigned HOST_WIDE_INT offset;
1858
1859       offset = (bitpos + bitsdone) / unit;
1860       thispos = (bitpos + bitsdone) % unit;
1861
1862       /* THISSIZE must not overrun a word boundary.  Otherwise,
1863          extract_fixed_bit_field will call us again, and we will mutually
1864          recurse forever.  */
1865       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1866       thissize = MIN (thissize, unit - thispos);
1867
1868       /* If OP0 is a register, then handle OFFSET here.
1869
1870          When handling multiword bitfields, extract_bit_field may pass
1871          down a word_mode SUBREG of a larger REG for a bitfield that actually
1872          crosses a word boundary.  Thus, for a SUBREG, we must find
1873          the current word starting from the base register.  */
1874       if (GET_CODE (op0) == SUBREG)
1875         {
1876           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1877           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1878                                         GET_MODE (SUBREG_REG (op0)));
1879           offset = 0;
1880         }
1881       else if (REG_P (op0))
1882         {
1883           word = operand_subword_force (op0, offset, GET_MODE (op0));
1884           offset = 0;
1885         }
1886       else
1887         word = op0;
1888
1889       /* Extract the parts in bit-counting order,
1890          whose meaning is determined by BYTES_PER_UNIT.
1891          OFFSET is in UNITs, and UNIT is in bits.
1892          extract_fixed_bit_field wants offset in bytes.  */
1893       part = extract_fixed_bit_field (word_mode, word,
1894                                       offset * unit / BITS_PER_UNIT,
1895                                       thissize, thispos, 0, 1, false);
1896       bitsdone += thissize;
1897
1898       /* Shift this part into place for the result.  */
1899       if (BYTES_BIG_ENDIAN)
1900         {
1901           if (bitsize != bitsdone)
1902             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1903                                  bitsize - bitsdone, 0, 1);
1904         }
1905       else
1906         {
1907           if (bitsdone != thissize)
1908             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1909                                  bitsdone - thissize, 0, 1);
1910         }
1911
1912       if (first)
1913         result = part;
1914       else
1915         /* Combine the parts with bitwise or.  This works
1916            because we extracted each part as an unsigned bit field.  */
1917         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1918                                OPTAB_LIB_WIDEN);
1919
1920       first = 0;
1921     }
1922
1923   /* Unsigned bit field: we are done.  */
1924   if (unsignedp)
1925     return result;
1926   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1927   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1928                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1929   return expand_shift (RSHIFT_EXPR, word_mode, result,
1930                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1931 }
1932 \f
1933 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1934    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1935    MODE, fill the upper bits with zeros.  Fail if the layout of either
1936    mode is unknown (as for CC modes) or if the extraction would involve
1937    unprofitable mode punning.  Return the value on success, otherwise
1938    return null.
1939
1940    This is different from gen_lowpart* in these respects:
1941
1942      - the returned value must always be considered an rvalue
1943
1944      - when MODE is wider than SRC_MODE, the extraction involves
1945        a zero extension
1946
1947      - when MODE is smaller than SRC_MODE, the extraction involves
1948        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1949
1950    In other words, this routine performs a computation, whereas the
1951    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1952    operations.  */
1953
1954 rtx
1955 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1956 {
1957   enum machine_mode int_mode, src_int_mode;
1958
1959   if (mode == src_mode)
1960     return src;
1961
1962   if (CONSTANT_P (src))
1963     {
1964       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1965          fails, it will happily create (subreg (symbol_ref)) or similar
1966          invalid SUBREGs.  */
1967       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1968       rtx ret = simplify_subreg (mode, src, src_mode, byte);
1969       if (ret)
1970         return ret;
1971
1972       if (GET_MODE (src) == VOIDmode
1973           || !validate_subreg (mode, src_mode, src, byte))
1974         return NULL_RTX;
1975
1976       src = force_reg (GET_MODE (src), src);
1977       return gen_rtx_SUBREG (mode, src, byte);
1978     }
1979
1980   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1981     return NULL_RTX;
1982
1983   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1984       && MODES_TIEABLE_P (mode, src_mode))
1985     {
1986       rtx x = gen_lowpart_common (mode, src);
1987       if (x)
1988         return x;
1989     }
1990
1991   src_int_mode = int_mode_for_mode (src_mode);
1992   int_mode = int_mode_for_mode (mode);
1993   if (src_int_mode == BLKmode || int_mode == BLKmode)
1994     return NULL_RTX;
1995
1996   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
1997     return NULL_RTX;
1998   if (!MODES_TIEABLE_P (int_mode, mode))
1999     return NULL_RTX;
2000
2001   src = gen_lowpart (src_int_mode, src);
2002   src = convert_modes (int_mode, src_int_mode, src, true);
2003   src = gen_lowpart (mode, src);
2004   return src;
2005 }
2006 \f
2007 /* Add INC into TARGET.  */
2008
2009 void
2010 expand_inc (rtx target, rtx inc)
2011 {
2012   rtx value = expand_binop (GET_MODE (target), add_optab,
2013                             target, inc,
2014                             target, 0, OPTAB_LIB_WIDEN);
2015   if (value != target)
2016     emit_move_insn (target, value);
2017 }
2018
2019 /* Subtract DEC from TARGET.  */
2020
2021 void
2022 expand_dec (rtx target, rtx dec)
2023 {
2024   rtx value = expand_binop (GET_MODE (target), sub_optab,
2025                             target, dec,
2026                             target, 0, OPTAB_LIB_WIDEN);
2027   if (value != target)
2028     emit_move_insn (target, value);
2029 }
2030 \f
2031 /* Output a shift instruction for expression code CODE,
2032    with SHIFTED being the rtx for the value to shift,
2033    and AMOUNT the rtx for the amount to shift by.
2034    Store the result in the rtx TARGET, if that is convenient.
2035    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2036    Return the rtx for where the value is.  */
2037
2038 static rtx
2039 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2040                 rtx amount, rtx target, int unsignedp)
2041 {
2042   rtx op1, temp = 0;
2043   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2044   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2045   optab lshift_optab = ashl_optab;
2046   optab rshift_arith_optab = ashr_optab;
2047   optab rshift_uns_optab = lshr_optab;
2048   optab lrotate_optab = rotl_optab;
2049   optab rrotate_optab = rotr_optab;
2050   enum machine_mode op1_mode;
2051   int attempt;
2052   bool speed = optimize_insn_for_speed_p ();
2053
2054   op1 = amount;
2055   op1_mode = GET_MODE (op1);
2056
2057   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2058      shift amount is a vector, use the vector/vector shift patterns.  */
2059   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2060     {
2061       lshift_optab = vashl_optab;
2062       rshift_arith_optab = vashr_optab;
2063       rshift_uns_optab = vlshr_optab;
2064       lrotate_optab = vrotl_optab;
2065       rrotate_optab = vrotr_optab;
2066     }
2067
2068   /* Previously detected shift-counts computed by NEGATE_EXPR
2069      and shifted in the other direction; but that does not work
2070      on all machines.  */
2071
2072   if (SHIFT_COUNT_TRUNCATED)
2073     {
2074       if (CONST_INT_P (op1)
2075           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2076               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2077         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2078                        % GET_MODE_BITSIZE (mode));
2079       else if (GET_CODE (op1) == SUBREG
2080                && subreg_lowpart_p (op1)
2081                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2082         op1 = SUBREG_REG (op1);
2083     }
2084
2085   if (op1 == const0_rtx)
2086     return shifted;
2087
2088   /* Check whether its cheaper to implement a left shift by a constant
2089      bit count by a sequence of additions.  */
2090   if (code == LSHIFT_EXPR
2091       && CONST_INT_P (op1)
2092       && INTVAL (op1) > 0
2093       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2094       && INTVAL (op1) < MAX_BITS_PER_WORD
2095       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2096       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2097     {
2098       int i;
2099       for (i = 0; i < INTVAL (op1); i++)
2100         {
2101           temp = force_reg (mode, shifted);
2102           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2103                                   unsignedp, OPTAB_LIB_WIDEN);
2104         }
2105       return shifted;
2106     }
2107
2108   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2109     {
2110       enum optab_methods methods;
2111
2112       if (attempt == 0)
2113         methods = OPTAB_DIRECT;
2114       else if (attempt == 1)
2115         methods = OPTAB_WIDEN;
2116       else
2117         methods = OPTAB_LIB_WIDEN;
2118
2119       if (rotate)
2120         {
2121           /* Widening does not work for rotation.  */
2122           if (methods == OPTAB_WIDEN)
2123             continue;
2124           else if (methods == OPTAB_LIB_WIDEN)
2125             {
2126               /* If we have been unable to open-code this by a rotation,
2127                  do it as the IOR of two shifts.  I.e., to rotate A
2128                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2129                  where C is the bitsize of A.
2130
2131                  It is theoretically possible that the target machine might
2132                  not be able to perform either shift and hence we would
2133                  be making two libcalls rather than just the one for the
2134                  shift (similarly if IOR could not be done).  We will allow
2135                  this extremely unlikely lossage to avoid complicating the
2136                  code below.  */
2137
2138               rtx subtarget = target == shifted ? 0 : target;
2139               rtx new_amount, other_amount;
2140               rtx temp1;
2141
2142               new_amount = op1;
2143               if (CONST_INT_P (op1))
2144                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2145                                         - INTVAL (op1));
2146               else
2147                 other_amount
2148                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2149                                          GEN_INT (GET_MODE_PRECISION (mode)),
2150                                          op1);
2151
2152               shifted = force_reg (mode, shifted);
2153
2154               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2155                                      mode, shifted, new_amount, 0, 1);
2156               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2157                                       mode, shifted, other_amount,
2158                                       subtarget, 1);
2159               return expand_binop (mode, ior_optab, temp, temp1, target,
2160                                    unsignedp, methods);
2161             }
2162
2163           temp = expand_binop (mode,
2164                                left ? lrotate_optab : rrotate_optab,
2165                                shifted, op1, target, unsignedp, methods);
2166         }
2167       else if (unsignedp)
2168         temp = expand_binop (mode,
2169                              left ? lshift_optab : rshift_uns_optab,
2170                              shifted, op1, target, unsignedp, methods);
2171
2172       /* Do arithmetic shifts.
2173          Also, if we are going to widen the operand, we can just as well
2174          use an arithmetic right-shift instead of a logical one.  */
2175       if (temp == 0 && ! rotate
2176           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2177         {
2178           enum optab_methods methods1 = methods;
2179
2180           /* If trying to widen a log shift to an arithmetic shift,
2181              don't accept an arithmetic shift of the same size.  */
2182           if (unsignedp)
2183             methods1 = OPTAB_MUST_WIDEN;
2184
2185           /* Arithmetic shift */
2186
2187           temp = expand_binop (mode,
2188                                left ? lshift_optab : rshift_arith_optab,
2189                                shifted, op1, target, unsignedp, methods1);
2190         }
2191
2192       /* We used to try extzv here for logical right shifts, but that was
2193          only useful for one machine, the VAX, and caused poor code
2194          generation there for lshrdi3, so the code was deleted and a
2195          define_expand for lshrsi3 was added to vax.md.  */
2196     }
2197
2198   gcc_assert (temp);
2199   return temp;
2200 }
2201
2202 /* Output a shift instruction for expression code CODE,
2203    with SHIFTED being the rtx for the value to shift,
2204    and AMOUNT the amount to shift by.
2205    Store the result in the rtx TARGET, if that is convenient.
2206    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2207    Return the rtx for where the value is.  */
2208
2209 rtx
2210 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2211               int amount, rtx target, int unsignedp)
2212 {
2213   return expand_shift_1 (code, mode,
2214                          shifted, GEN_INT (amount), target, unsignedp);
2215 }
2216
2217 /* Output a shift instruction for expression code CODE,
2218    with SHIFTED being the rtx for the value to shift,
2219    and AMOUNT the tree for the amount to shift by.
2220    Store the result in the rtx TARGET, if that is convenient.
2221    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2222    Return the rtx for where the value is.  */
2223
2224 rtx
2225 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2226                        tree amount, rtx target, int unsignedp)
2227 {
2228   return expand_shift_1 (code, mode,
2229                          shifted, expand_normal (amount), target, unsignedp);
2230 }
2231
2232 \f
2233 /* Indicates the type of fixup needed after a constant multiplication.
2234    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2235    the result should be negated, and ADD_VARIANT means that the
2236    multiplicand should be added to the result.  */
2237 enum mult_variant {basic_variant, negate_variant, add_variant};
2238
2239 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2240                         const struct mult_cost *, enum machine_mode mode);
2241 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2242                                  struct algorithm *, enum mult_variant *, int);
2243 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2244                               const struct algorithm *, enum mult_variant);
2245 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2246                                                  int, rtx *, int *, int *);
2247 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2248 static rtx extract_high_half (enum machine_mode, rtx);
2249 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2250 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2251                                        int, int);
2252 /* Compute and return the best algorithm for multiplying by T.
2253    The algorithm must cost less than cost_limit
2254    If retval.cost >= COST_LIMIT, no algorithm was found and all
2255    other field of the returned struct are undefined.
2256    MODE is the machine mode of the multiplication.  */
2257
2258 static void
2259 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2260             const struct mult_cost *cost_limit, enum machine_mode mode)
2261 {
2262   int m;
2263   struct algorithm *alg_in, *best_alg;
2264   struct mult_cost best_cost;
2265   struct mult_cost new_limit;
2266   int op_cost, op_latency;
2267   unsigned HOST_WIDE_INT orig_t = t;
2268   unsigned HOST_WIDE_INT q;
2269   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2270   int hash_index;
2271   bool cache_hit = false;
2272   enum alg_code cache_alg = alg_zero;
2273   bool speed = optimize_insn_for_speed_p ();
2274
2275   /* Indicate that no algorithm is yet found.  If no algorithm
2276      is found, this value will be returned and indicate failure.  */
2277   alg_out->cost.cost = cost_limit->cost + 1;
2278   alg_out->cost.latency = cost_limit->latency + 1;
2279
2280   if (cost_limit->cost < 0
2281       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2282     return;
2283
2284   /* Restrict the bits of "t" to the multiplication's mode.  */
2285   t &= GET_MODE_MASK (mode);
2286
2287   /* t == 1 can be done in zero cost.  */
2288   if (t == 1)
2289     {
2290       alg_out->ops = 1;
2291       alg_out->cost.cost = 0;
2292       alg_out->cost.latency = 0;
2293       alg_out->op[0] = alg_m;
2294       return;
2295     }
2296
2297   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2298      fail now.  */
2299   if (t == 0)
2300     {
2301       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2302         return;
2303       else
2304         {
2305           alg_out->ops = 1;
2306           alg_out->cost.cost = zero_cost[speed];
2307           alg_out->cost.latency = zero_cost[speed];
2308           alg_out->op[0] = alg_zero;
2309           return;
2310         }
2311     }
2312
2313   /* We'll be needing a couple extra algorithm structures now.  */
2314
2315   alg_in = XALLOCA (struct algorithm);
2316   best_alg = XALLOCA (struct algorithm);
2317   best_cost = *cost_limit;
2318
2319   /* Compute the hash index.  */
2320   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2321
2322   /* See if we already know what to do for T.  */
2323   if (alg_hash[hash_index].t == t
2324       && alg_hash[hash_index].mode == mode
2325       && alg_hash[hash_index].mode == mode
2326       && alg_hash[hash_index].speed == speed
2327       && alg_hash[hash_index].alg != alg_unknown)
2328     {
2329       cache_alg = alg_hash[hash_index].alg;
2330
2331       if (cache_alg == alg_impossible)
2332         {
2333           /* The cache tells us that it's impossible to synthesize
2334              multiplication by T within alg_hash[hash_index].cost.  */
2335           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2336             /* COST_LIMIT is at least as restrictive as the one
2337                recorded in the hash table, in which case we have no
2338                hope of synthesizing a multiplication.  Just
2339                return.  */
2340             return;
2341
2342           /* If we get here, COST_LIMIT is less restrictive than the
2343              one recorded in the hash table, so we may be able to
2344              synthesize a multiplication.  Proceed as if we didn't
2345              have the cache entry.  */
2346         }
2347       else
2348         {
2349           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2350             /* The cached algorithm shows that this multiplication
2351                requires more cost than COST_LIMIT.  Just return.  This
2352                way, we don't clobber this cache entry with
2353                alg_impossible but retain useful information.  */
2354             return;
2355
2356           cache_hit = true;
2357
2358           switch (cache_alg)
2359             {
2360             case alg_shift:
2361               goto do_alg_shift;
2362
2363             case alg_add_t_m2:
2364             case alg_sub_t_m2:
2365               goto do_alg_addsub_t_m2;
2366
2367             case alg_add_factor:
2368             case alg_sub_factor:
2369               goto do_alg_addsub_factor;
2370
2371             case alg_add_t2_m:
2372               goto do_alg_add_t2_m;
2373
2374             case alg_sub_t2_m:
2375               goto do_alg_sub_t2_m;
2376
2377             default:
2378               gcc_unreachable ();
2379             }
2380         }
2381     }
2382
2383   /* If we have a group of zero bits at the low-order part of T, try
2384      multiplying by the remaining bits and then doing a shift.  */
2385
2386   if ((t & 1) == 0)
2387     {
2388     do_alg_shift:
2389       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2390       if (m < maxm)
2391         {
2392           q = t >> m;
2393           /* The function expand_shift will choose between a shift and
2394              a sequence of additions, so the observed cost is given as
2395              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2396           op_cost = m * add_cost[speed][mode];
2397           if (shift_cost[speed][mode][m] < op_cost)
2398             op_cost = shift_cost[speed][mode][m];
2399           new_limit.cost = best_cost.cost - op_cost;
2400           new_limit.latency = best_cost.latency - op_cost;
2401           synth_mult (alg_in, q, &new_limit, mode);
2402
2403           alg_in->cost.cost += op_cost;
2404           alg_in->cost.latency += op_cost;
2405           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2406             {
2407               struct algorithm *x;
2408               best_cost = alg_in->cost;
2409               x = alg_in, alg_in = best_alg, best_alg = x;
2410               best_alg->log[best_alg->ops] = m;
2411               best_alg->op[best_alg->ops] = alg_shift;
2412             }
2413
2414           /* See if treating ORIG_T as a signed number yields a better
2415              sequence.  Try this sequence only for a negative ORIG_T
2416              as it would be useless for a non-negative ORIG_T.  */
2417           if ((HOST_WIDE_INT) orig_t < 0)
2418             {
2419               /* Shift ORIG_T as follows because a right shift of a
2420                  negative-valued signed type is implementation
2421                  defined.  */
2422               q = ~(~orig_t >> m);
2423               /* The function expand_shift will choose between a shift
2424                  and a sequence of additions, so the observed cost is
2425                  given as MIN (m * add_cost[speed][mode],
2426                  shift_cost[speed][mode][m]).  */
2427               op_cost = m * add_cost[speed][mode];
2428               if (shift_cost[speed][mode][m] < op_cost)
2429                 op_cost = shift_cost[speed][mode][m];
2430               new_limit.cost = best_cost.cost - op_cost;
2431               new_limit.latency = best_cost.latency - op_cost;
2432               synth_mult (alg_in, q, &new_limit, mode);
2433
2434               alg_in->cost.cost += op_cost;
2435               alg_in->cost.latency += op_cost;
2436               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2437                 {
2438                   struct algorithm *x;
2439                   best_cost = alg_in->cost;
2440                   x = alg_in, alg_in = best_alg, best_alg = x;
2441                   best_alg->log[best_alg->ops] = m;
2442                   best_alg->op[best_alg->ops] = alg_shift;
2443                 }
2444             }
2445         }
2446       if (cache_hit)
2447         goto done;
2448     }
2449
2450   /* If we have an odd number, add or subtract one.  */
2451   if ((t & 1) != 0)
2452     {
2453       unsigned HOST_WIDE_INT w;
2454
2455     do_alg_addsub_t_m2:
2456       for (w = 1; (w & t) != 0; w <<= 1)
2457         ;
2458       /* If T was -1, then W will be zero after the loop.  This is another
2459          case where T ends with ...111.  Handling this with (T + 1) and
2460          subtract 1 produces slightly better code and results in algorithm
2461          selection much faster than treating it like the ...0111 case
2462          below.  */
2463       if (w == 0
2464           || (w > 2
2465               /* Reject the case where t is 3.
2466                  Thus we prefer addition in that case.  */
2467               && t != 3))
2468         {
2469           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2470
2471           op_cost = add_cost[speed][mode];
2472           new_limit.cost = best_cost.cost - op_cost;
2473           new_limit.latency = best_cost.latency - op_cost;
2474           synth_mult (alg_in, t + 1, &new_limit, mode);
2475
2476           alg_in->cost.cost += op_cost;
2477           alg_in->cost.latency += op_cost;
2478           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2479             {
2480               struct algorithm *x;
2481               best_cost = alg_in->cost;
2482               x = alg_in, alg_in = best_alg, best_alg = x;
2483               best_alg->log[best_alg->ops] = 0;
2484               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2485             }
2486         }
2487       else
2488         {
2489           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2490
2491           op_cost = add_cost[speed][mode];
2492           new_limit.cost = best_cost.cost - op_cost;
2493           new_limit.latency = best_cost.latency - op_cost;
2494           synth_mult (alg_in, t - 1, &new_limit, mode);
2495
2496           alg_in->cost.cost += op_cost;
2497           alg_in->cost.latency += op_cost;
2498           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2499             {
2500               struct algorithm *x;
2501               best_cost = alg_in->cost;
2502               x = alg_in, alg_in = best_alg, best_alg = x;
2503               best_alg->log[best_alg->ops] = 0;
2504               best_alg->op[best_alg->ops] = alg_add_t_m2;
2505             }
2506         }
2507
2508       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2509          quickly with a - a * n for some appropriate constant n.  */
2510       m = exact_log2 (-orig_t + 1);
2511       if (m >= 0 && m < maxm)
2512         {
2513           op_cost = shiftsub1_cost[speed][mode][m];
2514           new_limit.cost = best_cost.cost - op_cost;
2515           new_limit.latency = best_cost.latency - op_cost;
2516           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2517
2518           alg_in->cost.cost += op_cost;
2519           alg_in->cost.latency += op_cost;
2520           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2521             {
2522               struct algorithm *x;
2523               best_cost = alg_in->cost;
2524               x = alg_in, alg_in = best_alg, best_alg = x;
2525               best_alg->log[best_alg->ops] = m;
2526               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2527             }
2528         }
2529
2530       if (cache_hit)
2531         goto done;
2532     }
2533
2534   /* Look for factors of t of the form
2535      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2536      If we find such a factor, we can multiply by t using an algorithm that
2537      multiplies by q, shift the result by m and add/subtract it to itself.
2538
2539      We search for large factors first and loop down, even if large factors
2540      are less probable than small; if we find a large factor we will find a
2541      good sequence quickly, and therefore be able to prune (by decreasing
2542      COST_LIMIT) the search.  */
2543
2544  do_alg_addsub_factor:
2545   for (m = floor_log2 (t - 1); m >= 2; m--)
2546     {
2547       unsigned HOST_WIDE_INT d;
2548
2549       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2550       if (t % d == 0 && t > d && m < maxm
2551           && (!cache_hit || cache_alg == alg_add_factor))
2552         {
2553           /* If the target has a cheap shift-and-add instruction use
2554              that in preference to a shift insn followed by an add insn.
2555              Assume that the shift-and-add is "atomic" with a latency
2556              equal to its cost, otherwise assume that on superscalar
2557              hardware the shift may be executed concurrently with the
2558              earlier steps in the algorithm.  */
2559           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2560           if (shiftadd_cost[speed][mode][m] < op_cost)
2561             {
2562               op_cost = shiftadd_cost[speed][mode][m];
2563               op_latency = op_cost;
2564             }
2565           else
2566             op_latency = add_cost[speed][mode];
2567
2568           new_limit.cost = best_cost.cost - op_cost;
2569           new_limit.latency = best_cost.latency - op_latency;
2570           synth_mult (alg_in, t / d, &new_limit, mode);
2571
2572           alg_in->cost.cost += op_cost;
2573           alg_in->cost.latency += op_latency;
2574           if (alg_in->cost.latency < op_cost)
2575             alg_in->cost.latency = op_cost;
2576           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2577             {
2578               struct algorithm *x;
2579               best_cost = alg_in->cost;
2580               x = alg_in, alg_in = best_alg, best_alg = x;
2581               best_alg->log[best_alg->ops] = m;
2582               best_alg->op[best_alg->ops] = alg_add_factor;
2583             }
2584           /* Other factors will have been taken care of in the recursion.  */
2585           break;
2586         }
2587
2588       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2589       if (t % d == 0 && t > d && m < maxm
2590           && (!cache_hit || cache_alg == alg_sub_factor))
2591         {
2592           /* If the target has a cheap shift-and-subtract insn use
2593              that in preference to a shift insn followed by a sub insn.
2594              Assume that the shift-and-sub is "atomic" with a latency
2595              equal to it's cost, otherwise assume that on superscalar
2596              hardware the shift may be executed concurrently with the
2597              earlier steps in the algorithm.  */
2598           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2599           if (shiftsub0_cost[speed][mode][m] < op_cost)
2600             {
2601               op_cost = shiftsub0_cost[speed][mode][m];
2602               op_latency = op_cost;
2603             }
2604           else
2605             op_latency = add_cost[speed][mode];
2606
2607           new_limit.cost = best_cost.cost - op_cost;
2608           new_limit.latency = best_cost.latency - op_latency;
2609           synth_mult (alg_in, t / d, &new_limit, mode);
2610
2611           alg_in->cost.cost += op_cost;
2612           alg_in->cost.latency += op_latency;
2613           if (alg_in->cost.latency < op_cost)
2614             alg_in->cost.latency = op_cost;
2615           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2616             {
2617               struct algorithm *x;
2618               best_cost = alg_in->cost;
2619               x = alg_in, alg_in = best_alg, best_alg = x;
2620               best_alg->log[best_alg->ops] = m;
2621               best_alg->op[best_alg->ops] = alg_sub_factor;
2622             }
2623           break;
2624         }
2625     }
2626   if (cache_hit)
2627     goto done;
2628
2629   /* Try shift-and-add (load effective address) instructions,
2630      i.e. do a*3, a*5, a*9.  */
2631   if ((t & 1) != 0)
2632     {
2633     do_alg_add_t2_m:
2634       q = t - 1;
2635       q = q & -q;
2636       m = exact_log2 (q);
2637       if (m >= 0 && m < maxm)
2638         {
2639           op_cost = shiftadd_cost[speed][mode][m];
2640           new_limit.cost = best_cost.cost - op_cost;
2641           new_limit.latency = best_cost.latency - op_cost;
2642           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2643
2644           alg_in->cost.cost += op_cost;
2645           alg_in->cost.latency += op_cost;
2646           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2647             {
2648               struct algorithm *x;
2649               best_cost = alg_in->cost;
2650               x = alg_in, alg_in = best_alg, best_alg = x;
2651               best_alg->log[best_alg->ops] = m;
2652               best_alg->op[best_alg->ops] = alg_add_t2_m;
2653             }
2654         }
2655       if (cache_hit)
2656         goto done;
2657
2658     do_alg_sub_t2_m:
2659       q = t + 1;
2660       q = q & -q;
2661       m = exact_log2 (q);
2662       if (m >= 0 && m < maxm)
2663         {
2664           op_cost = shiftsub0_cost[speed][mode][m];
2665           new_limit.cost = best_cost.cost - op_cost;
2666           new_limit.latency = best_cost.latency - op_cost;
2667           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2668
2669           alg_in->cost.cost += op_cost;
2670           alg_in->cost.latency += op_cost;
2671           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2672             {
2673               struct algorithm *x;
2674               best_cost = alg_in->cost;
2675               x = alg_in, alg_in = best_alg, best_alg = x;
2676               best_alg->log[best_alg->ops] = m;
2677               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2678             }
2679         }
2680       if (cache_hit)
2681         goto done;
2682     }
2683
2684  done:
2685   /* If best_cost has not decreased, we have not found any algorithm.  */
2686   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2687     {
2688       /* We failed to find an algorithm.  Record alg_impossible for
2689          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2690          we are asked to find an algorithm for T within the same or
2691          lower COST_LIMIT, we can immediately return to the
2692          caller.  */
2693       alg_hash[hash_index].t = t;
2694       alg_hash[hash_index].mode = mode;
2695       alg_hash[hash_index].speed = speed;
2696       alg_hash[hash_index].alg = alg_impossible;
2697       alg_hash[hash_index].cost = *cost_limit;
2698       return;
2699     }
2700
2701   /* Cache the result.  */
2702   if (!cache_hit)
2703     {
2704       alg_hash[hash_index].t = t;
2705       alg_hash[hash_index].mode = mode;
2706       alg_hash[hash_index].speed = speed;
2707       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2708       alg_hash[hash_index].cost.cost = best_cost.cost;
2709       alg_hash[hash_index].cost.latency = best_cost.latency;
2710     }
2711
2712   /* If we are getting a too long sequence for `struct algorithm'
2713      to record, make this search fail.  */
2714   if (best_alg->ops == MAX_BITS_PER_WORD)
2715     return;
2716
2717   /* Copy the algorithm from temporary space to the space at alg_out.
2718      We avoid using structure assignment because the majority of
2719      best_alg is normally undefined, and this is a critical function.  */
2720   alg_out->ops = best_alg->ops + 1;
2721   alg_out->cost = best_cost;
2722   memcpy (alg_out->op, best_alg->op,
2723           alg_out->ops * sizeof *alg_out->op);
2724   memcpy (alg_out->log, best_alg->log,
2725           alg_out->ops * sizeof *alg_out->log);
2726 }
2727 \f
2728 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2729    Try three variations:
2730
2731        - a shift/add sequence based on VAL itself
2732        - a shift/add sequence based on -VAL, followed by a negation
2733        - a shift/add sequence based on VAL - 1, followed by an addition.
2734
2735    Return true if the cheapest of these cost less than MULT_COST,
2736    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2737
2738 static bool
2739 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2740                      struct algorithm *alg, enum mult_variant *variant,
2741                      int mult_cost)
2742 {
2743   struct algorithm alg2;
2744   struct mult_cost limit;
2745   int op_cost;
2746   bool speed = optimize_insn_for_speed_p ();
2747
2748   /* Fail quickly for impossible bounds.  */
2749   if (mult_cost < 0)
2750     return false;
2751
2752   /* Ensure that mult_cost provides a reasonable upper bound.
2753      Any constant multiplication can be performed with less
2754      than 2 * bits additions.  */
2755   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2756   if (mult_cost > op_cost)
2757     mult_cost = op_cost;
2758
2759   *variant = basic_variant;
2760   limit.cost = mult_cost;
2761   limit.latency = mult_cost;
2762   synth_mult (alg, val, &limit, mode);
2763
2764   /* This works only if the inverted value actually fits in an
2765      `unsigned int' */
2766   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2767     {
2768       op_cost = neg_cost[speed][mode];
2769       if (MULT_COST_LESS (&alg->cost, mult_cost))
2770         {
2771           limit.cost = alg->cost.cost - op_cost;
2772           limit.latency = alg->cost.latency - op_cost;
2773         }
2774       else
2775         {
2776           limit.cost = mult_cost - op_cost;
2777           limit.latency = mult_cost - op_cost;
2778         }
2779
2780       synth_mult (&alg2, -val, &limit, mode);
2781       alg2.cost.cost += op_cost;
2782       alg2.cost.latency += op_cost;
2783       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2784         *alg = alg2, *variant = negate_variant;
2785     }
2786
2787   /* This proves very useful for division-by-constant.  */
2788   op_cost = add_cost[speed][mode];
2789   if (MULT_COST_LESS (&alg->cost, mult_cost))
2790     {
2791       limit.cost = alg->cost.cost - op_cost;
2792       limit.latency = alg->cost.latency - op_cost;
2793     }
2794   else
2795     {
2796       limit.cost = mult_cost - op_cost;
2797       limit.latency = mult_cost - op_cost;
2798     }
2799
2800   synth_mult (&alg2, val - 1, &limit, mode);
2801   alg2.cost.cost += op_cost;
2802   alg2.cost.latency += op_cost;
2803   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2804     *alg = alg2, *variant = add_variant;
2805
2806   return MULT_COST_LESS (&alg->cost, mult_cost);
2807 }
2808
2809 /* A subroutine of expand_mult, used for constant multiplications.
2810    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2811    convenient.  Use the shift/add sequence described by ALG and apply
2812    the final fixup specified by VARIANT.  */
2813
2814 static rtx
2815 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2816                    rtx target, const struct algorithm *alg,
2817                    enum mult_variant variant)
2818 {
2819   HOST_WIDE_INT val_so_far;
2820   rtx insn, accum, tem;
2821   int opno;
2822   enum machine_mode nmode;
2823
2824   /* Avoid referencing memory over and over and invalid sharing
2825      on SUBREGs.  */
2826   op0 = force_reg (mode, op0);
2827
2828   /* ACCUM starts out either as OP0 or as a zero, depending on
2829      the first operation.  */
2830
2831   if (alg->op[0] == alg_zero)
2832     {
2833       accum = copy_to_mode_reg (mode, const0_rtx);
2834       val_so_far = 0;
2835     }
2836   else if (alg->op[0] == alg_m)
2837     {
2838       accum = copy_to_mode_reg (mode, op0);
2839       val_so_far = 1;
2840     }
2841   else
2842     gcc_unreachable ();
2843
2844   for (opno = 1; opno < alg->ops; opno++)
2845     {
2846       int log = alg->log[opno];
2847       rtx shift_subtarget = optimize ? 0 : accum;
2848       rtx add_target
2849         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2850            && !optimize)
2851           ? target : 0;
2852       rtx accum_target = optimize ? 0 : accum;
2853
2854       switch (alg->op[opno])
2855         {
2856         case alg_shift:
2857           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2858           /* REG_EQUAL note will be attached to the following insn.  */
2859           emit_move_insn (accum, tem);
2860           val_so_far <<= log;
2861           break;
2862
2863         case alg_add_t_m2:
2864           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2865           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2866                                  add_target ? add_target : accum_target);
2867           val_so_far += (HOST_WIDE_INT) 1 << log;
2868           break;
2869
2870         case alg_sub_t_m2:
2871           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2872           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2873                                  add_target ? add_target : accum_target);
2874           val_so_far -= (HOST_WIDE_INT) 1 << log;
2875           break;
2876
2877         case alg_add_t2_m:
2878           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2879                                 log, shift_subtarget, 0);
2880           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2881                                  add_target ? add_target : accum_target);
2882           val_so_far = (val_so_far << log) + 1;
2883           break;
2884
2885         case alg_sub_t2_m:
2886           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2887                                 log, shift_subtarget, 0);
2888           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2889                                  add_target ? add_target : accum_target);
2890           val_so_far = (val_so_far << log) - 1;
2891           break;
2892
2893         case alg_add_factor:
2894           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2895           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2896                                  add_target ? add_target : accum_target);
2897           val_so_far += val_so_far << log;
2898           break;
2899
2900         case alg_sub_factor:
2901           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2902           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2903                                  (add_target
2904                                   ? add_target : (optimize ? 0 : tem)));
2905           val_so_far = (val_so_far << log) - val_so_far;
2906           break;
2907
2908         default:
2909           gcc_unreachable ();
2910         }
2911
2912       /* Write a REG_EQUAL note on the last insn so that we can cse
2913          multiplication sequences.  Note that if ACCUM is a SUBREG,
2914          we've set the inner register and must properly indicate
2915          that.  */
2916
2917       tem = op0, nmode = mode;
2918       if (GET_CODE (accum) == SUBREG)
2919         {
2920           nmode = GET_MODE (SUBREG_REG (accum));
2921           tem = gen_lowpart (nmode, op0);
2922         }
2923
2924       insn = get_last_insn ();
2925       set_unique_reg_note (insn, REG_EQUAL,
2926                            gen_rtx_MULT (nmode, tem,
2927                                          GEN_INT (val_so_far)));
2928     }
2929
2930   if (variant == negate_variant)
2931     {
2932       val_so_far = -val_so_far;
2933       accum = expand_unop (mode, neg_optab, accum, target, 0);
2934     }
2935   else if (variant == add_variant)
2936     {
2937       val_so_far = val_so_far + 1;
2938       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2939     }
2940
2941   /* Compare only the bits of val and val_so_far that are significant
2942      in the result mode, to avoid sign-/zero-extension confusion.  */
2943   val &= GET_MODE_MASK (mode);
2944   val_so_far &= GET_MODE_MASK (mode);
2945   gcc_assert (val == val_so_far);
2946
2947   return accum;
2948 }
2949
2950 /* Perform a multiplication and return an rtx for the result.
2951    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2952    TARGET is a suggestion for where to store the result (an rtx).
2953
2954    We check specially for a constant integer as OP1.
2955    If you want this check for OP0 as well, then before calling
2956    you should swap the two operands if OP0 would be constant.  */
2957
2958 rtx
2959 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2960              int unsignedp)
2961 {
2962   enum mult_variant variant;
2963   struct algorithm algorithm;
2964   int max_cost;
2965   bool speed = optimize_insn_for_speed_p ();
2966
2967   /* Handling const0_rtx here allows us to use zero as a rogue value for
2968      coeff below.  */
2969   if (op1 == const0_rtx)
2970     return const0_rtx;
2971   if (op1 == const1_rtx)
2972     return op0;
2973   if (op1 == constm1_rtx)
2974     return expand_unop (mode,
2975                         GET_MODE_CLASS (mode) == MODE_INT
2976                         && !unsignedp && flag_trapv
2977                         ? negv_optab : neg_optab,
2978                         op0, target, 0);
2979
2980   /* These are the operations that are potentially turned into a sequence
2981      of shifts and additions.  */
2982   if (SCALAR_INT_MODE_P (mode)
2983       && (unsignedp || !flag_trapv))
2984     {
2985       HOST_WIDE_INT coeff = 0;
2986       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
2987
2988       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2989          less than or equal in size to `unsigned int' this doesn't matter.
2990          If the mode is larger than `unsigned int', then synth_mult works
2991          only if the constant value exactly fits in an `unsigned int' without
2992          any truncation.  This means that multiplying by negative values does
2993          not work; results are off by 2^32 on a 32 bit machine.  */
2994
2995       if (CONST_INT_P (op1))
2996         {
2997           /* Attempt to handle multiplication of DImode values by negative
2998              coefficients, by performing the multiplication by a positive
2999              multiplier and then inverting the result.  */
3000           if (INTVAL (op1) < 0
3001               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3002             {
3003               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3004                  result is interpreted as an unsigned coefficient.
3005                  Exclude cost of op0 from max_cost to match the cost
3006                  calculation of the synth_mult.  */
3007               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3008                          - neg_cost[speed][mode];
3009               if (max_cost > 0
3010                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3011                                           &variant, max_cost))
3012                 {
3013                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3014                                                 NULL_RTX, &algorithm,
3015                                                 variant);
3016                   return expand_unop (mode, neg_optab, temp, target, 0);
3017                 }
3018             }
3019           else coeff = INTVAL (op1);
3020         }
3021       else if (GET_CODE (op1) == CONST_DOUBLE)
3022         {
3023           /* If we are multiplying in DImode, it may still be a win
3024              to try to work with shifts and adds.  */
3025           if (CONST_DOUBLE_HIGH (op1) == 0
3026               && CONST_DOUBLE_LOW (op1) > 0)
3027             coeff = CONST_DOUBLE_LOW (op1);
3028           else if (CONST_DOUBLE_LOW (op1) == 0
3029                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3030             {
3031               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3032                           + HOST_BITS_PER_WIDE_INT;
3033               return expand_shift (LSHIFT_EXPR, mode, op0,
3034                                    shift, target, unsignedp);
3035             }
3036         }
3037
3038       /* We used to test optimize here, on the grounds that it's better to
3039          produce a smaller program when -O is not used.  But this causes
3040          such a terrible slowdown sometimes that it seems better to always
3041          use synth_mult.  */
3042       if (coeff != 0)
3043         {
3044           /* Special case powers of two.  */
3045           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3046             return expand_shift (LSHIFT_EXPR, mode, op0,
3047                                  floor_log2 (coeff), target, unsignedp);
3048
3049           /* Exclude cost of op0 from max_cost to match the cost
3050              calculation of the synth_mult.  */
3051           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3052           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3053                                    max_cost))
3054             return expand_mult_const (mode, op0, coeff, target,
3055                                       &algorithm, variant);
3056         }
3057     }
3058
3059   if (GET_CODE (op0) == CONST_DOUBLE)
3060     {
3061       rtx temp = op0;
3062       op0 = op1;
3063       op1 = temp;
3064     }
3065
3066   /* Expand x*2.0 as x+x.  */
3067   if (GET_CODE (op1) == CONST_DOUBLE
3068       && SCALAR_FLOAT_MODE_P (mode))
3069     {
3070       REAL_VALUE_TYPE d;
3071       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3072
3073       if (REAL_VALUES_EQUAL (d, dconst2))
3074         {
3075           op0 = force_reg (GET_MODE (op0), op0);
3076           return expand_binop (mode, add_optab, op0, op0,
3077                                target, unsignedp, OPTAB_LIB_WIDEN);
3078         }
3079     }
3080
3081   /* This used to use umul_optab if unsigned, but for non-widening multiply
3082      there is no difference between signed and unsigned.  */
3083   op0 = expand_binop (mode,
3084                       ! unsignedp
3085                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3086                       ? smulv_optab : smul_optab,
3087                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3088   gcc_assert (op0);
3089   return op0;
3090 }
3091
3092 /* Perform a widening multiplication and return an rtx for the result.
3093    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3094    TARGET is a suggestion for where to store the result (an rtx).
3095    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3096    or smul_widen_optab.
3097
3098    We check specially for a constant integer as OP1, comparing the
3099    cost of a widening multiply against the cost of a sequence of shifts
3100    and adds.  */
3101
3102 rtx
3103 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3104                       int unsignedp, optab this_optab)
3105 {
3106   bool speed = optimize_insn_for_speed_p ();
3107   rtx cop1;
3108
3109   if (CONST_INT_P (op1)
3110       && GET_MODE (op0) != VOIDmode
3111       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3112                                 this_optab == umul_widen_optab))
3113       && CONST_INT_P (cop1)
3114       && (INTVAL (cop1) >= 0
3115           || HWI_COMPUTABLE_MODE_P (mode)))
3116     {
3117       HOST_WIDE_INT coeff = INTVAL (cop1);
3118       int max_cost;
3119       enum mult_variant variant;
3120       struct algorithm algorithm;
3121
3122       /* Special case powers of two.  */
3123       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3124         {
3125           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3126           return expand_shift (LSHIFT_EXPR, mode, op0,
3127                                floor_log2 (coeff), target, unsignedp);
3128         }
3129
3130       /* Exclude cost of op0 from max_cost to match the cost
3131          calculation of the synth_mult.  */
3132       max_cost = mul_widen_cost[speed][mode];
3133       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3134                                max_cost))
3135         {
3136           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3137           return expand_mult_const (mode, op0, coeff, target,
3138                                     &algorithm, variant);
3139         }
3140     }
3141   return expand_binop (mode, this_optab, op0, op1, target,
3142                        unsignedp, OPTAB_LIB_WIDEN);
3143 }
3144 \f
3145 /* Return the smallest n such that 2**n >= X.  */
3146
3147 int
3148 ceil_log2 (unsigned HOST_WIDE_INT x)
3149 {
3150   return floor_log2 (x - 1) + 1;
3151 }
3152
3153 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3154    replace division by D, and put the least significant N bits of the result
3155    in *MULTIPLIER_PTR and return the most significant bit.
3156
3157    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3158    needed precision is in PRECISION (should be <= N).
3159
3160    PRECISION should be as small as possible so this function can choose
3161    multiplier more freely.
3162
3163    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3164    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3165
3166    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3167    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3168
3169 static
3170 unsigned HOST_WIDE_INT
3171 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3172                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3173 {
3174   HOST_WIDE_INT mhigh_hi, mlow_hi;
3175   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3176   int lgup, post_shift;
3177   int pow, pow2;
3178   unsigned HOST_WIDE_INT nl, dummy1;
3179   HOST_WIDE_INT nh, dummy2;
3180
3181   /* lgup = ceil(log2(divisor)); */
3182   lgup = ceil_log2 (d);
3183
3184   gcc_assert (lgup <= n);
3185
3186   pow = n + lgup;
3187   pow2 = n + lgup - precision;
3188
3189   /* We could handle this with some effort, but this case is much
3190      better handled directly with a scc insn, so rely on caller using
3191      that.  */
3192   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3193
3194   /* mlow = 2^(N + lgup)/d */
3195  if (pow >= HOST_BITS_PER_WIDE_INT)
3196     {
3197       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3198       nl = 0;
3199     }
3200   else
3201     {
3202       nh = 0;
3203       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3204     }
3205   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3206                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3207
3208   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3209   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3210     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3211   else
3212     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3213   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3214                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3215
3216   gcc_assert (!mhigh_hi || nh - d < d);
3217   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3218   /* Assert that mlow < mhigh.  */
3219   gcc_assert (mlow_hi < mhigh_hi
3220               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3221
3222   /* If precision == N, then mlow, mhigh exceed 2^N
3223      (but they do not exceed 2^(N+1)).  */
3224
3225   /* Reduce to lowest terms.  */
3226   for (post_shift = lgup; post_shift > 0; post_shift--)
3227     {
3228       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3229       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3230       if (ml_lo >= mh_lo)
3231         break;
3232
3233       mlow_hi = 0;
3234       mlow_lo = ml_lo;
3235       mhigh_hi = 0;
3236       mhigh_lo = mh_lo;
3237     }
3238
3239   *post_shift_ptr = post_shift;
3240   *lgup_ptr = lgup;
3241   if (n < HOST_BITS_PER_WIDE_INT)
3242     {
3243       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3244       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3245       return mhigh_lo >= mask;
3246     }
3247   else
3248     {
3249       *multiplier_ptr = GEN_INT (mhigh_lo);
3250       return mhigh_hi;
3251     }
3252 }
3253
3254 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3255    congruent to 1 (mod 2**N).  */
3256
3257 static unsigned HOST_WIDE_INT
3258 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3259 {
3260   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3261
3262   /* The algorithm notes that the choice y = x satisfies
3263      x*y == 1 mod 2^3, since x is assumed odd.
3264      Each iteration doubles the number of bits of significance in y.  */
3265
3266   unsigned HOST_WIDE_INT mask;
3267   unsigned HOST_WIDE_INT y = x;
3268   int nbit = 3;
3269
3270   mask = (n == HOST_BITS_PER_WIDE_INT
3271           ? ~(unsigned HOST_WIDE_INT) 0
3272           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3273
3274   while (nbit < n)
3275     {
3276       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3277       nbit *= 2;
3278     }
3279   return y;
3280 }
3281
3282 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3283    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3284    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3285    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3286    become signed.
3287
3288    The result is put in TARGET if that is convenient.
3289
3290    MODE is the mode of operation.  */
3291
3292 rtx
3293 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3294                              rtx op1, rtx target, int unsignedp)
3295 {
3296   rtx tem;
3297   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3298
3299   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3300                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3301   tem = expand_and (mode, tem, op1, NULL_RTX);
3302   adj_operand
3303     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3304                      adj_operand);
3305
3306   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3307                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3308   tem = expand_and (mode, tem, op0, NULL_RTX);
3309   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3310                           target);
3311
3312   return target;
3313 }
3314
3315 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3316
3317 static rtx
3318 extract_high_half (enum machine_mode mode, rtx op)
3319 {
3320   enum machine_mode wider_mode;
3321
3322   if (mode == word_mode)
3323     return gen_highpart (mode, op);
3324
3325   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3326
3327   wider_mode = GET_MODE_WIDER_MODE (mode);
3328   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3329                      GET_MODE_BITSIZE (mode), 0, 1);
3330   return convert_modes (mode, wider_mode, op, 0);
3331 }
3332
3333 /* Like expand_mult_highpart, but only consider using a multiplication
3334    optab.  OP1 is an rtx for the constant operand.  */
3335
3336 static rtx
3337 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3338                             rtx target, int unsignedp, int max_cost)
3339 {
3340   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3341   enum machine_mode wider_mode;
3342   optab moptab;
3343   rtx tem;
3344   int size;
3345   bool speed = optimize_insn_for_speed_p ();
3346
3347   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3348
3349   wider_mode = GET_MODE_WIDER_MODE (mode);
3350   size = GET_MODE_BITSIZE (mode);
3351
3352   /* Firstly, try using a multiplication insn that only generates the needed
3353      high part of the product, and in the sign flavor of unsignedp.  */
3354   if (mul_highpart_cost[speed][mode] < max_cost)
3355     {
3356       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3357       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3358                           unsignedp, OPTAB_DIRECT);
3359       if (tem)
3360         return tem;
3361     }
3362
3363   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3364      Need to adjust the result after the multiplication.  */
3365   if (size - 1 < BITS_PER_WORD
3366       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3367           + 4 * add_cost[speed][mode] < max_cost))
3368     {
3369       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3370       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3371                           unsignedp, OPTAB_DIRECT);
3372       if (tem)
3373         /* We used the wrong signedness.  Adjust the result.  */
3374         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3375                                             tem, unsignedp);
3376     }
3377
3378   /* Try widening multiplication.  */
3379   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3380   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3381       && mul_widen_cost[speed][wider_mode] < max_cost)
3382     {
3383       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3384                           unsignedp, OPTAB_WIDEN);
3385       if (tem)
3386         return extract_high_half (mode, tem);
3387     }
3388
3389   /* Try widening the mode and perform a non-widening multiplication.  */
3390   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3391       && size - 1 < BITS_PER_WORD
3392       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3393     {
3394       rtx insns, wop0, wop1;
3395
3396       /* We need to widen the operands, for example to ensure the
3397          constant multiplier is correctly sign or zero extended.
3398          Use a sequence to clean-up any instructions emitted by
3399          the conversions if things don't work out.  */
3400       start_sequence ();
3401       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3402       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3403       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3404                           unsignedp, OPTAB_WIDEN);
3405       insns = get_insns ();
3406       end_sequence ();
3407
3408       if (tem)
3409         {
3410           emit_insn (insns);
3411           return extract_high_half (mode, tem);
3412         }
3413     }
3414
3415   /* Try widening multiplication of opposite signedness, and adjust.  */
3416   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3417   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3418       && size - 1 < BITS_PER_WORD
3419       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3420           + 4 * add_cost[speed][mode] < max_cost))
3421     {
3422       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3423                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3424       if (tem != 0)
3425         {
3426           tem = extract_high_half (mode, tem);
3427           /* We used the wrong signedness.  Adjust the result.  */
3428           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3429                                               target, unsignedp);
3430         }
3431     }
3432
3433   return 0;
3434 }
3435
3436 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3437    putting the high half of the result in TARGET if that is convenient,
3438    and return where the result is.  If the operation can not be performed,
3439    0 is returned.
3440
3441    MODE is the mode of operation and result.
3442
3443    UNSIGNEDP nonzero means unsigned multiply.
3444
3445    MAX_COST is the total allowed cost for the expanded RTL.  */
3446
3447 static rtx
3448 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3449                       rtx target, int unsignedp, int max_cost)
3450 {
3451   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3452   unsigned HOST_WIDE_INT cnst1;
3453   int extra_cost;
3454   bool sign_adjust = false;
3455   enum mult_variant variant;
3456   struct algorithm alg;
3457   rtx tem;
3458   bool speed = optimize_insn_for_speed_p ();
3459
3460   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3461   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3462   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3463
3464   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3465
3466   /* We can't optimize modes wider than BITS_PER_WORD.
3467      ??? We might be able to perform double-word arithmetic if
3468      mode == word_mode, however all the cost calculations in
3469      synth_mult etc. assume single-word operations.  */
3470   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3471     return expand_mult_highpart_optab (mode, op0, op1, target,
3472                                        unsignedp, max_cost);
3473
3474   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3475
3476   /* Check whether we try to multiply by a negative constant.  */
3477   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3478     {
3479       sign_adjust = true;
3480       extra_cost += add_cost[speed][mode];
3481     }
3482
3483   /* See whether shift/add multiplication is cheap enough.  */
3484   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3485                            max_cost - extra_cost))
3486     {
3487       /* See whether the specialized multiplication optabs are
3488          cheaper than the shift/add version.  */
3489       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3490                                         alg.cost.cost + extra_cost);
3491       if (tem)
3492         return tem;
3493
3494       tem = convert_to_mode (wider_mode, op0, unsignedp);
3495       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3496       tem = extract_high_half (mode, tem);
3497
3498       /* Adjust result for signedness.  */
3499       if (sign_adjust)
3500         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3501
3502       return tem;
3503     }
3504   return expand_mult_highpart_optab (mode, op0, op1, target,
3505                                      unsignedp, max_cost);
3506 }
3507
3508
3509 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3510
3511 static rtx
3512 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3513 {
3514   unsigned HOST_WIDE_INT masklow, maskhigh;
3515   rtx result, temp, shift, label;
3516   int logd;
3517
3518   logd = floor_log2 (d);
3519   result = gen_reg_rtx (mode);
3520
3521   /* Avoid conditional branches when they're expensive.  */
3522   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3523       && optimize_insn_for_speed_p ())
3524     {
3525       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3526                                       mode, 0, -1);
3527       if (signmask)
3528         {
3529           signmask = force_reg (mode, signmask);
3530           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3531           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3532
3533           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3534              which instruction sequence to use.  If logical right shifts
3535              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3536              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3537
3538           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3539           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3540               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3541             {
3542               temp = expand_binop (mode, xor_optab, op0, signmask,
3543                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3544               temp = expand_binop (mode, sub_optab, temp, signmask,
3545                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3546               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3547                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3548               temp = expand_binop (mode, xor_optab, temp, signmask,
3549                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3550               temp = expand_binop (mode, sub_optab, temp, signmask,
3551                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552             }
3553           else
3554             {
3555               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3556                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3557               signmask = force_reg (mode, signmask);
3558
3559               temp = expand_binop (mode, add_optab, op0, signmask,
3560                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3561               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3562                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3563               temp = expand_binop (mode, sub_optab, temp, signmask,
3564                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3565             }
3566           return temp;
3567         }
3568     }
3569
3570   /* Mask contains the mode's signbit and the significant bits of the
3571      modulus.  By including the signbit in the operation, many targets
3572      can avoid an explicit compare operation in the following comparison
3573      against zero.  */
3574
3575   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3576   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3577     {
3578       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3579       maskhigh = -1;
3580     }
3581   else
3582     maskhigh = (HOST_WIDE_INT) -1
3583                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3584
3585   temp = expand_binop (mode, and_optab, op0,
3586                        immed_double_const (masklow, maskhigh, mode),
3587                        result, 1, OPTAB_LIB_WIDEN);
3588   if (temp != result)
3589     emit_move_insn (result, temp);
3590
3591   label = gen_label_rtx ();
3592   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3593
3594   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3595                        0, OPTAB_LIB_WIDEN);
3596   masklow = (HOST_WIDE_INT) -1 << logd;
3597   maskhigh = -1;
3598   temp = expand_binop (mode, ior_optab, temp,
3599                        immed_double_const (masklow, maskhigh, mode),
3600                        result, 1, OPTAB_LIB_WIDEN);
3601   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3602                        0, OPTAB_LIB_WIDEN);
3603   if (temp != result)
3604     emit_move_insn (result, temp);
3605   emit_label (label);
3606   return result;
3607 }
3608
3609 /* Expand signed division of OP0 by a power of two D in mode MODE.
3610    This routine is only called for positive values of D.  */
3611
3612 static rtx
3613 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3614 {
3615   rtx temp, label;
3616   int logd;
3617
3618   logd = floor_log2 (d);
3619
3620   if (d == 2
3621       && BRANCH_COST (optimize_insn_for_speed_p (),
3622                       false) >= 1)
3623     {
3624       temp = gen_reg_rtx (mode);
3625       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3626       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3627                            0, OPTAB_LIB_WIDEN);
3628       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3629     }
3630
3631 #ifdef HAVE_conditional_move
3632   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3633       >= 2)
3634     {
3635       rtx temp2;
3636
3637       /* ??? emit_conditional_move forces a stack adjustment via
3638          compare_from_rtx so, if the sequence is discarded, it will
3639          be lost.  Do it now instead.  */
3640       do_pending_stack_adjust ();
3641
3642       start_sequence ();
3643       temp2 = copy_to_mode_reg (mode, op0);
3644       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3645                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3646       temp = force_reg (mode, temp);
3647
3648       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3649       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3650                                      mode, temp, temp2, mode, 0);
3651       if (temp2)
3652         {
3653           rtx seq = get_insns ();
3654           end_sequence ();
3655           emit_insn (seq);
3656           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3657         }
3658       end_sequence ();
3659     }
3660 #endif
3661
3662   if (BRANCH_COST (optimize_insn_for_speed_p (),
3663                    false) >= 2)
3664     {
3665       int ushift = GET_MODE_BITSIZE (mode) - logd;
3666
3667       temp = gen_reg_rtx (mode);
3668       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3669       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3670         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3671                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3672       else
3673         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3674                              ushift, NULL_RTX, 1);
3675       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3676                            0, OPTAB_LIB_WIDEN);
3677       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3678     }
3679
3680   label = gen_label_rtx ();
3681   temp = copy_to_mode_reg (mode, op0);
3682   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3683   expand_inc (temp, GEN_INT (d - 1));
3684   emit_label (label);
3685   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3686 }
3687 \f
3688 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3689    if that is convenient, and returning where the result is.
3690    You may request either the quotient or the remainder as the result;
3691    specify REM_FLAG nonzero to get the remainder.
3692
3693    CODE is the expression code for which kind of division this is;
3694    it controls how rounding is done.  MODE is the machine mode to use.
3695    UNSIGNEDP nonzero means do unsigned division.  */
3696
3697 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3698    and then correct it by or'ing in missing high bits
3699    if result of ANDI is nonzero.
3700    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3701    This could optimize to a bfexts instruction.
3702    But C doesn't use these operations, so their optimizations are
3703    left for later.  */
3704 /* ??? For modulo, we don't actually need the highpart of the first product,
3705    the low part will do nicely.  And for small divisors, the second multiply
3706    can also be a low-part only multiply or even be completely left out.
3707    E.g. to calculate the remainder of a division by 3 with a 32 bit
3708    multiply, multiply with 0x55555556 and extract the upper two bits;
3709    the result is exact for inputs up to 0x1fffffff.
3710    The input range can be reduced by using cross-sum rules.
3711    For odd divisors >= 3, the following table gives right shift counts
3712    so that if a number is shifted by an integer multiple of the given
3713    amount, the remainder stays the same:
3714    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3715    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3716    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3717    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3718    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3719
3720    Cross-sum rules for even numbers can be derived by leaving as many bits
3721    to the right alone as the divisor has zeros to the right.
3722    E.g. if x is an unsigned 32 bit number:
3723    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3724    */
3725
3726 rtx
3727 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3728                rtx op0, rtx op1, rtx target, int unsignedp)
3729 {
3730   enum machine_mode compute_mode;
3731   rtx tquotient;
3732   rtx quotient = 0, remainder = 0;
3733   rtx last;
3734   int size;
3735   rtx insn, set;
3736   optab optab1, optab2;
3737   int op1_is_constant, op1_is_pow2 = 0;
3738   int max_cost, extra_cost;
3739   static HOST_WIDE_INT last_div_const = 0;
3740   static HOST_WIDE_INT ext_op1;
3741   bool speed = optimize_insn_for_speed_p ();
3742
3743   op1_is_constant = CONST_INT_P (op1);
3744   if (op1_is_constant)
3745     {
3746       ext_op1 = INTVAL (op1);
3747       if (unsignedp)
3748         ext_op1 &= GET_MODE_MASK (mode);
3749       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3750                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3751     }
3752
3753   /*
3754      This is the structure of expand_divmod:
3755
3756      First comes code to fix up the operands so we can perform the operations
3757      correctly and efficiently.
3758
3759      Second comes a switch statement with code specific for each rounding mode.
3760      For some special operands this code emits all RTL for the desired
3761      operation, for other cases, it generates only a quotient and stores it in
3762      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3763      to indicate that it has not done anything.
3764
3765      Last comes code that finishes the operation.  If QUOTIENT is set and
3766      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3767      QUOTIENT is not set, it is computed using trunc rounding.
3768
3769      We try to generate special code for division and remainder when OP1 is a
3770      constant.  If |OP1| = 2**n we can use shifts and some other fast
3771      operations.  For other values of OP1, we compute a carefully selected
3772      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3773      by m.
3774
3775      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3776      half of the product.  Different strategies for generating the product are
3777      implemented in expand_mult_highpart.
3778
3779      If what we actually want is the remainder, we generate that by another
3780      by-constant multiplication and a subtraction.  */
3781
3782   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3783      code below will malfunction if we are, so check here and handle
3784      the special case if so.  */
3785   if (op1 == const1_rtx)
3786     return rem_flag ? const0_rtx : op0;
3787
3788     /* When dividing by -1, we could get an overflow.
3789      negv_optab can handle overflows.  */
3790   if (! unsignedp && op1 == constm1_rtx)
3791     {
3792       if (rem_flag)
3793         return const0_rtx;
3794       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3795                           ? negv_optab : neg_optab, op0, target, 0);
3796     }
3797
3798   if (target
3799       /* Don't use the function value register as a target
3800          since we have to read it as well as write it,
3801          and function-inlining gets confused by this.  */
3802       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3803           /* Don't clobber an operand while doing a multi-step calculation.  */
3804           || ((rem_flag || op1_is_constant)
3805               && (reg_mentioned_p (target, op0)
3806                   || (MEM_P (op0) && MEM_P (target))))
3807           || reg_mentioned_p (target, op1)
3808           || (MEM_P (op1) && MEM_P (target))))
3809     target = 0;
3810
3811   /* Get the mode in which to perform this computation.  Normally it will
3812      be MODE, but sometimes we can't do the desired operation in MODE.
3813      If so, pick a wider mode in which we can do the operation.  Convert
3814      to that mode at the start to avoid repeated conversions.
3815
3816      First see what operations we need.  These depend on the expression
3817      we are evaluating.  (We assume that divxx3 insns exist under the
3818      same conditions that modxx3 insns and that these insns don't normally
3819      fail.  If these assumptions are not correct, we may generate less
3820      efficient code in some cases.)
3821
3822      Then see if we find a mode in which we can open-code that operation
3823      (either a division, modulus, or shift).  Finally, check for the smallest
3824      mode for which we can do the operation with a library call.  */
3825
3826   /* We might want to refine this now that we have division-by-constant
3827      optimization.  Since expand_mult_highpart tries so many variants, it is
3828      not straightforward to generalize this.  Maybe we should make an array
3829      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3830
3831   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3832             ? (unsignedp ? lshr_optab : ashr_optab)
3833             : (unsignedp ? udiv_optab : sdiv_optab));
3834   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3835             ? optab1
3836             : (unsignedp ? udivmod_optab : sdivmod_optab));
3837
3838   for (compute_mode = mode; compute_mode != VOIDmode;
3839        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3840     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3841         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3842       break;
3843
3844   if (compute_mode == VOIDmode)
3845     for (compute_mode = mode; compute_mode != VOIDmode;
3846          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3847       if (optab_libfunc (optab1, compute_mode)
3848           || optab_libfunc (optab2, compute_mode))
3849         break;
3850
3851   /* If we still couldn't find a mode, use MODE, but expand_binop will
3852      probably die.  */
3853   if (compute_mode == VOIDmode)
3854     compute_mode = mode;
3855
3856   if (target && GET_MODE (target) == compute_mode)
3857     tquotient = target;
3858   else
3859     tquotient = gen_reg_rtx (compute_mode);
3860
3861   size = GET_MODE_BITSIZE (compute_mode);
3862 #if 0
3863   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3864      (mode), and thereby get better code when OP1 is a constant.  Do that
3865      later.  It will require going over all usages of SIZE below.  */
3866   size = GET_MODE_BITSIZE (mode);
3867 #endif
3868
3869   /* Only deduct something for a REM if the last divide done was
3870      for a different constant.   Then set the constant of the last
3871      divide.  */
3872   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3873   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3874                      && INTVAL (op1) == last_div_const))
3875     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3876
3877   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3878
3879   /* Now convert to the best mode to use.  */
3880   if (compute_mode != mode)
3881     {
3882       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3883       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3884
3885       /* convert_modes may have placed op1 into a register, so we
3886          must recompute the following.  */
3887       op1_is_constant = CONST_INT_P (op1);
3888       op1_is_pow2 = (op1_is_constant
3889                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3890                           || (! unsignedp
3891                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3892     }
3893
3894   /* If one of the operands is a volatile MEM, copy it into a register.  */
3895
3896   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3897     op0 = force_reg (compute_mode, op0);
3898   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3899     op1 = force_reg (compute_mode, op1);
3900
3901   /* If we need the remainder or if OP1 is constant, we need to
3902      put OP0 in a register in case it has any queued subexpressions.  */
3903   if (rem_flag || op1_is_constant)
3904     op0 = force_reg (compute_mode, op0);
3905
3906   last = get_last_insn ();
3907
3908   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3909   if (unsignedp)
3910     {
3911       if (code == FLOOR_DIV_EXPR)
3912         code = TRUNC_DIV_EXPR;
3913       if (code == FLOOR_MOD_EXPR)
3914         code = TRUNC_MOD_EXPR;
3915       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3916         code = TRUNC_DIV_EXPR;
3917     }
3918
3919   if (op1 != const0_rtx)
3920     switch (code)
3921       {
3922       case TRUNC_MOD_EXPR:
3923       case TRUNC_DIV_EXPR:
3924         if (op1_is_constant)
3925           {
3926             if (unsignedp)
3927               {
3928                 unsigned HOST_WIDE_INT mh;
3929                 int pre_shift, post_shift;
3930                 int dummy;
3931                 rtx ml;
3932                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3933                                             & GET_MODE_MASK (compute_mode));
3934
3935                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3936                   {
3937                     pre_shift = floor_log2 (d);
3938                     if (rem_flag)
3939                       {
3940                         remainder
3941                           = expand_binop (compute_mode, and_optab, op0,
3942                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3943                                           remainder, 1,
3944                                           OPTAB_LIB_WIDEN);
3945                         if (remainder)
3946                           return gen_lowpart (mode, remainder);
3947                       }
3948                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3949                                              pre_shift, tquotient, 1);
3950                   }
3951                 else if (size <= HOST_BITS_PER_WIDE_INT)
3952                   {
3953                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3954                       {
3955                         /* Most significant bit of divisor is set; emit an scc
3956                            insn.  */
3957                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
3958                                                           compute_mode, 1, 1);
3959                       }
3960                     else
3961                       {
3962                         /* Find a suitable multiplier and right shift count
3963                            instead of multiplying with D.  */
3964
3965                         mh = choose_multiplier (d, size, size,
3966                                                 &ml, &post_shift, &dummy);
3967
3968                         /* If the suggested multiplier is more than SIZE bits,
3969                            we can do better for even divisors, using an
3970                            initial right shift.  */
3971                         if (mh != 0 && (d & 1) == 0)
3972                           {
3973                             pre_shift = floor_log2 (d & -d);
3974                             mh = choose_multiplier (d >> pre_shift, size,
3975                                                     size - pre_shift,
3976                                                     &ml, &post_shift, &dummy);
3977                             gcc_assert (!mh);
3978                           }
3979                         else
3980                           pre_shift = 0;
3981
3982                         if (mh != 0)
3983                           {
3984                             rtx t1, t2, t3, t4;
3985
3986                             if (post_shift - 1 >= BITS_PER_WORD)
3987                               goto fail1;
3988
3989                             extra_cost
3990                               = (shift_cost[speed][compute_mode][post_shift - 1]
3991                                  + shift_cost[speed][compute_mode][1]
3992                                  + 2 * add_cost[speed][compute_mode]);
3993                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3994                                                        NULL_RTX, 1,
3995                                                        max_cost - extra_cost);
3996                             if (t1 == 0)
3997                               goto fail1;
3998                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3999                                                                op0, t1),
4000                                                 NULL_RTX);
4001                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4002                                                t2, 1, NULL_RTX, 1);
4003                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4004                                                               t1, t3),
4005                                                 NULL_RTX);
4006                             quotient = expand_shift
4007                               (RSHIFT_EXPR, compute_mode, t4,
4008                                post_shift - 1, tquotient, 1);
4009                           }
4010                         else
4011                           {
4012                             rtx t1, t2;
4013
4014                             if (pre_shift >= BITS_PER_WORD
4015                                 || post_shift >= BITS_PER_WORD)
4016                               goto fail1;
4017
4018                             t1 = expand_shift
4019                               (RSHIFT_EXPR, compute_mode, op0,
4020                                pre_shift, NULL_RTX, 1);
4021                             extra_cost
4022                               = (shift_cost[speed][compute_mode][pre_shift]
4023                                  + shift_cost[speed][compute_mode][post_shift]);
4024                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4025                                                        NULL_RTX, 1,
4026                                                        max_cost - extra_cost);
4027                             if (t2 == 0)
4028                               goto fail1;
4029                             quotient = expand_shift
4030                               (RSHIFT_EXPR, compute_mode, t2,
4031                                post_shift, tquotient, 1);
4032                           }
4033                       }
4034                   }
4035                 else            /* Too wide mode to use tricky code */
4036                   break;
4037
4038                 insn = get_last_insn ();
4039                 if (insn != last
4040                     && (set = single_set (insn)) != 0
4041                     && SET_DEST (set) == quotient)
4042                   set_unique_reg_note (insn,
4043                                        REG_EQUAL,
4044                                        gen_rtx_UDIV (compute_mode, op0, op1));
4045               }
4046             else                /* TRUNC_DIV, signed */
4047               {
4048                 unsigned HOST_WIDE_INT ml;
4049                 int lgup, post_shift;
4050                 rtx mlr;
4051                 HOST_WIDE_INT d = INTVAL (op1);
4052                 unsigned HOST_WIDE_INT abs_d;
4053
4054                 /* Since d might be INT_MIN, we have to cast to
4055                    unsigned HOST_WIDE_INT before negating to avoid
4056                    undefined signed overflow.  */
4057                 abs_d = (d >= 0
4058                          ? (unsigned HOST_WIDE_INT) d
4059                          : - (unsigned HOST_WIDE_INT) d);
4060
4061                 /* n rem d = n rem -d */
4062                 if (rem_flag && d < 0)
4063                   {
4064                     d = abs_d;
4065                     op1 = gen_int_mode (abs_d, compute_mode);
4066                   }
4067
4068                 if (d == 1)
4069                   quotient = op0;
4070                 else if (d == -1)
4071                   quotient = expand_unop (compute_mode, neg_optab, op0,
4072                                           tquotient, 0);
4073                 else if (HOST_BITS_PER_WIDE_INT >= size
4074                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4075                   {
4076                     /* This case is not handled correctly below.  */
4077                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4078                                                 compute_mode, 1, 1);
4079                     if (quotient == 0)
4080                       goto fail1;
4081                   }
4082                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4083                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4084                                       : sdiv_pow2_cheap[speed][compute_mode])
4085                          /* We assume that cheap metric is true if the
4086                             optab has an expander for this mode.  */
4087                          && ((optab_handler ((rem_flag ? smod_optab
4088                                               : sdiv_optab),
4089                                              compute_mode)
4090                               != CODE_FOR_nothing)
4091                              || (optab_handler (sdivmod_optab,
4092                                                 compute_mode)
4093                                  != CODE_FOR_nothing)))
4094                   ;
4095                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4096                   {
4097                     if (rem_flag)
4098                       {
4099                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4100                         if (remainder)
4101                           return gen_lowpart (mode, remainder);
4102                       }
4103
4104                     if (sdiv_pow2_cheap[speed][compute_mode]
4105                         && ((optab_handler (sdiv_optab, compute_mode)
4106                              != CODE_FOR_nothing)
4107                             || (optab_handler (sdivmod_optab, compute_mode)
4108                                 != CODE_FOR_nothing)))
4109                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4110                                                 compute_mode, op0,
4111                                                 gen_int_mode (abs_d,
4112                                                               compute_mode),
4113                                                 NULL_RTX, 0);
4114                     else
4115                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4116
4117                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4118                        negate the quotient.  */
4119                     if (d < 0)
4120                       {
4121                         insn = get_last_insn ();
4122                         if (insn != last
4123                             && (set = single_set (insn)) != 0
4124                             && SET_DEST (set) == quotient
4125                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4126                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4127                           set_unique_reg_note (insn,
4128                                                REG_EQUAL,
4129                                                gen_rtx_DIV (compute_mode,
4130                                                             op0,
4131                                                             GEN_INT
4132                                                             (trunc_int_for_mode
4133                                                              (abs_d,
4134                                                               compute_mode))));
4135
4136                         quotient = expand_unop (compute_mode, neg_optab,
4137                                                 quotient, quotient, 0);
4138                       }
4139                   }
4140                 else if (size <= HOST_BITS_PER_WIDE_INT)
4141                   {
4142                     choose_multiplier (abs_d, size, size - 1,
4143                                        &mlr, &post_shift, &lgup);
4144                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4145                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4146                       {
4147                         rtx t1, t2, t3;
4148
4149                         if (post_shift >= BITS_PER_WORD
4150                             || size - 1 >= BITS_PER_WORD)
4151                           goto fail1;
4152
4153                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4154                                       + shift_cost[speed][compute_mode][size - 1]
4155                                       + add_cost[speed][compute_mode]);
4156                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4157                                                    NULL_RTX, 0,
4158                                                    max_cost - extra_cost);
4159                         if (t1 == 0)
4160                           goto fail1;
4161                         t2 = expand_shift
4162                           (RSHIFT_EXPR, compute_mode, t1,
4163                            post_shift, NULL_RTX, 0);
4164                         t3 = expand_shift
4165                           (RSHIFT_EXPR, compute_mode, op0,
4166                            size - 1, NULL_RTX, 0);
4167                         if (d < 0)
4168                           quotient
4169                             = force_operand (gen_rtx_MINUS (compute_mode,
4170                                                             t3, t2),
4171                                              tquotient);
4172                         else
4173                           quotient
4174                             = force_operand (gen_rtx_MINUS (compute_mode,
4175                                                             t2, t3),
4176                                              tquotient);
4177                       }
4178                     else
4179                       {
4180                         rtx t1, t2, t3, t4;
4181
4182                         if (post_shift >= BITS_PER_WORD
4183                             || size - 1 >= BITS_PER_WORD)
4184                           goto fail1;
4185
4186                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4187                         mlr = gen_int_mode (ml, compute_mode);
4188                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4189                                       + shift_cost[speed][compute_mode][size - 1]
4190                                       + 2 * add_cost[speed][compute_mode]);
4191                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4192                                                    NULL_RTX, 0,
4193                                                    max_cost - extra_cost);
4194                         if (t1 == 0)
4195                           goto fail1;
4196                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4197                                                           t1, op0),
4198                                             NULL_RTX);
4199                         t3 = expand_shift
4200                           (RSHIFT_EXPR, compute_mode, t2,
4201                            post_shift, NULL_RTX, 0);
4202                         t4 = expand_shift
4203                           (RSHIFT_EXPR, compute_mode, op0,
4204                            size - 1, NULL_RTX, 0);
4205                         if (d < 0)
4206                           quotient
4207                             = force_operand (gen_rtx_MINUS (compute_mode,
4208                                                             t4, t3),
4209                                              tquotient);
4210                         else
4211                           quotient
4212                             = force_operand (gen_rtx_MINUS (compute_mode,
4213                                                             t3, t4),
4214                                              tquotient);
4215                       }
4216                   }
4217                 else            /* Too wide mode to use tricky code */
4218                   break;
4219
4220                 insn = get_last_insn ();
4221                 if (insn != last
4222                     && (set = single_set (insn)) != 0
4223                     && SET_DEST (set) == quotient)
4224                   set_unique_reg_note (insn,
4225                                        REG_EQUAL,
4226                                        gen_rtx_DIV (compute_mode, op0, op1));
4227               }
4228             break;
4229           }
4230       fail1:
4231         delete_insns_since (last);
4232         break;
4233
4234       case FLOOR_DIV_EXPR:
4235       case FLOOR_MOD_EXPR:
4236       /* We will come here only for signed operations.  */
4237         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4238           {
4239             unsigned HOST_WIDE_INT mh;
4240             int pre_shift, lgup, post_shift;
4241             HOST_WIDE_INT d = INTVAL (op1);
4242             rtx ml;
4243
4244             if (d > 0)
4245               {
4246                 /* We could just as easily deal with negative constants here,
4247                    but it does not seem worth the trouble for GCC 2.6.  */
4248                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4249                   {
4250                     pre_shift = floor_log2 (d);
4251                     if (rem_flag)
4252                       {
4253                         remainder = expand_binop (compute_mode, and_optab, op0,
4254                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4255                                                   remainder, 0, OPTAB_LIB_WIDEN);
4256                         if (remainder)
4257                           return gen_lowpart (mode, remainder);
4258                       }
4259                     quotient = expand_shift
4260                       (RSHIFT_EXPR, compute_mode, op0,
4261                        pre_shift, tquotient, 0);
4262                   }
4263                 else
4264                   {
4265                     rtx t1, t2, t3, t4;
4266
4267                     mh = choose_multiplier (d, size, size - 1,
4268                                             &ml, &post_shift, &lgup);
4269                     gcc_assert (!mh);
4270
4271                     if (post_shift < BITS_PER_WORD
4272                         && size - 1 < BITS_PER_WORD)
4273                       {
4274                         t1 = expand_shift
4275                           (RSHIFT_EXPR, compute_mode, op0,
4276                            size - 1, NULL_RTX, 0);
4277                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4278                                            NULL_RTX, 0, OPTAB_WIDEN);
4279                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4280                                       + shift_cost[speed][compute_mode][size - 1]
4281                                       + 2 * add_cost[speed][compute_mode]);
4282                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4283                                                    NULL_RTX, 1,
4284                                                    max_cost - extra_cost);
4285                         if (t3 != 0)
4286                           {
4287                             t4 = expand_shift
4288                               (RSHIFT_EXPR, compute_mode, t3,
4289                                post_shift, NULL_RTX, 1);
4290                             quotient = expand_binop (compute_mode, xor_optab,
4291                                                      t4, t1, tquotient, 0,
4292                                                      OPTAB_WIDEN);
4293                           }
4294                       }
4295                   }
4296               }
4297             else
4298               {
4299                 rtx nsign, t1, t2, t3, t4;
4300                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4301                                                   op0, constm1_rtx), NULL_RTX);
4302                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4303                                    0, OPTAB_WIDEN);
4304                 nsign = expand_shift
4305                   (RSHIFT_EXPR, compute_mode, t2,
4306                    size - 1, NULL_RTX, 0);
4307                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4308                                     NULL_RTX);
4309                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4310                                     NULL_RTX, 0);
4311                 if (t4)
4312                   {
4313                     rtx t5;
4314                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4315                                       NULL_RTX, 0);
4316                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4317                                                             t4, t5),
4318                                               tquotient);
4319                   }
4320               }
4321           }
4322
4323         if (quotient != 0)
4324           break;
4325         delete_insns_since (last);
4326
4327         /* Try using an instruction that produces both the quotient and
4328            remainder, using truncation.  We can easily compensate the quotient
4329            or remainder to get floor rounding, once we have the remainder.
4330            Notice that we compute also the final remainder value here,
4331            and return the result right away.  */
4332         if (target == 0 || GET_MODE (target) != compute_mode)
4333           target = gen_reg_rtx (compute_mode);
4334
4335         if (rem_flag)
4336           {
4337             remainder
4338               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4339             quotient = gen_reg_rtx (compute_mode);
4340           }
4341         else
4342           {
4343             quotient
4344               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4345             remainder = gen_reg_rtx (compute_mode);
4346           }
4347
4348         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4349                                  quotient, remainder, 0))
4350           {
4351             /* This could be computed with a branch-less sequence.
4352                Save that for later.  */
4353             rtx tem;
4354             rtx label = gen_label_rtx ();
4355             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4356             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4357                                 NULL_RTX, 0, OPTAB_WIDEN);
4358             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4359             expand_dec (quotient, const1_rtx);
4360             expand_inc (remainder, op1);
4361             emit_label (label);
4362             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4363           }
4364
4365         /* No luck with division elimination or divmod.  Have to do it
4366            by conditionally adjusting op0 *and* the result.  */
4367         {
4368           rtx label1, label2, label3, label4, label5;
4369           rtx adjusted_op0;
4370           rtx tem;
4371
4372           quotient = gen_reg_rtx (compute_mode);
4373           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4374           label1 = gen_label_rtx ();
4375           label2 = gen_label_rtx ();
4376           label3 = gen_label_rtx ();
4377           label4 = gen_label_rtx ();
4378           label5 = gen_label_rtx ();
4379           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4380           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4381           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4382                               quotient, 0, OPTAB_LIB_WIDEN);
4383           if (tem != quotient)
4384             emit_move_insn (quotient, tem);
4385           emit_jump_insn (gen_jump (label5));
4386           emit_barrier ();
4387           emit_label (label1);
4388           expand_inc (adjusted_op0, const1_rtx);
4389           emit_jump_insn (gen_jump (label4));
4390           emit_barrier ();
4391           emit_label (label2);
4392           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4393           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4394                               quotient, 0, OPTAB_LIB_WIDEN);
4395           if (tem != quotient)
4396             emit_move_insn (quotient, tem);
4397           emit_jump_insn (gen_jump (label5));
4398           emit_barrier ();
4399           emit_label (label3);
4400           expand_dec (adjusted_op0, const1_rtx);
4401           emit_label (label4);
4402           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4403                               quotient, 0, OPTAB_LIB_WIDEN);
4404           if (tem != quotient)
4405             emit_move_insn (quotient, tem);
4406           expand_dec (quotient, const1_rtx);
4407           emit_label (label5);
4408         }
4409         break;
4410
4411       case CEIL_DIV_EXPR:
4412       case CEIL_MOD_EXPR:
4413         if (unsignedp)
4414           {
4415             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4416               {
4417                 rtx t1, t2, t3;
4418                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4419                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4420                                    floor_log2 (d), tquotient, 1);
4421                 t2 = expand_binop (compute_mode, and_optab, op0,
4422                                    GEN_INT (d - 1),
4423                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4424                 t3 = gen_reg_rtx (compute_mode);
4425                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4426                                       compute_mode, 1, 1);
4427                 if (t3 == 0)
4428                   {
4429                     rtx lab;
4430                     lab = gen_label_rtx ();
4431                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4432                     expand_inc (t1, const1_rtx);
4433                     emit_label (lab);
4434                     quotient = t1;
4435                   }
4436                 else
4437                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4438                                                           t1, t3),
4439                                             tquotient);
4440                 break;
4441               }
4442
4443             /* Try using an instruction that produces both the quotient and
4444                remainder, using truncation.  We can easily compensate the
4445                quotient or remainder to get ceiling rounding, once we have the
4446                remainder.  Notice that we compute also the final remainder
4447                value here, and return the result right away.  */
4448             if (target == 0 || GET_MODE (target) != compute_mode)
4449               target = gen_reg_rtx (compute_mode);
4450
4451             if (rem_flag)
4452               {
4453                 remainder = (REG_P (target)
4454                              ? target : gen_reg_rtx (compute_mode));
4455                 quotient = gen_reg_rtx (compute_mode);
4456               }
4457             else
4458               {
4459                 quotient = (REG_P (target)
4460                             ? target : gen_reg_rtx (compute_mode));
4461                 remainder = gen_reg_rtx (compute_mode);
4462               }
4463
4464             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4465                                      remainder, 1))
4466               {
4467                 /* This could be computed with a branch-less sequence.
4468                    Save that for later.  */
4469                 rtx label = gen_label_rtx ();
4470                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4471                                  compute_mode, label);
4472                 expand_inc (quotient, const1_rtx);
4473                 expand_dec (remainder, op1);
4474                 emit_label (label);
4475                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4476               }
4477
4478             /* No luck with division elimination or divmod.  Have to do it
4479                by conditionally adjusting op0 *and* the result.  */
4480             {
4481               rtx label1, label2;
4482               rtx adjusted_op0, tem;
4483
4484               quotient = gen_reg_rtx (compute_mode);
4485               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4486               label1 = gen_label_rtx ();
4487               label2 = gen_label_rtx ();
4488               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4489                                compute_mode, label1);
4490               emit_move_insn  (quotient, const0_rtx);
4491               emit_jump_insn (gen_jump (label2));
4492               emit_barrier ();
4493               emit_label (label1);
4494               expand_dec (adjusted_op0, const1_rtx);
4495               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4496                                   quotient, 1, OPTAB_LIB_WIDEN);
4497               if (tem != quotient)
4498                 emit_move_insn (quotient, tem);
4499               expand_inc (quotient, const1_rtx);
4500               emit_label (label2);
4501             }
4502           }
4503         else /* signed */
4504           {
4505             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4506                 && INTVAL (op1) >= 0)
4507               {
4508                 /* This is extremely similar to the code for the unsigned case
4509                    above.  For 2.7 we should merge these variants, but for
4510                    2.6.1 I don't want to touch the code for unsigned since that
4511                    get used in C.  The signed case will only be used by other
4512                    languages (Ada).  */
4513
4514                 rtx t1, t2, t3;
4515                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4516                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4517                                    floor_log2 (d), tquotient, 0);
4518                 t2 = expand_binop (compute_mode, and_optab, op0,
4519                                    GEN_INT (d - 1),
4520                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4521                 t3 = gen_reg_rtx (compute_mode);
4522                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4523                                       compute_mode, 1, 1);
4524                 if (t3 == 0)
4525                   {
4526                     rtx lab;
4527                     lab = gen_label_rtx ();
4528                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4529                     expand_inc (t1, const1_rtx);
4530                     emit_label (lab);
4531                     quotient = t1;
4532                   }
4533                 else
4534                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4535                                                           t1, t3),
4536                                             tquotient);
4537                 break;
4538               }
4539
4540             /* Try using an instruction that produces both the quotient and
4541                remainder, using truncation.  We can easily compensate the
4542                quotient or remainder to get ceiling rounding, once we have the
4543                remainder.  Notice that we compute also the final remainder
4544                value here, and return the result right away.  */
4545             if (target == 0 || GET_MODE (target) != compute_mode)
4546               target = gen_reg_rtx (compute_mode);
4547             if (rem_flag)
4548               {
4549                 remainder= (REG_P (target)
4550                             ? target : gen_reg_rtx (compute_mode));
4551                 quotient = gen_reg_rtx (compute_mode);
4552               }
4553             else
4554               {
4555                 quotient = (REG_P (target)
4556                             ? target : gen_reg_rtx (compute_mode));
4557                 remainder = gen_reg_rtx (compute_mode);
4558               }
4559
4560             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4561                                      remainder, 0))
4562               {
4563                 /* This could be computed with a branch-less sequence.
4564                    Save that for later.  */
4565                 rtx tem;
4566                 rtx label = gen_label_rtx ();
4567                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4568                                  compute_mode, label);
4569                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4570                                     NULL_RTX, 0, OPTAB_WIDEN);
4571                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4572                 expand_inc (quotient, const1_rtx);
4573                 expand_dec (remainder, op1);
4574                 emit_label (label);
4575                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4576               }
4577
4578             /* No luck with division elimination or divmod.  Have to do it
4579                by conditionally adjusting op0 *and* the result.  */
4580             {
4581               rtx label1, label2, label3, label4, label5;
4582               rtx adjusted_op0;
4583               rtx tem;
4584
4585               quotient = gen_reg_rtx (compute_mode);
4586               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4587               label1 = gen_label_rtx ();
4588               label2 = gen_label_rtx ();
4589               label3 = gen_label_rtx ();
4590               label4 = gen_label_rtx ();
4591               label5 = gen_label_rtx ();
4592               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4593               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4594                                compute_mode, label1);
4595               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4596                                   quotient, 0, OPTAB_LIB_WIDEN);
4597               if (tem != quotient)
4598                 emit_move_insn (quotient, tem);
4599               emit_jump_insn (gen_jump (label5));
4600               emit_barrier ();
4601               emit_label (label1);
4602               expand_dec (adjusted_op0, const1_rtx);
4603               emit_jump_insn (gen_jump (label4));
4604               emit_barrier ();
4605               emit_label (label2);
4606               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4607                                compute_mode, label3);
4608               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4609                                   quotient, 0, OPTAB_LIB_WIDEN);
4610               if (tem != quotient)
4611                 emit_move_insn (quotient, tem);
4612               emit_jump_insn (gen_jump (label5));
4613               emit_barrier ();
4614               emit_label (label3);
4615               expand_inc (adjusted_op0, const1_rtx);
4616               emit_label (label4);
4617               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4618                                   quotient, 0, OPTAB_LIB_WIDEN);
4619               if (tem != quotient)
4620                 emit_move_insn (quotient, tem);
4621               expand_inc (quotient, const1_rtx);
4622               emit_label (label5);
4623             }
4624           }
4625         break;
4626
4627       case EXACT_DIV_EXPR:
4628         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4629           {
4630             HOST_WIDE_INT d = INTVAL (op1);
4631             unsigned HOST_WIDE_INT ml;
4632             int pre_shift;
4633             rtx t1;
4634
4635             pre_shift = floor_log2 (d & -d);
4636             ml = invert_mod2n (d >> pre_shift, size);
4637             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4638                                pre_shift, NULL_RTX, unsignedp);
4639             quotient = expand_mult (compute_mode, t1,
4640                                     gen_int_mode (ml, compute_mode),
4641                                     NULL_RTX, 1);
4642
4643             insn = get_last_insn ();
4644             set_unique_reg_note (insn,
4645                                  REG_EQUAL,
4646                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4647                                                  compute_mode,
4648                                                  op0, op1));
4649           }
4650         break;
4651
4652       case ROUND_DIV_EXPR:
4653       case ROUND_MOD_EXPR:
4654         if (unsignedp)
4655           {
4656             rtx tem;
4657             rtx label;
4658             label = gen_label_rtx ();
4659             quotient = gen_reg_rtx (compute_mode);
4660             remainder = gen_reg_rtx (compute_mode);
4661             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4662               {
4663                 rtx tem;
4664                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4665                                          quotient, 1, OPTAB_LIB_WIDEN);
4666                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4667                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4668                                           remainder, 1, OPTAB_LIB_WIDEN);
4669               }
4670             tem = plus_constant (op1, -1);
4671             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4672             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4673             expand_inc (quotient, const1_rtx);
4674             expand_dec (remainder, op1);
4675             emit_label (label);
4676           }
4677         else
4678           {
4679             rtx abs_rem, abs_op1, tem, mask;
4680             rtx label;
4681             label = gen_label_rtx ();
4682             quotient = gen_reg_rtx (compute_mode);
4683             remainder = gen_reg_rtx (compute_mode);
4684             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4685               {
4686                 rtx tem;
4687                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4688                                          quotient, 0, OPTAB_LIB_WIDEN);
4689                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4690                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4691                                           remainder, 0, OPTAB_LIB_WIDEN);
4692               }
4693             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4694             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4695             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4696                                 1, NULL_RTX, 1);
4697             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4698             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4699                                 NULL_RTX, 0, OPTAB_WIDEN);
4700             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4701                                  size - 1, NULL_RTX, 0);
4702             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4703                                 NULL_RTX, 0, OPTAB_WIDEN);
4704             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4705                                 NULL_RTX, 0, OPTAB_WIDEN);
4706             expand_inc (quotient, tem);
4707             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4708                                 NULL_RTX, 0, OPTAB_WIDEN);
4709             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4710                                 NULL_RTX, 0, OPTAB_WIDEN);
4711             expand_dec (remainder, tem);
4712             emit_label (label);
4713           }
4714         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4715
4716       default:
4717         gcc_unreachable ();
4718       }
4719
4720   if (quotient == 0)
4721     {
4722       if (target && GET_MODE (target) != compute_mode)
4723         target = 0;
4724
4725       if (rem_flag)
4726         {
4727           /* Try to produce the remainder without producing the quotient.
4728              If we seem to have a divmod pattern that does not require widening,
4729              don't try widening here.  We should really have a WIDEN argument
4730              to expand_twoval_binop, since what we'd really like to do here is
4731              1) try a mod insn in compute_mode
4732              2) try a divmod insn in compute_mode
4733              3) try a div insn in compute_mode and multiply-subtract to get
4734                 remainder
4735              4) try the same things with widening allowed.  */
4736           remainder
4737             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4738                                  op0, op1, target,
4739                                  unsignedp,
4740                                  ((optab_handler (optab2, compute_mode)
4741                                    != CODE_FOR_nothing)
4742                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4743           if (remainder == 0)
4744             {
4745               /* No luck there.  Can we do remainder and divide at once
4746                  without a library call?  */
4747               remainder = gen_reg_rtx (compute_mode);
4748               if (! expand_twoval_binop ((unsignedp
4749                                           ? udivmod_optab
4750                                           : sdivmod_optab),
4751                                          op0, op1,
4752                                          NULL_RTX, remainder, unsignedp))
4753                 remainder = 0;
4754             }
4755
4756           if (remainder)
4757             return gen_lowpart (mode, remainder);
4758         }
4759
4760       /* Produce the quotient.  Try a quotient insn, but not a library call.
4761          If we have a divmod in this mode, use it in preference to widening
4762          the div (for this test we assume it will not fail). Note that optab2
4763          is set to the one of the two optabs that the call below will use.  */
4764       quotient
4765         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4766                              op0, op1, rem_flag ? NULL_RTX : target,
4767                              unsignedp,
4768                              ((optab_handler (optab2, compute_mode)
4769                                != CODE_FOR_nothing)
4770                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4771
4772       if (quotient == 0)
4773         {
4774           /* No luck there.  Try a quotient-and-remainder insn,
4775              keeping the quotient alone.  */
4776           quotient = gen_reg_rtx (compute_mode);
4777           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4778                                      op0, op1,
4779                                      quotient, NULL_RTX, unsignedp))
4780             {
4781               quotient = 0;
4782               if (! rem_flag)
4783                 /* Still no luck.  If we are not computing the remainder,
4784                    use a library call for the quotient.  */
4785                 quotient = sign_expand_binop (compute_mode,
4786                                               udiv_optab, sdiv_optab,
4787                                               op0, op1, target,
4788                                               unsignedp, OPTAB_LIB_WIDEN);
4789             }
4790         }
4791     }
4792
4793   if (rem_flag)
4794     {
4795       if (target && GET_MODE (target) != compute_mode)
4796         target = 0;
4797
4798       if (quotient == 0)
4799         {
4800           /* No divide instruction either.  Use library for remainder.  */
4801           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4802                                          op0, op1, target,
4803                                          unsignedp, OPTAB_LIB_WIDEN);
4804           /* No remainder function.  Try a quotient-and-remainder
4805              function, keeping the remainder.  */
4806           if (!remainder)
4807             {
4808               remainder = gen_reg_rtx (compute_mode);
4809               if (!expand_twoval_binop_libfunc
4810                   (unsignedp ? udivmod_optab : sdivmod_optab,
4811                    op0, op1,
4812                    NULL_RTX, remainder,
4813                    unsignedp ? UMOD : MOD))
4814                 remainder = NULL_RTX;
4815             }
4816         }
4817       else
4818         {
4819           /* We divided.  Now finish doing X - Y * (X / Y).  */
4820           remainder = expand_mult (compute_mode, quotient, op1,
4821                                    NULL_RTX, unsignedp);
4822           remainder = expand_binop (compute_mode, sub_optab, op0,
4823                                     remainder, target, unsignedp,
4824                                     OPTAB_LIB_WIDEN);
4825         }
4826     }
4827
4828   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4829 }
4830 \f
4831 /* Return a tree node with data type TYPE, describing the value of X.
4832    Usually this is an VAR_DECL, if there is no obvious better choice.
4833    X may be an expression, however we only support those expressions
4834    generated by loop.c.  */
4835
4836 tree
4837 make_tree (tree type, rtx x)
4838 {
4839   tree t;
4840
4841   switch (GET_CODE (x))
4842     {
4843     case CONST_INT:
4844       {
4845         HOST_WIDE_INT hi = 0;
4846
4847         if (INTVAL (x) < 0
4848             && !(TYPE_UNSIGNED (type)
4849                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4850                      < HOST_BITS_PER_WIDE_INT)))
4851           hi = -1;
4852
4853         t = build_int_cst_wide (type, INTVAL (x), hi);
4854
4855         return t;
4856       }
4857
4858     case CONST_DOUBLE:
4859       if (GET_MODE (x) == VOIDmode)
4860         t = build_int_cst_wide (type,
4861                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4862       else
4863         {
4864           REAL_VALUE_TYPE d;
4865
4866           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4867           t = build_real (type, d);
4868         }
4869
4870       return t;
4871
4872     case CONST_VECTOR:
4873       {
4874         int units = CONST_VECTOR_NUNITS (x);
4875         tree itype = TREE_TYPE (type);
4876         tree t = NULL_TREE;
4877         int i;
4878
4879
4880         /* Build a tree with vector elements.  */
4881         for (i = units - 1; i >= 0; --i)
4882           {
4883             rtx elt = CONST_VECTOR_ELT (x, i);
4884             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4885           }
4886
4887         return build_vector (type, t);
4888       }
4889
4890     case PLUS:
4891       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4892                           make_tree (type, XEXP (x, 1)));
4893
4894     case MINUS:
4895       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4896                           make_tree (type, XEXP (x, 1)));
4897
4898     case NEG:
4899       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4900
4901     case MULT:
4902       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4903                           make_tree (type, XEXP (x, 1)));
4904
4905     case ASHIFT:
4906       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4907                           make_tree (type, XEXP (x, 1)));
4908
4909     case LSHIFTRT:
4910       t = unsigned_type_for (type);
4911       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4912                                          make_tree (t, XEXP (x, 0)),
4913                                          make_tree (type, XEXP (x, 1))));
4914
4915     case ASHIFTRT:
4916       t = signed_type_for (type);
4917       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4918                                          make_tree (t, XEXP (x, 0)),
4919                                          make_tree (type, XEXP (x, 1))));
4920
4921     case DIV:
4922       if (TREE_CODE (type) != REAL_TYPE)
4923         t = signed_type_for (type);
4924       else
4925         t = type;
4926
4927       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4928                                          make_tree (t, XEXP (x, 0)),
4929                                          make_tree (t, XEXP (x, 1))));
4930     case UDIV:
4931       t = unsigned_type_for (type);
4932       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4933                                          make_tree (t, XEXP (x, 0)),
4934                                          make_tree (t, XEXP (x, 1))));
4935
4936     case SIGN_EXTEND:
4937     case ZERO_EXTEND:
4938       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4939                                           GET_CODE (x) == ZERO_EXTEND);
4940       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4941
4942     case CONST:
4943       return make_tree (type, XEXP (x, 0));
4944
4945     case SYMBOL_REF:
4946       t = SYMBOL_REF_DECL (x);
4947       if (t)
4948         return fold_convert (type, build_fold_addr_expr (t));
4949       /* else fall through.  */
4950
4951     default:
4952       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
4953
4954       /* If TYPE is a POINTER_TYPE, we might need to convert X from
4955          address mode to pointer mode.  */
4956       if (POINTER_TYPE_P (type))
4957         x = convert_memory_address_addr_space
4958               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
4959
4960       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4961          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4962       t->decl_with_rtl.rtl = x;
4963
4964       return t;
4965     }
4966 }
4967 \f
4968 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4969    and returning TARGET.
4970
4971    If TARGET is 0, a pseudo-register or constant is returned.  */
4972
4973 rtx
4974 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4975 {
4976   rtx tem = 0;
4977
4978   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4979     tem = simplify_binary_operation (AND, mode, op0, op1);
4980   if (tem == 0)
4981     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4982
4983   if (target == 0)
4984     target = tem;
4985   else if (tem != target)
4986     emit_move_insn (target, tem);
4987   return target;
4988 }
4989
4990 /* Helper function for emit_store_flag.  */
4991 static rtx
4992 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
4993              enum machine_mode mode, enum machine_mode compare_mode,
4994              int unsignedp, rtx x, rtx y, int normalizep,
4995              enum machine_mode target_mode)
4996 {
4997   struct expand_operand ops[4];
4998   rtx op0, last, comparison, subtarget;
4999   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5000
5001   last = get_last_insn ();
5002   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5003   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5004   if (!x || !y)
5005     {
5006       delete_insns_since (last);
5007       return NULL_RTX;
5008     }
5009
5010   if (target_mode == VOIDmode)
5011     target_mode = result_mode;
5012   if (!target)
5013     target = gen_reg_rtx (target_mode);
5014
5015   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5016
5017   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5018   create_fixed_operand (&ops[1], comparison);
5019   create_fixed_operand (&ops[2], x);
5020   create_fixed_operand (&ops[3], y);
5021   if (!maybe_expand_insn (icode, 4, ops))
5022     {
5023       delete_insns_since (last);
5024       return NULL_RTX;
5025     }
5026   subtarget = ops[0].value;
5027
5028   /* If we are converting to a wider mode, first convert to
5029      TARGET_MODE, then normalize.  This produces better combining
5030      opportunities on machines that have a SIGN_EXTRACT when we are
5031      testing a single bit.  This mostly benefits the 68k.
5032
5033      If STORE_FLAG_VALUE does not have the sign bit set when
5034      interpreted in MODE, we can do this conversion as unsigned, which
5035      is usually more efficient.  */
5036   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5037     {
5038       convert_move (target, subtarget,
5039                     val_signbit_known_clear_p (result_mode,
5040                                                STORE_FLAG_VALUE));
5041       op0 = target;
5042       result_mode = target_mode;
5043     }
5044   else
5045     op0 = subtarget;
5046
5047   /* If we want to keep subexpressions around, don't reuse our last
5048      target.  */
5049   if (optimize)
5050     subtarget = 0;
5051
5052   /* Now normalize to the proper value in MODE.  Sometimes we don't
5053      have to do anything.  */
5054   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5055     ;
5056   /* STORE_FLAG_VALUE might be the most negative number, so write
5057      the comparison this way to avoid a compiler-time warning.  */
5058   else if (- normalizep == STORE_FLAG_VALUE)
5059     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5060
5061   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5062      it hard to use a value of just the sign bit due to ANSI integer
5063      constant typing rules.  */
5064   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5065     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5066                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5067                         normalizep == 1);
5068   else
5069     {
5070       gcc_assert (STORE_FLAG_VALUE & 1);
5071
5072       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5073       if (normalizep == -1)
5074         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5075     }
5076
5077   /* If we were converting to a smaller mode, do the conversion now.  */
5078   if (target_mode != result_mode)
5079     {
5080       convert_move (target, op0, 0);
5081       return target;
5082     }
5083   else
5084     return op0;
5085 }
5086
5087
5088 /* A subroutine of emit_store_flag only including "tricks" that do not
5089    need a recursive call.  These are kept separate to avoid infinite
5090    loops.  */
5091
5092 static rtx
5093 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5094                    enum machine_mode mode, int unsignedp, int normalizep,
5095                    enum machine_mode target_mode)
5096 {
5097   rtx subtarget;
5098   enum insn_code icode;
5099   enum machine_mode compare_mode;
5100   enum mode_class mclass;
5101   enum rtx_code scode;
5102   rtx tem;
5103
5104   if (unsignedp)
5105     code = unsigned_condition (code);
5106   scode = swap_condition (code);
5107
5108   /* If one operand is constant, make it the second one.  Only do this
5109      if the other operand is not constant as well.  */
5110
5111   if (swap_commutative_operands_p (op0, op1))
5112     {
5113       tem = op0;
5114       op0 = op1;
5115       op1 = tem;
5116       code = swap_condition (code);
5117     }
5118
5119   if (mode == VOIDmode)
5120     mode = GET_MODE (op0);
5121
5122   /* For some comparisons with 1 and -1, we can convert this to
5123      comparisons with zero.  This will often produce more opportunities for
5124      store-flag insns.  */
5125
5126   switch (code)
5127     {
5128     case LT:
5129       if (op1 == const1_rtx)
5130         op1 = const0_rtx, code = LE;
5131       break;
5132     case LE:
5133       if (op1 == constm1_rtx)
5134         op1 = const0_rtx, code = LT;
5135       break;
5136     case GE:
5137       if (op1 == const1_rtx)
5138         op1 = const0_rtx, code = GT;
5139       break;
5140     case GT:
5141       if (op1 == constm1_rtx)
5142         op1 = const0_rtx, code = GE;
5143       break;
5144     case GEU:
5145       if (op1 == const1_rtx)
5146         op1 = const0_rtx, code = NE;
5147       break;
5148     case LTU:
5149       if (op1 == const1_rtx)
5150         op1 = const0_rtx, code = EQ;
5151       break;
5152     default:
5153       break;
5154     }
5155
5156   /* If we are comparing a double-word integer with zero or -1, we can
5157      convert the comparison into one involving a single word.  */
5158   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5159       && GET_MODE_CLASS (mode) == MODE_INT
5160       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5161     {
5162       if ((code == EQ || code == NE)
5163           && (op1 == const0_rtx || op1 == constm1_rtx))
5164         {
5165           rtx op00, op01;
5166
5167           /* Do a logical OR or AND of the two words and compare the
5168              result.  */
5169           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5170           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5171           tem = expand_binop (word_mode,
5172                               op1 == const0_rtx ? ior_optab : and_optab,
5173                               op00, op01, NULL_RTX, unsignedp,
5174                               OPTAB_DIRECT);
5175
5176           if (tem != 0)
5177             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5178                                    unsignedp, normalizep);
5179         }
5180       else if ((code == LT || code == GE) && op1 == const0_rtx)
5181         {
5182           rtx op0h;
5183
5184           /* If testing the sign bit, can just test on high word.  */
5185           op0h = simplify_gen_subreg (word_mode, op0, mode,
5186                                       subreg_highpart_offset (word_mode,
5187                                                               mode));
5188           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5189                                  unsignedp, normalizep);
5190         }
5191       else
5192         tem = NULL_RTX;
5193
5194       if (tem)
5195         {
5196           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5197             return tem;
5198           if (!target)
5199             target = gen_reg_rtx (target_mode);
5200
5201           convert_move (target, tem,
5202                         !val_signbit_known_set_p (word_mode,
5203                                                   (normalizep ? normalizep
5204                                                    : STORE_FLAG_VALUE)));
5205           return target;
5206         }
5207     }
5208
5209   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5210      complement of A (for GE) and shifting the sign bit to the low bit.  */
5211   if (op1 == const0_rtx && (code == LT || code == GE)
5212       && GET_MODE_CLASS (mode) == MODE_INT
5213       && (normalizep || STORE_FLAG_VALUE == 1
5214           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5215     {
5216       subtarget = target;
5217
5218       if (!target)
5219         target_mode = mode;
5220
5221       /* If the result is to be wider than OP0, it is best to convert it
5222          first.  If it is to be narrower, it is *incorrect* to convert it
5223          first.  */
5224       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5225         {
5226           op0 = convert_modes (target_mode, mode, op0, 0);
5227           mode = target_mode;
5228         }
5229
5230       if (target_mode != mode)
5231         subtarget = 0;
5232
5233       if (code == GE)
5234         op0 = expand_unop (mode, one_cmpl_optab, op0,
5235                            ((STORE_FLAG_VALUE == 1 || normalizep)
5236                             ? 0 : subtarget), 0);
5237
5238       if (STORE_FLAG_VALUE == 1 || normalizep)
5239         /* If we are supposed to produce a 0/1 value, we want to do
5240            a logical shift from the sign bit to the low-order bit; for
5241            a -1/0 value, we do an arithmetic shift.  */
5242         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5243                             GET_MODE_BITSIZE (mode) - 1,
5244                             subtarget, normalizep != -1);
5245
5246       if (mode != target_mode)
5247         op0 = convert_modes (target_mode, mode, op0, 0);
5248
5249       return op0;
5250     }
5251
5252   mclass = GET_MODE_CLASS (mode);
5253   for (compare_mode = mode; compare_mode != VOIDmode;
5254        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5255     {
5256      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5257      icode = optab_handler (cstore_optab, optab_mode);
5258      if (icode != CODE_FOR_nothing)
5259         {
5260           do_pending_stack_adjust ();
5261           tem = emit_cstore (target, icode, code, mode, compare_mode,
5262                              unsignedp, op0, op1, normalizep, target_mode);
5263           if (tem)
5264             return tem;
5265
5266           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5267             {
5268               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5269                                  unsignedp, op1, op0, normalizep, target_mode);
5270               if (tem)
5271                 return tem;
5272             }
5273           break;
5274         }
5275     }
5276
5277   return 0;
5278 }
5279
5280 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5281    and storing in TARGET.  Normally return TARGET.
5282    Return 0 if that cannot be done.
5283
5284    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5285    it is VOIDmode, they cannot both be CONST_INT.
5286
5287    UNSIGNEDP is for the case where we have to widen the operands
5288    to perform the operation.  It says to use zero-extension.
5289
5290    NORMALIZEP is 1 if we should convert the result to be either zero
5291    or one.  Normalize is -1 if we should convert the result to be
5292    either zero or -1.  If NORMALIZEP is zero, the result will be left
5293    "raw" out of the scc insn.  */
5294
5295 rtx
5296 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5297                  enum machine_mode mode, int unsignedp, int normalizep)
5298 {
5299   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5300   enum rtx_code rcode;
5301   rtx subtarget;
5302   rtx tem, last, trueval;
5303
5304   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5305                            target_mode);
5306   if (tem)
5307     return tem;
5308
5309   /* If we reached here, we can't do this with a scc insn, however there
5310      are some comparisons that can be done in other ways.  Don't do any
5311      of these cases if branches are very cheap.  */
5312   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5313     return 0;
5314
5315   /* See what we need to return.  We can only return a 1, -1, or the
5316      sign bit.  */
5317
5318   if (normalizep == 0)
5319     {
5320       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5321         normalizep = STORE_FLAG_VALUE;
5322
5323       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5324         ;
5325       else
5326         return 0;
5327     }
5328
5329   last = get_last_insn ();
5330
5331   /* If optimizing, use different pseudo registers for each insn, instead
5332      of reusing the same pseudo.  This leads to better CSE, but slows
5333      down the compiler, since there are more pseudos */
5334   subtarget = (!optimize
5335                && (target_mode == mode)) ? target : NULL_RTX;
5336   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5337
5338   /* For floating-point comparisons, try the reverse comparison or try
5339      changing the "orderedness" of the comparison.  */
5340   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5341     {
5342       enum rtx_code first_code;
5343       bool and_them;
5344
5345       rcode = reverse_condition_maybe_unordered (code);
5346       if (can_compare_p (rcode, mode, ccp_store_flag)
5347           && (code == ORDERED || code == UNORDERED
5348               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5349               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5350         {
5351           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5352                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5353
5354           /* For the reverse comparison, use either an addition or a XOR.  */
5355           if (want_add
5356               && rtx_cost (GEN_INT (normalizep), PLUS,
5357                            optimize_insn_for_speed_p ()) == 0)
5358             {
5359               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5360                                        STORE_FLAG_VALUE, target_mode);
5361               if (tem)
5362                 return expand_binop (target_mode, add_optab, tem,
5363                                      GEN_INT (normalizep),
5364                                      target, 0, OPTAB_WIDEN);
5365             }
5366           else if (!want_add
5367                    && rtx_cost (trueval, XOR,
5368                                 optimize_insn_for_speed_p ()) == 0)
5369             {
5370               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5371                                        normalizep, target_mode);
5372               if (tem)
5373                 return expand_binop (target_mode, xor_optab, tem, trueval,
5374                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5375             }
5376         }
5377
5378       delete_insns_since (last);
5379
5380       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5381       if (code == ORDERED || code == UNORDERED)
5382         return 0;
5383
5384       and_them = split_comparison (code, mode, &first_code, &code);
5385
5386       /* If there are no NaNs, the first comparison should always fall through.
5387          Effectively change the comparison to the other one.  */
5388       if (!HONOR_NANS (mode))
5389         {
5390           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5391           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5392                                     target_mode);
5393         }
5394
5395 #ifdef HAVE_conditional_move
5396       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5397          conditional move.  */
5398       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5399                                normalizep, target_mode);
5400       if (tem == 0)
5401         return 0;
5402
5403       if (and_them)
5404         tem = emit_conditional_move (target, code, op0, op1, mode,
5405                                      tem, const0_rtx, GET_MODE (tem), 0);
5406       else
5407         tem = emit_conditional_move (target, code, op0, op1, mode,
5408                                      trueval, tem, GET_MODE (tem), 0);
5409
5410       if (tem == 0)
5411         delete_insns_since (last);
5412       return tem;
5413 #else
5414       return 0;
5415 #endif
5416     }
5417
5418   /* The remaining tricks only apply to integer comparisons.  */
5419
5420   if (GET_MODE_CLASS (mode) != MODE_INT)
5421     return 0;
5422
5423   /* If this is an equality comparison of integers, we can try to exclusive-or
5424      (or subtract) the two operands and use a recursive call to try the
5425      comparison with zero.  Don't do any of these cases if branches are
5426      very cheap.  */
5427
5428   if ((code == EQ || code == NE) && op1 != const0_rtx)
5429     {
5430       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5431                           OPTAB_WIDEN);
5432
5433       if (tem == 0)
5434         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5435                             OPTAB_WIDEN);
5436       if (tem != 0)
5437         tem = emit_store_flag (target, code, tem, const0_rtx,
5438                                mode, unsignedp, normalizep);
5439       if (tem != 0)
5440         return tem;
5441
5442       delete_insns_since (last);
5443     }
5444
5445   /* For integer comparisons, try the reverse comparison.  However, for
5446      small X and if we'd have anyway to extend, implementing "X != 0"
5447      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5448   rcode = reverse_condition (code);
5449   if (can_compare_p (rcode, mode, ccp_store_flag)
5450       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5451             && code == NE
5452             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5453             && op1 == const0_rtx))
5454     {
5455       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5456                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5457
5458       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5459       if (want_add
5460           && rtx_cost (GEN_INT (normalizep), PLUS,
5461                        optimize_insn_for_speed_p ()) == 0)
5462         {
5463           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5464                                    STORE_FLAG_VALUE, target_mode);
5465           if (tem != 0)
5466             tem = expand_binop (target_mode, add_optab, tem,
5467                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5468         }
5469       else if (!want_add
5470                && rtx_cost (trueval, XOR,
5471                             optimize_insn_for_speed_p ()) == 0)
5472         {
5473           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5474                                    normalizep, target_mode);
5475           if (tem != 0)
5476             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5477                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5478         }
5479
5480       if (tem != 0)
5481         return tem;
5482       delete_insns_since (last);
5483     }
5484
5485   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5486      the constant zero.  Reject all other comparisons at this point.  Only
5487      do LE and GT if branches are expensive since they are expensive on
5488      2-operand machines.  */
5489
5490   if (op1 != const0_rtx
5491       || (code != EQ && code != NE
5492           && (BRANCH_COST (optimize_insn_for_speed_p (),
5493                            false) <= 1 || (code != LE && code != GT))))
5494     return 0;
5495
5496   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5497      do the necessary operation below.  */
5498
5499   tem = 0;
5500
5501   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5502      the sign bit set.  */
5503
5504   if (code == LE)
5505     {
5506       /* This is destructive, so SUBTARGET can't be OP0.  */
5507       if (rtx_equal_p (subtarget, op0))
5508         subtarget = 0;
5509
5510       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5511                           OPTAB_WIDEN);
5512       if (tem)
5513         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5514                             OPTAB_WIDEN);
5515     }
5516
5517   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5518      number of bits in the mode of OP0, minus one.  */
5519
5520   if (code == GT)
5521     {
5522       if (rtx_equal_p (subtarget, op0))
5523         subtarget = 0;
5524
5525       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5526                           GET_MODE_BITSIZE (mode) - 1,
5527                           subtarget, 0);
5528       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5529                           OPTAB_WIDEN);
5530     }
5531
5532   if (code == EQ || code == NE)
5533     {
5534       /* For EQ or NE, one way to do the comparison is to apply an operation
5535          that converts the operand into a positive number if it is nonzero
5536          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5537          for NE we negate.  This puts the result in the sign bit.  Then we
5538          normalize with a shift, if needed.
5539
5540          Two operations that can do the above actions are ABS and FFS, so try
5541          them.  If that doesn't work, and MODE is smaller than a full word,
5542          we can use zero-extension to the wider mode (an unsigned conversion)
5543          as the operation.  */
5544
5545       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5546          that is compensated by the subsequent overflow when subtracting
5547          one / negating.  */
5548
5549       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5550         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5551       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5552         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5553       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5554         {
5555           tem = convert_modes (word_mode, mode, op0, 1);
5556           mode = word_mode;
5557         }
5558
5559       if (tem != 0)
5560         {
5561           if (code == EQ)
5562             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5563                                 0, OPTAB_WIDEN);
5564           else
5565             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5566         }
5567
5568       /* If we couldn't do it that way, for NE we can "or" the two's complement
5569          of the value with itself.  For EQ, we take the one's complement of
5570          that "or", which is an extra insn, so we only handle EQ if branches
5571          are expensive.  */
5572
5573       if (tem == 0
5574           && (code == NE
5575               || BRANCH_COST (optimize_insn_for_speed_p (),
5576                               false) > 1))
5577         {
5578           if (rtx_equal_p (subtarget, op0))
5579             subtarget = 0;
5580
5581           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5582           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5583                               OPTAB_WIDEN);
5584
5585           if (tem && code == EQ)
5586             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5587         }
5588     }
5589
5590   if (tem && normalizep)
5591     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5592                         GET_MODE_BITSIZE (mode) - 1,
5593                         subtarget, normalizep == 1);
5594
5595   if (tem)
5596     {
5597       if (!target)
5598         ;
5599       else if (GET_MODE (tem) != target_mode)
5600         {
5601           convert_move (target, tem, 0);
5602           tem = target;
5603         }
5604       else if (!subtarget)
5605         {
5606           emit_move_insn (target, tem);
5607           tem = target;
5608         }
5609     }
5610   else
5611     delete_insns_since (last);
5612
5613   return tem;
5614 }
5615
5616 /* Like emit_store_flag, but always succeeds.  */
5617
5618 rtx
5619 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5620                        enum machine_mode mode, int unsignedp, int normalizep)
5621 {
5622   rtx tem, label;
5623   rtx trueval, falseval;
5624
5625   /* First see if emit_store_flag can do the job.  */
5626   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5627   if (tem != 0)
5628     return tem;
5629
5630   if (!target)
5631     target = gen_reg_rtx (word_mode);
5632
5633   /* If this failed, we have to do this with set/compare/jump/set code.
5634      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5635   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5636   if (code == NE
5637       && GET_MODE_CLASS (mode) == MODE_INT
5638       && REG_P (target)
5639       && op0 == target
5640       && op1 == const0_rtx)
5641     {
5642       label = gen_label_rtx ();
5643       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5644                                mode, NULL_RTX, NULL_RTX, label, -1);
5645       emit_move_insn (target, trueval);
5646       emit_label (label);
5647       return target;
5648     }
5649
5650   if (!REG_P (target)
5651       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5652     target = gen_reg_rtx (GET_MODE (target));
5653
5654   /* Jump in the right direction if the target cannot implement CODE
5655      but can jump on its reverse condition.  */
5656   falseval = const0_rtx;
5657   if (! can_compare_p (code, mode, ccp_jump)
5658       && (! FLOAT_MODE_P (mode)
5659           || code == ORDERED || code == UNORDERED
5660           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5661           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5662     {
5663       enum rtx_code rcode;
5664       if (FLOAT_MODE_P (mode))
5665         rcode = reverse_condition_maybe_unordered (code);
5666       else
5667         rcode = reverse_condition (code);
5668
5669       /* Canonicalize to UNORDERED for the libcall.  */
5670       if (can_compare_p (rcode, mode, ccp_jump)
5671           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5672         {
5673           falseval = trueval;
5674           trueval = const0_rtx;
5675           code = rcode;
5676         }
5677     }
5678
5679   emit_move_insn (target, trueval);
5680   label = gen_label_rtx ();
5681   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5682                            NULL_RTX, label, -1);
5683
5684   emit_move_insn (target, falseval);
5685   emit_label (label);
5686
5687   return target;
5688 }
5689 \f
5690 /* Perform possibly multi-word comparison and conditional jump to LABEL
5691    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5692    now a thin wrapper around do_compare_rtx_and_jump.  */
5693
5694 static void
5695 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5696                  rtx label)
5697 {
5698   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5699   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5700                            NULL_RTX, NULL_RTX, label, -1);
5701 }