gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 struct init_expmed_rtl
  97 {
  98   struct rtx_def reg;           rtunion reg_fld[2];
  99   struct rtx_def plus;  rtunion plus_fld1;
 100   struct rtx_def neg;
 101   struct rtx_def mult;  rtunion mult_fld1;
 102   struct rtx_def sdiv;  rtunion sdiv_fld1;
 103   struct rtx_def udiv;  rtunion udiv_fld1;
 104   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
 105   struct rtx_def smod_32;       rtunion smod_32_fld1;
 106   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
 107   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
 108   struct rtx_def wide_trunc;
 109   struct rtx_def shift; rtunion shift_fld1;
 110   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
 111   struct rtx_def shift_add;     rtunion shift_add_fld1;
 112   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
 113   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
 114   struct rtx_def zext;
 115   struct rtx_def trunc;
 116
 117   rtx pow2[MAX_BITS_PER_WORD];
 118   rtx cint[MAX_BITS_PER_WORD];
 119 };
 120
 121 static void
 122 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 123                       enum machine_mode from_mode, bool speed)
 124 {
 125   int to_size, from_size;
 126   rtx which;
 127
 128   /* We're given no information about the true size of a partial integer,
 129      only the size of the "full" integer it requires for storage.  For
 130      comparison purposes here, reduce the bit size by one in that case.  */
 131   to_size = (GET_MODE_BITSIZE (to_mode)
 132              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 133   from_size = (GET_MODE_BITSIZE (from_mode)
 134                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? &all->trunc : &all->zext);
 138
 139   PUT_MODE (&all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 141 }
 142
 143 static void
 144 init_expmed_one_mode (struct init_expmed_rtl *all,
 145                       enum machine_mode mode, int speed)
 146 {
 147   int m, n, mode_bitsize;
 148   enum machine_mode mode_from;
 149
 150   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 151
 152   PUT_MODE (&all->reg, mode);
 153   PUT_MODE (&all->plus, mode);
 154   PUT_MODE (&all->neg, mode);
 155   PUT_MODE (&all->mult, mode);
 156   PUT_MODE (&all->sdiv, mode);
 157   PUT_MODE (&all->udiv, mode);
 158   PUT_MODE (&all->sdiv_32, mode);
 159   PUT_MODE (&all->smod_32, mode);
 160   PUT_MODE (&all->wide_trunc, mode);
 161   PUT_MODE (&all->shift, mode);
 162   PUT_MODE (&all->shift_mult, mode);
 163   PUT_MODE (&all->shift_add, mode);
 164   PUT_MODE (&all->shift_sub0, mode);
 165   PUT_MODE (&all->shift_sub1, mode);
 166   PUT_MODE (&all->zext, mode);
 167   PUT_MODE (&all->trunc, mode);
 168
 169   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 170   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 171   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 172   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 173   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 174
 175   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 176                                      <= 2 * add_cost (speed, mode)));
 177   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 178                                      <= 4 * add_cost (speed, mode)));
 179
 180   set_shift_cost (speed, mode, 0, 0);
 181   {
 182     int cost = add_cost (speed, mode);
 183     set_shiftadd_cost (speed, mode, 0, cost);
 184     set_shiftsub0_cost (speed, mode, 0, cost);
 185     set_shiftsub1_cost (speed, mode, 0, cost);
 186   }
 187
 188   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 189   for (m = 1; m < n; m++)
 190     {
 191       XEXP (&all->shift, 1) = all->cint[m];
 192       XEXP (&all->shift_mult, 1) = all->pow2[m];
 193
 194       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 195       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 196       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 197       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 198     }
 199
 200   if (SCALAR_INT_MODE_P (mode))
 201     {
 202       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 203            mode_from = (enum machine_mode)(mode_from + 1))
 204         init_expmed_one_conv (all, mode, mode_from, speed);
 205     }
 206   if (GET_MODE_CLASS (mode) == MODE_INT)
 207     {
 208       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 209       if (wider_mode != VOIDmode)
 210         {
 211           PUT_MODE (&all->zext, wider_mode);
 212           PUT_MODE (&all->wide_mult, wider_mode);
 213           PUT_MODE (&all->wide_lshr, wider_mode);
 214           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 215
 216           set_mul_widen_cost (speed, wider_mode,
 217                               set_src_cost (&all->wide_mult, speed));
 218           set_mul_highpart_cost (speed, mode,
 219                                  set_src_cost (&all->wide_trunc, speed));
 220         }
 221     }
 222 }
 223
 224 void
 225 init_expmed (void)
 226 {
 227   struct init_expmed_rtl all;
 228   enum machine_mode mode;
 229   int m, speed;
 230
 231   memset (&all, 0, sizeof all);
 232   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 233     {
 234       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 235       all.cint[m] = GEN_INT (m);
 236     }
 237
 238   PUT_CODE (&all.reg, REG);
 239   /* Avoid using hard regs in ways which may be unsupported.  */
 240   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 241
 242   PUT_CODE (&all.plus, PLUS);
 243   XEXP (&all.plus, 0) = &all.reg;
 244   XEXP (&all.plus, 1) = &all.reg;
 245
 246   PUT_CODE (&all.neg, NEG);
 247   XEXP (&all.neg, 0) = &all.reg;
 248
 249   PUT_CODE (&all.mult, MULT);
 250   XEXP (&all.mult, 0) = &all.reg;
 251   XEXP (&all.mult, 1) = &all.reg;
 252
 253   PUT_CODE (&all.sdiv, DIV);
 254   XEXP (&all.sdiv, 0) = &all.reg;
 255   XEXP (&all.sdiv, 1) = &all.reg;
 256
 257   PUT_CODE (&all.udiv, UDIV);
 258   XEXP (&all.udiv, 0) = &all.reg;
 259   XEXP (&all.udiv, 1) = &all.reg;
 260
 261   PUT_CODE (&all.sdiv_32, DIV);
 262   XEXP (&all.sdiv_32, 0) = &all.reg;
 263   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 264
 265   PUT_CODE (&all.smod_32, MOD);
 266   XEXP (&all.smod_32, 0) = &all.reg;
 267   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 268
 269   PUT_CODE (&all.zext, ZERO_EXTEND);
 270   XEXP (&all.zext, 0) = &all.reg;
 271
 272   PUT_CODE (&all.wide_mult, MULT);
 273   XEXP (&all.wide_mult, 0) = &all.zext;
 274   XEXP (&all.wide_mult, 1) = &all.zext;
 275
 276   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 277   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 278
 279   PUT_CODE (&all.wide_trunc, TRUNCATE);
 280   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 281
 282   PUT_CODE (&all.shift, ASHIFT);
 283   XEXP (&all.shift, 0) = &all.reg;
 284
 285   PUT_CODE (&all.shift_mult, MULT);
 286   XEXP (&all.shift_mult, 0) = &all.reg;
 287
 288   PUT_CODE (&all.shift_add, PLUS);
 289   XEXP (&all.shift_add, 0) = &all.shift_mult;
 290   XEXP (&all.shift_add, 1) = &all.reg;
 291
 292   PUT_CODE (&all.shift_sub0, MINUS);
 293   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 294   XEXP (&all.shift_sub0, 1) = &all.reg;
 295
 296   PUT_CODE (&all.shift_sub1, MINUS);
 297   XEXP (&all.shift_sub1, 0) = &all.reg;
 298   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 299
 300   PUT_CODE (&all.trunc, TRUNCATE);
 301   XEXP (&all.trunc, 0) = &all.reg;
 302
 303   for (speed = 0; speed < 2; speed++)
 304     {
 305       crtl->maybe_hot_insn_p = speed;
 306       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 307
 308       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 309            mode = (enum machine_mode)(mode + 1))
 310         init_expmed_one_mode (&all, mode, speed);
 311
 312       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 313         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 314              mode = (enum machine_mode)(mode + 1))
 315           init_expmed_one_mode (&all, mode, speed);
 316
 317       if (MIN_MODE_VECTOR_INT != VOIDmode)
 318         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 319              mode = (enum machine_mode)(mode + 1))
 320           init_expmed_one_mode (&all, mode, speed);
 321     }
 322
 323   if (alg_hash_used_p ())
 324     {
 325       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 326       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 327     }
 328   else
 329     set_alg_hash_used_p (true);
 330   default_rtl_profile ();
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (enum machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Report on the availability of insv/extv/extzv and the desired mode
 349    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 350    is false; else the mode of the specified operand.  If OPNO is -1,
 351    all the caller cares about is whether the insn is available.  */
 352 enum machine_mode
 353 mode_for_extraction (enum extraction_pattern pattern, int opno)
 354 {
 355   const struct insn_data_d *data;
 356
 357   switch (pattern)
 358     {
 359     case EP_insv:
 360       if (HAVE_insv)
 361         {
 362           data = &insn_data[CODE_FOR_insv];
 363           break;
 364         }
 365       return MAX_MACHINE_MODE;
 366
 367     case EP_extv:
 368       if (HAVE_extv)
 369         {
 370           data = &insn_data[CODE_FOR_extv];
 371           break;
 372         }
 373       return MAX_MACHINE_MODE;
 374
 375     case EP_extzv:
 376       if (HAVE_extzv)
 377         {
 378           data = &insn_data[CODE_FOR_extzv];
 379           break;
 380         }
 381       return MAX_MACHINE_MODE;
 382
 383     default:
 384       gcc_unreachable ();
 385     }
 386
 387   if (opno == -1)
 388     return VOIDmode;
 389
 390   /* Everyone who uses this function used to follow it with
 391      if (result == VOIDmode) result = word_mode; */
 392   if (data->operand[opno].mode == VOIDmode)
 393     return word_mode;
 394   return data->operand[opno].mode;
 395 }
 396 \f
 397 /* A subroutine of store_bit_field, with the same arguments.  Return true
 398    if the operation could be implemented.
 399
 400    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 401    no other way of implementing the operation.  If FALLBACK_P is false,
 402    return false instead.  */
 403
 404 static bool
 405 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 406                    unsigned HOST_WIDE_INT bitnum,
 407                    unsigned HOST_WIDE_INT bitregion_start,
 408                    unsigned HOST_WIDE_INT bitregion_end,
 409                    enum machine_mode fieldmode,
 410                    rtx value, bool fallback_p)
 411 {
 412   unsigned int unit
 413     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 414   unsigned HOST_WIDE_INT offset, bitpos;
 415   rtx op0 = str_rtx;
 416   int byte_offset;
 417   rtx orig_value;
 418
 419   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 420
 421   while (GET_CODE (op0) == SUBREG)
 422     {
 423       /* The following line once was done only if WORDS_BIG_ENDIAN,
 424          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 425          meaningful at a much higher level; when structures are copied
 426          between memory and regs, the higher-numbered regs
 427          always get higher addresses.  */
 428       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 429       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 430
 431       byte_offset = 0;
 432
 433       /* Paradoxical subregs need special handling on big endian machines.  */
 434       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 435         {
 436           int difference = inner_mode_size - outer_mode_size;
 437
 438           if (WORDS_BIG_ENDIAN)
 439             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 440           if (BYTES_BIG_ENDIAN)
 441             byte_offset += difference % UNITS_PER_WORD;
 442         }
 443       else
 444         byte_offset = SUBREG_BYTE (op0);
 445
 446       bitnum += byte_offset * BITS_PER_UNIT;
 447       op0 = SUBREG_REG (op0);
 448     }
 449
 450   /* No action is needed if the target is a register and if the field
 451      lies completely outside that register.  This can occur if the source
 452      code contains an out-of-bounds access to a small array.  */
 453   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 454     return true;
 455
 456   /* Use vec_set patterns for inserting parts of vectors whenever
 457      available.  */
 458   if (VECTOR_MODE_P (GET_MODE (op0))
 459       && !MEM_P (op0)
 460       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 461       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 462       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 463       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 464     {
 465       struct expand_operand ops[3];
 466       enum machine_mode outermode = GET_MODE (op0);
 467       enum machine_mode innermode = GET_MODE_INNER (outermode);
 468       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 469       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 470
 471       create_fixed_operand (&ops[0], op0);
 472       create_input_operand (&ops[1], value, innermode);
 473       create_integer_operand (&ops[2], pos);
 474       if (maybe_expand_insn (icode, 3, ops))
 475         return true;
 476     }
 477
 478   /* If the target is a register, overwriting the entire object, or storing
 479      a full-word or multi-word field can be done with just a SUBREG.
 480
 481      If the target is memory, storing any naturally aligned field can be
 482      done with a simple store.  For targets that support fast unaligned
 483      memory, any naturally sized, unit aligned field can be done directly.  */
 484
 485   offset = bitnum / unit;
 486   bitpos = bitnum % unit;
 487   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 488                 + (offset * UNITS_PER_WORD);
 489
 490   if (bitpos == 0
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && (!MEM_P (op0)
 493           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 494               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 495              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 496                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 497                                      byte_offset)))
 498           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 499              || (offset * BITS_PER_UNIT % bitsize == 0
 500                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 501     {
 502       if (MEM_P (op0))
 503         op0 = adjust_address (op0, fieldmode, offset);
 504       else if (GET_MODE (op0) != fieldmode)
 505         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 506                                    byte_offset);
 507       emit_move_insn (op0, value);
 508       return true;
 509     }
 510
 511   /* Make sure we are playing with integral modes.  Pun with subregs
 512      if we aren't.  This must come after the entire register case above,
 513      since that case is valid for any mode.  The following cases are only
 514      valid for integral modes.  */
 515   {
 516     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 517     if (imode != GET_MODE (op0))
 518       {
 519         if (MEM_P (op0))
 520           op0 = adjust_address (op0, imode, 0);
 521         else
 522           {
 523             gcc_assert (imode != BLKmode);
 524             op0 = gen_lowpart (imode, op0);
 525           }
 526       }
 527   }
 528
 529   /* We may be accessing data outside the field, which means
 530      we can alias adjacent data.  */
 531   /* ?? not always for C++0x memory model ?? */
 532   if (MEM_P (op0))
 533     {
 534       op0 = shallow_copy_rtx (op0);
 535       set_mem_alias_set (op0, 0);
 536       set_mem_expr (op0, 0);
 537     }
 538
 539   /* If OP0 is a register, BITPOS must count within a word.
 540      But as we have it, it counts within whatever size OP0 now has.
 541      On a bigendian machine, these are not the same, so convert.  */
 542   if (BYTES_BIG_ENDIAN
 543       && !MEM_P (op0)
 544       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 545     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 546
 547   /* Storing an lsb-aligned field in a register
 548      can be done with a movestrict instruction.  */
 549
 550   if (!MEM_P (op0)
 551       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 552       && bitsize == GET_MODE_BITSIZE (fieldmode)
 553       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 554     {
 555       struct expand_operand ops[2];
 556       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 557       rtx arg0 = op0;
 558       unsigned HOST_WIDE_INT subreg_off;
 559
 560       if (GET_CODE (arg0) == SUBREG)
 561         {
 562           /* Else we've got some float mode source being extracted into
 563              a different float mode destination -- this combination of
 564              subregs results in Severe Tire Damage.  */
 565           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 566                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 567                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 568           arg0 = SUBREG_REG (arg0);
 569         }
 570
 571       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 572                    + (offset * UNITS_PER_WORD);
 573       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 574         {
 575           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 576
 577           create_fixed_operand (&ops[0], arg0);
 578           /* Shrink the source operand to FIELDMODE.  */
 579           create_convert_operand_to (&ops[1], value, fieldmode, false);
 580           if (maybe_expand_insn (icode, 2, ops))
 581             return true;
 582         }
 583     }
 584
 585   /* Handle fields bigger than a word.  */
 586
 587   if (bitsize > BITS_PER_WORD)
 588     {
 589       /* Here we transfer the words of the field
 590          in the order least significant first.
 591          This is because the most significant word is the one which may
 592          be less than full.
 593          However, only do that if the value is not BLKmode.  */
 594
 595       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 596       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 597       unsigned int i;
 598       rtx last;
 599
 600       /* This is the mode we must force value to, so that there will be enough
 601          subwords to extract.  Note that fieldmode will often (always?) be
 602          VOIDmode, because that is what store_field uses to indicate that this
 603          is a bit field, but passing VOIDmode to operand_subword_force
 604          is not allowed.  */
 605       fieldmode = GET_MODE (value);
 606       if (fieldmode == VOIDmode)
 607         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 608
 609       last = get_last_insn ();
 610       for (i = 0; i < nwords; i++)
 611         {
 612           /* If I is 0, use the low-order word in both field and target;
 613              if I is 1, use the next to lowest word; and so on.  */
 614           unsigned int wordnum = (backwards
 615                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 616                                   - i - 1
 617                                   : i);
 618           unsigned int bit_offset = (backwards
 619                                      ? MAX ((int) bitsize - ((int) i + 1)
 620                                             * BITS_PER_WORD,
 621                                             0)
 622                                      : (int) i * BITS_PER_WORD);
 623           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 624           unsigned HOST_WIDE_INT new_bitsize =
 625             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 626
 627           /* If the remaining chunk doesn't have full wordsize we have
 628              to make sure that for big endian machines the higher order
 629              bits are used.  */
 630           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 631             value_word = simplify_expand_binop (word_mode, lshr_optab,
 632                                                 value_word,
 633                                                 GEN_INT (BITS_PER_WORD
 634                                                          - new_bitsize),
 635                                                 NULL_RTX, true,
 636                                                 OPTAB_LIB_WIDEN);
 637
 638           if (!store_bit_field_1 (op0, new_bitsize,
 639                                   bitnum + bit_offset,
 640                                   bitregion_start, bitregion_end,
 641                                   word_mode,
 642                                   value_word, fallback_p))
 643             {
 644               delete_insns_since (last);
 645               return false;
 646             }
 647         }
 648       return true;
 649     }
 650
 651   /* From here on we can assume that the field to be stored in is
 652      a full-word (whatever type that is), since it is shorter than a word.  */
 653
 654   /* OFFSET is the number of words or bytes (UNIT says which)
 655      from STR_RTX to the first word or byte containing part of the field.  */
 656
 657   if (!MEM_P (op0))
 658     {
 659       if (offset != 0
 660           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 661         {
 662           if (!REG_P (op0))
 663             {
 664               /* Since this is a destination (lvalue), we can't copy
 665                  it to a pseudo.  We can remove a SUBREG that does not
 666                  change the size of the operand.  Such a SUBREG may
 667                  have been added above.  */
 668               gcc_assert (GET_CODE (op0) == SUBREG
 669                           && (GET_MODE_SIZE (GET_MODE (op0))
 670                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 671               op0 = SUBREG_REG (op0);
 672             }
 673           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 674                                 op0, (offset * UNITS_PER_WORD));
 675         }
 676       offset = 0;
 677     }
 678
 679   /* If VALUE has a floating-point or complex mode, access it as an
 680      integer of the corresponding size.  This can occur on a machine
 681      with 64 bit registers that uses SFmode for float.  It can also
 682      occur for unaligned float or complex fields.  */
 683   orig_value = value;
 684   if (GET_MODE (value) != VOIDmode
 685       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 686       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 687     {
 688       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 689       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 690     }
 691
 692   /* Now OFFSET is nonzero only if OP0 is memory
 693      and is therefore always measured in bytes.  */
 694
 695   if (HAVE_insv
 696       && GET_MODE (value) != BLKmode
 697       && bitsize > 0
 698       && GET_MODE_BITSIZE (op_mode) >= bitsize
 699       /* Do not use insv for volatile bitfields when
 700          -fstrict-volatile-bitfields is in effect.  */
 701       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 702            && flag_strict_volatile_bitfields > 0)
 703       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 704             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 705       /* Do not use insv if the bit region is restricted and
 706          op_mode integer at offset doesn't fit into the
 707          restricted region.  */
 708       && !(MEM_P (op0) && bitregion_end
 709            && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
 710               > bitregion_end + 1))
 711     {
 712       struct expand_operand ops[4];
 713       int xbitpos = bitpos;
 714       rtx value1;
 715       rtx xop0 = op0;
 716       rtx last = get_last_insn ();
 717       bool copy_back = false;
 718
 719       /* Add OFFSET into OP0's address.  */
 720       if (MEM_P (xop0))
 721         xop0 = adjust_address (xop0, byte_mode, offset);
 722
 723       /* If xop0 is a register, we need it in OP_MODE
 724          to make it acceptable to the format of insv.  */
 725       if (GET_CODE (xop0) == SUBREG)
 726         /* We can't just change the mode, because this might clobber op0,
 727            and we will need the original value of op0 if insv fails.  */
 728         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 729       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 730         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 731
 732       /* If the destination is a paradoxical subreg such that we need a
 733          truncate to the inner mode, perform the insertion on a temporary and
 734          truncate the result to the original destination.  Note that we can't
 735          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 736          X) 0)) is (reg:N X).  */
 737       if (GET_CODE (xop0) == SUBREG
 738           && REG_P (SUBREG_REG (xop0))
 739           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 740                                               op_mode)))
 741         {
 742           rtx tem = gen_reg_rtx (op_mode);
 743           emit_move_insn (tem, xop0);
 744           xop0 = tem;
 745           copy_back = true;
 746         }
 747
 748       /* We have been counting XBITPOS within UNIT.
 749          Count instead within the size of the register.  */
 750       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 751         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 752
 753       unit = GET_MODE_BITSIZE (op_mode);
 754
 755       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 756          "backwards" from the size of the unit we are inserting into.
 757          Otherwise, we count bits from the most significant on a
 758          BYTES/BITS_BIG_ENDIAN machine.  */
 759
 760       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 761         xbitpos = unit - bitsize - xbitpos;
 762
 763       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 764       value1 = value;
 765       if (GET_MODE (value) != op_mode)
 766         {
 767           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 768             {
 769               /* Optimization: Don't bother really extending VALUE
 770                  if it has all the bits we will actually use.  However,
 771                  if we must narrow it, be sure we do it correctly.  */
 772
 773               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 774                 {
 775                   rtx tmp;
 776
 777                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 778                   if (! tmp)
 779                     tmp = simplify_gen_subreg (op_mode,
 780                                                force_reg (GET_MODE (value),
 781                                                           value1),
 782                                                GET_MODE (value), 0);
 783                   value1 = tmp;
 784                 }
 785               else
 786                 value1 = gen_lowpart (op_mode, value1);
 787             }
 788           else if (CONST_INT_P (value))
 789             value1 = gen_int_mode (INTVAL (value), op_mode);
 790           else
 791             /* Parse phase is supposed to make VALUE's data type
 792                match that of the component reference, which is a type
 793                at least as wide as the field; so VALUE should have
 794                a mode that corresponds to that type.  */
 795             gcc_assert (CONSTANT_P (value));
 796         }
 797
 798       create_fixed_operand (&ops[0], xop0);
 799       create_integer_operand (&ops[1], bitsize);
 800       create_integer_operand (&ops[2], xbitpos);
 801       create_input_operand (&ops[3], value1, op_mode);
 802       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 803         {
 804           if (copy_back)
 805             convert_move (op0, xop0, true);
 806           return true;
 807         }
 808       delete_insns_since (last);
 809     }
 810
 811   /* If OP0 is a memory, try copying it to a register and seeing if a
 812      cheap register alternative is available.  */
 813   if (HAVE_insv && MEM_P (op0))
 814     {
 815       enum machine_mode bestmode;
 816       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 817
 818       if (bitregion_end)
 819         maxbits = bitregion_end - bitregion_start + 1;
 820
 821       /* Get the mode to use for inserting into this field.  If OP0 is
 822          BLKmode, get the smallest mode consistent with the alignment. If
 823          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 824          mode. Otherwise, use the smallest mode containing the field.  */
 825
 826       if (GET_MODE (op0) == BLKmode
 827           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 828           || (op_mode != MAX_MACHINE_MODE
 829               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 830         bestmode = get_best_mode (bitsize, bitnum,
 831                                   bitregion_start, bitregion_end,
 832                                   MEM_ALIGN (op0),
 833                                   (op_mode == MAX_MACHINE_MODE
 834                                    ? VOIDmode : op_mode),
 835                                   MEM_VOLATILE_P (op0));
 836       else
 837         bestmode = GET_MODE (op0);
 838
 839       if (bestmode != VOIDmode
 840           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 841           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 842                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 843         {
 844           rtx last, tempreg, xop0;
 845           unsigned HOST_WIDE_INT xoffset, xbitpos;
 846
 847           last = get_last_insn ();
 848
 849           /* Adjust address to point to the containing unit of
 850              that mode.  Compute the offset as a multiple of this unit,
 851              counting in bytes.  */
 852           unit = GET_MODE_BITSIZE (bestmode);
 853           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 854           xbitpos = bitnum % unit;
 855           xop0 = adjust_address (op0, bestmode, xoffset);
 856
 857           /* Fetch that unit, store the bitfield in it, then store
 858              the unit.  */
 859           tempreg = copy_to_reg (xop0);
 860           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 861                                  bitregion_start, bitregion_end,
 862                                  fieldmode, orig_value, false))
 863             {
 864               emit_move_insn (xop0, tempreg);
 865               return true;
 866             }
 867           delete_insns_since (last);
 868         }
 869     }
 870
 871   if (!fallback_p)
 872     return false;
 873
 874   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 875                          bitregion_start, bitregion_end, value);
 876   return true;
 877 }
 878
 879 /* Generate code to store value from rtx VALUE
 880    into a bit-field within structure STR_RTX
 881    containing BITSIZE bits starting at bit BITNUM.
 882
 883    BITREGION_START is bitpos of the first bitfield in this region.
 884    BITREGION_END is the bitpos of the ending bitfield in this region.
 885    These two fields are 0, if the C++ memory model does not apply,
 886    or we are not interested in keeping track of bitfield regions.
 887
 888    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 889
 890 void
 891 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 892                  unsigned HOST_WIDE_INT bitnum,
 893                  unsigned HOST_WIDE_INT bitregion_start,
 894                  unsigned HOST_WIDE_INT bitregion_end,
 895                  enum machine_mode fieldmode,
 896                  rtx value)
 897 {
 898   /* Under the C++0x memory model, we must not touch bits outside the
 899      bit region.  Adjust the address to start at the beginning of the
 900      bit region.  */
 901   if (MEM_P (str_rtx) && bitregion_start > 0)
 902     {
 903       enum machine_mode bestmode;
 904       enum machine_mode op_mode;
 905       unsigned HOST_WIDE_INT offset;
 906
 907       op_mode = mode_for_extraction (EP_insv, 3);
 908       if (op_mode == MAX_MACHINE_MODE)
 909         op_mode = VOIDmode;
 910
 911       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 912
 913       offset = bitregion_start / BITS_PER_UNIT;
 914       bitnum -= bitregion_start;
 915       bitregion_end -= bitregion_start;
 916       bitregion_start = 0;
 917       bestmode = get_best_mode (bitsize, bitnum,
 918                                 bitregion_start, bitregion_end,
 919                                 MEM_ALIGN (str_rtx),
 920                                 op_mode,
 921                                 MEM_VOLATILE_P (str_rtx));
 922       str_rtx = adjust_address (str_rtx, bestmode, offset);
 923     }
 924
 925   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 926                           bitregion_start, bitregion_end,
 927                           fieldmode, value, true))
 928     gcc_unreachable ();
 929 }
 930 \f
 931 /* Use shifts and boolean operations to store VALUE
 932    into a bit field of width BITSIZE
 933    in a memory location specified by OP0 except offset by OFFSET bytes.
 934      (OFFSET must be 0 if OP0 is a register.)
 935    The field starts at position BITPOS within the byte.
 936     (If OP0 is a register, it may be a full word or a narrower mode,
 937      but BITPOS still counts within a full word,
 938      which is significant on bigendian machines.)  */
 939
 940 static void
 941 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 942                        unsigned HOST_WIDE_INT bitsize,
 943                        unsigned HOST_WIDE_INT bitpos,
 944                        unsigned HOST_WIDE_INT bitregion_start,
 945                        unsigned HOST_WIDE_INT bitregion_end,
 946                        rtx value)
 947 {
 948   enum machine_mode mode;
 949   unsigned int total_bits = BITS_PER_WORD;
 950   rtx temp;
 951   int all_zero = 0;
 952   int all_one = 0;
 953
 954   /* There is a case not handled here:
 955      a structure with a known alignment of just a halfword
 956      and a field split across two aligned halfwords within the structure.
 957      Or likewise a structure with a known alignment of just a byte
 958      and a field split across two bytes.
 959      Such cases are not supposed to be able to occur.  */
 960
 961   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 962     {
 963       gcc_assert (!offset);
 964       /* Special treatment for a bit field split across two registers.  */
 965       if (bitsize + bitpos > BITS_PER_WORD)
 966         {
 967           store_split_bit_field (op0, bitsize, bitpos,
 968                                  bitregion_start, bitregion_end,
 969                                  value);
 970           return;
 971         }
 972     }
 973   else
 974     {
 975       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 976
 977       if (bitregion_end)
 978         maxbits = bitregion_end - bitregion_start + 1;
 979
 980       /* Get the proper mode to use for this field.  We want a mode that
 981          includes the entire field.  If such a mode would be larger than
 982          a word, we won't be doing the extraction the normal way.
 983          We don't want a mode bigger than the destination.  */
 984
 985       mode = GET_MODE (op0);
 986       if (GET_MODE_BITSIZE (mode) == 0
 987           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 988         mode = word_mode;
 989
 990       if (MEM_VOLATILE_P (op0)
 991           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 992           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 993           && flag_strict_volatile_bitfields > 0)
 994         mode = GET_MODE (op0);
 995       else
 996         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 997                               bitregion_start, bitregion_end,
 998                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 999
1000       if (mode == VOIDmode)
1001         {
1002           /* The only way this should occur is if the field spans word
1003              boundaries.  */
1004           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
1005                                  bitregion_start, bitregion_end, value);
1006           return;
1007         }
1008
1009       total_bits = GET_MODE_BITSIZE (mode);
1010
1011       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1012          be in the range 0 to total_bits-1, and put any excess bytes in
1013          OFFSET.  */
1014       if (bitpos >= total_bits)
1015         {
1016           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1017           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1018                      * BITS_PER_UNIT);
1019         }
1020
1021       /* Get ref to an aligned byte, halfword, or word containing the field.
1022          Adjust BITPOS to be position within a word,
1023          and OFFSET to be the offset of that word.
1024          Then alter OP0 to refer to that word.  */
1025       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1026       offset -= (offset % (total_bits / BITS_PER_UNIT));
1027       op0 = adjust_address (op0, mode, offset);
1028     }
1029
1030   mode = GET_MODE (op0);
1031
1032   /* Now MODE is either some integral mode for a MEM as OP0,
1033      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
1034      The bit field is contained entirely within OP0.
1035      BITPOS is the starting bit number within OP0.
1036      (OP0's mode may actually be narrower than MODE.)  */
1037
1038   if (BYTES_BIG_ENDIAN)
1039       /* BITPOS is the distance between our msb
1040          and that of the containing datum.
1041          Convert it to the distance from the lsb.  */
1042       bitpos = total_bits - bitsize - bitpos;
1043
1044   /* Now BITPOS is always the distance between our lsb
1045      and that of OP0.  */
1046
1047   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
1048      we must first convert its mode to MODE.  */
1049
1050   if (CONST_INT_P (value))
1051     {
1052       HOST_WIDE_INT v = INTVAL (value);
1053
1054       if (bitsize < HOST_BITS_PER_WIDE_INT)
1055         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1056
1057       if (v == 0)
1058         all_zero = 1;
1059       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1060                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1061                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1062         all_one = 1;
1063
1064       value = lshift_value (mode, value, bitpos, bitsize);
1065     }
1066   else
1067     {
1068       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1069                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1070
1071       if (GET_MODE (value) != mode)
1072         value = convert_to_mode (mode, value, 1);
1073
1074       if (must_and)
1075         value = expand_binop (mode, and_optab, value,
1076                               mask_rtx (mode, 0, bitsize, 0),
1077                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1078       if (bitpos > 0)
1079         value = expand_shift (LSHIFT_EXPR, mode, value,
1080                               bitpos, NULL_RTX, 1);
1081     }
1082
1083   /* Now clear the chosen bits in OP0,
1084      except that if VALUE is -1 we need not bother.  */
1085   /* We keep the intermediates in registers to allow CSE to combine
1086      consecutive bitfield assignments.  */
1087
1088   temp = force_reg (mode, op0);
1089
1090   if (! all_one)
1091     {
1092       temp = expand_binop (mode, and_optab, temp,
1093                            mask_rtx (mode, bitpos, bitsize, 1),
1094                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1095       temp = force_reg (mode, temp);
1096     }
1097
1098   /* Now logical-or VALUE into OP0, unless it is zero.  */
1099
1100   if (! all_zero)
1101     {
1102       temp = expand_binop (mode, ior_optab, temp, value,
1103                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1104       temp = force_reg (mode, temp);
1105     }
1106
1107   if (op0 != temp)
1108     {
1109       op0 = copy_rtx (op0);
1110       emit_move_insn (op0, temp);
1111     }
1112 }
1113 \f
1114 /* Store a bit field that is split across multiple accessible memory objects.
1115
1116    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1117    BITSIZE is the field width; BITPOS the position of its first bit
1118    (within the word).
1119    VALUE is the value to store.
1120
1121    This does not yet handle fields wider than BITS_PER_WORD.  */
1122
1123 static void
1124 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1125                        unsigned HOST_WIDE_INT bitpos,
1126                        unsigned HOST_WIDE_INT bitregion_start,
1127                        unsigned HOST_WIDE_INT bitregion_end,
1128                        rtx value)
1129 {
1130   unsigned int unit;
1131   unsigned int bitsdone = 0;
1132
1133   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1134      much at a time.  */
1135   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1136     unit = BITS_PER_WORD;
1137   else
1138     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1139
1140   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1141      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1142      that VALUE might be a floating-point constant.  */
1143   if (CONSTANT_P (value) && !CONST_INT_P (value))
1144     {
1145       rtx word = gen_lowpart_common (word_mode, value);
1146
1147       if (word && (value != word))
1148         value = word;
1149       else
1150         value = gen_lowpart_common (word_mode,
1151                                     force_reg (GET_MODE (value) != VOIDmode
1152                                                ? GET_MODE (value)
1153                                                : word_mode, value));
1154     }
1155
1156   while (bitsdone < bitsize)
1157     {
1158       unsigned HOST_WIDE_INT thissize;
1159       rtx part, word;
1160       unsigned HOST_WIDE_INT thispos;
1161       unsigned HOST_WIDE_INT offset;
1162
1163       offset = (bitpos + bitsdone) / unit;
1164       thispos = (bitpos + bitsdone) % unit;
1165
1166       /* When region of bytes we can touch is restricted, decrease
1167          UNIT close to the end of the region as needed.  */
1168       if (bitregion_end
1169           && unit > BITS_PER_UNIT
1170           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1171         {
1172           unit = unit / 2;
1173           continue;
1174         }
1175
1176       /* THISSIZE must not overrun a word boundary.  Otherwise,
1177          store_fixed_bit_field will call us again, and we will mutually
1178          recurse forever.  */
1179       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1180       thissize = MIN (thissize, unit - thispos);
1181
1182       if (BYTES_BIG_ENDIAN)
1183         {
1184           int total_bits;
1185
1186           /* We must do an endian conversion exactly the same way as it is
1187              done in extract_bit_field, so that the two calls to
1188              extract_fixed_bit_field will have comparable arguments.  */
1189           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1190             total_bits = BITS_PER_WORD;
1191           else
1192             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1193
1194           /* Fetch successively less significant portions.  */
1195           if (CONST_INT_P (value))
1196             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1197                              >> (bitsize - bitsdone - thissize))
1198                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1199           else
1200             /* The args are chosen so that the last part includes the
1201                lsb.  Give extract_bit_field the value it needs (with
1202                endianness compensation) to fetch the piece we want.  */
1203             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1204                                             total_bits - bitsize + bitsdone,
1205                                             NULL_RTX, 1, false);
1206         }
1207       else
1208         {
1209           /* Fetch successively more significant portions.  */
1210           if (CONST_INT_P (value))
1211             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1212                              >> bitsdone)
1213                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1214           else
1215             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1216                                             bitsdone, NULL_RTX, 1, false);
1217         }
1218
1219       /* If OP0 is a register, then handle OFFSET here.
1220
1221          When handling multiword bitfields, extract_bit_field may pass
1222          down a word_mode SUBREG of a larger REG for a bitfield that actually
1223          crosses a word boundary.  Thus, for a SUBREG, we must find
1224          the current word starting from the base register.  */
1225       if (GET_CODE (op0) == SUBREG)
1226         {
1227           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1228           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1229           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1230             word = word_offset ? const0_rtx : op0;
1231           else
1232             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1233                                           GET_MODE (SUBREG_REG (op0)));
1234           offset = 0;
1235         }
1236       else if (REG_P (op0))
1237         {
1238           enum machine_mode op0_mode = GET_MODE (op0);
1239           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1240             word = offset ? const0_rtx : op0;
1241           else
1242             word = operand_subword_force (op0, offset, GET_MODE (op0));
1243           offset = 0;
1244         }
1245       else
1246         word = op0;
1247
1248       /* OFFSET is in UNITs, and UNIT is in bits.
1249          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1250          it is just an out-of-bounds access.  Ignore it.  */
1251       if (word != const0_rtx)
1252         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1253                                thispos, bitregion_start, bitregion_end, part);
1254       bitsdone += thissize;
1255     }
1256 }
1257 \f
1258 /* A subroutine of extract_bit_field_1 that converts return value X
1259    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1260    to extract_bit_field.  */
1261
1262 static rtx
1263 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1264                              enum machine_mode tmode, bool unsignedp)
1265 {
1266   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1267     return x;
1268
1269   /* If the x mode is not a scalar integral, first convert to the
1270      integer mode of that size and then access it as a floating-point
1271      value via a SUBREG.  */
1272   if (!SCALAR_INT_MODE_P (tmode))
1273     {
1274       enum machine_mode smode;
1275
1276       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1277       x = convert_to_mode (smode, x, unsignedp);
1278       x = force_reg (smode, x);
1279       return gen_lowpart (tmode, x);
1280     }
1281
1282   return convert_to_mode (tmode, x, unsignedp);
1283 }
1284
1285 /* A subroutine of extract_bit_field, with the same arguments.
1286    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1287    if we can find no other means of implementing the operation.
1288    if FALLBACK_P is false, return NULL instead.  */
1289
1290 static rtx
1291 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1292                      unsigned HOST_WIDE_INT bitnum,
1293                      int unsignedp, bool packedp, rtx target,
1294                      enum machine_mode mode, enum machine_mode tmode,
1295                      bool fallback_p)
1296 {
1297   unsigned int unit
1298     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1299   unsigned HOST_WIDE_INT offset, bitpos;
1300   rtx op0 = str_rtx;
1301   enum machine_mode int_mode;
1302   enum machine_mode ext_mode;
1303   enum machine_mode mode1;
1304   int byte_offset;
1305
1306   if (tmode == VOIDmode)
1307     tmode = mode;
1308
1309   while (GET_CODE (op0) == SUBREG)
1310     {
1311       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1312       op0 = SUBREG_REG (op0);
1313     }
1314
1315   /* If we have an out-of-bounds access to a register, just return an
1316      uninitialized register of the required mode.  This can occur if the
1317      source code contains an out-of-bounds access to a small array.  */
1318   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1319     return gen_reg_rtx (tmode);
1320
1321   if (REG_P (op0)
1322       && mode == GET_MODE (op0)
1323       && bitnum == 0
1324       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1325     {
1326       /* We're trying to extract a full register from itself.  */
1327       return op0;
1328     }
1329
1330   /* See if we can get a better vector mode before extracting.  */
1331   if (VECTOR_MODE_P (GET_MODE (op0))
1332       && !MEM_P (op0)
1333       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1334     {
1335       enum machine_mode new_mode;
1336
1337       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1338         new_mode = MIN_MODE_VECTOR_FLOAT;
1339       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1340         new_mode = MIN_MODE_VECTOR_FRACT;
1341       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1342         new_mode = MIN_MODE_VECTOR_UFRACT;
1343       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1344         new_mode = MIN_MODE_VECTOR_ACCUM;
1345       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1346         new_mode = MIN_MODE_VECTOR_UACCUM;
1347       else
1348         new_mode = MIN_MODE_VECTOR_INT;
1349
1350       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1351         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1352             && targetm.vector_mode_supported_p (new_mode))
1353           break;
1354       if (new_mode != VOIDmode)
1355         op0 = gen_lowpart (new_mode, op0);
1356     }
1357
1358   /* Use vec_extract patterns for extracting parts of vectors whenever
1359      available.  */
1360   if (VECTOR_MODE_P (GET_MODE (op0))
1361       && !MEM_P (op0)
1362       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1363       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1364           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1365     {
1366       struct expand_operand ops[3];
1367       enum machine_mode outermode = GET_MODE (op0);
1368       enum machine_mode innermode = GET_MODE_INNER (outermode);
1369       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1370       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1371
1372       create_output_operand (&ops[0], target, innermode);
1373       create_input_operand (&ops[1], op0, outermode);
1374       create_integer_operand (&ops[2], pos);
1375       if (maybe_expand_insn (icode, 3, ops))
1376         {
1377           target = ops[0].value;
1378           if (GET_MODE (target) != mode)
1379             return gen_lowpart (tmode, target);
1380           return target;
1381         }
1382     }
1383
1384   /* Make sure we are playing with integral modes.  Pun with subregs
1385      if we aren't.  */
1386   {
1387     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1388     if (imode != GET_MODE (op0))
1389       {
1390         if (MEM_P (op0))
1391           op0 = adjust_address (op0, imode, 0);
1392         else if (imode != BLKmode)
1393           {
1394             op0 = gen_lowpart (imode, op0);
1395
1396             /* If we got a SUBREG, force it into a register since we
1397                aren't going to be able to do another SUBREG on it.  */
1398             if (GET_CODE (op0) == SUBREG)
1399               op0 = force_reg (imode, op0);
1400           }
1401         else if (REG_P (op0))
1402           {
1403             rtx reg, subreg;
1404             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1405                                             MODE_INT);
1406             reg = gen_reg_rtx (imode);
1407             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1408             emit_move_insn (subreg, op0);
1409             op0 = reg;
1410             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1411           }
1412         else
1413           {
1414             rtx mem = assign_stack_temp (GET_MODE (op0),
1415                                          GET_MODE_SIZE (GET_MODE (op0)));
1416             emit_move_insn (mem, op0);
1417             op0 = adjust_address (mem, BLKmode, 0);
1418           }
1419       }
1420   }
1421
1422   /* We may be accessing data outside the field, which means
1423      we can alias adjacent data.  */
1424   if (MEM_P (op0))
1425     {
1426       op0 = shallow_copy_rtx (op0);
1427       set_mem_alias_set (op0, 0);
1428       set_mem_expr (op0, 0);
1429     }
1430
1431   /* Extraction of a full-word or multi-word value from a structure
1432      in a register or aligned memory can be done with just a SUBREG.
1433      A subword value in the least significant part of a register
1434      can also be extracted with a SUBREG.  For this, we need the
1435      byte offset of the value in op0.  */
1436
1437   bitpos = bitnum % unit;
1438   offset = bitnum / unit;
1439   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1440
1441   /* If OP0 is a register, BITPOS must count within a word.
1442      But as we have it, it counts within whatever size OP0 now has.
1443      On a bigendian machine, these are not the same, so convert.  */
1444   if (BYTES_BIG_ENDIAN
1445       && !MEM_P (op0)
1446       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1447     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1448
1449   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1450      If that's wrong, the solution is to test for it and set TARGET to 0
1451      if needed.  */
1452
1453   /* Only scalar integer modes can be converted via subregs.  There is an
1454      additional problem for FP modes here in that they can have a precision
1455      which is different from the size.  mode_for_size uses precision, but
1456      we want a mode based on the size, so we must avoid calling it for FP
1457      modes.  */
1458   mode1  = (SCALAR_INT_MODE_P (tmode)
1459             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1460             : mode);
1461
1462   /* If the bitfield is volatile, we need to make sure the access
1463      remains on a type-aligned boundary.  */
1464   if (GET_CODE (op0) == MEM
1465       && MEM_VOLATILE_P (op0)
1466       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1467       && flag_strict_volatile_bitfields > 0)
1468     goto no_subreg_mode_swap;
1469
1470   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1471         && bitpos % BITS_PER_WORD == 0)
1472        || (mode1 != BLKmode
1473            /* ??? The big endian test here is wrong.  This is correct
1474               if the value is in a register, and if mode_for_size is not
1475               the same mode as op0.  This causes us to get unnecessarily
1476               inefficient code from the Thumb port when -mbig-endian.  */
1477            && (BYTES_BIG_ENDIAN
1478                ? bitpos + bitsize == BITS_PER_WORD
1479                : bitpos == 0)))
1480       && ((!MEM_P (op0)
1481            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1482            && GET_MODE_SIZE (mode1) != 0
1483            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1484           || (MEM_P (op0)
1485               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1486                   || (offset * BITS_PER_UNIT % bitsize == 0
1487                       && MEM_ALIGN (op0) % bitsize == 0)))))
1488     {
1489       if (MEM_P (op0))
1490         op0 = adjust_address (op0, mode1, offset);
1491       else if (mode1 != GET_MODE (op0))
1492         {
1493           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1494                                          byte_offset);
1495           if (sub == NULL)
1496             goto no_subreg_mode_swap;
1497           op0 = sub;
1498         }
1499       if (mode1 != mode)
1500         return convert_to_mode (tmode, op0, unsignedp);
1501       return op0;
1502     }
1503  no_subreg_mode_swap:
1504
1505   /* Handle fields bigger than a word.  */
1506
1507   if (bitsize > BITS_PER_WORD)
1508     {
1509       /* Here we transfer the words of the field
1510          in the order least significant first.
1511          This is because the most significant word is the one which may
1512          be less than full.  */
1513
1514       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1515       unsigned int i;
1516
1517       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1518         target = gen_reg_rtx (mode);
1519
1520       /* Indicate for flow that the entire target reg is being set.  */
1521       emit_clobber (target);
1522
1523       for (i = 0; i < nwords; i++)
1524         {
1525           /* If I is 0, use the low-order word in both field and target;
1526              if I is 1, use the next to lowest word; and so on.  */
1527           /* Word number in TARGET to use.  */
1528           unsigned int wordnum
1529             = (WORDS_BIG_ENDIAN
1530                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1531                : i);
1532           /* Offset from start of field in OP0.  */
1533           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1534                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1535                                                 * (int) BITS_PER_WORD))
1536                                      : (int) i * BITS_PER_WORD);
1537           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1538           rtx result_part
1539             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1540                                            bitsize - i * BITS_PER_WORD),
1541                                  bitnum + bit_offset, 1, false, target_part, mode,
1542                                  word_mode);
1543
1544           gcc_assert (target_part);
1545
1546           if (result_part != target_part)
1547             emit_move_insn (target_part, result_part);
1548         }
1549
1550       if (unsignedp)
1551         {
1552           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1553              need to be zero'd out.  */
1554           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1555             {
1556               unsigned int i, total_words;
1557
1558               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1559               for (i = nwords; i < total_words; i++)
1560                 emit_move_insn
1561                   (operand_subword (target,
1562                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1563                                     1, VOIDmode),
1564                    const0_rtx);
1565             }
1566           return target;
1567         }
1568
1569       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1570       target = expand_shift (LSHIFT_EXPR, mode, target,
1571                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1572       return expand_shift (RSHIFT_EXPR, mode, target,
1573                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1574     }
1575
1576   /* From here on we know the desired field is smaller than a word.  */
1577
1578   /* Check if there is a correspondingly-sized integer field, so we can
1579      safely extract it as one size of integer, if necessary; then
1580      truncate or extend to the size that is wanted; then use SUBREGs or
1581      convert_to_mode to get one of the modes we really wanted.  */
1582
1583   int_mode = int_mode_for_mode (tmode);
1584   if (int_mode == BLKmode)
1585     int_mode = int_mode_for_mode (mode);
1586   /* Should probably push op0 out to memory and then do a load.  */
1587   gcc_assert (int_mode != BLKmode);
1588
1589   /* OFFSET is the number of words or bytes (UNIT says which)
1590      from STR_RTX to the first word or byte containing part of the field.  */
1591   if (!MEM_P (op0))
1592     {
1593       if (offset != 0
1594           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1595         {
1596           if (!REG_P (op0))
1597             op0 = copy_to_reg (op0);
1598           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1599                                 op0, (offset * UNITS_PER_WORD));
1600         }
1601       offset = 0;
1602     }
1603
1604   /* Now OFFSET is nonzero only for memory operands.  */
1605   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1606   if (ext_mode != MAX_MACHINE_MODE
1607       && bitsize > 0
1608       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1609       /* Do not use extv/extzv for volatile bitfields when
1610          -fstrict-volatile-bitfields is in effect.  */
1611       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1612            && flag_strict_volatile_bitfields > 0)
1613       /* If op0 is a register, we need it in EXT_MODE to make it
1614          acceptable to the format of ext(z)v.  */
1615       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1616       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1617            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1618     {
1619       struct expand_operand ops[4];
1620       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1621       rtx xop0 = op0;
1622       rtx xtarget = target;
1623       rtx xspec_target = target;
1624       rtx xspec_target_subreg = 0;
1625
1626       /* If op0 is a register, we need it in EXT_MODE to make it
1627          acceptable to the format of ext(z)v.  */
1628       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1629         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1630       if (MEM_P (xop0))
1631         /* Get ref to first byte containing part of the field.  */
1632         xop0 = adjust_address (xop0, byte_mode, xoffset);
1633
1634       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1635       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1636         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1637
1638       unit = GET_MODE_BITSIZE (ext_mode);
1639
1640       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1641          "backwards" from the size of the unit we are extracting from.
1642          Otherwise, we count bits from the most significant on a
1643          BYTES/BITS_BIG_ENDIAN machine.  */
1644
1645       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1646         xbitpos = unit - bitsize - xbitpos;
1647
1648       if (xtarget == 0)
1649         xtarget = xspec_target = gen_reg_rtx (tmode);
1650
1651       if (GET_MODE (xtarget) != ext_mode)
1652         {
1653           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1654              between the mode of the extraction (word_mode) and the target
1655              mode.  Instead, create a temporary and use convert_move to set
1656              the target.  */
1657           if (REG_P (xtarget)
1658               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1659             {
1660               xtarget = gen_lowpart (ext_mode, xtarget);
1661               if (GET_MODE_PRECISION (ext_mode)
1662                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1663                 xspec_target_subreg = xtarget;
1664             }
1665           else
1666             xtarget = gen_reg_rtx (ext_mode);
1667         }
1668
1669       create_output_operand (&ops[0], xtarget, ext_mode);
1670       create_fixed_operand (&ops[1], xop0);
1671       create_integer_operand (&ops[2], bitsize);
1672       create_integer_operand (&ops[3], xbitpos);
1673       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1674                              4, ops))
1675         {
1676           xtarget = ops[0].value;
1677           if (xtarget == xspec_target)
1678             return xtarget;
1679           if (xtarget == xspec_target_subreg)
1680             return xspec_target;
1681           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1682         }
1683     }
1684
1685   /* If OP0 is a memory, try copying it to a register and seeing if a
1686      cheap register alternative is available.  */
1687   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1688     {
1689       enum machine_mode bestmode;
1690
1691       /* Get the mode to use for inserting into this field.  If
1692          OP0 is BLKmode, get the smallest mode consistent with the
1693          alignment. If OP0 is a non-BLKmode object that is no
1694          wider than EXT_MODE, use its mode. Otherwise, use the
1695          smallest mode containing the field.  */
1696
1697       if (GET_MODE (op0) == BLKmode
1698           || (ext_mode != MAX_MACHINE_MODE
1699               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1700         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1701                                   (ext_mode == MAX_MACHINE_MODE
1702                                    ? VOIDmode : ext_mode),
1703                                   MEM_VOLATILE_P (op0));
1704       else
1705         bestmode = GET_MODE (op0);
1706
1707       if (bestmode != VOIDmode
1708           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1709                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1710         {
1711           unsigned HOST_WIDE_INT xoffset, xbitpos;
1712
1713           /* Compute the offset as a multiple of this unit,
1714              counting in bytes.  */
1715           unit = GET_MODE_BITSIZE (bestmode);
1716           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1717           xbitpos = bitnum % unit;
1718
1719           /* Make sure the register is big enough for the whole field.  */
1720           if (xoffset * BITS_PER_UNIT + unit
1721               >= offset * BITS_PER_UNIT + bitsize)
1722             {
1723               rtx last, result, xop0;
1724
1725               last = get_last_insn ();
1726
1727               /* Fetch it to a register in that size.  */
1728               xop0 = adjust_address (op0, bestmode, xoffset);
1729               xop0 = force_reg (bestmode, xop0);
1730               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1731                                             unsignedp, packedp, target,
1732                                             mode, tmode, false);
1733               if (result)
1734                 return result;
1735
1736               delete_insns_since (last);
1737             }
1738         }
1739     }
1740
1741   if (!fallback_p)
1742     return NULL;
1743
1744   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1745                                     bitpos, target, unsignedp, packedp);
1746   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1747 }
1748
1749 /* Generate code to extract a byte-field from STR_RTX
1750    containing BITSIZE bits, starting at BITNUM,
1751    and put it in TARGET if possible (if TARGET is nonzero).
1752    Regardless of TARGET, we return the rtx for where the value is placed.
1753
1754    STR_RTX is the structure containing the byte (a REG or MEM).
1755    UNSIGNEDP is nonzero if this is an unsigned bit field.
1756    PACKEDP is nonzero if the field has the packed attribute.
1757    MODE is the natural mode of the field value once extracted.
1758    TMODE is the mode the caller would like the value to have;
1759    but the value may be returned with type MODE instead.
1760
1761    If a TARGET is specified and we can store in it at no extra cost,
1762    we do so, and return TARGET.
1763    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1764    if they are equally easy.  */
1765
1766 rtx
1767 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1768                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1769                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1770 {
1771   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1772                               target, mode, tmode, true);
1773 }
1774 \f
1775 /* Extract a bit field using shifts and boolean operations
1776    Returns an rtx to represent the value.
1777    OP0 addresses a register (word) or memory (byte).
1778    BITPOS says which bit within the word or byte the bit field starts in.
1779    OFFSET says how many bytes farther the bit field starts;
1780     it is 0 if OP0 is a register.
1781    BITSIZE says how many bits long the bit field is.
1782     (If OP0 is a register, it may be narrower than a full word,
1783      but BITPOS still counts within a full word,
1784      which is significant on bigendian machines.)
1785
1786    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1787    PACKEDP is true if the field has the packed attribute.
1788
1789    If TARGET is nonzero, attempts to store the value there
1790    and return TARGET, but this is not guaranteed.
1791    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1792
1793 static rtx
1794 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1795                          unsigned HOST_WIDE_INT offset,
1796                          unsigned HOST_WIDE_INT bitsize,
1797                          unsigned HOST_WIDE_INT bitpos, rtx target,
1798                          int unsignedp, bool packedp)
1799 {
1800   unsigned int total_bits = BITS_PER_WORD;
1801   enum machine_mode mode;
1802
1803   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1804     {
1805       /* Special treatment for a bit field split across two registers.  */
1806       if (bitsize + bitpos > BITS_PER_WORD)
1807         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1808     }
1809   else
1810     {
1811       /* Get the proper mode to use for this field.  We want a mode that
1812          includes the entire field.  If such a mode would be larger than
1813          a word, we won't be doing the extraction the normal way.  */
1814
1815       if (MEM_VOLATILE_P (op0)
1816           && flag_strict_volatile_bitfields > 0)
1817         {
1818           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1819             mode = GET_MODE (op0);
1820           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1821             mode = GET_MODE (target);
1822           else
1823             mode = tmode;
1824         }
1825       else
1826         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1827                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1828
1829       if (mode == VOIDmode)
1830         /* The only way this should occur is if the field spans word
1831            boundaries.  */
1832         return extract_split_bit_field (op0, bitsize,
1833                                         bitpos + offset * BITS_PER_UNIT,
1834                                         unsignedp);
1835
1836       total_bits = GET_MODE_BITSIZE (mode);
1837
1838       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1839          be in the range 0 to total_bits-1, and put any excess bytes in
1840          OFFSET.  */
1841       if (bitpos >= total_bits)
1842         {
1843           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1844           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1845                      * BITS_PER_UNIT);
1846         }
1847
1848       /* If we're accessing a volatile MEM, we can't do the next
1849          alignment step if it results in a multi-word access where we
1850          otherwise wouldn't have one.  So, check for that case
1851          here.  */
1852       if (MEM_P (op0)
1853           && MEM_VOLATILE_P (op0)
1854           && flag_strict_volatile_bitfields > 0
1855           && bitpos + bitsize <= total_bits
1856           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1857         {
1858           if (STRICT_ALIGNMENT)
1859             {
1860               static bool informed_about_misalignment = false;
1861               bool warned;
1862
1863               if (packedp)
1864                 {
1865                   if (bitsize == total_bits)
1866                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1867                                          "multiple accesses to volatile structure member"
1868                                          " because of packed attribute");
1869                   else
1870                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1871                                          "multiple accesses to volatile structure bitfield"
1872                                          " because of packed attribute");
1873
1874                   return extract_split_bit_field (op0, bitsize,
1875                                                   bitpos + offset * BITS_PER_UNIT,
1876                                                   unsignedp);
1877                 }
1878
1879               if (bitsize == total_bits)
1880                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1881                                      "mis-aligned access used for structure member");
1882               else
1883                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1884                                      "mis-aligned access used for structure bitfield");
1885
1886               if (! informed_about_misalignment && warned)
1887                 {
1888                   informed_about_misalignment = true;
1889                   inform (input_location,
1890                           "when a volatile object spans multiple type-sized locations,"
1891                           " the compiler must choose between using a single mis-aligned access to"
1892                           " preserve the volatility, or using multiple aligned accesses to avoid"
1893                           " runtime faults; this code may fail at runtime if the hardware does"
1894                           " not allow this access");
1895                 }
1896             }
1897         }
1898       else
1899         {
1900
1901           /* Get ref to an aligned byte, halfword, or word containing the field.
1902              Adjust BITPOS to be position within a word,
1903              and OFFSET to be the offset of that word.
1904              Then alter OP0 to refer to that word.  */
1905           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1906           offset -= (offset % (total_bits / BITS_PER_UNIT));
1907         }
1908
1909       op0 = adjust_address (op0, mode, offset);
1910     }
1911
1912   mode = GET_MODE (op0);
1913
1914   if (BYTES_BIG_ENDIAN)
1915     /* BITPOS is the distance between our msb and that of OP0.
1916        Convert it to the distance from the lsb.  */
1917     bitpos = total_bits - bitsize - bitpos;
1918
1919   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1920      We have reduced the big-endian case to the little-endian case.  */
1921
1922   if (unsignedp)
1923     {
1924       if (bitpos)
1925         {
1926           /* If the field does not already start at the lsb,
1927              shift it so it does.  */
1928           /* Maybe propagate the target for the shift.  */
1929           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1930           if (tmode != mode)
1931             subtarget = 0;
1932           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1933         }
1934       /* Convert the value to the desired mode.  */
1935       if (mode != tmode)
1936         op0 = convert_to_mode (tmode, op0, 1);
1937
1938       /* Unless the msb of the field used to be the msb when we shifted,
1939          mask out the upper bits.  */
1940
1941       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1942         return expand_binop (GET_MODE (op0), and_optab, op0,
1943                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1944                              target, 1, OPTAB_LIB_WIDEN);
1945       return op0;
1946     }
1947
1948   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1949      then arithmetic-shift its lsb to the lsb of the word.  */
1950   op0 = force_reg (mode, op0);
1951
1952   /* Find the narrowest integer mode that contains the field.  */
1953
1954   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1955        mode = GET_MODE_WIDER_MODE (mode))
1956     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1957       {
1958         op0 = convert_to_mode (mode, op0, 0);
1959         break;
1960       }
1961
1962   if (mode != tmode)
1963     target = 0;
1964
1965   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1966     {
1967       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1968       /* Maybe propagate the target for the shift.  */
1969       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1970       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1971     }
1972
1973   return expand_shift (RSHIFT_EXPR, mode, op0,
1974                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1975 }
1976 \f
1977 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1978    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1979    complement of that if COMPLEMENT.  The mask is truncated if
1980    necessary to the width of mode MODE.  The mask is zero-extended if
1981    BITSIZE+BITPOS is too small for MODE.  */
1982
1983 static rtx
1984 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1985 {
1986   double_int mask;
1987
1988   mask = double_int_mask (bitsize);
1989   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1990
1991   if (complement)
1992     mask = double_int_not (mask);
1993
1994   return immed_double_int_const (mask, mode);
1995 }
1996
1997 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1998    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1999
2000 static rtx
2001 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
2002 {
2003   double_int val;
2004
2005   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
2006   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
2007
2008   return immed_double_int_const (val, mode);
2009 }
2010 \f
2011 /* Extract a bit field that is split across two words
2012    and return an RTX for the result.
2013
2014    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2015    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2016    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2017
2018 static rtx
2019 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2020                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2021 {
2022   unsigned int unit;
2023   unsigned int bitsdone = 0;
2024   rtx result = NULL_RTX;
2025   int first = 1;
2026
2027   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2028      much at a time.  */
2029   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2030     unit = BITS_PER_WORD;
2031   else
2032     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2033
2034   while (bitsdone < bitsize)
2035     {
2036       unsigned HOST_WIDE_INT thissize;
2037       rtx part, word;
2038       unsigned HOST_WIDE_INT thispos;
2039       unsigned HOST_WIDE_INT offset;
2040
2041       offset = (bitpos + bitsdone) / unit;
2042       thispos = (bitpos + bitsdone) % unit;
2043
2044       /* THISSIZE must not overrun a word boundary.  Otherwise,
2045          extract_fixed_bit_field will call us again, and we will mutually
2046          recurse forever.  */
2047       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2048       thissize = MIN (thissize, unit - thispos);
2049
2050       /* If OP0 is a register, then handle OFFSET here.
2051
2052          When handling multiword bitfields, extract_bit_field may pass
2053          down a word_mode SUBREG of a larger REG for a bitfield that actually
2054          crosses a word boundary.  Thus, for a SUBREG, we must find
2055          the current word starting from the base register.  */
2056       if (GET_CODE (op0) == SUBREG)
2057         {
2058           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2059           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2060                                         GET_MODE (SUBREG_REG (op0)));
2061           offset = 0;
2062         }
2063       else if (REG_P (op0))
2064         {
2065           word = operand_subword_force (op0, offset, GET_MODE (op0));
2066           offset = 0;
2067         }
2068       else
2069         word = op0;
2070
2071       /* Extract the parts in bit-counting order,
2072          whose meaning is determined by BYTES_PER_UNIT.
2073          OFFSET is in UNITs, and UNIT is in bits.
2074          extract_fixed_bit_field wants offset in bytes.  */
2075       part = extract_fixed_bit_field (word_mode, word,
2076                                       offset * unit / BITS_PER_UNIT,
2077                                       thissize, thispos, 0, 1, false);
2078       bitsdone += thissize;
2079
2080       /* Shift this part into place for the result.  */
2081       if (BYTES_BIG_ENDIAN)
2082         {
2083           if (bitsize != bitsdone)
2084             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2085                                  bitsize - bitsdone, 0, 1);
2086         }
2087       else
2088         {
2089           if (bitsdone != thissize)
2090             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2091                                  bitsdone - thissize, 0, 1);
2092         }
2093
2094       if (first)
2095         result = part;
2096       else
2097         /* Combine the parts with bitwise or.  This works
2098            because we extracted each part as an unsigned bit field.  */
2099         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2100                                OPTAB_LIB_WIDEN);
2101
2102       first = 0;
2103     }
2104
2105   /* Unsigned bit field: we are done.  */
2106   if (unsignedp)
2107     return result;
2108   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2109   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2110                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2111   return expand_shift (RSHIFT_EXPR, word_mode, result,
2112                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2113 }
2114 \f
2115 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2116    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2117    MODE, fill the upper bits with zeros.  Fail if the layout of either
2118    mode is unknown (as for CC modes) or if the extraction would involve
2119    unprofitable mode punning.  Return the value on success, otherwise
2120    return null.
2121
2122    This is different from gen_lowpart* in these respects:
2123
2124      - the returned value must always be considered an rvalue
2125
2126      - when MODE is wider than SRC_MODE, the extraction involves
2127        a zero extension
2128
2129      - when MODE is smaller than SRC_MODE, the extraction involves
2130        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2131
2132    In other words, this routine performs a computation, whereas the
2133    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2134    operations.  */
2135
2136 rtx
2137 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2138 {
2139   enum machine_mode int_mode, src_int_mode;
2140
2141   if (mode == src_mode)
2142     return src;
2143
2144   if (CONSTANT_P (src))
2145     {
2146       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2147          fails, it will happily create (subreg (symbol_ref)) or similar
2148          invalid SUBREGs.  */
2149       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2150       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2151       if (ret)
2152         return ret;
2153
2154       if (GET_MODE (src) == VOIDmode
2155           || !validate_subreg (mode, src_mode, src, byte))
2156         return NULL_RTX;
2157
2158       src = force_reg (GET_MODE (src), src);
2159       return gen_rtx_SUBREG (mode, src, byte);
2160     }
2161
2162   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2163     return NULL_RTX;
2164
2165   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2166       && MODES_TIEABLE_P (mode, src_mode))
2167     {
2168       rtx x = gen_lowpart_common (mode, src);
2169       if (x)
2170         return x;
2171     }
2172
2173   src_int_mode = int_mode_for_mode (src_mode);
2174   int_mode = int_mode_for_mode (mode);
2175   if (src_int_mode == BLKmode || int_mode == BLKmode)
2176     return NULL_RTX;
2177
2178   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2179     return NULL_RTX;
2180   if (!MODES_TIEABLE_P (int_mode, mode))
2181     return NULL_RTX;
2182
2183   src = gen_lowpart (src_int_mode, src);
2184   src = convert_modes (int_mode, src_int_mode, src, true);
2185   src = gen_lowpart (mode, src);
2186   return src;
2187 }
2188 \f
2189 /* Add INC into TARGET.  */
2190
2191 void
2192 expand_inc (rtx target, rtx inc)
2193 {
2194   rtx value = expand_binop (GET_MODE (target), add_optab,
2195                             target, inc,
2196                             target, 0, OPTAB_LIB_WIDEN);
2197   if (value != target)
2198     emit_move_insn (target, value);
2199 }
2200
2201 /* Subtract DEC from TARGET.  */
2202
2203 void
2204 expand_dec (rtx target, rtx dec)
2205 {
2206   rtx value = expand_binop (GET_MODE (target), sub_optab,
2207                             target, dec,
2208                             target, 0, OPTAB_LIB_WIDEN);
2209   if (value != target)
2210     emit_move_insn (target, value);
2211 }
2212 \f
2213 /* Output a shift instruction for expression code CODE,
2214    with SHIFTED being the rtx for the value to shift,
2215    and AMOUNT the rtx for the amount to shift by.
2216    Store the result in the rtx TARGET, if that is convenient.
2217    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2218    Return the rtx for where the value is.  */
2219
2220 static rtx
2221 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2222                 rtx amount, rtx target, int unsignedp)
2223 {
2224   rtx op1, temp = 0;
2225   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2226   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2227   optab lshift_optab = ashl_optab;
2228   optab rshift_arith_optab = ashr_optab;
2229   optab rshift_uns_optab = lshr_optab;
2230   optab lrotate_optab = rotl_optab;
2231   optab rrotate_optab = rotr_optab;
2232   enum machine_mode op1_mode;
2233   int attempt;
2234   bool speed = optimize_insn_for_speed_p ();
2235
2236   op1 = amount;
2237   op1_mode = GET_MODE (op1);
2238
2239   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2240      shift amount is a vector, use the vector/vector shift patterns.  */
2241   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2242     {
2243       lshift_optab = vashl_optab;
2244       rshift_arith_optab = vashr_optab;
2245       rshift_uns_optab = vlshr_optab;
2246       lrotate_optab = vrotl_optab;
2247       rrotate_optab = vrotr_optab;
2248     }
2249
2250   /* Previously detected shift-counts computed by NEGATE_EXPR
2251      and shifted in the other direction; but that does not work
2252      on all machines.  */
2253
2254   if (SHIFT_COUNT_TRUNCATED)
2255     {
2256       if (CONST_INT_P (op1)
2257           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2258               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2259         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2260                        % GET_MODE_BITSIZE (mode));
2261       else if (GET_CODE (op1) == SUBREG
2262                && subreg_lowpart_p (op1)
2263                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2264         op1 = SUBREG_REG (op1);
2265     }
2266
2267   if (op1 == const0_rtx)
2268     return shifted;
2269
2270   /* Check whether its cheaper to implement a left shift by a constant
2271      bit count by a sequence of additions.  */
2272   if (code == LSHIFT_EXPR
2273       && CONST_INT_P (op1)
2274       && INTVAL (op1) > 0
2275       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2276       && INTVAL (op1) < MAX_BITS_PER_WORD
2277       && (shift_cost (speed, mode, INTVAL (op1))
2278           > INTVAL (op1) * add_cost (speed, mode))
2279       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2280     {
2281       int i;
2282       for (i = 0; i < INTVAL (op1); i++)
2283         {
2284           temp = force_reg (mode, shifted);
2285           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2286                                   unsignedp, OPTAB_LIB_WIDEN);
2287         }
2288       return shifted;
2289     }
2290
2291   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2292     {
2293       enum optab_methods methods;
2294
2295       if (attempt == 0)
2296         methods = OPTAB_DIRECT;
2297       else if (attempt == 1)
2298         methods = OPTAB_WIDEN;
2299       else
2300         methods = OPTAB_LIB_WIDEN;
2301
2302       if (rotate)
2303         {
2304           /* Widening does not work for rotation.  */
2305           if (methods == OPTAB_WIDEN)
2306             continue;
2307           else if (methods == OPTAB_LIB_WIDEN)
2308             {
2309               /* If we have been unable to open-code this by a rotation,
2310                  do it as the IOR of two shifts.  I.e., to rotate A
2311                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2312                  where C is the bitsize of A.
2313
2314                  It is theoretically possible that the target machine might
2315                  not be able to perform either shift and hence we would
2316                  be making two libcalls rather than just the one for the
2317                  shift (similarly if IOR could not be done).  We will allow
2318                  this extremely unlikely lossage to avoid complicating the
2319                  code below.  */
2320
2321               rtx subtarget = target == shifted ? 0 : target;
2322               rtx new_amount, other_amount;
2323               rtx temp1;
2324
2325               new_amount = op1;
2326               if (CONST_INT_P (op1))
2327                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2328                                         - INTVAL (op1));
2329               else
2330                 other_amount
2331                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2332                                          GEN_INT (GET_MODE_PRECISION (mode)),
2333                                          op1);
2334
2335               shifted = force_reg (mode, shifted);
2336
2337               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2338                                      mode, shifted, new_amount, 0, 1);
2339               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2340                                       mode, shifted, other_amount,
2341                                       subtarget, 1);
2342               return expand_binop (mode, ior_optab, temp, temp1, target,
2343                                    unsignedp, methods);
2344             }
2345
2346           temp = expand_binop (mode,
2347                                left ? lrotate_optab : rrotate_optab,
2348                                shifted, op1, target, unsignedp, methods);
2349         }
2350       else if (unsignedp)
2351         temp = expand_binop (mode,
2352                              left ? lshift_optab : rshift_uns_optab,
2353                              shifted, op1, target, unsignedp, methods);
2354
2355       /* Do arithmetic shifts.
2356          Also, if we are going to widen the operand, we can just as well
2357          use an arithmetic right-shift instead of a logical one.  */
2358       if (temp == 0 && ! rotate
2359           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2360         {
2361           enum optab_methods methods1 = methods;
2362
2363           /* If trying to widen a log shift to an arithmetic shift,
2364              don't accept an arithmetic shift of the same size.  */
2365           if (unsignedp)
2366             methods1 = OPTAB_MUST_WIDEN;
2367
2368           /* Arithmetic shift */
2369
2370           temp = expand_binop (mode,
2371                                left ? lshift_optab : rshift_arith_optab,
2372                                shifted, op1, target, unsignedp, methods1);
2373         }
2374
2375       /* We used to try extzv here for logical right shifts, but that was
2376          only useful for one machine, the VAX, and caused poor code
2377          generation there for lshrdi3, so the code was deleted and a
2378          define_expand for lshrsi3 was added to vax.md.  */
2379     }
2380
2381   gcc_assert (temp);
2382   return temp;
2383 }
2384
2385 /* Output a shift instruction for expression code CODE,
2386    with SHIFTED being the rtx for the value to shift,
2387    and AMOUNT the amount to shift by.
2388    Store the result in the rtx TARGET, if that is convenient.
2389    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2390    Return the rtx for where the value is.  */
2391
2392 rtx
2393 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2394               int amount, rtx target, int unsignedp)
2395 {
2396   return expand_shift_1 (code, mode,
2397                          shifted, GEN_INT (amount), target, unsignedp);
2398 }
2399
2400 /* Output a shift instruction for expression code CODE,
2401    with SHIFTED being the rtx for the value to shift,
2402    and AMOUNT the tree for the amount to shift by.
2403    Store the result in the rtx TARGET, if that is convenient.
2404    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2405    Return the rtx for where the value is.  */
2406
2407 rtx
2408 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2409                        tree amount, rtx target, int unsignedp)
2410 {
2411   return expand_shift_1 (code, mode,
2412                          shifted, expand_normal (amount), target, unsignedp);
2413 }
2414
2415 \f
2416 /* Indicates the type of fixup needed after a constant multiplication.
2417    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2418    the result should be negated, and ADD_VARIANT means that the
2419    multiplicand should be added to the result.  */
2420 enum mult_variant {basic_variant, negate_variant, add_variant};
2421
2422 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2423                         const struct mult_cost *, enum machine_mode mode);
2424 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2425                                  struct algorithm *, enum mult_variant *, int);
2426 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2427                               const struct algorithm *, enum mult_variant);
2428 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2429 static rtx extract_high_half (enum machine_mode, rtx);
2430 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2431 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2432                                        int, int);
2433 /* Compute and return the best algorithm for multiplying by T.
2434    The algorithm must cost less than cost_limit
2435    If retval.cost >= COST_LIMIT, no algorithm was found and all
2436    other field of the returned struct are undefined.
2437    MODE is the machine mode of the multiplication.  */
2438
2439 static void
2440 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2441             const struct mult_cost *cost_limit, enum machine_mode mode)
2442 {
2443   int m;
2444   struct algorithm *alg_in, *best_alg;
2445   struct mult_cost best_cost;
2446   struct mult_cost new_limit;
2447   int op_cost, op_latency;
2448   unsigned HOST_WIDE_INT orig_t = t;
2449   unsigned HOST_WIDE_INT q;
2450   int maxm, hash_index;
2451   bool cache_hit = false;
2452   enum alg_code cache_alg = alg_zero;
2453   bool speed = optimize_insn_for_speed_p ();
2454   enum machine_mode imode;
2455   struct alg_hash_entry *entry_ptr;
2456
2457   /* Indicate that no algorithm is yet found.  If no algorithm
2458      is found, this value will be returned and indicate failure.  */
2459   alg_out->cost.cost = cost_limit->cost + 1;
2460   alg_out->cost.latency = cost_limit->latency + 1;
2461
2462   if (cost_limit->cost < 0
2463       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2464     return;
2465
2466   /* Be prepared for vector modes.  */
2467   imode = GET_MODE_INNER (mode);
2468   if (imode == VOIDmode)
2469     imode = mode;
2470
2471   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2472
2473   /* Restrict the bits of "t" to the multiplication's mode.  */
2474   t &= GET_MODE_MASK (imode);
2475
2476   /* t == 1 can be done in zero cost.  */
2477   if (t == 1)
2478     {
2479       alg_out->ops = 1;
2480       alg_out->cost.cost = 0;
2481       alg_out->cost.latency = 0;
2482       alg_out->op[0] = alg_m;
2483       return;
2484     }
2485
2486   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2487      fail now.  */
2488   if (t == 0)
2489     {
2490       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2491         return;
2492       else
2493         {
2494           alg_out->ops = 1;
2495           alg_out->cost.cost = zero_cost (speed);
2496           alg_out->cost.latency = zero_cost (speed);
2497           alg_out->op[0] = alg_zero;
2498           return;
2499         }
2500     }
2501
2502   /* We'll be needing a couple extra algorithm structures now.  */
2503
2504   alg_in = XALLOCA (struct algorithm);
2505   best_alg = XALLOCA (struct algorithm);
2506   best_cost = *cost_limit;
2507
2508   /* Compute the hash index.  */
2509   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2510
2511   /* See if we already know what to do for T.  */
2512   entry_ptr = alg_hash_entry_ptr (hash_index);
2513   if (entry_ptr->t == t
2514       && entry_ptr->mode == mode
2515       && entry_ptr->mode == mode
2516       && entry_ptr->speed == speed
2517       && entry_ptr->alg != alg_unknown)
2518     {
2519       cache_alg = entry_ptr->alg;
2520
2521       if (cache_alg == alg_impossible)
2522         {
2523           /* The cache tells us that it's impossible to synthesize
2524              multiplication by T within entry_ptr->cost.  */
2525           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2526             /* COST_LIMIT is at least as restrictive as the one
2527                recorded in the hash table, in which case we have no
2528                hope of synthesizing a multiplication.  Just
2529                return.  */
2530             return;
2531
2532           /* If we get here, COST_LIMIT is less restrictive than the
2533              one recorded in the hash table, so we may be able to
2534              synthesize a multiplication.  Proceed as if we didn't
2535              have the cache entry.  */
2536         }
2537       else
2538         {
2539           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2540             /* The cached algorithm shows that this multiplication
2541                requires more cost than COST_LIMIT.  Just return.  This
2542                way, we don't clobber this cache entry with
2543                alg_impossible but retain useful information.  */
2544             return;
2545
2546           cache_hit = true;
2547
2548           switch (cache_alg)
2549             {
2550             case alg_shift:
2551               goto do_alg_shift;
2552
2553             case alg_add_t_m2:
2554             case alg_sub_t_m2:
2555               goto do_alg_addsub_t_m2;
2556
2557             case alg_add_factor:
2558             case alg_sub_factor:
2559               goto do_alg_addsub_factor;
2560
2561             case alg_add_t2_m:
2562               goto do_alg_add_t2_m;
2563
2564             case alg_sub_t2_m:
2565               goto do_alg_sub_t2_m;
2566
2567             default:
2568               gcc_unreachable ();
2569             }
2570         }
2571     }
2572
2573   /* If we have a group of zero bits at the low-order part of T, try
2574      multiplying by the remaining bits and then doing a shift.  */
2575
2576   if ((t & 1) == 0)
2577     {
2578     do_alg_shift:
2579       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2580       if (m < maxm)
2581         {
2582           q = t >> m;
2583           /* The function expand_shift will choose between a shift and
2584              a sequence of additions, so the observed cost is given as
2585              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2586           op_cost = m * add_cost (speed, mode);
2587           if (shift_cost (speed, mode, m) < op_cost)
2588             op_cost = shift_cost (speed, mode, m);
2589           new_limit.cost = best_cost.cost - op_cost;
2590           new_limit.latency = best_cost.latency - op_cost;
2591           synth_mult (alg_in, q, &new_limit, mode);
2592
2593           alg_in->cost.cost += op_cost;
2594           alg_in->cost.latency += op_cost;
2595           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2596             {
2597               struct algorithm *x;
2598               best_cost = alg_in->cost;
2599               x = alg_in, alg_in = best_alg, best_alg = x;
2600               best_alg->log[best_alg->ops] = m;
2601               best_alg->op[best_alg->ops] = alg_shift;
2602             }
2603
2604           /* See if treating ORIG_T as a signed number yields a better
2605              sequence.  Try this sequence only for a negative ORIG_T
2606              as it would be useless for a non-negative ORIG_T.  */
2607           if ((HOST_WIDE_INT) orig_t < 0)
2608             {
2609               /* Shift ORIG_T as follows because a right shift of a
2610                  negative-valued signed type is implementation
2611                  defined.  */
2612               q = ~(~orig_t >> m);
2613               /* The function expand_shift will choose between a shift
2614                  and a sequence of additions, so the observed cost is
2615                  given as MIN (m * add_cost(speed, mode),
2616                  shift_cost(speed, mode, m)).  */
2617               op_cost = m * add_cost (speed, mode);
2618               if (shift_cost (speed, mode, m) < op_cost)
2619                 op_cost = shift_cost (speed, mode, m);
2620               new_limit.cost = best_cost.cost - op_cost;
2621               new_limit.latency = best_cost.latency - op_cost;
2622               synth_mult (alg_in, q, &new_limit, mode);
2623
2624               alg_in->cost.cost += op_cost;
2625               alg_in->cost.latency += op_cost;
2626               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2627                 {
2628                   struct algorithm *x;
2629                   best_cost = alg_in->cost;
2630                   x = alg_in, alg_in = best_alg, best_alg = x;
2631                   best_alg->log[best_alg->ops] = m;
2632                   best_alg->op[best_alg->ops] = alg_shift;
2633                 }
2634             }
2635         }
2636       if (cache_hit)
2637         goto done;
2638     }
2639
2640   /* If we have an odd number, add or subtract one.  */
2641   if ((t & 1) != 0)
2642     {
2643       unsigned HOST_WIDE_INT w;
2644
2645     do_alg_addsub_t_m2:
2646       for (w = 1; (w & t) != 0; w <<= 1)
2647         ;
2648       /* If T was -1, then W will be zero after the loop.  This is another
2649          case where T ends with ...111.  Handling this with (T + 1) and
2650          subtract 1 produces slightly better code and results in algorithm
2651          selection much faster than treating it like the ...0111 case
2652          below.  */
2653       if (w == 0
2654           || (w > 2
2655               /* Reject the case where t is 3.
2656                  Thus we prefer addition in that case.  */
2657               && t != 3))
2658         {
2659           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2660
2661           op_cost = add_cost (speed, mode);
2662           new_limit.cost = best_cost.cost - op_cost;
2663           new_limit.latency = best_cost.latency - op_cost;
2664           synth_mult (alg_in, t + 1, &new_limit, mode);
2665
2666           alg_in->cost.cost += op_cost;
2667           alg_in->cost.latency += op_cost;
2668           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2669             {
2670               struct algorithm *x;
2671               best_cost = alg_in->cost;
2672               x = alg_in, alg_in = best_alg, best_alg = x;
2673               best_alg->log[best_alg->ops] = 0;
2674               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2675             }
2676         }
2677       else
2678         {
2679           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2680
2681           op_cost = add_cost (speed, mode);
2682           new_limit.cost = best_cost.cost - op_cost;
2683           new_limit.latency = best_cost.latency - op_cost;
2684           synth_mult (alg_in, t - 1, &new_limit, mode);
2685
2686           alg_in->cost.cost += op_cost;
2687           alg_in->cost.latency += op_cost;
2688           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2689             {
2690               struct algorithm *x;
2691               best_cost = alg_in->cost;
2692               x = alg_in, alg_in = best_alg, best_alg = x;
2693               best_alg->log[best_alg->ops] = 0;
2694               best_alg->op[best_alg->ops] = alg_add_t_m2;
2695             }
2696         }
2697
2698       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2699          quickly with a - a * n for some appropriate constant n.  */
2700       m = exact_log2 (-orig_t + 1);
2701       if (m >= 0 && m < maxm)
2702         {
2703           op_cost = shiftsub1_cost (speed, mode, m);
2704           new_limit.cost = best_cost.cost - op_cost;
2705           new_limit.latency = best_cost.latency - op_cost;
2706           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2707                       &new_limit, mode);
2708
2709           alg_in->cost.cost += op_cost;
2710           alg_in->cost.latency += op_cost;
2711           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2712             {
2713               struct algorithm *x;
2714               best_cost = alg_in->cost;
2715               x = alg_in, alg_in = best_alg, best_alg = x;
2716               best_alg->log[best_alg->ops] = m;
2717               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2718             }
2719         }
2720
2721       if (cache_hit)
2722         goto done;
2723     }
2724
2725   /* Look for factors of t of the form
2726      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2727      If we find such a factor, we can multiply by t using an algorithm that
2728      multiplies by q, shift the result by m and add/subtract it to itself.
2729
2730      We search for large factors first and loop down, even if large factors
2731      are less probable than small; if we find a large factor we will find a
2732      good sequence quickly, and therefore be able to prune (by decreasing
2733      COST_LIMIT) the search.  */
2734
2735  do_alg_addsub_factor:
2736   for (m = floor_log2 (t - 1); m >= 2; m--)
2737     {
2738       unsigned HOST_WIDE_INT d;
2739
2740       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2741       if (t % d == 0 && t > d && m < maxm
2742           && (!cache_hit || cache_alg == alg_add_factor))
2743         {
2744           /* If the target has a cheap shift-and-add instruction use
2745              that in preference to a shift insn followed by an add insn.
2746              Assume that the shift-and-add is "atomic" with a latency
2747              equal to its cost, otherwise assume that on superscalar
2748              hardware the shift may be executed concurrently with the
2749              earlier steps in the algorithm.  */
2750           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2751           if (shiftadd_cost (speed, mode, m) < op_cost)
2752             {
2753               op_cost = shiftadd_cost (speed, mode, m);
2754               op_latency = op_cost;
2755             }
2756           else
2757             op_latency = add_cost (speed, mode);
2758
2759           new_limit.cost = best_cost.cost - op_cost;
2760           new_limit.latency = best_cost.latency - op_latency;
2761           synth_mult (alg_in, t / d, &new_limit, mode);
2762
2763           alg_in->cost.cost += op_cost;
2764           alg_in->cost.latency += op_latency;
2765           if (alg_in->cost.latency < op_cost)
2766             alg_in->cost.latency = op_cost;
2767           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768             {
2769               struct algorithm *x;
2770               best_cost = alg_in->cost;
2771               x = alg_in, alg_in = best_alg, best_alg = x;
2772               best_alg->log[best_alg->ops] = m;
2773               best_alg->op[best_alg->ops] = alg_add_factor;
2774             }
2775           /* Other factors will have been taken care of in the recursion.  */
2776           break;
2777         }
2778
2779       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2780       if (t % d == 0 && t > d && m < maxm
2781           && (!cache_hit || cache_alg == alg_sub_factor))
2782         {
2783           /* If the target has a cheap shift-and-subtract insn use
2784              that in preference to a shift insn followed by a sub insn.
2785              Assume that the shift-and-sub is "atomic" with a latency
2786              equal to it's cost, otherwise assume that on superscalar
2787              hardware the shift may be executed concurrently with the
2788              earlier steps in the algorithm.  */
2789           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2790           if (shiftsub0_cost (speed, mode, m) < op_cost)
2791             {
2792               op_cost = shiftsub0_cost (speed, mode, m);
2793               op_latency = op_cost;
2794             }
2795           else
2796             op_latency = add_cost (speed, mode);
2797
2798           new_limit.cost = best_cost.cost - op_cost;
2799           new_limit.latency = best_cost.latency - op_latency;
2800           synth_mult (alg_in, t / d, &new_limit, mode);
2801
2802           alg_in->cost.cost += op_cost;
2803           alg_in->cost.latency += op_latency;
2804           if (alg_in->cost.latency < op_cost)
2805             alg_in->cost.latency = op_cost;
2806           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2807             {
2808               struct algorithm *x;
2809               best_cost = alg_in->cost;
2810               x = alg_in, alg_in = best_alg, best_alg = x;
2811               best_alg->log[best_alg->ops] = m;
2812               best_alg->op[best_alg->ops] = alg_sub_factor;
2813             }
2814           break;
2815         }
2816     }
2817   if (cache_hit)
2818     goto done;
2819
2820   /* Try shift-and-add (load effective address) instructions,
2821      i.e. do a*3, a*5, a*9.  */
2822   if ((t & 1) != 0)
2823     {
2824     do_alg_add_t2_m:
2825       q = t - 1;
2826       q = q & -q;
2827       m = exact_log2 (q);
2828       if (m >= 0 && m < maxm)
2829         {
2830           op_cost = shiftadd_cost (speed, mode, m);
2831           new_limit.cost = best_cost.cost - op_cost;
2832           new_limit.latency = best_cost.latency - op_cost;
2833           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2834
2835           alg_in->cost.cost += op_cost;
2836           alg_in->cost.latency += op_cost;
2837           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2838             {
2839               struct algorithm *x;
2840               best_cost = alg_in->cost;
2841               x = alg_in, alg_in = best_alg, best_alg = x;
2842               best_alg->log[best_alg->ops] = m;
2843               best_alg->op[best_alg->ops] = alg_add_t2_m;
2844             }
2845         }
2846       if (cache_hit)
2847         goto done;
2848
2849     do_alg_sub_t2_m:
2850       q = t + 1;
2851       q = q & -q;
2852       m = exact_log2 (q);
2853       if (m >= 0 && m < maxm)
2854         {
2855           op_cost = shiftsub0_cost (speed, mode, m);
2856           new_limit.cost = best_cost.cost - op_cost;
2857           new_limit.latency = best_cost.latency - op_cost;
2858           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2859
2860           alg_in->cost.cost += op_cost;
2861           alg_in->cost.latency += op_cost;
2862           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2863             {
2864               struct algorithm *x;
2865               best_cost = alg_in->cost;
2866               x = alg_in, alg_in = best_alg, best_alg = x;
2867               best_alg->log[best_alg->ops] = m;
2868               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2869             }
2870         }
2871       if (cache_hit)
2872         goto done;
2873     }
2874
2875  done:
2876   /* If best_cost has not decreased, we have not found any algorithm.  */
2877   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2878     {
2879       /* We failed to find an algorithm.  Record alg_impossible for
2880          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2881          we are asked to find an algorithm for T within the same or
2882          lower COST_LIMIT, we can immediately return to the
2883          caller.  */
2884       entry_ptr->t = t;
2885       entry_ptr->mode = mode;
2886       entry_ptr->speed = speed;
2887       entry_ptr->alg = alg_impossible;
2888       entry_ptr->cost = *cost_limit;
2889       return;
2890     }
2891
2892   /* Cache the result.  */
2893   if (!cache_hit)
2894     {
2895       entry_ptr->t = t;
2896       entry_ptr->mode = mode;
2897       entry_ptr->speed = speed;
2898       entry_ptr->alg = best_alg->op[best_alg->ops];
2899       entry_ptr->cost.cost = best_cost.cost;
2900       entry_ptr->cost.latency = best_cost.latency;
2901     }
2902
2903   /* If we are getting a too long sequence for `struct algorithm'
2904      to record, make this search fail.  */
2905   if (best_alg->ops == MAX_BITS_PER_WORD)
2906     return;
2907
2908   /* Copy the algorithm from temporary space to the space at alg_out.
2909      We avoid using structure assignment because the majority of
2910      best_alg is normally undefined, and this is a critical function.  */
2911   alg_out->ops = best_alg->ops + 1;
2912   alg_out->cost = best_cost;
2913   memcpy (alg_out->op, best_alg->op,
2914           alg_out->ops * sizeof *alg_out->op);
2915   memcpy (alg_out->log, best_alg->log,
2916           alg_out->ops * sizeof *alg_out->log);
2917 }
2918 \f
2919 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2920    Try three variations:
2921
2922        - a shift/add sequence based on VAL itself
2923        - a shift/add sequence based on -VAL, followed by a negation
2924        - a shift/add sequence based on VAL - 1, followed by an addition.
2925
2926    Return true if the cheapest of these cost less than MULT_COST,
2927    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2928
2929 static bool
2930 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2931                      struct algorithm *alg, enum mult_variant *variant,
2932                      int mult_cost)
2933 {
2934   struct algorithm alg2;
2935   struct mult_cost limit;
2936   int op_cost;
2937   bool speed = optimize_insn_for_speed_p ();
2938
2939   /* Fail quickly for impossible bounds.  */
2940   if (mult_cost < 0)
2941     return false;
2942
2943   /* Ensure that mult_cost provides a reasonable upper bound.
2944      Any constant multiplication can be performed with less
2945      than 2 * bits additions.  */
2946   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2947   if (mult_cost > op_cost)
2948     mult_cost = op_cost;
2949
2950   *variant = basic_variant;
2951   limit.cost = mult_cost;
2952   limit.latency = mult_cost;
2953   synth_mult (alg, val, &limit, mode);
2954
2955   /* This works only if the inverted value actually fits in an
2956      `unsigned int' */
2957   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2958     {
2959       op_cost = neg_cost(speed, mode);
2960       if (MULT_COST_LESS (&alg->cost, mult_cost))
2961         {
2962           limit.cost = alg->cost.cost - op_cost;
2963           limit.latency = alg->cost.latency - op_cost;
2964         }
2965       else
2966         {
2967           limit.cost = mult_cost - op_cost;
2968           limit.latency = mult_cost - op_cost;
2969         }
2970
2971       synth_mult (&alg2, -val, &limit, mode);
2972       alg2.cost.cost += op_cost;
2973       alg2.cost.latency += op_cost;
2974       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2975         *alg = alg2, *variant = negate_variant;
2976     }
2977
2978   /* This proves very useful for division-by-constant.  */
2979   op_cost = add_cost (speed, mode);
2980   if (MULT_COST_LESS (&alg->cost, mult_cost))
2981     {
2982       limit.cost = alg->cost.cost - op_cost;
2983       limit.latency = alg->cost.latency - op_cost;
2984     }
2985   else
2986     {
2987       limit.cost = mult_cost - op_cost;
2988       limit.latency = mult_cost - op_cost;
2989     }
2990
2991   synth_mult (&alg2, val - 1, &limit, mode);
2992   alg2.cost.cost += op_cost;
2993   alg2.cost.latency += op_cost;
2994   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2995     *alg = alg2, *variant = add_variant;
2996
2997   return MULT_COST_LESS (&alg->cost, mult_cost);
2998 }
2999
3000 /* A subroutine of expand_mult, used for constant multiplications.
3001    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3002    convenient.  Use the shift/add sequence described by ALG and apply
3003    the final fixup specified by VARIANT.  */
3004
3005 static rtx
3006 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
3007                    rtx target, const struct algorithm *alg,
3008                    enum mult_variant variant)
3009 {
3010   HOST_WIDE_INT val_so_far;
3011   rtx insn, accum, tem;
3012   int opno;
3013   enum machine_mode nmode;
3014
3015   /* Avoid referencing memory over and over and invalid sharing
3016      on SUBREGs.  */
3017   op0 = force_reg (mode, op0);
3018
3019   /* ACCUM starts out either as OP0 or as a zero, depending on
3020      the first operation.  */
3021
3022   if (alg->op[0] == alg_zero)
3023     {
3024       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3025       val_so_far = 0;
3026     }
3027   else if (alg->op[0] == alg_m)
3028     {
3029       accum = copy_to_mode_reg (mode, op0);
3030       val_so_far = 1;
3031     }
3032   else
3033     gcc_unreachable ();
3034
3035   for (opno = 1; opno < alg->ops; opno++)
3036     {
3037       int log = alg->log[opno];
3038       rtx shift_subtarget = optimize ? 0 : accum;
3039       rtx add_target
3040         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3041            && !optimize)
3042           ? target : 0;
3043       rtx accum_target = optimize ? 0 : accum;
3044       rtx accum_inner;
3045
3046       switch (alg->op[opno])
3047         {
3048         case alg_shift:
3049           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3050           /* REG_EQUAL note will be attached to the following insn.  */
3051           emit_move_insn (accum, tem);
3052           val_so_far <<= log;
3053           break;
3054
3055         case alg_add_t_m2:
3056           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3057           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3058                                  add_target ? add_target : accum_target);
3059           val_so_far += (HOST_WIDE_INT) 1 << log;
3060           break;
3061
3062         case alg_sub_t_m2:
3063           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3064           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3065                                  add_target ? add_target : accum_target);
3066           val_so_far -= (HOST_WIDE_INT) 1 << log;
3067           break;
3068
3069         case alg_add_t2_m:
3070           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3071                                 log, shift_subtarget, 0);
3072           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3073                                  add_target ? add_target : accum_target);
3074           val_so_far = (val_so_far << log) + 1;
3075           break;
3076
3077         case alg_sub_t2_m:
3078           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3079                                 log, shift_subtarget, 0);
3080           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3081                                  add_target ? add_target : accum_target);
3082           val_so_far = (val_so_far << log) - 1;
3083           break;
3084
3085         case alg_add_factor:
3086           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3087           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3088                                  add_target ? add_target : accum_target);
3089           val_so_far += val_so_far << log;
3090           break;
3091
3092         case alg_sub_factor:
3093           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3094           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3095                                  (add_target
3096                                   ? add_target : (optimize ? 0 : tem)));
3097           val_so_far = (val_so_far << log) - val_so_far;
3098           break;
3099
3100         default:
3101           gcc_unreachable ();
3102         }
3103
3104       if (SCALAR_INT_MODE_P (mode))
3105         {
3106           /* Write a REG_EQUAL note on the last insn so that we can cse
3107              multiplication sequences.  Note that if ACCUM is a SUBREG,
3108              we've set the inner register and must properly indicate that.  */
3109           tem = op0, nmode = mode;
3110           accum_inner = accum;
3111           if (GET_CODE (accum) == SUBREG)
3112             {
3113               accum_inner = SUBREG_REG (accum);
3114               nmode = GET_MODE (accum_inner);
3115               tem = gen_lowpart (nmode, op0);
3116             }
3117
3118           insn = get_last_insn ();
3119           set_dst_reg_note (insn, REG_EQUAL,
3120                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3121                             accum_inner);
3122         }
3123     }
3124
3125   if (variant == negate_variant)
3126     {
3127       val_so_far = -val_so_far;
3128       accum = expand_unop (mode, neg_optab, accum, target, 0);
3129     }
3130   else if (variant == add_variant)
3131     {
3132       val_so_far = val_so_far + 1;
3133       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3134     }
3135
3136   /* Compare only the bits of val and val_so_far that are significant
3137      in the result mode, to avoid sign-/zero-extension confusion.  */
3138   nmode = GET_MODE_INNER (mode);
3139   if (nmode == VOIDmode)
3140     nmode = mode;
3141   val &= GET_MODE_MASK (nmode);
3142   val_so_far &= GET_MODE_MASK (nmode);
3143   gcc_assert (val == val_so_far);
3144
3145   return accum;
3146 }
3147
3148 /* Perform a multiplication and return an rtx for the result.
3149    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3150    TARGET is a suggestion for where to store the result (an rtx).
3151
3152    We check specially for a constant integer as OP1.
3153    If you want this check for OP0 as well, then before calling
3154    you should swap the two operands if OP0 would be constant.  */
3155
3156 rtx
3157 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3158              int unsignedp)
3159 {
3160   enum mult_variant variant;
3161   struct algorithm algorithm;
3162   rtx scalar_op1;
3163   int max_cost;
3164   bool speed = optimize_insn_for_speed_p ();
3165   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3166
3167   if (CONSTANT_P (op0))
3168     {
3169       rtx temp = op0;
3170       op0 = op1;
3171       op1 = temp;
3172     }
3173
3174   /* For vectors, there are several simplifications that can be made if
3175      all elements of the vector constant are identical.  */
3176   scalar_op1 = op1;
3177   if (GET_CODE (op1) == CONST_VECTOR)
3178     {
3179       int i, n = CONST_VECTOR_NUNITS (op1);
3180       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3181       for (i = 1; i < n; ++i)
3182         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3183           goto skip_scalar;
3184     }
3185
3186   if (INTEGRAL_MODE_P (mode))
3187     {
3188       rtx fake_reg;
3189       HOST_WIDE_INT coeff;
3190       bool is_neg;
3191       int mode_bitsize;
3192
3193       if (op1 == CONST0_RTX (mode))
3194         return op1;
3195       if (op1 == CONST1_RTX (mode))
3196         return op0;
3197       if (op1 == CONSTM1_RTX (mode))
3198         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3199                             op0, target, 0);
3200
3201       if (do_trapv)
3202         goto skip_synth;
3203
3204       /* These are the operations that are potentially turned into
3205          a sequence of shifts and additions.  */
3206       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3207
3208       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3209          less than or equal in size to `unsigned int' this doesn't matter.
3210          If the mode is larger than `unsigned int', then synth_mult works
3211          only if the constant value exactly fits in an `unsigned int' without
3212          any truncation.  This means that multiplying by negative values does
3213          not work; results are off by 2^32 on a 32 bit machine.  */
3214
3215       if (CONST_INT_P (scalar_op1))
3216         {
3217           coeff = INTVAL (scalar_op1);
3218           is_neg = coeff < 0;
3219         }
3220       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3221         {
3222           /* If we are multiplying in DImode, it may still be a win
3223              to try to work with shifts and adds.  */
3224           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3225               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3226             {
3227               coeff = CONST_DOUBLE_LOW (scalar_op1);
3228               is_neg = false;
3229             }
3230           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3231             {
3232               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3233               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3234                 {
3235                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3236                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3237                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3238                     return expand_shift (LSHIFT_EXPR, mode, op0,
3239                                          shift, target, unsignedp);
3240                 }
3241               goto skip_synth;
3242             }
3243           else
3244             goto skip_synth;
3245         }
3246       else
3247         goto skip_synth;
3248
3249       /* We used to test optimize here, on the grounds that it's better to
3250          produce a smaller program when -O is not used.  But this causes
3251          such a terrible slowdown sometimes that it seems better to always
3252          use synth_mult.  */
3253
3254       /* Special case powers of two.  */
3255       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3256         return expand_shift (LSHIFT_EXPR, mode, op0,
3257                              floor_log2 (coeff), target, unsignedp);
3258
3259       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3260
3261       /* Attempt to handle multiplication of DImode values by negative
3262          coefficients, by performing the multiplication by a positive
3263          multiplier and then inverting the result.  */
3264       /* ??? How is this not slightly redundant with the neg variant?  */
3265       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3266         {
3267           /* Its safe to use -coeff even for INT_MIN, as the
3268              result is interpreted as an unsigned coefficient.
3269              Exclude cost of op0 from max_cost to match the cost
3270              calculation of the synth_mult.  */
3271           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3272                       - neg_cost(speed, mode));
3273           if (max_cost > 0
3274               && choose_mult_variant (mode, -coeff, &algorithm,
3275                                       &variant, max_cost))
3276             {
3277               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3278                                             &algorithm, variant);
3279               return expand_unop (mode, neg_optab, temp, target, 0);
3280             }
3281         }
3282
3283       /* Exclude cost of op0 from max_cost to match the cost
3284          calculation of the synth_mult.  */
3285       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3286       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3287         return expand_mult_const (mode, op0, coeff, target,
3288                                   &algorithm, variant);
3289     }
3290  skip_synth:
3291
3292   /* Expand x*2.0 as x+x.  */
3293   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3294     {
3295       REAL_VALUE_TYPE d;
3296       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3297
3298       if (REAL_VALUES_EQUAL (d, dconst2))
3299         {
3300           op0 = force_reg (GET_MODE (op0), op0);
3301           return expand_binop (mode, add_optab, op0, op0,
3302                                target, unsignedp, OPTAB_LIB_WIDEN);
3303         }
3304     }
3305  skip_scalar:
3306
3307   /* This used to use umul_optab if unsigned, but for non-widening multiply
3308      there is no difference between signed and unsigned.  */
3309   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3310                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3311   gcc_assert (op0);
3312   return op0;
3313 }
3314
3315 /* Return a cost estimate for multiplying a register by the given
3316    COEFFicient in the given MODE and SPEED.  */
3317
3318 int
3319 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3320 {
3321   int max_cost;
3322   struct algorithm algorithm;
3323   enum mult_variant variant;
3324
3325   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3326   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3327   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3328     return algorithm.cost.cost;
3329   else
3330     return max_cost;
3331 }
3332
3333 /* Perform a widening multiplication and return an rtx for the result.
3334    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3335    TARGET is a suggestion for where to store the result (an rtx).
3336    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3337    or smul_widen_optab.
3338
3339    We check specially for a constant integer as OP1, comparing the
3340    cost of a widening multiply against the cost of a sequence of shifts
3341    and adds.  */
3342
3343 rtx
3344 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3345                       int unsignedp, optab this_optab)
3346 {
3347   bool speed = optimize_insn_for_speed_p ();
3348   rtx cop1;
3349
3350   if (CONST_INT_P (op1)
3351       && GET_MODE (op0) != VOIDmode
3352       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3353                                 this_optab == umul_widen_optab))
3354       && CONST_INT_P (cop1)
3355       && (INTVAL (cop1) >= 0
3356           || HWI_COMPUTABLE_MODE_P (mode)))
3357     {
3358       HOST_WIDE_INT coeff = INTVAL (cop1);
3359       int max_cost;
3360       enum mult_variant variant;
3361       struct algorithm algorithm;
3362
3363       /* Special case powers of two.  */
3364       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3365         {
3366           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3367           return expand_shift (LSHIFT_EXPR, mode, op0,
3368                                floor_log2 (coeff), target, unsignedp);
3369         }
3370
3371       /* Exclude cost of op0 from max_cost to match the cost
3372          calculation of the synth_mult.  */
3373       max_cost = mul_widen_cost (speed, mode);
3374       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3375                                max_cost))
3376         {
3377           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3378           return expand_mult_const (mode, op0, coeff, target,
3379                                     &algorithm, variant);
3380         }
3381     }
3382   return expand_binop (mode, this_optab, op0, op1, target,
3383                        unsignedp, OPTAB_LIB_WIDEN);
3384 }
3385 \f
3386 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3387    replace division by D, and put the least significant N bits of the result
3388    in *MULTIPLIER_PTR and return the most significant bit.
3389
3390    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3391    needed precision is in PRECISION (should be <= N).
3392
3393    PRECISION should be as small as possible so this function can choose
3394    multiplier more freely.
3395
3396    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3397    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3398
3399    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3400    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3401
3402 unsigned HOST_WIDE_INT
3403 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3404                    unsigned HOST_WIDE_INT *multiplier_ptr,
3405                    int *post_shift_ptr, int *lgup_ptr)
3406 {
3407   HOST_WIDE_INT mhigh_hi, mlow_hi;
3408   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3409   int lgup, post_shift;
3410   int pow, pow2;
3411   unsigned HOST_WIDE_INT nl, dummy1;
3412   HOST_WIDE_INT nh, dummy2;
3413
3414   /* lgup = ceil(log2(divisor)); */
3415   lgup = ceil_log2 (d);
3416
3417   gcc_assert (lgup <= n);
3418
3419   pow = n + lgup;
3420   pow2 = n + lgup - precision;
3421
3422   /* We could handle this with some effort, but this case is much
3423      better handled directly with a scc insn, so rely on caller using
3424      that.  */
3425   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3426
3427   /* mlow = 2^(N + lgup)/d */
3428  if (pow >= HOST_BITS_PER_WIDE_INT)
3429     {
3430       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3431       nl = 0;
3432     }
3433   else
3434     {
3435       nh = 0;
3436       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3437     }
3438   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3439                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3440
3441   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3442   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3443     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3444   else
3445     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3446   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3447                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3448
3449   gcc_assert (!mhigh_hi || nh - d < d);
3450   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3451   /* Assert that mlow < mhigh.  */
3452   gcc_assert (mlow_hi < mhigh_hi
3453               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3454
3455   /* If precision == N, then mlow, mhigh exceed 2^N
3456      (but they do not exceed 2^(N+1)).  */
3457
3458   /* Reduce to lowest terms.  */
3459   for (post_shift = lgup; post_shift > 0; post_shift--)
3460     {
3461       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3462       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3463       if (ml_lo >= mh_lo)
3464         break;
3465
3466       mlow_hi = 0;
3467       mlow_lo = ml_lo;
3468       mhigh_hi = 0;
3469       mhigh_lo = mh_lo;
3470     }
3471
3472   *post_shift_ptr = post_shift;
3473   *lgup_ptr = lgup;
3474   if (n < HOST_BITS_PER_WIDE_INT)
3475     {
3476       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3477       *multiplier_ptr = mhigh_lo & mask;
3478       return mhigh_lo >= mask;
3479     }
3480   else
3481     {
3482       *multiplier_ptr = mhigh_lo;
3483       return mhigh_hi;
3484     }
3485 }
3486
3487 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3488    congruent to 1 (mod 2**N).  */
3489
3490 static unsigned HOST_WIDE_INT
3491 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3492 {
3493   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3494
3495   /* The algorithm notes that the choice y = x satisfies
3496      x*y == 1 mod 2^3, since x is assumed odd.
3497      Each iteration doubles the number of bits of significance in y.  */
3498
3499   unsigned HOST_WIDE_INT mask;
3500   unsigned HOST_WIDE_INT y = x;
3501   int nbit = 3;
3502
3503   mask = (n == HOST_BITS_PER_WIDE_INT
3504           ? ~(unsigned HOST_WIDE_INT) 0
3505           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3506
3507   while (nbit < n)
3508     {
3509       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3510       nbit *= 2;
3511     }
3512   return y;
3513 }
3514
3515 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3516    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3517    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3518    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3519    become signed.
3520
3521    The result is put in TARGET if that is convenient.
3522
3523    MODE is the mode of operation.  */
3524
3525 rtx
3526 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3527                              rtx op1, rtx target, int unsignedp)
3528 {
3529   rtx tem;
3530   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3531
3532   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3533                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3534   tem = expand_and (mode, tem, op1, NULL_RTX);
3535   adj_operand
3536     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3537                      adj_operand);
3538
3539   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3540                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3541   tem = expand_and (mode, tem, op0, NULL_RTX);
3542   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3543                           target);
3544
3545   return target;
3546 }
3547
3548 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3549
3550 static rtx
3551 extract_high_half (enum machine_mode mode, rtx op)
3552 {
3553   enum machine_mode wider_mode;
3554
3555   if (mode == word_mode)
3556     return gen_highpart (mode, op);
3557
3558   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3559
3560   wider_mode = GET_MODE_WIDER_MODE (mode);
3561   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3562                      GET_MODE_BITSIZE (mode), 0, 1);
3563   return convert_modes (mode, wider_mode, op, 0);
3564 }
3565
3566 /* Like expmed_mult_highpart, but only consider using a multiplication
3567    optab.  OP1 is an rtx for the constant operand.  */
3568
3569 static rtx
3570 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3571                             rtx target, int unsignedp, int max_cost)
3572 {
3573   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3574   enum machine_mode wider_mode;
3575   optab moptab;
3576   rtx tem;
3577   int size;
3578   bool speed = optimize_insn_for_speed_p ();
3579
3580   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3581
3582   wider_mode = GET_MODE_WIDER_MODE (mode);
3583   size = GET_MODE_BITSIZE (mode);
3584
3585   /* Firstly, try using a multiplication insn that only generates the needed
3586      high part of the product, and in the sign flavor of unsignedp.  */
3587   if (mul_highpart_cost (speed, mode) < max_cost)
3588     {
3589       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3590       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3591                           unsignedp, OPTAB_DIRECT);
3592       if (tem)
3593         return tem;
3594     }
3595
3596   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3597      Need to adjust the result after the multiplication.  */
3598   if (size - 1 < BITS_PER_WORD
3599       && (mul_highpart_cost (speed, mode)
3600           + 2 * shift_cost (speed, mode, size-1)
3601           + 4 * add_cost (speed, mode) < max_cost))
3602     {
3603       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3604       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3605                           unsignedp, OPTAB_DIRECT);
3606       if (tem)
3607         /* We used the wrong signedness.  Adjust the result.  */
3608         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3609                                             tem, unsignedp);
3610     }
3611
3612   /* Try widening multiplication.  */
3613   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3614   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3615       && mul_widen_cost (speed, wider_mode) < max_cost)
3616     {
3617       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3618                           unsignedp, OPTAB_WIDEN);
3619       if (tem)
3620         return extract_high_half (mode, tem);
3621     }
3622
3623   /* Try widening the mode and perform a non-widening multiplication.  */
3624   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3625       && size - 1 < BITS_PER_WORD
3626       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3627           < max_cost))
3628     {
3629       rtx insns, wop0, wop1;
3630
3631       /* We need to widen the operands, for example to ensure the
3632          constant multiplier is correctly sign or zero extended.
3633          Use a sequence to clean-up any instructions emitted by
3634          the conversions if things don't work out.  */
3635       start_sequence ();
3636       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3637       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3638       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3639                           unsignedp, OPTAB_WIDEN);
3640       insns = get_insns ();
3641       end_sequence ();
3642
3643       if (tem)
3644         {
3645           emit_insn (insns);
3646           return extract_high_half (mode, tem);
3647         }
3648     }
3649
3650   /* Try widening multiplication of opposite signedness, and adjust.  */
3651   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3652   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3653       && size - 1 < BITS_PER_WORD
3654       && (mul_widen_cost (speed, wider_mode)
3655           + 2 * shift_cost (speed, mode, size-1)
3656           + 4 * add_cost (speed, mode) < max_cost))
3657     {
3658       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3659                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3660       if (tem != 0)
3661         {
3662           tem = extract_high_half (mode, tem);
3663           /* We used the wrong signedness.  Adjust the result.  */
3664           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3665                                               target, unsignedp);
3666         }
3667     }
3668
3669   return 0;
3670 }
3671
3672 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3673    putting the high half of the result in TARGET if that is convenient,
3674    and return where the result is.  If the operation can not be performed,
3675    0 is returned.
3676
3677    MODE is the mode of operation and result.
3678
3679    UNSIGNEDP nonzero means unsigned multiply.
3680
3681    MAX_COST is the total allowed cost for the expanded RTL.  */
3682
3683 static rtx
3684 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3685                       rtx target, int unsignedp, int max_cost)
3686 {
3687   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3688   unsigned HOST_WIDE_INT cnst1;
3689   int extra_cost;
3690   bool sign_adjust = false;
3691   enum mult_variant variant;
3692   struct algorithm alg;
3693   rtx tem;
3694   bool speed = optimize_insn_for_speed_p ();
3695
3696   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3697   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3698   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3699
3700   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3701
3702   /* We can't optimize modes wider than BITS_PER_WORD.
3703      ??? We might be able to perform double-word arithmetic if
3704      mode == word_mode, however all the cost calculations in
3705      synth_mult etc. assume single-word operations.  */
3706   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3707     return expmed_mult_highpart_optab (mode, op0, op1, target,
3708                                        unsignedp, max_cost);
3709
3710   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3711
3712   /* Check whether we try to multiply by a negative constant.  */
3713   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3714     {
3715       sign_adjust = true;
3716       extra_cost += add_cost (speed, mode);
3717     }
3718
3719   /* See whether shift/add multiplication is cheap enough.  */
3720   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3721                            max_cost - extra_cost))
3722     {
3723       /* See whether the specialized multiplication optabs are
3724          cheaper than the shift/add version.  */
3725       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3726                                         alg.cost.cost + extra_cost);
3727       if (tem)
3728         return tem;
3729
3730       tem = convert_to_mode (wider_mode, op0, unsignedp);
3731       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3732       tem = extract_high_half (mode, tem);
3733
3734       /* Adjust result for signedness.  */
3735       if (sign_adjust)
3736         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3737
3738       return tem;
3739     }
3740   return expmed_mult_highpart_optab (mode, op0, op1, target,
3741                                      unsignedp, max_cost);
3742 }
3743
3744
3745 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3746
3747 static rtx
3748 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3749 {
3750   unsigned HOST_WIDE_INT masklow, maskhigh;
3751   rtx result, temp, shift, label;
3752   int logd;
3753
3754   logd = floor_log2 (d);
3755   result = gen_reg_rtx (mode);
3756
3757   /* Avoid conditional branches when they're expensive.  */
3758   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3759       && optimize_insn_for_speed_p ())
3760     {
3761       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3762                                       mode, 0, -1);
3763       if (signmask)
3764         {
3765           signmask = force_reg (mode, signmask);
3766           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3767           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3768
3769           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3770              which instruction sequence to use.  If logical right shifts
3771              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3772              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3773
3774           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3775           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3776               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3777                   > COSTS_N_INSNS (2)))
3778             {
3779               temp = expand_binop (mode, xor_optab, op0, signmask,
3780                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3781               temp = expand_binop (mode, sub_optab, temp, signmask,
3782                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3783               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3784                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3785               temp = expand_binop (mode, xor_optab, temp, signmask,
3786                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3787               temp = expand_binop (mode, sub_optab, temp, signmask,
3788                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3789             }
3790           else
3791             {
3792               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3793                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3794               signmask = force_reg (mode, signmask);
3795
3796               temp = expand_binop (mode, add_optab, op0, signmask,
3797                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3798               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3799                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3800               temp = expand_binop (mode, sub_optab, temp, signmask,
3801                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3802             }
3803           return temp;
3804         }
3805     }
3806
3807   /* Mask contains the mode's signbit and the significant bits of the
3808      modulus.  By including the signbit in the operation, many targets
3809      can avoid an explicit compare operation in the following comparison
3810      against zero.  */
3811
3812   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3813   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3814     {
3815       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3816       maskhigh = -1;
3817     }
3818   else
3819     maskhigh = (HOST_WIDE_INT) -1
3820                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3821
3822   temp = expand_binop (mode, and_optab, op0,
3823                        immed_double_const (masklow, maskhigh, mode),
3824                        result, 1, OPTAB_LIB_WIDEN);
3825   if (temp != result)
3826     emit_move_insn (result, temp);
3827
3828   label = gen_label_rtx ();
3829   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3830
3831   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3832                        0, OPTAB_LIB_WIDEN);
3833   masklow = (HOST_WIDE_INT) -1 << logd;
3834   maskhigh = -1;
3835   temp = expand_binop (mode, ior_optab, temp,
3836                        immed_double_const (masklow, maskhigh, mode),
3837                        result, 1, OPTAB_LIB_WIDEN);
3838   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3839                        0, OPTAB_LIB_WIDEN);
3840   if (temp != result)
3841     emit_move_insn (result, temp);
3842   emit_label (label);
3843   return result;
3844 }
3845
3846 /* Expand signed division of OP0 by a power of two D in mode MODE.
3847    This routine is only called for positive values of D.  */
3848
3849 static rtx
3850 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3851 {
3852   rtx temp, label;
3853   int logd;
3854
3855   logd = floor_log2 (d);
3856
3857   if (d == 2
3858       && BRANCH_COST (optimize_insn_for_speed_p (),
3859                       false) >= 1)
3860     {
3861       temp = gen_reg_rtx (mode);
3862       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3863       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3864                            0, OPTAB_LIB_WIDEN);
3865       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3866     }
3867
3868 #ifdef HAVE_conditional_move
3869   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3870       >= 2)
3871     {
3872       rtx temp2;
3873
3874       /* ??? emit_conditional_move forces a stack adjustment via
3875          compare_from_rtx so, if the sequence is discarded, it will
3876          be lost.  Do it now instead.  */
3877       do_pending_stack_adjust ();
3878
3879       start_sequence ();
3880       temp2 = copy_to_mode_reg (mode, op0);
3881       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3882                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3883       temp = force_reg (mode, temp);
3884
3885       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3886       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3887                                      mode, temp, temp2, mode, 0);
3888       if (temp2)
3889         {
3890           rtx seq = get_insns ();
3891           end_sequence ();
3892           emit_insn (seq);
3893           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3894         }
3895       end_sequence ();
3896     }
3897 #endif
3898
3899   if (BRANCH_COST (optimize_insn_for_speed_p (),
3900                    false) >= 2)
3901     {
3902       int ushift = GET_MODE_BITSIZE (mode) - logd;
3903
3904       temp = gen_reg_rtx (mode);
3905       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3906       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3907           > COSTS_N_INSNS (1))
3908         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3909                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3910       else
3911         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3912                              ushift, NULL_RTX, 1);
3913       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3914                            0, OPTAB_LIB_WIDEN);
3915       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3916     }
3917
3918   label = gen_label_rtx ();
3919   temp = copy_to_mode_reg (mode, op0);
3920   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3921   expand_inc (temp, GEN_INT (d - 1));
3922   emit_label (label);
3923   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3924 }
3925 \f
3926 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3927    if that is convenient, and returning where the result is.
3928    You may request either the quotient or the remainder as the result;
3929    specify REM_FLAG nonzero to get the remainder.
3930
3931    CODE is the expression code for which kind of division this is;
3932    it controls how rounding is done.  MODE is the machine mode to use.
3933    UNSIGNEDP nonzero means do unsigned division.  */
3934
3935 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3936    and then correct it by or'ing in missing high bits
3937    if result of ANDI is nonzero.
3938    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3939    This could optimize to a bfexts instruction.
3940    But C doesn't use these operations, so their optimizations are
3941    left for later.  */
3942 /* ??? For modulo, we don't actually need the highpart of the first product,
3943    the low part will do nicely.  And for small divisors, the second multiply
3944    can also be a low-part only multiply or even be completely left out.
3945    E.g. to calculate the remainder of a division by 3 with a 32 bit
3946    multiply, multiply with 0x55555556 and extract the upper two bits;
3947    the result is exact for inputs up to 0x1fffffff.
3948    The input range can be reduced by using cross-sum rules.
3949    For odd divisors >= 3, the following table gives right shift counts
3950    so that if a number is shifted by an integer multiple of the given
3951    amount, the remainder stays the same:
3952    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3953    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3954    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3955    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3956    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3957
3958    Cross-sum rules for even numbers can be derived by leaving as many bits
3959    to the right alone as the divisor has zeros to the right.
3960    E.g. if x is an unsigned 32 bit number:
3961    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3962    */
3963
3964 rtx
3965 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3966                rtx op0, rtx op1, rtx target, int unsignedp)
3967 {
3968   enum machine_mode compute_mode;
3969   rtx tquotient;
3970   rtx quotient = 0, remainder = 0;
3971   rtx last;
3972   int size;
3973   rtx insn;
3974   optab optab1, optab2;
3975   int op1_is_constant, op1_is_pow2 = 0;
3976   int max_cost, extra_cost;
3977   static HOST_WIDE_INT last_div_const = 0;
3978   static HOST_WIDE_INT ext_op1;
3979   bool speed = optimize_insn_for_speed_p ();
3980
3981   op1_is_constant = CONST_INT_P (op1);
3982   if (op1_is_constant)
3983     {
3984       ext_op1 = INTVAL (op1);
3985       if (unsignedp)
3986         ext_op1 &= GET_MODE_MASK (mode);
3987       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3988                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3989     }
3990
3991   /*
3992      This is the structure of expand_divmod:
3993
3994      First comes code to fix up the operands so we can perform the operations
3995      correctly and efficiently.
3996
3997      Second comes a switch statement with code specific for each rounding mode.
3998      For some special operands this code emits all RTL for the desired
3999      operation, for other cases, it generates only a quotient and stores it in
4000      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4001      to indicate that it has not done anything.
4002
4003      Last comes code that finishes the operation.  If QUOTIENT is set and
4004      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4005      QUOTIENT is not set, it is computed using trunc rounding.
4006
4007      We try to generate special code for division and remainder when OP1 is a
4008      constant.  If |OP1| = 2**n we can use shifts and some other fast
4009      operations.  For other values of OP1, we compute a carefully selected
4010      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4011      by m.
4012
4013      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4014      half of the product.  Different strategies for generating the product are
4015      implemented in expmed_mult_highpart.
4016
4017      If what we actually want is the remainder, we generate that by another
4018      by-constant multiplication and a subtraction.  */
4019
4020   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4021      code below will malfunction if we are, so check here and handle
4022      the special case if so.  */
4023   if (op1 == const1_rtx)
4024     return rem_flag ? const0_rtx : op0;
4025
4026     /* When dividing by -1, we could get an overflow.
4027      negv_optab can handle overflows.  */
4028   if (! unsignedp && op1 == constm1_rtx)
4029     {
4030       if (rem_flag)
4031         return const0_rtx;
4032       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
4033                           ? negv_optab : neg_optab, op0, target, 0);
4034     }
4035
4036   if (target
4037       /* Don't use the function value register as a target
4038          since we have to read it as well as write it,
4039          and function-inlining gets confused by this.  */
4040       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4041           /* Don't clobber an operand while doing a multi-step calculation.  */
4042           || ((rem_flag || op1_is_constant)
4043               && (reg_mentioned_p (target, op0)
4044                   || (MEM_P (op0) && MEM_P (target))))
4045           || reg_mentioned_p (target, op1)
4046           || (MEM_P (op1) && MEM_P (target))))
4047     target = 0;
4048
4049   /* Get the mode in which to perform this computation.  Normally it will
4050      be MODE, but sometimes we can't do the desired operation in MODE.
4051      If so, pick a wider mode in which we can do the operation.  Convert
4052      to that mode at the start to avoid repeated conversions.
4053
4054      First see what operations we need.  These depend on the expression
4055      we are evaluating.  (We assume that divxx3 insns exist under the
4056      same conditions that modxx3 insns and that these insns don't normally
4057      fail.  If these assumptions are not correct, we may generate less
4058      efficient code in some cases.)
4059
4060      Then see if we find a mode in which we can open-code that operation
4061      (either a division, modulus, or shift).  Finally, check for the smallest
4062      mode for which we can do the operation with a library call.  */
4063
4064   /* We might want to refine this now that we have division-by-constant
4065      optimization.  Since expmed_mult_highpart tries so many variants, it is
4066      not straightforward to generalize this.  Maybe we should make an array
4067      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4068
4069   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4070             ? (unsignedp ? lshr_optab : ashr_optab)
4071             : (unsignedp ? udiv_optab : sdiv_optab));
4072   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4073             ? optab1
4074             : (unsignedp ? udivmod_optab : sdivmod_optab));
4075
4076   for (compute_mode = mode; compute_mode != VOIDmode;
4077        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4078     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4079         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4080       break;
4081
4082   if (compute_mode == VOIDmode)
4083     for (compute_mode = mode; compute_mode != VOIDmode;
4084          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4085       if (optab_libfunc (optab1, compute_mode)
4086           || optab_libfunc (optab2, compute_mode))
4087         break;
4088
4089   /* If we still couldn't find a mode, use MODE, but expand_binop will
4090      probably die.  */
4091   if (compute_mode == VOIDmode)
4092     compute_mode = mode;
4093
4094   if (target && GET_MODE (target) == compute_mode)
4095     tquotient = target;
4096   else
4097     tquotient = gen_reg_rtx (compute_mode);
4098
4099   size = GET_MODE_BITSIZE (compute_mode);
4100 #if 0
4101   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4102      (mode), and thereby get better code when OP1 is a constant.  Do that
4103      later.  It will require going over all usages of SIZE below.  */
4104   size = GET_MODE_BITSIZE (mode);
4105 #endif
4106
4107   /* Only deduct something for a REM if the last divide done was
4108      for a different constant.   Then set the constant of the last
4109      divide.  */
4110   max_cost = (unsignedp
4111               ? udiv_cost (speed, compute_mode)
4112               : sdiv_cost (speed, compute_mode));
4113   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4114                      && INTVAL (op1) == last_div_const))
4115     max_cost -= (mul_cost (speed, compute_mode)
4116                  + add_cost (speed, compute_mode));
4117
4118   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4119
4120   /* Now convert to the best mode to use.  */
4121   if (compute_mode != mode)
4122     {
4123       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4124       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4125
4126       /* convert_modes may have placed op1 into a register, so we
4127          must recompute the following.  */
4128       op1_is_constant = CONST_INT_P (op1);
4129       op1_is_pow2 = (op1_is_constant
4130                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4131                           || (! unsignedp
4132                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4133     }
4134
4135   /* If one of the operands is a volatile MEM, copy it into a register.  */
4136
4137   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4138     op0 = force_reg (compute_mode, op0);
4139   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4140     op1 = force_reg (compute_mode, op1);
4141
4142   /* If we need the remainder or if OP1 is constant, we need to
4143      put OP0 in a register in case it has any queued subexpressions.  */
4144   if (rem_flag || op1_is_constant)
4145     op0 = force_reg (compute_mode, op0);
4146
4147   last = get_last_insn ();
4148
4149   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4150   if (unsignedp)
4151     {
4152       if (code == FLOOR_DIV_EXPR)
4153         code = TRUNC_DIV_EXPR;
4154       if (code == FLOOR_MOD_EXPR)
4155         code = TRUNC_MOD_EXPR;
4156       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4157         code = TRUNC_DIV_EXPR;
4158     }
4159
4160   if (op1 != const0_rtx)
4161     switch (code)
4162       {
4163       case TRUNC_MOD_EXPR:
4164       case TRUNC_DIV_EXPR:
4165         if (op1_is_constant)
4166           {
4167             if (unsignedp)
4168               {
4169                 unsigned HOST_WIDE_INT mh, ml;
4170                 int pre_shift, post_shift;
4171                 int dummy;
4172                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4173                                             & GET_MODE_MASK (compute_mode));
4174
4175                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4176                   {
4177                     pre_shift = floor_log2 (d);
4178                     if (rem_flag)
4179                       {
4180                         remainder
4181                           = expand_binop (compute_mode, and_optab, op0,
4182                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4183                                           remainder, 1,
4184                                           OPTAB_LIB_WIDEN);
4185                         if (remainder)
4186                           return gen_lowpart (mode, remainder);
4187                       }
4188                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4189                                              pre_shift, tquotient, 1);
4190                   }
4191                 else if (size <= HOST_BITS_PER_WIDE_INT)
4192                   {
4193                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4194                       {
4195                         /* Most significant bit of divisor is set; emit an scc
4196                            insn.  */
4197                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4198                                                           compute_mode, 1, 1);
4199                       }
4200                     else
4201                       {
4202                         /* Find a suitable multiplier and right shift count
4203                            instead of multiplying with D.  */
4204
4205                         mh = choose_multiplier (d, size, size,
4206                                                 &ml, &post_shift, &dummy);
4207
4208                         /* If the suggested multiplier is more than SIZE bits,
4209                            we can do better for even divisors, using an
4210                            initial right shift.  */
4211                         if (mh != 0 && (d & 1) == 0)
4212                           {
4213                             pre_shift = floor_log2 (d & -d);
4214                             mh = choose_multiplier (d >> pre_shift, size,
4215                                                     size - pre_shift,
4216                                                     &ml, &post_shift, &dummy);
4217                             gcc_assert (!mh);
4218                           }
4219                         else
4220                           pre_shift = 0;
4221
4222                         if (mh != 0)
4223                           {
4224                             rtx t1, t2, t3, t4;
4225
4226                             if (post_shift - 1 >= BITS_PER_WORD)
4227                               goto fail1;
4228
4229                             extra_cost
4230                               = (shift_cost (speed, compute_mode, post_shift - 1)
4231                                  + shift_cost (speed, compute_mode, 1)
4232                                  + 2 * add_cost (speed, compute_mode));
4233                             t1 = expmed_mult_highpart (compute_mode, op0,
4234                                                        GEN_INT (ml),
4235                                                        NULL_RTX, 1,
4236                                                        max_cost - extra_cost);
4237                             if (t1 == 0)
4238                               goto fail1;
4239                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4240                                                                op0, t1),
4241                                                 NULL_RTX);
4242                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4243                                                t2, 1, NULL_RTX, 1);
4244                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4245                                                               t1, t3),
4246                                                 NULL_RTX);
4247                             quotient = expand_shift
4248                               (RSHIFT_EXPR, compute_mode, t4,
4249                                post_shift - 1, tquotient, 1);
4250                           }
4251                         else
4252                           {
4253                             rtx t1, t2;
4254
4255                             if (pre_shift >= BITS_PER_WORD
4256                                 || post_shift >= BITS_PER_WORD)
4257                               goto fail1;
4258
4259                             t1 = expand_shift
4260                               (RSHIFT_EXPR, compute_mode, op0,
4261                                pre_shift, NULL_RTX, 1);
4262                             extra_cost
4263                               = (shift_cost (speed, compute_mode, pre_shift)
4264                                  + shift_cost (speed, compute_mode, post_shift));
4265                             t2 = expmed_mult_highpart (compute_mode, t1,
4266                                                        GEN_INT (ml),
4267                                                        NULL_RTX, 1,
4268                                                        max_cost - extra_cost);
4269                             if (t2 == 0)
4270                               goto fail1;
4271                             quotient = expand_shift
4272                               (RSHIFT_EXPR, compute_mode, t2,
4273                                post_shift, tquotient, 1);
4274                           }
4275                       }
4276                   }
4277                 else            /* Too wide mode to use tricky code */
4278                   break;
4279
4280                 insn = get_last_insn ();
4281                 if (insn != last)
4282                   set_dst_reg_note (insn, REG_EQUAL,
4283                                     gen_rtx_UDIV (compute_mode, op0, op1),
4284                                     quotient);
4285               }
4286             else                /* TRUNC_DIV, signed */
4287               {
4288                 unsigned HOST_WIDE_INT ml;
4289                 int lgup, post_shift;
4290                 rtx mlr;
4291                 HOST_WIDE_INT d = INTVAL (op1);
4292                 unsigned HOST_WIDE_INT abs_d;
4293
4294                 /* Since d might be INT_MIN, we have to cast to
4295                    unsigned HOST_WIDE_INT before negating to avoid
4296                    undefined signed overflow.  */
4297                 abs_d = (d >= 0
4298                          ? (unsigned HOST_WIDE_INT) d
4299                          : - (unsigned HOST_WIDE_INT) d);
4300
4301                 /* n rem d = n rem -d */
4302                 if (rem_flag && d < 0)
4303                   {
4304                     d = abs_d;
4305                     op1 = gen_int_mode (abs_d, compute_mode);
4306                   }
4307
4308                 if (d == 1)
4309                   quotient = op0;
4310                 else if (d == -1)
4311                   quotient = expand_unop (compute_mode, neg_optab, op0,
4312                                           tquotient, 0);
4313                 else if (HOST_BITS_PER_WIDE_INT >= size
4314                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4315                   {
4316                     /* This case is not handled correctly below.  */
4317                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4318                                                 compute_mode, 1, 1);
4319                     if (quotient == 0)
4320                       goto fail1;
4321                   }
4322                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4323                          && (rem_flag
4324                              ? smod_pow2_cheap (speed, compute_mode)
4325                              : sdiv_pow2_cheap (speed, compute_mode))
4326                          /* We assume that cheap metric is true if the
4327                             optab has an expander for this mode.  */
4328                          && ((optab_handler ((rem_flag ? smod_optab
4329                                               : sdiv_optab),
4330                                              compute_mode)
4331                               != CODE_FOR_nothing)
4332                              || (optab_handler (sdivmod_optab,
4333                                                 compute_mode)
4334                                  != CODE_FOR_nothing)))
4335                   ;
4336                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4337                   {
4338                     if (rem_flag)
4339                       {
4340                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4341                         if (remainder)
4342                           return gen_lowpart (mode, remainder);
4343                       }
4344
4345                     if (sdiv_pow2_cheap (speed, compute_mode)
4346                         && ((optab_handler (sdiv_optab, compute_mode)
4347                              != CODE_FOR_nothing)
4348                             || (optab_handler (sdivmod_optab, compute_mode)
4349                                 != CODE_FOR_nothing)))
4350                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4351                                                 compute_mode, op0,
4352                                                 gen_int_mode (abs_d,
4353                                                               compute_mode),
4354                                                 NULL_RTX, 0);
4355                     else
4356                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4357
4358                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4359                        negate the quotient.  */
4360                     if (d < 0)
4361                       {
4362                         insn = get_last_insn ();
4363                         if (insn != last
4364                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4365                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4366                           set_dst_reg_note (insn, REG_EQUAL,
4367                                             gen_rtx_DIV (compute_mode, op0,
4368                                                          gen_int_mode
4369                                                            (abs_d,
4370                                                             compute_mode)),
4371                                             quotient);
4372
4373                         quotient = expand_unop (compute_mode, neg_optab,
4374                                                 quotient, quotient, 0);
4375                       }
4376                   }
4377                 else if (size <= HOST_BITS_PER_WIDE_INT)
4378                   {
4379                     choose_multiplier (abs_d, size, size - 1,
4380                                        &ml, &post_shift, &lgup);
4381                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4382                       {
4383                         rtx t1, t2, t3;
4384
4385                         if (post_shift >= BITS_PER_WORD
4386                             || size - 1 >= BITS_PER_WORD)
4387                           goto fail1;
4388
4389                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4390                                       + shift_cost (speed, compute_mode, size - 1)
4391                                       + add_cost (speed, compute_mode));
4392                         t1 = expmed_mult_highpart (compute_mode, op0,
4393                                                    GEN_INT (ml), NULL_RTX, 0,
4394                                                    max_cost - extra_cost);
4395                         if (t1 == 0)
4396                           goto fail1;
4397                         t2 = expand_shift
4398                           (RSHIFT_EXPR, compute_mode, t1,
4399                            post_shift, NULL_RTX, 0);
4400                         t3 = expand_shift
4401                           (RSHIFT_EXPR, compute_mode, op0,
4402                            size - 1, NULL_RTX, 0);
4403                         if (d < 0)
4404                           quotient
4405                             = force_operand (gen_rtx_MINUS (compute_mode,
4406                                                             t3, t2),
4407                                              tquotient);
4408                         else
4409                           quotient
4410                             = force_operand (gen_rtx_MINUS (compute_mode,
4411                                                             t2, t3),
4412                                              tquotient);
4413                       }
4414                     else
4415                       {
4416                         rtx t1, t2, t3, t4;
4417
4418                         if (post_shift >= BITS_PER_WORD
4419                             || size - 1 >= BITS_PER_WORD)
4420                           goto fail1;
4421
4422                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4423                         mlr = gen_int_mode (ml, compute_mode);
4424                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4425                                       + shift_cost (speed, compute_mode, size - 1)
4426                                       + 2 * add_cost (speed, compute_mode));
4427                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4428                                                    NULL_RTX, 0,
4429                                                    max_cost - extra_cost);
4430                         if (t1 == 0)
4431                           goto fail1;
4432                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4433                                                           t1, op0),
4434                                             NULL_RTX);
4435                         t3 = expand_shift
4436                           (RSHIFT_EXPR, compute_mode, t2,
4437                            post_shift, NULL_RTX, 0);
4438                         t4 = expand_shift
4439                           (RSHIFT_EXPR, compute_mode, op0,
4440                            size - 1, NULL_RTX, 0);
4441                         if (d < 0)
4442                           quotient
4443                             = force_operand (gen_rtx_MINUS (compute_mode,
4444                                                             t4, t3),
4445                                              tquotient);
4446                         else
4447                           quotient
4448                             = force_operand (gen_rtx_MINUS (compute_mode,
4449                                                             t3, t4),
4450                                              tquotient);
4451                       }
4452                   }
4453                 else            /* Too wide mode to use tricky code */
4454                   break;
4455
4456                 insn = get_last_insn ();
4457                 if (insn != last)
4458                   set_dst_reg_note (insn, REG_EQUAL,
4459                                     gen_rtx_DIV (compute_mode, op0, op1),
4460                                     quotient);
4461               }
4462             break;
4463           }
4464       fail1:
4465         delete_insns_since (last);
4466         break;
4467
4468       case FLOOR_DIV_EXPR:
4469       case FLOOR_MOD_EXPR:
4470       /* We will come here only for signed operations.  */
4471         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4472           {
4473             unsigned HOST_WIDE_INT mh, ml;
4474             int pre_shift, lgup, post_shift;
4475             HOST_WIDE_INT d = INTVAL (op1);
4476
4477             if (d > 0)
4478               {
4479                 /* We could just as easily deal with negative constants here,
4480                    but it does not seem worth the trouble for GCC 2.6.  */
4481                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4482                   {
4483                     pre_shift = floor_log2 (d);
4484                     if (rem_flag)
4485                       {
4486                         remainder = expand_binop (compute_mode, and_optab, op0,
4487                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4488                                                   remainder, 0, OPTAB_LIB_WIDEN);
4489                         if (remainder)
4490                           return gen_lowpart (mode, remainder);
4491                       }
4492                     quotient = expand_shift
4493                       (RSHIFT_EXPR, compute_mode, op0,
4494                        pre_shift, tquotient, 0);
4495                   }
4496                 else
4497                   {
4498                     rtx t1, t2, t3, t4;
4499
4500                     mh = choose_multiplier (d, size, size - 1,
4501                                             &ml, &post_shift, &lgup);
4502                     gcc_assert (!mh);
4503
4504                     if (post_shift < BITS_PER_WORD
4505                         && size - 1 < BITS_PER_WORD)
4506                       {
4507                         t1 = expand_shift
4508                           (RSHIFT_EXPR, compute_mode, op0,
4509                            size - 1, NULL_RTX, 0);
4510                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4511                                            NULL_RTX, 0, OPTAB_WIDEN);
4512                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4513                                       + shift_cost (speed, compute_mode, size - 1)
4514                                       + 2 * add_cost (speed, compute_mode));
4515                         t3 = expmed_mult_highpart (compute_mode, t2,
4516                                                    GEN_INT (ml), NULL_RTX, 1,
4517                                                    max_cost - extra_cost);
4518                         if (t3 != 0)
4519                           {
4520                             t4 = expand_shift
4521                               (RSHIFT_EXPR, compute_mode, t3,
4522                                post_shift, NULL_RTX, 1);
4523                             quotient = expand_binop (compute_mode, xor_optab,
4524                                                      t4, t1, tquotient, 0,
4525                                                      OPTAB_WIDEN);
4526                           }
4527                       }
4528                   }
4529               }
4530             else
4531               {
4532                 rtx nsign, t1, t2, t3, t4;
4533                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4534                                                   op0, constm1_rtx), NULL_RTX);
4535                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4536                                    0, OPTAB_WIDEN);
4537                 nsign = expand_shift
4538                   (RSHIFT_EXPR, compute_mode, t2,
4539                    size - 1, NULL_RTX, 0);
4540                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4541                                     NULL_RTX);
4542                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4543                                     NULL_RTX, 0);
4544                 if (t4)
4545                   {
4546                     rtx t5;
4547                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4548                                       NULL_RTX, 0);
4549                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4550                                                             t4, t5),
4551                                               tquotient);
4552                   }
4553               }
4554           }
4555
4556         if (quotient != 0)
4557           break;
4558         delete_insns_since (last);
4559
4560         /* Try using an instruction that produces both the quotient and
4561            remainder, using truncation.  We can easily compensate the quotient
4562            or remainder to get floor rounding, once we have the remainder.
4563            Notice that we compute also the final remainder value here,
4564            and return the result right away.  */
4565         if (target == 0 || GET_MODE (target) != compute_mode)
4566           target = gen_reg_rtx (compute_mode);
4567
4568         if (rem_flag)
4569           {
4570             remainder
4571               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4572             quotient = gen_reg_rtx (compute_mode);
4573           }
4574         else
4575           {
4576             quotient
4577               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4578             remainder = gen_reg_rtx (compute_mode);
4579           }
4580
4581         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4582                                  quotient, remainder, 0))
4583           {
4584             /* This could be computed with a branch-less sequence.
4585                Save that for later.  */
4586             rtx tem;
4587             rtx label = gen_label_rtx ();
4588             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4589             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4590                                 NULL_RTX, 0, OPTAB_WIDEN);
4591             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4592             expand_dec (quotient, const1_rtx);
4593             expand_inc (remainder, op1);
4594             emit_label (label);
4595             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4596           }
4597
4598         /* No luck with division elimination or divmod.  Have to do it
4599            by conditionally adjusting op0 *and* the result.  */
4600         {
4601           rtx label1, label2, label3, label4, label5;
4602           rtx adjusted_op0;
4603           rtx tem;
4604
4605           quotient = gen_reg_rtx (compute_mode);
4606           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4607           label1 = gen_label_rtx ();
4608           label2 = gen_label_rtx ();
4609           label3 = gen_label_rtx ();
4610           label4 = gen_label_rtx ();
4611           label5 = gen_label_rtx ();
4612           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4613           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4614           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4615                               quotient, 0, OPTAB_LIB_WIDEN);
4616           if (tem != quotient)
4617             emit_move_insn (quotient, tem);
4618           emit_jump_insn (gen_jump (label5));
4619           emit_barrier ();
4620           emit_label (label1);
4621           expand_inc (adjusted_op0, const1_rtx);
4622           emit_jump_insn (gen_jump (label4));
4623           emit_barrier ();
4624           emit_label (label2);
4625           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4626           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4627                               quotient, 0, OPTAB_LIB_WIDEN);
4628           if (tem != quotient)
4629             emit_move_insn (quotient, tem);
4630           emit_jump_insn (gen_jump (label5));
4631           emit_barrier ();
4632           emit_label (label3);
4633           expand_dec (adjusted_op0, const1_rtx);
4634           emit_label (label4);
4635           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4636                               quotient, 0, OPTAB_LIB_WIDEN);
4637           if (tem != quotient)
4638             emit_move_insn (quotient, tem);
4639           expand_dec (quotient, const1_rtx);
4640           emit_label (label5);
4641         }
4642         break;
4643
4644       case CEIL_DIV_EXPR:
4645       case CEIL_MOD_EXPR:
4646         if (unsignedp)
4647           {
4648             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4649               {
4650                 rtx t1, t2, t3;
4651                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4652                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4653                                    floor_log2 (d), tquotient, 1);
4654                 t2 = expand_binop (compute_mode, and_optab, op0,
4655                                    GEN_INT (d - 1),
4656                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4657                 t3 = gen_reg_rtx (compute_mode);
4658                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4659                                       compute_mode, 1, 1);
4660                 if (t3 == 0)
4661                   {
4662                     rtx lab;
4663                     lab = gen_label_rtx ();
4664                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4665                     expand_inc (t1, const1_rtx);
4666                     emit_label (lab);
4667                     quotient = t1;
4668                   }
4669                 else
4670                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4671                                                           t1, t3),
4672                                             tquotient);
4673                 break;
4674               }
4675
4676             /* Try using an instruction that produces both the quotient and
4677                remainder, using truncation.  We can easily compensate the
4678                quotient or remainder to get ceiling rounding, once we have the
4679                remainder.  Notice that we compute also the final remainder
4680                value here, and return the result right away.  */
4681             if (target == 0 || GET_MODE (target) != compute_mode)
4682               target = gen_reg_rtx (compute_mode);
4683
4684             if (rem_flag)
4685               {
4686                 remainder = (REG_P (target)
4687                              ? target : gen_reg_rtx (compute_mode));
4688                 quotient = gen_reg_rtx (compute_mode);
4689               }
4690             else
4691               {
4692                 quotient = (REG_P (target)
4693                             ? target : gen_reg_rtx (compute_mode));
4694                 remainder = gen_reg_rtx (compute_mode);
4695               }
4696
4697             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4698                                      remainder, 1))
4699               {
4700                 /* This could be computed with a branch-less sequence.
4701                    Save that for later.  */
4702                 rtx label = gen_label_rtx ();
4703                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4704                                  compute_mode, label);
4705                 expand_inc (quotient, const1_rtx);
4706                 expand_dec (remainder, op1);
4707                 emit_label (label);
4708                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4709               }
4710
4711             /* No luck with division elimination or divmod.  Have to do it
4712                by conditionally adjusting op0 *and* the result.  */
4713             {
4714               rtx label1, label2;
4715               rtx adjusted_op0, tem;
4716
4717               quotient = gen_reg_rtx (compute_mode);
4718               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4719               label1 = gen_label_rtx ();
4720               label2 = gen_label_rtx ();
4721               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4722                                compute_mode, label1);
4723               emit_move_insn  (quotient, const0_rtx);
4724               emit_jump_insn (gen_jump (label2));
4725               emit_barrier ();
4726               emit_label (label1);
4727               expand_dec (adjusted_op0, const1_rtx);
4728               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4729                                   quotient, 1, OPTAB_LIB_WIDEN);
4730               if (tem != quotient)
4731                 emit_move_insn (quotient, tem);
4732               expand_inc (quotient, const1_rtx);
4733               emit_label (label2);
4734             }
4735           }
4736         else /* signed */
4737           {
4738             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4739                 && INTVAL (op1) >= 0)
4740               {
4741                 /* This is extremely similar to the code for the unsigned case
4742                    above.  For 2.7 we should merge these variants, but for
4743                    2.6.1 I don't want to touch the code for unsigned since that
4744                    get used in C.  The signed case will only be used by other
4745                    languages (Ada).  */
4746
4747                 rtx t1, t2, t3;
4748                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4749                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4750                                    floor_log2 (d), tquotient, 0);
4751                 t2 = expand_binop (compute_mode, and_optab, op0,
4752                                    GEN_INT (d - 1),
4753                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4754                 t3 = gen_reg_rtx (compute_mode);
4755                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4756                                       compute_mode, 1, 1);
4757                 if (t3 == 0)
4758                   {
4759                     rtx lab;
4760                     lab = gen_label_rtx ();
4761                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4762                     expand_inc (t1, const1_rtx);
4763                     emit_label (lab);
4764                     quotient = t1;
4765                   }
4766                 else
4767                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4768                                                           t1, t3),
4769                                             tquotient);
4770                 break;
4771               }
4772
4773             /* Try using an instruction that produces both the quotient and
4774                remainder, using truncation.  We can easily compensate the
4775                quotient or remainder to get ceiling rounding, once we have the
4776                remainder.  Notice that we compute also the final remainder
4777                value here, and return the result right away.  */
4778             if (target == 0 || GET_MODE (target) != compute_mode)
4779               target = gen_reg_rtx (compute_mode);
4780             if (rem_flag)
4781               {
4782                 remainder= (REG_P (target)
4783                             ? target : gen_reg_rtx (compute_mode));
4784                 quotient = gen_reg_rtx (compute_mode);
4785               }
4786             else
4787               {
4788                 quotient = (REG_P (target)
4789                             ? target : gen_reg_rtx (compute_mode));
4790                 remainder = gen_reg_rtx (compute_mode);
4791               }
4792
4793             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4794                                      remainder, 0))
4795               {
4796                 /* This could be computed with a branch-less sequence.
4797                    Save that for later.  */
4798                 rtx tem;
4799                 rtx label = gen_label_rtx ();
4800                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4801                                  compute_mode, label);
4802                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4803                                     NULL_RTX, 0, OPTAB_WIDEN);
4804                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4805                 expand_inc (quotient, const1_rtx);
4806                 expand_dec (remainder, op1);
4807                 emit_label (label);
4808                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4809               }
4810
4811             /* No luck with division elimination or divmod.  Have to do it
4812                by conditionally adjusting op0 *and* the result.  */
4813             {
4814               rtx label1, label2, label3, label4, label5;
4815               rtx adjusted_op0;
4816               rtx tem;
4817
4818               quotient = gen_reg_rtx (compute_mode);
4819               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4820               label1 = gen_label_rtx ();
4821               label2 = gen_label_rtx ();
4822               label3 = gen_label_rtx ();
4823               label4 = gen_label_rtx ();
4824               label5 = gen_label_rtx ();
4825               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4826               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4827                                compute_mode, label1);
4828               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4829                                   quotient, 0, OPTAB_LIB_WIDEN);
4830               if (tem != quotient)
4831                 emit_move_insn (quotient, tem);
4832               emit_jump_insn (gen_jump (label5));
4833               emit_barrier ();
4834               emit_label (label1);
4835               expand_dec (adjusted_op0, const1_rtx);
4836               emit_jump_insn (gen_jump (label4));
4837               emit_barrier ();
4838               emit_label (label2);
4839               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4840                                compute_mode, label3);
4841               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4842                                   quotient, 0, OPTAB_LIB_WIDEN);
4843               if (tem != quotient)
4844                 emit_move_insn (quotient, tem);
4845               emit_jump_insn (gen_jump (label5));
4846               emit_barrier ();
4847               emit_label (label3);
4848               expand_inc (adjusted_op0, const1_rtx);
4849               emit_label (label4);
4850               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4851                                   quotient, 0, OPTAB_LIB_WIDEN);
4852               if (tem != quotient)
4853                 emit_move_insn (quotient, tem);
4854               expand_inc (quotient, const1_rtx);
4855               emit_label (label5);
4856             }
4857           }
4858         break;
4859
4860       case EXACT_DIV_EXPR:
4861         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4862           {
4863             HOST_WIDE_INT d = INTVAL (op1);
4864             unsigned HOST_WIDE_INT ml;
4865             int pre_shift;
4866             rtx t1;
4867
4868             pre_shift = floor_log2 (d & -d);
4869             ml = invert_mod2n (d >> pre_shift, size);
4870             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4871                                pre_shift, NULL_RTX, unsignedp);
4872             quotient = expand_mult (compute_mode, t1,
4873                                     gen_int_mode (ml, compute_mode),
4874                                     NULL_RTX, 1);
4875
4876             insn = get_last_insn ();
4877             set_dst_reg_note (insn, REG_EQUAL,
4878                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4879                                               compute_mode, op0, op1),
4880                               quotient);
4881           }
4882         break;
4883
4884       case ROUND_DIV_EXPR:
4885       case ROUND_MOD_EXPR:
4886         if (unsignedp)
4887           {
4888             rtx tem;
4889             rtx label;
4890             label = gen_label_rtx ();
4891             quotient = gen_reg_rtx (compute_mode);
4892             remainder = gen_reg_rtx (compute_mode);
4893             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4894               {
4895                 rtx tem;
4896                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4897                                          quotient, 1, OPTAB_LIB_WIDEN);
4898                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4899                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4900                                           remainder, 1, OPTAB_LIB_WIDEN);
4901               }
4902             tem = plus_constant (compute_mode, op1, -1);
4903             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4904             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4905             expand_inc (quotient, const1_rtx);
4906             expand_dec (remainder, op1);
4907             emit_label (label);
4908           }
4909         else
4910           {
4911             rtx abs_rem, abs_op1, tem, mask;
4912             rtx label;
4913             label = gen_label_rtx ();
4914             quotient = gen_reg_rtx (compute_mode);
4915             remainder = gen_reg_rtx (compute_mode);
4916             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4917               {
4918                 rtx tem;
4919                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4920                                          quotient, 0, OPTAB_LIB_WIDEN);
4921                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4922                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4923                                           remainder, 0, OPTAB_LIB_WIDEN);
4924               }
4925             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4926             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4927             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4928                                 1, NULL_RTX, 1);
4929             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4930             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4931                                 NULL_RTX, 0, OPTAB_WIDEN);
4932             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4933                                  size - 1, NULL_RTX, 0);
4934             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4935                                 NULL_RTX, 0, OPTAB_WIDEN);
4936             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4937                                 NULL_RTX, 0, OPTAB_WIDEN);
4938             expand_inc (quotient, tem);
4939             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4940                                 NULL_RTX, 0, OPTAB_WIDEN);
4941             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4942                                 NULL_RTX, 0, OPTAB_WIDEN);
4943             expand_dec (remainder, tem);
4944             emit_label (label);
4945           }
4946         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4947
4948       default:
4949         gcc_unreachable ();
4950       }
4951
4952   if (quotient == 0)
4953     {
4954       if (target && GET_MODE (target) != compute_mode)
4955         target = 0;
4956
4957       if (rem_flag)
4958         {
4959           /* Try to produce the remainder without producing the quotient.
4960              If we seem to have a divmod pattern that does not require widening,
4961              don't try widening here.  We should really have a WIDEN argument
4962              to expand_twoval_binop, since what we'd really like to do here is
4963              1) try a mod insn in compute_mode
4964              2) try a divmod insn in compute_mode
4965              3) try a div insn in compute_mode and multiply-subtract to get
4966                 remainder
4967              4) try the same things with widening allowed.  */
4968           remainder
4969             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4970                                  op0, op1, target,
4971                                  unsignedp,
4972                                  ((optab_handler (optab2, compute_mode)
4973                                    != CODE_FOR_nothing)
4974                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4975           if (remainder == 0)
4976             {
4977               /* No luck there.  Can we do remainder and divide at once
4978                  without a library call?  */
4979               remainder = gen_reg_rtx (compute_mode);
4980               if (! expand_twoval_binop ((unsignedp
4981                                           ? udivmod_optab
4982                                           : sdivmod_optab),
4983                                          op0, op1,
4984                                          NULL_RTX, remainder, unsignedp))
4985                 remainder = 0;
4986             }
4987
4988           if (remainder)
4989             return gen_lowpart (mode, remainder);
4990         }
4991
4992       /* Produce the quotient.  Try a quotient insn, but not a library call.
4993          If we have a divmod in this mode, use it in preference to widening
4994          the div (for this test we assume it will not fail). Note that optab2
4995          is set to the one of the two optabs that the call below will use.  */
4996       quotient
4997         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4998                              op0, op1, rem_flag ? NULL_RTX : target,
4999                              unsignedp,
5000                              ((optab_handler (optab2, compute_mode)
5001                                != CODE_FOR_nothing)
5002                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5003
5004       if (quotient == 0)
5005         {
5006           /* No luck there.  Try a quotient-and-remainder insn,
5007              keeping the quotient alone.  */
5008           quotient = gen_reg_rtx (compute_mode);
5009           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5010                                      op0, op1,
5011                                      quotient, NULL_RTX, unsignedp))
5012             {
5013               quotient = 0;
5014               if (! rem_flag)
5015                 /* Still no luck.  If we are not computing the remainder,
5016                    use a library call for the quotient.  */
5017                 quotient = sign_expand_binop (compute_mode,
5018                                               udiv_optab, sdiv_optab,
5019                                               op0, op1, target,
5020                                               unsignedp, OPTAB_LIB_WIDEN);
5021             }
5022         }
5023     }
5024
5025   if (rem_flag)
5026     {
5027       if (target && GET_MODE (target) != compute_mode)
5028         target = 0;
5029
5030       if (quotient == 0)
5031         {
5032           /* No divide instruction either.  Use library for remainder.  */
5033           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5034                                          op0, op1, target,
5035                                          unsignedp, OPTAB_LIB_WIDEN);
5036           /* No remainder function.  Try a quotient-and-remainder
5037              function, keeping the remainder.  */
5038           if (!remainder)
5039             {
5040               remainder = gen_reg_rtx (compute_mode);
5041               if (!expand_twoval_binop_libfunc
5042                   (unsignedp ? udivmod_optab : sdivmod_optab,
5043                    op0, op1,
5044                    NULL_RTX, remainder,
5045                    unsignedp ? UMOD : MOD))
5046                 remainder = NULL_RTX;
5047             }
5048         }
5049       else
5050         {
5051           /* We divided.  Now finish doing X - Y * (X / Y).  */
5052           remainder = expand_mult (compute_mode, quotient, op1,
5053                                    NULL_RTX, unsignedp);
5054           remainder = expand_binop (compute_mode, sub_optab, op0,
5055                                     remainder, target, unsignedp,
5056                                     OPTAB_LIB_WIDEN);
5057         }
5058     }
5059
5060   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5061 }
5062 \f
5063 /* Return a tree node with data type TYPE, describing the value of X.
5064    Usually this is an VAR_DECL, if there is no obvious better choice.
5065    X may be an expression, however we only support those expressions
5066    generated by loop.c.  */
5067
5068 tree
5069 make_tree (tree type, rtx x)
5070 {
5071   tree t;
5072
5073   switch (GET_CODE (x))
5074     {
5075     case CONST_INT:
5076       {
5077         HOST_WIDE_INT hi = 0;
5078
5079         if (INTVAL (x) < 0
5080             && !(TYPE_UNSIGNED (type)
5081                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5082                      < HOST_BITS_PER_WIDE_INT)))
5083           hi = -1;
5084
5085         t = build_int_cst_wide (type, INTVAL (x), hi);
5086
5087         return t;
5088       }
5089
5090     case CONST_DOUBLE:
5091       if (GET_MODE (x) == VOIDmode)
5092         t = build_int_cst_wide (type,
5093                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5094       else
5095         {
5096           REAL_VALUE_TYPE d;
5097
5098           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5099           t = build_real (type, d);
5100         }
5101
5102       return t;
5103
5104     case CONST_VECTOR:
5105       {
5106         int units = CONST_VECTOR_NUNITS (x);
5107         tree itype = TREE_TYPE (type);
5108         tree *elts;
5109         int i;
5110
5111         /* Build a tree with vector elements.  */
5112         elts = XALLOCAVEC (tree, units);
5113         for (i = units - 1; i >= 0; --i)
5114           {
5115             rtx elt = CONST_VECTOR_ELT (x, i);
5116             elts[i] = make_tree (itype, elt);
5117           }
5118
5119         return build_vector (type, elts);
5120       }
5121
5122     case PLUS:
5123       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5124                           make_tree (type, XEXP (x, 1)));
5125
5126     case MINUS:
5127       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5128                           make_tree (type, XEXP (x, 1)));
5129
5130     case NEG:
5131       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5132
5133     case MULT:
5134       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5135                           make_tree (type, XEXP (x, 1)));
5136
5137     case ASHIFT:
5138       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5139                           make_tree (type, XEXP (x, 1)));
5140
5141     case LSHIFTRT:
5142       t = unsigned_type_for (type);
5143       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5144                                          make_tree (t, XEXP (x, 0)),
5145                                          make_tree (type, XEXP (x, 1))));
5146
5147     case ASHIFTRT:
5148       t = signed_type_for (type);
5149       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5150                                          make_tree (t, XEXP (x, 0)),
5151                                          make_tree (type, XEXP (x, 1))));
5152
5153     case DIV:
5154       if (TREE_CODE (type) != REAL_TYPE)
5155         t = signed_type_for (type);
5156       else
5157         t = type;
5158
5159       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5160                                          make_tree (t, XEXP (x, 0)),
5161                                          make_tree (t, XEXP (x, 1))));
5162     case UDIV:
5163       t = unsigned_type_for (type);
5164       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5165                                          make_tree (t, XEXP (x, 0)),
5166                                          make_tree (t, XEXP (x, 1))));
5167
5168     case SIGN_EXTEND:
5169     case ZERO_EXTEND:
5170       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5171                                           GET_CODE (x) == ZERO_EXTEND);
5172       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5173
5174     case CONST:
5175       return make_tree (type, XEXP (x, 0));
5176
5177     case SYMBOL_REF:
5178       t = SYMBOL_REF_DECL (x);
5179       if (t)
5180         return fold_convert (type, build_fold_addr_expr (t));
5181       /* else fall through.  */
5182
5183     default:
5184       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5185
5186       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5187          address mode to pointer mode.  */
5188       if (POINTER_TYPE_P (type))
5189         x = convert_memory_address_addr_space
5190               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5191
5192       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5193          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5194       t->decl_with_rtl.rtl = x;
5195
5196       return t;
5197     }
5198 }
5199 \f
5200 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5201    and returning TARGET.
5202
5203    If TARGET is 0, a pseudo-register or constant is returned.  */
5204
5205 rtx
5206 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5207 {
5208   rtx tem = 0;
5209
5210   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5211     tem = simplify_binary_operation (AND, mode, op0, op1);
5212   if (tem == 0)
5213     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5214
5215   if (target == 0)
5216     target = tem;
5217   else if (tem != target)
5218     emit_move_insn (target, tem);
5219   return target;
5220 }
5221
5222 /* Helper function for emit_store_flag.  */
5223 static rtx
5224 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5225              enum machine_mode mode, enum machine_mode compare_mode,
5226              int unsignedp, rtx x, rtx y, int normalizep,
5227              enum machine_mode target_mode)
5228 {
5229   struct expand_operand ops[4];
5230   rtx op0, last, comparison, subtarget;
5231   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5232
5233   last = get_last_insn ();
5234   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5235   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5236   if (!x || !y)
5237     {
5238       delete_insns_since (last);
5239       return NULL_RTX;
5240     }
5241
5242   if (target_mode == VOIDmode)
5243     target_mode = result_mode;
5244   if (!target)
5245     target = gen_reg_rtx (target_mode);
5246
5247   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5248
5249   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5250   create_fixed_operand (&ops[1], comparison);
5251   create_fixed_operand (&ops[2], x);
5252   create_fixed_operand (&ops[3], y);
5253   if (!maybe_expand_insn (icode, 4, ops))
5254     {
5255       delete_insns_since (last);
5256       return NULL_RTX;
5257     }
5258   subtarget = ops[0].value;
5259
5260   /* If we are converting to a wider mode, first convert to
5261      TARGET_MODE, then normalize.  This produces better combining
5262      opportunities on machines that have a SIGN_EXTRACT when we are
5263      testing a single bit.  This mostly benefits the 68k.
5264
5265      If STORE_FLAG_VALUE does not have the sign bit set when
5266      interpreted in MODE, we can do this conversion as unsigned, which
5267      is usually more efficient.  */
5268   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5269     {
5270       convert_move (target, subtarget,
5271                     val_signbit_known_clear_p (result_mode,
5272                                                STORE_FLAG_VALUE));
5273       op0 = target;
5274       result_mode = target_mode;
5275     }
5276   else
5277     op0 = subtarget;
5278
5279   /* If we want to keep subexpressions around, don't reuse our last
5280      target.  */
5281   if (optimize)
5282     subtarget = 0;
5283
5284   /* Now normalize to the proper value in MODE.  Sometimes we don't
5285      have to do anything.  */
5286   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5287     ;
5288   /* STORE_FLAG_VALUE might be the most negative number, so write
5289      the comparison this way to avoid a compiler-time warning.  */
5290   else if (- normalizep == STORE_FLAG_VALUE)
5291     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5292
5293   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5294      it hard to use a value of just the sign bit due to ANSI integer
5295      constant typing rules.  */
5296   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5297     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5298                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5299                         normalizep == 1);
5300   else
5301     {
5302       gcc_assert (STORE_FLAG_VALUE & 1);
5303
5304       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5305       if (normalizep == -1)
5306         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5307     }
5308
5309   /* If we were converting to a smaller mode, do the conversion now.  */
5310   if (target_mode != result_mode)
5311     {
5312       convert_move (target, op0, 0);
5313       return target;
5314     }
5315   else
5316     return op0;
5317 }
5318
5319
5320 /* A subroutine of emit_store_flag only including "tricks" that do not
5321    need a recursive call.  These are kept separate to avoid infinite
5322    loops.  */
5323
5324 static rtx
5325 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5326                    enum machine_mode mode, int unsignedp, int normalizep,
5327                    enum machine_mode target_mode)
5328 {
5329   rtx subtarget;
5330   enum insn_code icode;
5331   enum machine_mode compare_mode;
5332   enum mode_class mclass;
5333   enum rtx_code scode;
5334   rtx tem;
5335
5336   if (unsignedp)
5337     code = unsigned_condition (code);
5338   scode = swap_condition (code);
5339
5340   /* If one operand is constant, make it the second one.  Only do this
5341      if the other operand is not constant as well.  */
5342
5343   if (swap_commutative_operands_p (op0, op1))
5344     {
5345       tem = op0;
5346       op0 = op1;
5347       op1 = tem;
5348       code = swap_condition (code);
5349     }
5350
5351   if (mode == VOIDmode)
5352     mode = GET_MODE (op0);
5353
5354   /* For some comparisons with 1 and -1, we can convert this to
5355      comparisons with zero.  This will often produce more opportunities for
5356      store-flag insns.  */
5357
5358   switch (code)
5359     {
5360     case LT:
5361       if (op1 == const1_rtx)
5362         op1 = const0_rtx, code = LE;
5363       break;
5364     case LE:
5365       if (op1 == constm1_rtx)
5366         op1 = const0_rtx, code = LT;
5367       break;
5368     case GE:
5369       if (op1 == const1_rtx)
5370         op1 = const0_rtx, code = GT;
5371       break;
5372     case GT:
5373       if (op1 == constm1_rtx)
5374         op1 = const0_rtx, code = GE;
5375       break;
5376     case GEU:
5377       if (op1 == const1_rtx)
5378         op1 = const0_rtx, code = NE;
5379       break;
5380     case LTU:
5381       if (op1 == const1_rtx)
5382         op1 = const0_rtx, code = EQ;
5383       break;
5384     default:
5385       break;
5386     }
5387
5388   /* If we are comparing a double-word integer with zero or -1, we can
5389      convert the comparison into one involving a single word.  */
5390   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5391       && GET_MODE_CLASS (mode) == MODE_INT
5392       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5393     {
5394       if ((code == EQ || code == NE)
5395           && (op1 == const0_rtx || op1 == constm1_rtx))
5396         {
5397           rtx op00, op01;
5398
5399           /* Do a logical OR or AND of the two words and compare the
5400              result.  */
5401           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5402           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5403           tem = expand_binop (word_mode,
5404                               op1 == const0_rtx ? ior_optab : and_optab,
5405                               op00, op01, NULL_RTX, unsignedp,
5406                               OPTAB_DIRECT);
5407
5408           if (tem != 0)
5409             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5410                                    unsignedp, normalizep);
5411         }
5412       else if ((code == LT || code == GE) && op1 == const0_rtx)
5413         {
5414           rtx op0h;
5415
5416           /* If testing the sign bit, can just test on high word.  */
5417           op0h = simplify_gen_subreg (word_mode, op0, mode,
5418                                       subreg_highpart_offset (word_mode,
5419                                                               mode));
5420           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5421                                  unsignedp, normalizep);
5422         }
5423       else
5424         tem = NULL_RTX;
5425
5426       if (tem)
5427         {
5428           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5429             return tem;
5430           if (!target)
5431             target = gen_reg_rtx (target_mode);
5432
5433           convert_move (target, tem,
5434                         !val_signbit_known_set_p (word_mode,
5435                                                   (normalizep ? normalizep
5436                                                    : STORE_FLAG_VALUE)));
5437           return target;
5438         }
5439     }
5440
5441   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5442      complement of A (for GE) and shifting the sign bit to the low bit.  */
5443   if (op1 == const0_rtx && (code == LT || code == GE)
5444       && GET_MODE_CLASS (mode) == MODE_INT
5445       && (normalizep || STORE_FLAG_VALUE == 1
5446           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5447     {
5448       subtarget = target;
5449
5450       if (!target)
5451         target_mode = mode;
5452
5453       /* If the result is to be wider than OP0, it is best to convert it
5454          first.  If it is to be narrower, it is *incorrect* to convert it
5455          first.  */
5456       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5457         {
5458           op0 = convert_modes (target_mode, mode, op0, 0);
5459           mode = target_mode;
5460         }
5461
5462       if (target_mode != mode)
5463         subtarget = 0;
5464
5465       if (code == GE)
5466         op0 = expand_unop (mode, one_cmpl_optab, op0,
5467                            ((STORE_FLAG_VALUE == 1 || normalizep)
5468                             ? 0 : subtarget), 0);
5469
5470       if (STORE_FLAG_VALUE == 1 || normalizep)
5471         /* If we are supposed to produce a 0/1 value, we want to do
5472            a logical shift from the sign bit to the low-order bit; for
5473            a -1/0 value, we do an arithmetic shift.  */
5474         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5475                             GET_MODE_BITSIZE (mode) - 1,
5476                             subtarget, normalizep != -1);
5477
5478       if (mode != target_mode)
5479         op0 = convert_modes (target_mode, mode, op0, 0);
5480
5481       return op0;
5482     }
5483
5484   mclass = GET_MODE_CLASS (mode);
5485   for (compare_mode = mode; compare_mode != VOIDmode;
5486        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5487     {
5488      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5489      icode = optab_handler (cstore_optab, optab_mode);
5490      if (icode != CODE_FOR_nothing)
5491         {
5492           do_pending_stack_adjust ();
5493           tem = emit_cstore (target, icode, code, mode, compare_mode,
5494                              unsignedp, op0, op1, normalizep, target_mode);
5495           if (tem)
5496             return tem;
5497
5498           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5499             {
5500               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5501                                  unsignedp, op1, op0, normalizep, target_mode);
5502               if (tem)
5503                 return tem;
5504             }
5505           break;
5506         }
5507     }
5508
5509   return 0;
5510 }
5511
5512 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5513    and storing in TARGET.  Normally return TARGET.
5514    Return 0 if that cannot be done.
5515
5516    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5517    it is VOIDmode, they cannot both be CONST_INT.
5518
5519    UNSIGNEDP is for the case where we have to widen the operands
5520    to perform the operation.  It says to use zero-extension.
5521
5522    NORMALIZEP is 1 if we should convert the result to be either zero
5523    or one.  Normalize is -1 if we should convert the result to be
5524    either zero or -1.  If NORMALIZEP is zero, the result will be left
5525    "raw" out of the scc insn.  */
5526
5527 rtx
5528 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5529                  enum machine_mode mode, int unsignedp, int normalizep)
5530 {
5531   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5532   enum rtx_code rcode;
5533   rtx subtarget;
5534   rtx tem, last, trueval;
5535
5536   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5537                            target_mode);
5538   if (tem)
5539     return tem;
5540
5541   /* If we reached here, we can't do this with a scc insn, however there
5542      are some comparisons that can be done in other ways.  Don't do any
5543      of these cases if branches are very cheap.  */
5544   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5545     return 0;
5546
5547   /* See what we need to return.  We can only return a 1, -1, or the
5548      sign bit.  */
5549
5550   if (normalizep == 0)
5551     {
5552       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5553         normalizep = STORE_FLAG_VALUE;
5554
5555       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5556         ;
5557       else
5558         return 0;
5559     }
5560
5561   last = get_last_insn ();
5562
5563   /* If optimizing, use different pseudo registers for each insn, instead
5564      of reusing the same pseudo.  This leads to better CSE, but slows
5565      down the compiler, since there are more pseudos */
5566   subtarget = (!optimize
5567                && (target_mode == mode)) ? target : NULL_RTX;
5568   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5569
5570   /* For floating-point comparisons, try the reverse comparison or try
5571      changing the "orderedness" of the comparison.  */
5572   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5573     {
5574       enum rtx_code first_code;
5575       bool and_them;
5576
5577       rcode = reverse_condition_maybe_unordered (code);
5578       if (can_compare_p (rcode, mode, ccp_store_flag)
5579           && (code == ORDERED || code == UNORDERED
5580               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5581               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5582         {
5583           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5584                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5585
5586           /* For the reverse comparison, use either an addition or a XOR.  */
5587           if (want_add
5588               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5589                            optimize_insn_for_speed_p ()) == 0)
5590             {
5591               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592                                        STORE_FLAG_VALUE, target_mode);
5593               if (tem)
5594                 return expand_binop (target_mode, add_optab, tem,
5595                                      GEN_INT (normalizep),
5596                                      target, 0, OPTAB_WIDEN);
5597             }
5598           else if (!want_add
5599                    && rtx_cost (trueval, XOR, 1,
5600                                 optimize_insn_for_speed_p ()) == 0)
5601             {
5602               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5603                                        normalizep, target_mode);
5604               if (tem)
5605                 return expand_binop (target_mode, xor_optab, tem, trueval,
5606                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5607             }
5608         }
5609
5610       delete_insns_since (last);
5611
5612       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5613       if (code == ORDERED || code == UNORDERED)
5614         return 0;
5615
5616       and_them = split_comparison (code, mode, &first_code, &code);
5617
5618       /* If there are no NaNs, the first comparison should always fall through.
5619          Effectively change the comparison to the other one.  */
5620       if (!HONOR_NANS (mode))
5621         {
5622           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5623           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5624                                     target_mode);
5625         }
5626
5627 #ifdef HAVE_conditional_move
5628       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5629          conditional move.  */
5630       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5631                                normalizep, target_mode);
5632       if (tem == 0)
5633         return 0;
5634
5635       if (and_them)
5636         tem = emit_conditional_move (target, code, op0, op1, mode,
5637                                      tem, const0_rtx, GET_MODE (tem), 0);
5638       else
5639         tem = emit_conditional_move (target, code, op0, op1, mode,
5640                                      trueval, tem, GET_MODE (tem), 0);
5641
5642       if (tem == 0)
5643         delete_insns_since (last);
5644       return tem;
5645 #else
5646       return 0;
5647 #endif
5648     }
5649
5650   /* The remaining tricks only apply to integer comparisons.  */
5651
5652   if (GET_MODE_CLASS (mode) != MODE_INT)
5653     return 0;
5654
5655   /* If this is an equality comparison of integers, we can try to exclusive-or
5656      (or subtract) the two operands and use a recursive call to try the
5657      comparison with zero.  Don't do any of these cases if branches are
5658      very cheap.  */
5659
5660   if ((code == EQ || code == NE) && op1 != const0_rtx)
5661     {
5662       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5663                           OPTAB_WIDEN);
5664
5665       if (tem == 0)
5666         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5667                             OPTAB_WIDEN);
5668       if (tem != 0)
5669         tem = emit_store_flag (target, code, tem, const0_rtx,
5670                                mode, unsignedp, normalizep);
5671       if (tem != 0)
5672         return tem;
5673
5674       delete_insns_since (last);
5675     }
5676
5677   /* For integer comparisons, try the reverse comparison.  However, for
5678      small X and if we'd have anyway to extend, implementing "X != 0"
5679      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5680   rcode = reverse_condition (code);
5681   if (can_compare_p (rcode, mode, ccp_store_flag)
5682       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5683             && code == NE
5684             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5685             && op1 == const0_rtx))
5686     {
5687       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5688                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5689
5690       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5691       if (want_add
5692           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5693                        optimize_insn_for_speed_p ()) == 0)
5694         {
5695           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5696                                    STORE_FLAG_VALUE, target_mode);
5697           if (tem != 0)
5698             tem = expand_binop (target_mode, add_optab, tem,
5699                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5700         }
5701       else if (!want_add
5702                && rtx_cost (trueval, XOR, 1,
5703                             optimize_insn_for_speed_p ()) == 0)
5704         {
5705           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5706                                    normalizep, target_mode);
5707           if (tem != 0)
5708             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5709                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5710         }
5711
5712       if (tem != 0)
5713         return tem;
5714       delete_insns_since (last);
5715     }
5716
5717   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5718      the constant zero.  Reject all other comparisons at this point.  Only
5719      do LE and GT if branches are expensive since they are expensive on
5720      2-operand machines.  */
5721
5722   if (op1 != const0_rtx
5723       || (code != EQ && code != NE
5724           && (BRANCH_COST (optimize_insn_for_speed_p (),
5725                            false) <= 1 || (code != LE && code != GT))))
5726     return 0;
5727
5728   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5729      do the necessary operation below.  */
5730
5731   tem = 0;
5732
5733   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5734      the sign bit set.  */
5735
5736   if (code == LE)
5737     {
5738       /* This is destructive, so SUBTARGET can't be OP0.  */
5739       if (rtx_equal_p (subtarget, op0))
5740         subtarget = 0;
5741
5742       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5743                           OPTAB_WIDEN);
5744       if (tem)
5745         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5746                             OPTAB_WIDEN);
5747     }
5748
5749   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5750      number of bits in the mode of OP0, minus one.  */
5751
5752   if (code == GT)
5753     {
5754       if (rtx_equal_p (subtarget, op0))
5755         subtarget = 0;
5756
5757       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5758                           GET_MODE_BITSIZE (mode) - 1,
5759                           subtarget, 0);
5760       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5761                           OPTAB_WIDEN);
5762     }
5763
5764   if (code == EQ || code == NE)
5765     {
5766       /* For EQ or NE, one way to do the comparison is to apply an operation
5767          that converts the operand into a positive number if it is nonzero
5768          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5769          for NE we negate.  This puts the result in the sign bit.  Then we
5770          normalize with a shift, if needed.
5771
5772          Two operations that can do the above actions are ABS and FFS, so try
5773          them.  If that doesn't work, and MODE is smaller than a full word,
5774          we can use zero-extension to the wider mode (an unsigned conversion)
5775          as the operation.  */
5776
5777       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5778          that is compensated by the subsequent overflow when subtracting
5779          one / negating.  */
5780
5781       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5782         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5783       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5784         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5785       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5786         {
5787           tem = convert_modes (word_mode, mode, op0, 1);
5788           mode = word_mode;
5789         }
5790
5791       if (tem != 0)
5792         {
5793           if (code == EQ)
5794             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5795                                 0, OPTAB_WIDEN);
5796           else
5797             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5798         }
5799
5800       /* If we couldn't do it that way, for NE we can "or" the two's complement
5801          of the value with itself.  For EQ, we take the one's complement of
5802          that "or", which is an extra insn, so we only handle EQ if branches
5803          are expensive.  */
5804
5805       if (tem == 0
5806           && (code == NE
5807               || BRANCH_COST (optimize_insn_for_speed_p (),
5808                               false) > 1))
5809         {
5810           if (rtx_equal_p (subtarget, op0))
5811             subtarget = 0;
5812
5813           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5814           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5815                               OPTAB_WIDEN);
5816
5817           if (tem && code == EQ)
5818             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5819         }
5820     }
5821
5822   if (tem && normalizep)
5823     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5824                         GET_MODE_BITSIZE (mode) - 1,
5825                         subtarget, normalizep == 1);
5826
5827   if (tem)
5828     {
5829       if (!target)
5830         ;
5831       else if (GET_MODE (tem) != target_mode)
5832         {
5833           convert_move (target, tem, 0);
5834           tem = target;
5835         }
5836       else if (!subtarget)
5837         {
5838           emit_move_insn (target, tem);
5839           tem = target;
5840         }
5841     }
5842   else
5843     delete_insns_since (last);
5844
5845   return tem;
5846 }
5847
5848 /* Like emit_store_flag, but always succeeds.  */
5849
5850 rtx
5851 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5852                        enum machine_mode mode, int unsignedp, int normalizep)
5853 {
5854   rtx tem, label;
5855   rtx trueval, falseval;
5856
5857   /* First see if emit_store_flag can do the job.  */
5858   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5859   if (tem != 0)
5860     return tem;
5861
5862   if (!target)
5863     target = gen_reg_rtx (word_mode);
5864
5865   /* If this failed, we have to do this with set/compare/jump/set code.
5866      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5867   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5868   if (code == NE
5869       && GET_MODE_CLASS (mode) == MODE_INT
5870       && REG_P (target)
5871       && op0 == target
5872       && op1 == const0_rtx)
5873     {
5874       label = gen_label_rtx ();
5875       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5876                                mode, NULL_RTX, NULL_RTX, label, -1);
5877       emit_move_insn (target, trueval);
5878       emit_label (label);
5879       return target;
5880     }
5881
5882   if (!REG_P (target)
5883       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5884     target = gen_reg_rtx (GET_MODE (target));
5885
5886   /* Jump in the right direction if the target cannot implement CODE
5887      but can jump on its reverse condition.  */
5888   falseval = const0_rtx;
5889   if (! can_compare_p (code, mode, ccp_jump)
5890       && (! FLOAT_MODE_P (mode)
5891           || code == ORDERED || code == UNORDERED
5892           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5893           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5894     {
5895       enum rtx_code rcode;
5896       if (FLOAT_MODE_P (mode))
5897         rcode = reverse_condition_maybe_unordered (code);
5898       else
5899         rcode = reverse_condition (code);
5900
5901       /* Canonicalize to UNORDERED for the libcall.  */
5902       if (can_compare_p (rcode, mode, ccp_jump)
5903           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5904         {
5905           falseval = trueval;
5906           trueval = const0_rtx;
5907           code = rcode;
5908         }
5909     }
5910
5911   emit_move_insn (target, trueval);
5912   label = gen_label_rtx ();
5913   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5914                            NULL_RTX, label, -1);
5915
5916   emit_move_insn (target, falseval);
5917   emit_label (label);
5918
5919   return target;
5920 }
5921 \f
5922 /* Perform possibly multi-word comparison and conditional jump to LABEL
5923    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5924    now a thin wrapper around do_compare_rtx_and_jump.  */
5925
5926 static void
5927 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5928                  rtx label)
5929 {
5930   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5931   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5932                            NULL_RTX, NULL_RTX, label, -1);
5933 }