gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    rtx);
  58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  59                                     unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  61 static rtx mask_rtx (enum machine_mode, int, int, int);
  62 static rtx lshift_value (enum machine_mode, rtx, int, int);
  63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, int);
  65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  68
  69 /* Test whether a value is zero of a power of two.  */
  70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  71
  72 #ifndef SLOW_UNALIGNED_ACCESS
  73 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  74 #endif
  75
  76
  77 /* Reduce conditional compilation elsewhere.  */
  78 #ifndef HAVE_insv
  79 #define HAVE_insv       0
  80 #define CODE_FOR_insv   CODE_FOR_nothing
  81 #define gen_insv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extv
  84 #define HAVE_extv       0
  85 #define CODE_FOR_extv   CODE_FOR_nothing
  86 #define gen_extv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extzv
  89 #define HAVE_extzv      0
  90 #define CODE_FOR_extzv  CODE_FOR_nothing
  91 #define gen_extzv(a,b,c,d) NULL_RTX
  92 #endif
  93
  94 struct init_expmed_rtl
  95 {
  96   struct rtx_def reg;           rtunion reg_fld[2];
  97   struct rtx_def plus;  rtunion plus_fld1;
  98   struct rtx_def neg;
  99   struct rtx_def mult;  rtunion mult_fld1;
 100   struct rtx_def sdiv;  rtunion sdiv_fld1;
 101   struct rtx_def udiv;  rtunion udiv_fld1;
 102   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
 103   struct rtx_def smod_32;       rtunion smod_32_fld1;
 104   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
 105   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
 106   struct rtx_def wide_trunc;
 107   struct rtx_def shift; rtunion shift_fld1;
 108   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
 109   struct rtx_def shift_add;     rtunion shift_add_fld1;
 110   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
 111   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
 112   struct rtx_def zext;
 113   struct rtx_def trunc;
 114
 115   rtx pow2[MAX_BITS_PER_WORD];
 116   rtx cint[MAX_BITS_PER_WORD];
 117 };
 118
 119 static void
 120 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 121                       enum machine_mode from_mode, bool speed)
 122 {
 123   int to_size, from_size;
 124   rtx which;
 125
 126   /* We're given no information about the true size of a partial integer,
 127      only the size of the "full" integer it requires for storage.  For
 128      comparison purposes here, reduce the bit size by one in that case.  */
 129   to_size = (GET_MODE_BITSIZE (to_mode)
 130              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 131   from_size = (GET_MODE_BITSIZE (from_mode)
 132                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 133
 134   /* Assume cost of zero-extend and sign-extend is the same.  */
 135   which = (to_size < from_size ? &all->trunc : &all->zext);
 136
 137   PUT_MODE (&all->reg, from_mode);
 138   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 139 }
 140
 141 static void
 142 init_expmed_one_mode (struct init_expmed_rtl *all,
 143                       enum machine_mode mode, int speed)
 144 {
 145   int m, n, mode_bitsize;
 146   enum machine_mode mode_from;
 147
 148   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 149
 150   PUT_MODE (&all->reg, mode);
 151   PUT_MODE (&all->plus, mode);
 152   PUT_MODE (&all->neg, mode);
 153   PUT_MODE (&all->mult, mode);
 154   PUT_MODE (&all->sdiv, mode);
 155   PUT_MODE (&all->udiv, mode);
 156   PUT_MODE (&all->sdiv_32, mode);
 157   PUT_MODE (&all->smod_32, mode);
 158   PUT_MODE (&all->wide_trunc, mode);
 159   PUT_MODE (&all->shift, mode);
 160   PUT_MODE (&all->shift_mult, mode);
 161   PUT_MODE (&all->shift_add, mode);
 162   PUT_MODE (&all->shift_sub0, mode);
 163   PUT_MODE (&all->shift_sub1, mode);
 164   PUT_MODE (&all->zext, mode);
 165   PUT_MODE (&all->trunc, mode);
 166
 167   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 168   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 169   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 170   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 171   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 172
 173   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 174                                      <= 2 * add_cost (speed, mode)));
 175   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 176                                      <= 4 * add_cost (speed, mode)));
 177
 178   set_shift_cost (speed, mode, 0, 0);
 179   {
 180     int cost = add_cost (speed, mode);
 181     set_shiftadd_cost (speed, mode, 0, cost);
 182     set_shiftsub0_cost (speed, mode, 0, cost);
 183     set_shiftsub1_cost (speed, mode, 0, cost);
 184   }
 185
 186   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 187   for (m = 1; m < n; m++)
 188     {
 189       XEXP (&all->shift, 1) = all->cint[m];
 190       XEXP (&all->shift_mult, 1) = all->pow2[m];
 191
 192       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 193       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 194       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 195       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 196     }
 197
 198   if (SCALAR_INT_MODE_P (mode))
 199     {
 200       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 201            mode_from = (enum machine_mode)(mode_from + 1))
 202         init_expmed_one_conv (all, mode, mode_from, speed);
 203     }
 204   if (GET_MODE_CLASS (mode) == MODE_INT)
 205     {
 206       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 207       if (wider_mode != VOIDmode)
 208         {
 209           PUT_MODE (&all->zext, wider_mode);
 210           PUT_MODE (&all->wide_mult, wider_mode);
 211           PUT_MODE (&all->wide_lshr, wider_mode);
 212           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 213
 214           set_mul_widen_cost (speed, wider_mode,
 215                               set_src_cost (&all->wide_mult, speed));
 216           set_mul_highpart_cost (speed, mode,
 217                                  set_src_cost (&all->wide_trunc, speed));
 218         }
 219     }
 220 }
 221
 222 void
 223 init_expmed (void)
 224 {
 225   struct init_expmed_rtl all;
 226   enum machine_mode mode;
 227   int m, speed;
 228
 229   memset (&all, 0, sizeof all);
 230   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 231     {
 232       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 233       all.cint[m] = GEN_INT (m);
 234     }
 235
 236   PUT_CODE (&all.reg, REG);
 237   /* Avoid using hard regs in ways which may be unsupported.  */
 238   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 239
 240   PUT_CODE (&all.plus, PLUS);
 241   XEXP (&all.plus, 0) = &all.reg;
 242   XEXP (&all.plus, 1) = &all.reg;
 243
 244   PUT_CODE (&all.neg, NEG);
 245   XEXP (&all.neg, 0) = &all.reg;
 246
 247   PUT_CODE (&all.mult, MULT);
 248   XEXP (&all.mult, 0) = &all.reg;
 249   XEXP (&all.mult, 1) = &all.reg;
 250
 251   PUT_CODE (&all.sdiv, DIV);
 252   XEXP (&all.sdiv, 0) = &all.reg;
 253   XEXP (&all.sdiv, 1) = &all.reg;
 254
 255   PUT_CODE (&all.udiv, UDIV);
 256   XEXP (&all.udiv, 0) = &all.reg;
 257   XEXP (&all.udiv, 1) = &all.reg;
 258
 259   PUT_CODE (&all.sdiv_32, DIV);
 260   XEXP (&all.sdiv_32, 0) = &all.reg;
 261   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 262
 263   PUT_CODE (&all.smod_32, MOD);
 264   XEXP (&all.smod_32, 0) = &all.reg;
 265   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 266
 267   PUT_CODE (&all.zext, ZERO_EXTEND);
 268   XEXP (&all.zext, 0) = &all.reg;
 269
 270   PUT_CODE (&all.wide_mult, MULT);
 271   XEXP (&all.wide_mult, 0) = &all.zext;
 272   XEXP (&all.wide_mult, 1) = &all.zext;
 273
 274   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 275   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 276
 277   PUT_CODE (&all.wide_trunc, TRUNCATE);
 278   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 279
 280   PUT_CODE (&all.shift, ASHIFT);
 281   XEXP (&all.shift, 0) = &all.reg;
 282
 283   PUT_CODE (&all.shift_mult, MULT);
 284   XEXP (&all.shift_mult, 0) = &all.reg;
 285
 286   PUT_CODE (&all.shift_add, PLUS);
 287   XEXP (&all.shift_add, 0) = &all.shift_mult;
 288   XEXP (&all.shift_add, 1) = &all.reg;
 289
 290   PUT_CODE (&all.shift_sub0, MINUS);
 291   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 292   XEXP (&all.shift_sub0, 1) = &all.reg;
 293
 294   PUT_CODE (&all.shift_sub1, MINUS);
 295   XEXP (&all.shift_sub1, 0) = &all.reg;
 296   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 297
 298   PUT_CODE (&all.trunc, TRUNCATE);
 299   XEXP (&all.trunc, 0) = &all.reg;
 300
 301   for (speed = 0; speed < 2; speed++)
 302     {
 303       crtl->maybe_hot_insn_p = speed;
 304       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 305
 306       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 307            mode = (enum machine_mode)(mode + 1))
 308         init_expmed_one_mode (&all, mode, speed);
 309
 310       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 311         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 312              mode = (enum machine_mode)(mode + 1))
 313           init_expmed_one_mode (&all, mode, speed);
 314
 315       if (MIN_MODE_VECTOR_INT != VOIDmode)
 316         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 317              mode = (enum machine_mode)(mode + 1))
 318           init_expmed_one_mode (&all, mode, speed);
 319     }
 320
 321   if (alg_hash_used_p ())
 322     {
 323       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 324       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 325     }
 326   else
 327     set_alg_hash_used_p (true);
 328   default_rtl_profile ();
 329 }
 330
 331 /* Return an rtx representing minus the value of X.
 332    MODE is the intended mode of the result,
 333    useful if X is a CONST_INT.  */
 334
 335 rtx
 336 negate_rtx (enum machine_mode mode, rtx x)
 337 {
 338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 339
 340   if (result == 0)
 341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 342
 343   return result;
 344 }
 345
 346 /* Report on the availability of insv/extv/extzv and the desired mode
 347    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 348    is false; else the mode of the specified operand.  If OPNO is -1,
 349    all the caller cares about is whether the insn is available.  */
 350 enum machine_mode
 351 mode_for_extraction (enum extraction_pattern pattern, int opno)
 352 {
 353   const struct insn_data_d *data;
 354
 355   switch (pattern)
 356     {
 357     case EP_insv:
 358       if (HAVE_insv)
 359         {
 360           data = &insn_data[CODE_FOR_insv];
 361           break;
 362         }
 363       return MAX_MACHINE_MODE;
 364
 365     case EP_extv:
 366       if (HAVE_extv)
 367         {
 368           data = &insn_data[CODE_FOR_extv];
 369           break;
 370         }
 371       return MAX_MACHINE_MODE;
 372
 373     case EP_extzv:
 374       if (HAVE_extzv)
 375         {
 376           data = &insn_data[CODE_FOR_extzv];
 377           break;
 378         }
 379       return MAX_MACHINE_MODE;
 380
 381     default:
 382       gcc_unreachable ();
 383     }
 384
 385   if (opno == -1)
 386     return VOIDmode;
 387
 388   /* Everyone who uses this function used to follow it with
 389      if (result == VOIDmode) result = word_mode; */
 390   if (data->operand[opno].mode == VOIDmode)
 391     return word_mode;
 392   return data->operand[opno].mode;
 393 }
 394
 395 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 396    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 397    offset is then BITNUM / BITS_PER_UNIT.  */
 398
 399 static bool
 400 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 401                      unsigned HOST_WIDE_INT bitsize,
 402                      enum machine_mode struct_mode)
 403 {
 404   if (BYTES_BIG_ENDIAN)
 405     return (bitnum % BITS_PER_UNIT == 0
 406             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 407                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 408   else
 409     return bitnum % BITS_PER_WORD == 0;
 410 }
 411 \f
 412 /* A subroutine of store_bit_field, with the same arguments.  Return true
 413    if the operation could be implemented.
 414
 415    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 416    no other way of implementing the operation.  If FALLBACK_P is false,
 417    return false instead.  */
 418
 419 static bool
 420 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 421                    unsigned HOST_WIDE_INT bitnum,
 422                    unsigned HOST_WIDE_INT bitregion_start,
 423                    unsigned HOST_WIDE_INT bitregion_end,
 424                    enum machine_mode fieldmode,
 425                    rtx value, bool fallback_p)
 426 {
 427   rtx op0 = str_rtx;
 428   rtx orig_value;
 429
 430   while (GET_CODE (op0) == SUBREG)
 431     {
 432       /* The following line once was done only if WORDS_BIG_ENDIAN,
 433          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 434          meaningful at a much higher level; when structures are copied
 435          between memory and regs, the higher-numbered regs
 436          always get higher addresses.  */
 437       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 438       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 439       int byte_offset = 0;
 440
 441       /* Paradoxical subregs need special handling on big endian machines.  */
 442       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 443         {
 444           int difference = inner_mode_size - outer_mode_size;
 445
 446           if (WORDS_BIG_ENDIAN)
 447             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 448           if (BYTES_BIG_ENDIAN)
 449             byte_offset += difference % UNITS_PER_WORD;
 450         }
 451       else
 452         byte_offset = SUBREG_BYTE (op0);
 453
 454       bitnum += byte_offset * BITS_PER_UNIT;
 455       op0 = SUBREG_REG (op0);
 456     }
 457
 458   /* No action is needed if the target is a register and if the field
 459      lies completely outside that register.  This can occur if the source
 460      code contains an out-of-bounds access to a small array.  */
 461   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 462     return true;
 463
 464   /* Use vec_set patterns for inserting parts of vectors whenever
 465      available.  */
 466   if (VECTOR_MODE_P (GET_MODE (op0))
 467       && !MEM_P (op0)
 468       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 469       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 470       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 471       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 472     {
 473       struct expand_operand ops[3];
 474       enum machine_mode outermode = GET_MODE (op0);
 475       enum machine_mode innermode = GET_MODE_INNER (outermode);
 476       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 477       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 478
 479       create_fixed_operand (&ops[0], op0);
 480       create_input_operand (&ops[1], value, innermode);
 481       create_integer_operand (&ops[2], pos);
 482       if (maybe_expand_insn (icode, 3, ops))
 483         return true;
 484     }
 485
 486   /* If the target is a register, overwriting the entire object, or storing
 487      a full-word or multi-word field can be done with just a SUBREG.  */
 488   if (!MEM_P (op0)
 489       && bitsize == GET_MODE_BITSIZE (fieldmode)
 490       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 491           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 492     {
 493       /* Use the subreg machinery either to narrow OP0 to the required
 494          words or to cope with mode punning between equal-sized modes.  */
 495       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 496                                      bitnum / BITS_PER_UNIT);
 497       if (sub)
 498         {
 499           emit_move_insn (sub, value);
 500           return true;
 501         }
 502     }
 503
 504   /* If the target is memory, storing any naturally aligned field can be
 505      done with a simple store.  For targets that support fast unaligned
 506      memory, any naturally sized, unit aligned field can be done directly.  */
 507   if (MEM_P (op0)
 508       && bitnum % BITS_PER_UNIT == 0
 509       && bitsize == GET_MODE_BITSIZE (fieldmode)
 510       && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 511           || (bitnum % bitsize == 0
 512               && MEM_ALIGN (op0) % bitsize == 0)))
 513     {
 514       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 515       emit_move_insn (op0, value);
 516       return true;
 517     }
 518
 519   /* Make sure we are playing with integral modes.  Pun with subregs
 520      if we aren't.  This must come after the entire register case above,
 521      since that case is valid for any mode.  The following cases are only
 522      valid for integral modes.  */
 523   {
 524     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 525     if (imode != GET_MODE (op0))
 526       {
 527         if (MEM_P (op0))
 528           op0 = adjust_bitfield_address (op0, imode, 0);
 529         else
 530           {
 531             gcc_assert (imode != BLKmode);
 532             op0 = gen_lowpart (imode, op0);
 533           }
 534       }
 535   }
 536
 537   /* Storing an lsb-aligned field in a register
 538      can be done with a movstrict instruction.  */
 539
 540   if (!MEM_P (op0)
 541       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 542       && bitsize == GET_MODE_BITSIZE (fieldmode)
 543       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 544     {
 545       struct expand_operand ops[2];
 546       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 547       rtx arg0 = op0;
 548       unsigned HOST_WIDE_INT subreg_off;
 549
 550       if (GET_CODE (arg0) == SUBREG)
 551         {
 552           /* Else we've got some float mode source being extracted into
 553              a different float mode destination -- this combination of
 554              subregs results in Severe Tire Damage.  */
 555           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 556                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 557                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 558           arg0 = SUBREG_REG (arg0);
 559         }
 560
 561       subreg_off = bitnum / BITS_PER_UNIT;
 562       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 563         {
 564           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 565
 566           create_fixed_operand (&ops[0], arg0);
 567           /* Shrink the source operand to FIELDMODE.  */
 568           create_convert_operand_to (&ops[1], value, fieldmode, false);
 569           if (maybe_expand_insn (icode, 2, ops))
 570             return true;
 571         }
 572     }
 573
 574   /* Handle fields bigger than a word.  */
 575
 576   if (bitsize > BITS_PER_WORD)
 577     {
 578       /* Here we transfer the words of the field
 579          in the order least significant first.
 580          This is because the most significant word is the one which may
 581          be less than full.
 582          However, only do that if the value is not BLKmode.  */
 583
 584       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 585       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 586       unsigned int i;
 587       rtx last;
 588
 589       /* This is the mode we must force value to, so that there will be enough
 590          subwords to extract.  Note that fieldmode will often (always?) be
 591          VOIDmode, because that is what store_field uses to indicate that this
 592          is a bit field, but passing VOIDmode to operand_subword_force
 593          is not allowed.  */
 594       fieldmode = GET_MODE (value);
 595       if (fieldmode == VOIDmode)
 596         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 597
 598       last = get_last_insn ();
 599       for (i = 0; i < nwords; i++)
 600         {
 601           /* If I is 0, use the low-order word in both field and target;
 602              if I is 1, use the next to lowest word; and so on.  */
 603           unsigned int wordnum = (backwards
 604                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 605                                   - i - 1
 606                                   : i);
 607           unsigned int bit_offset = (backwards
 608                                      ? MAX ((int) bitsize - ((int) i + 1)
 609                                             * BITS_PER_WORD,
 610                                             0)
 611                                      : (int) i * BITS_PER_WORD);
 612           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 613           unsigned HOST_WIDE_INT new_bitsize =
 614             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 615
 616           /* If the remaining chunk doesn't have full wordsize we have
 617              to make sure that for big endian machines the higher order
 618              bits are used.  */
 619           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 620             value_word = simplify_expand_binop (word_mode, lshr_optab,
 621                                                 value_word,
 622                                                 GEN_INT (BITS_PER_WORD
 623                                                          - new_bitsize),
 624                                                 NULL_RTX, true,
 625                                                 OPTAB_LIB_WIDEN);
 626
 627           if (!store_bit_field_1 (op0, new_bitsize,
 628                                   bitnum + bit_offset,
 629                                   bitregion_start, bitregion_end,
 630                                   word_mode,
 631                                   value_word, fallback_p))
 632             {
 633               delete_insns_since (last);
 634               return false;
 635             }
 636         }
 637       return true;
 638     }
 639
 640   /* If VALUE has a floating-point or complex mode, access it as an
 641      integer of the corresponding size.  This can occur on a machine
 642      with 64 bit registers that uses SFmode for float.  It can also
 643      occur for unaligned float or complex fields.  */
 644   orig_value = value;
 645   if (GET_MODE (value) != VOIDmode
 646       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 647       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 648     {
 649       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 650       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 651     }
 652
 653   /* If OP0 is a multi-word register, narrow it to the affected word.
 654      If the region spans two words, defer to store_split_bit_field.  */
 655   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 656     {
 657       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 658                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 659       gcc_assert (op0);
 660       bitnum %= BITS_PER_WORD;
 661       if (bitnum + bitsize > BITS_PER_WORD)
 662         {
 663           if (!fallback_p)
 664             return false;
 665
 666           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 667                                  bitregion_end, value);
 668           return true;
 669         }
 670     }
 671
 672   /* From here on we can assume that the field to be stored in fits
 673      within a word.  If the destination is a register, it too fits
 674      in a word.  */
 675
 676   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 677   if (HAVE_insv
 678       && GET_MODE (value) != BLKmode
 679       && bitsize > 0
 680       && GET_MODE_BITSIZE (op_mode) >= bitsize
 681       /* Do not use insv for volatile bitfields when
 682          -fstrict-volatile-bitfields is in effect.  */
 683       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 684            && flag_strict_volatile_bitfields > 0)
 685       /* Do not use insv if the bit region is restricted and
 686          op_mode integer at offset doesn't fit into the
 687          restricted region.  */
 688       && !(MEM_P (op0) && bitregion_end
 689            && bitnum - (bitnum % BITS_PER_UNIT) + GET_MODE_BITSIZE (op_mode)
 690               > bitregion_end + 1))
 691     {
 692       struct expand_operand ops[4];
 693       unsigned HOST_WIDE_INT bitpos = bitnum;
 694       rtx value1;
 695       rtx xop0 = op0;
 696       rtx last = get_last_insn ();
 697       bool copy_back = false;
 698
 699       unsigned int unit = GET_MODE_BITSIZE (op_mode);
 700       if (MEM_P (xop0))
 701         {
 702           /* Get a reference to the first byte of the field.  */
 703           xop0 = adjust_bitfield_address (xop0, byte_mode,
 704                                           bitpos / BITS_PER_UNIT);
 705           bitpos %= BITS_PER_UNIT;
 706         }
 707       else
 708         {
 709           /* Convert from counting within OP0 to counting in OP_MODE.  */
 710           if (BYTES_BIG_ENDIAN)
 711             bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 712         }
 713
 714       /* If xop0 is a register, we need it in OP_MODE
 715          to make it acceptable to the format of insv.  */
 716       if (GET_CODE (xop0) == SUBREG)
 717         /* We can't just change the mode, because this might clobber op0,
 718            and we will need the original value of op0 if insv fails.  */
 719         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 720       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 721         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 722
 723       /* If the destination is a paradoxical subreg such that we need a
 724          truncate to the inner mode, perform the insertion on a temporary and
 725          truncate the result to the original destination.  Note that we can't
 726          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 727          X) 0)) is (reg:N X).  */
 728       if (GET_CODE (xop0) == SUBREG
 729           && REG_P (SUBREG_REG (xop0))
 730           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 731                                               op_mode)))
 732         {
 733           rtx tem = gen_reg_rtx (op_mode);
 734           emit_move_insn (tem, xop0);
 735           xop0 = tem;
 736           copy_back = true;
 737         }
 738
 739       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 740          "backwards" from the size of the unit we are inserting into.
 741          Otherwise, we count bits from the most significant on a
 742          BYTES/BITS_BIG_ENDIAN machine.  */
 743
 744       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 745         bitpos = unit - bitsize - bitpos;
 746
 747       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 748       value1 = value;
 749       if (GET_MODE (value) != op_mode)
 750         {
 751           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 752             {
 753               /* Optimization: Don't bother really extending VALUE
 754                  if it has all the bits we will actually use.  However,
 755                  if we must narrow it, be sure we do it correctly.  */
 756
 757               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 758                 {
 759                   rtx tmp;
 760
 761                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 762                   if (! tmp)
 763                     tmp = simplify_gen_subreg (op_mode,
 764                                                force_reg (GET_MODE (value),
 765                                                           value1),
 766                                                GET_MODE (value), 0);
 767                   value1 = tmp;
 768                 }
 769               else
 770                 value1 = gen_lowpart (op_mode, value1);
 771             }
 772           else if (CONST_INT_P (value))
 773             value1 = gen_int_mode (INTVAL (value), op_mode);
 774           else
 775             /* Parse phase is supposed to make VALUE's data type
 776                match that of the component reference, which is a type
 777                at least as wide as the field; so VALUE should have
 778                a mode that corresponds to that type.  */
 779             gcc_assert (CONSTANT_P (value));
 780         }
 781
 782       create_fixed_operand (&ops[0], xop0);
 783       create_integer_operand (&ops[1], bitsize);
 784       create_integer_operand (&ops[2], bitpos);
 785       create_input_operand (&ops[3], value1, op_mode);
 786       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 787         {
 788           if (copy_back)
 789             convert_move (op0, xop0, true);
 790           return true;
 791         }
 792       delete_insns_since (last);
 793     }
 794
 795   /* If OP0 is a memory, try copying it to a register and seeing if a
 796      cheap register alternative is available.  */
 797   if (HAVE_insv && MEM_P (op0))
 798     {
 799       enum machine_mode bestmode;
 800       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 801
 802       if (bitregion_end)
 803         maxbits = bitregion_end - bitregion_start + 1;
 804
 805       /* Get the mode to use for inserting into this field.  If OP0 is
 806          BLKmode, get the smallest mode consistent with the alignment. If
 807          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 808          mode. Otherwise, use the smallest mode containing the field.  */
 809
 810       if (GET_MODE (op0) == BLKmode
 811           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 812           || (op_mode != MAX_MACHINE_MODE
 813               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 814         bestmode = get_best_mode (bitsize, bitnum,
 815                                   bitregion_start, bitregion_end,
 816                                   MEM_ALIGN (op0),
 817                                   (op_mode == MAX_MACHINE_MODE
 818                                    ? VOIDmode : op_mode),
 819                                   MEM_VOLATILE_P (op0));
 820       else
 821         bestmode = GET_MODE (op0);
 822
 823       if (bestmode != VOIDmode
 824           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 825           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 826                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 827         {
 828           rtx last, tempreg, xop0;
 829           unsigned int unit;
 830           unsigned HOST_WIDE_INT offset, bitpos;
 831
 832           last = get_last_insn ();
 833
 834           /* Adjust address to point to the containing unit of
 835              that mode.  Compute the offset as a multiple of this unit,
 836              counting in bytes.  */
 837           unit = GET_MODE_BITSIZE (bestmode);
 838           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 839           bitpos = bitnum % unit;
 840           xop0 = adjust_bitfield_address (op0, bestmode, offset);
 841
 842           /* Fetch that unit, store the bitfield in it, then store
 843              the unit.  */
 844           tempreg = copy_to_reg (xop0);
 845           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 846                                  bitregion_start, bitregion_end,
 847                                  fieldmode, orig_value, false))
 848             {
 849               emit_move_insn (xop0, tempreg);
 850               return true;
 851             }
 852           delete_insns_since (last);
 853         }
 854     }
 855
 856   if (!fallback_p)
 857     return false;
 858
 859   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 860                          bitregion_end, value);
 861   return true;
 862 }
 863
 864 /* Generate code to store value from rtx VALUE
 865    into a bit-field within structure STR_RTX
 866    containing BITSIZE bits starting at bit BITNUM.
 867
 868    BITREGION_START is bitpos of the first bitfield in this region.
 869    BITREGION_END is the bitpos of the ending bitfield in this region.
 870    These two fields are 0, if the C++ memory model does not apply,
 871    or we are not interested in keeping track of bitfield regions.
 872
 873    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 874
 875 void
 876 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 877                  unsigned HOST_WIDE_INT bitnum,
 878                  unsigned HOST_WIDE_INT bitregion_start,
 879                  unsigned HOST_WIDE_INT bitregion_end,
 880                  enum machine_mode fieldmode,
 881                  rtx value)
 882 {
 883   /* Under the C++0x memory model, we must not touch bits outside the
 884      bit region.  Adjust the address to start at the beginning of the
 885      bit region.  */
 886   if (MEM_P (str_rtx) && bitregion_start > 0)
 887     {
 888       enum machine_mode bestmode;
 889       enum machine_mode op_mode;
 890       unsigned HOST_WIDE_INT offset;
 891
 892       op_mode = mode_for_extraction (EP_insv, 3);
 893       if (op_mode == MAX_MACHINE_MODE)
 894         op_mode = VOIDmode;
 895
 896       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 897
 898       offset = bitregion_start / BITS_PER_UNIT;
 899       bitnum -= bitregion_start;
 900       bitregion_end -= bitregion_start;
 901       bitregion_start = 0;
 902       bestmode = get_best_mode (bitsize, bitnum,
 903                                 bitregion_start, bitregion_end,
 904                                 MEM_ALIGN (str_rtx),
 905                                 op_mode,
 906                                 MEM_VOLATILE_P (str_rtx));
 907       str_rtx = adjust_address (str_rtx, bestmode, offset);
 908     }
 909
 910   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 911                           bitregion_start, bitregion_end,
 912                           fieldmode, value, true))
 913     gcc_unreachable ();
 914 }
 915 \f
 916 /* Use shifts and boolean operations to store VALUE into a bit field of
 917    width BITSIZE in OP0, starting at bit BITNUM.  */
 918
 919 static void
 920 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 921                        unsigned HOST_WIDE_INT bitnum,
 922                        unsigned HOST_WIDE_INT bitregion_start,
 923                        unsigned HOST_WIDE_INT bitregion_end,
 924                        rtx value)
 925 {
 926   enum machine_mode mode;
 927   rtx temp;
 928   int all_zero = 0;
 929   int all_one = 0;
 930
 931   /* There is a case not handled here:
 932      a structure with a known alignment of just a halfword
 933      and a field split across two aligned halfwords within the structure.
 934      Or likewise a structure with a known alignment of just a byte
 935      and a field split across two bytes.
 936      Such cases are not supposed to be able to occur.  */
 937
 938   if (MEM_P (op0))
 939     {
 940       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 941
 942       if (bitregion_end)
 943         maxbits = bitregion_end - bitregion_start + 1;
 944
 945       /* Get the proper mode to use for this field.  We want a mode that
 946          includes the entire field.  If such a mode would be larger than
 947          a word, we won't be doing the extraction the normal way.
 948          We don't want a mode bigger than the destination.  */
 949
 950       mode = GET_MODE (op0);
 951       if (GET_MODE_BITSIZE (mode) == 0
 952           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 953         mode = word_mode;
 954
 955       if (MEM_VOLATILE_P (op0)
 956           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 957           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 958           && flag_strict_volatile_bitfields > 0)
 959         mode = GET_MODE (op0);
 960       else
 961         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 962                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 963
 964       if (mode == VOIDmode)
 965         {
 966           /* The only way this should occur is if the field spans word
 967              boundaries.  */
 968           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 969                                  bitregion_end, value);
 970           return;
 971         }
 972
 973       HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
 974       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
 975       bitnum -= bit_offset;
 976     }
 977
 978   mode = GET_MODE (op0);
 979   gcc_assert (SCALAR_INT_MODE_P (mode));
 980
 981   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 982      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 983
 984   if (BYTES_BIG_ENDIAN)
 985     /* BITNUM is the distance between our msb
 986        and that of the containing datum.
 987        Convert it to the distance from the lsb.  */
 988     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 989
 990   /* Now BITNUM is always the distance between our lsb
 991      and that of OP0.  */
 992
 993   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 994      we must first convert its mode to MODE.  */
 995
 996   if (CONST_INT_P (value))
 997     {
 998       HOST_WIDE_INT v = INTVAL (value);
 999
1000       if (bitsize < HOST_BITS_PER_WIDE_INT)
1001         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1002
1003       if (v == 0)
1004         all_zero = 1;
1005       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1006                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1007                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1008         all_one = 1;
1009
1010       value = lshift_value (mode, value, bitnum, bitsize);
1011     }
1012   else
1013     {
1014       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1015                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1016
1017       if (GET_MODE (value) != mode)
1018         value = convert_to_mode (mode, value, 1);
1019
1020       if (must_and)
1021         value = expand_binop (mode, and_optab, value,
1022                               mask_rtx (mode, 0, bitsize, 0),
1023                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1024       if (bitnum > 0)
1025         value = expand_shift (LSHIFT_EXPR, mode, value,
1026                               bitnum, NULL_RTX, 1);
1027     }
1028
1029   /* Now clear the chosen bits in OP0,
1030      except that if VALUE is -1 we need not bother.  */
1031   /* We keep the intermediates in registers to allow CSE to combine
1032      consecutive bitfield assignments.  */
1033
1034   temp = force_reg (mode, op0);
1035
1036   if (! all_one)
1037     {
1038       temp = expand_binop (mode, and_optab, temp,
1039                            mask_rtx (mode, bitnum, bitsize, 1),
1040                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1041       temp = force_reg (mode, temp);
1042     }
1043
1044   /* Now logical-or VALUE into OP0, unless it is zero.  */
1045
1046   if (! all_zero)
1047     {
1048       temp = expand_binop (mode, ior_optab, temp, value,
1049                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1050       temp = force_reg (mode, temp);
1051     }
1052
1053   if (op0 != temp)
1054     {
1055       op0 = copy_rtx (op0);
1056       emit_move_insn (op0, temp);
1057     }
1058 }
1059 \f
1060 /* Store a bit field that is split across multiple accessible memory objects.
1061
1062    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1063    BITSIZE is the field width; BITPOS the position of its first bit
1064    (within the word).
1065    VALUE is the value to store.
1066
1067    This does not yet handle fields wider than BITS_PER_WORD.  */
1068
1069 static void
1070 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1071                        unsigned HOST_WIDE_INT bitpos,
1072                        unsigned HOST_WIDE_INT bitregion_start,
1073                        unsigned HOST_WIDE_INT bitregion_end,
1074                        rtx value)
1075 {
1076   unsigned int unit;
1077   unsigned int bitsdone = 0;
1078
1079   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1080      much at a time.  */
1081   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1082     unit = BITS_PER_WORD;
1083   else
1084     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1085
1086   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1087      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1088      that VALUE might be a floating-point constant.  */
1089   if (CONSTANT_P (value) && !CONST_INT_P (value))
1090     {
1091       rtx word = gen_lowpart_common (word_mode, value);
1092
1093       if (word && (value != word))
1094         value = word;
1095       else
1096         value = gen_lowpart_common (word_mode,
1097                                     force_reg (GET_MODE (value) != VOIDmode
1098                                                ? GET_MODE (value)
1099                                                : word_mode, value));
1100     }
1101
1102   while (bitsdone < bitsize)
1103     {
1104       unsigned HOST_WIDE_INT thissize;
1105       rtx part, word;
1106       unsigned HOST_WIDE_INT thispos;
1107       unsigned HOST_WIDE_INT offset;
1108
1109       offset = (bitpos + bitsdone) / unit;
1110       thispos = (bitpos + bitsdone) % unit;
1111
1112       /* When region of bytes we can touch is restricted, decrease
1113          UNIT close to the end of the region as needed.  */
1114       if (bitregion_end
1115           && unit > BITS_PER_UNIT
1116           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1117         {
1118           unit = unit / 2;
1119           continue;
1120         }
1121
1122       /* THISSIZE must not overrun a word boundary.  Otherwise,
1123          store_fixed_bit_field will call us again, and we will mutually
1124          recurse forever.  */
1125       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1126       thissize = MIN (thissize, unit - thispos);
1127
1128       if (BYTES_BIG_ENDIAN)
1129         {
1130           /* Fetch successively less significant portions.  */
1131           if (CONST_INT_P (value))
1132             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1133                              >> (bitsize - bitsdone - thissize))
1134                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1135           else
1136             {
1137               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1138               /* The args are chosen so that the last part includes the
1139                  lsb.  Give extract_bit_field the value it needs (with
1140                  endianness compensation) to fetch the piece we want.  */
1141               part = extract_fixed_bit_field (word_mode, value, thissize,
1142                                               total_bits - bitsize + bitsdone,
1143                                               NULL_RTX, 1, false);
1144             }
1145         }
1146       else
1147         {
1148           /* Fetch successively more significant portions.  */
1149           if (CONST_INT_P (value))
1150             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1151                              >> bitsdone)
1152                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1153           else
1154             part = extract_fixed_bit_field (word_mode, value, thissize,
1155                                             bitsdone, NULL_RTX, 1, false);
1156         }
1157
1158       /* If OP0 is a register, then handle OFFSET here.
1159
1160          When handling multiword bitfields, extract_bit_field may pass
1161          down a word_mode SUBREG of a larger REG for a bitfield that actually
1162          crosses a word boundary.  Thus, for a SUBREG, we must find
1163          the current word starting from the base register.  */
1164       if (GET_CODE (op0) == SUBREG)
1165         {
1166           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1167           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1168           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1169             word = word_offset ? const0_rtx : op0;
1170           else
1171             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1172                                           GET_MODE (SUBREG_REG (op0)));
1173           offset = 0;
1174         }
1175       else if (REG_P (op0))
1176         {
1177           enum machine_mode op0_mode = GET_MODE (op0);
1178           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1179             word = offset ? const0_rtx : op0;
1180           else
1181             word = operand_subword_force (op0, offset, GET_MODE (op0));
1182           offset = 0;
1183         }
1184       else
1185         word = op0;
1186
1187       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1188          it is just an out-of-bounds access.  Ignore it.  */
1189       if (word != const0_rtx)
1190         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1191                                bitregion_start, bitregion_end, part);
1192       bitsdone += thissize;
1193     }
1194 }
1195 \f
1196 /* A subroutine of extract_bit_field_1 that converts return value X
1197    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1198    to extract_bit_field.  */
1199
1200 static rtx
1201 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1202                              enum machine_mode tmode, bool unsignedp)
1203 {
1204   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1205     return x;
1206
1207   /* If the x mode is not a scalar integral, first convert to the
1208      integer mode of that size and then access it as a floating-point
1209      value via a SUBREG.  */
1210   if (!SCALAR_INT_MODE_P (tmode))
1211     {
1212       enum machine_mode smode;
1213
1214       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1215       x = convert_to_mode (smode, x, unsignedp);
1216       x = force_reg (smode, x);
1217       return gen_lowpart (tmode, x);
1218     }
1219
1220   return convert_to_mode (tmode, x, unsignedp);
1221 }
1222
1223 /* A subroutine of extract_bit_field, with the same arguments.
1224    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1225    if we can find no other means of implementing the operation.
1226    if FALLBACK_P is false, return NULL instead.  */
1227
1228 static rtx
1229 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1230                      unsigned HOST_WIDE_INT bitnum,
1231                      int unsignedp, bool packedp, rtx target,
1232                      enum machine_mode mode, enum machine_mode tmode,
1233                      bool fallback_p)
1234 {
1235   rtx op0 = str_rtx;
1236   enum machine_mode int_mode;
1237   enum machine_mode ext_mode;
1238   enum machine_mode mode1;
1239
1240   if (tmode == VOIDmode)
1241     tmode = mode;
1242
1243   while (GET_CODE (op0) == SUBREG)
1244     {
1245       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1246       op0 = SUBREG_REG (op0);
1247     }
1248
1249   /* If we have an out-of-bounds access to a register, just return an
1250      uninitialized register of the required mode.  This can occur if the
1251      source code contains an out-of-bounds access to a small array.  */
1252   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1253     return gen_reg_rtx (tmode);
1254
1255   if (REG_P (op0)
1256       && mode == GET_MODE (op0)
1257       && bitnum == 0
1258       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1259     {
1260       /* We're trying to extract a full register from itself.  */
1261       return op0;
1262     }
1263
1264   /* See if we can get a better vector mode before extracting.  */
1265   if (VECTOR_MODE_P (GET_MODE (op0))
1266       && !MEM_P (op0)
1267       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1268     {
1269       enum machine_mode new_mode;
1270
1271       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1272         new_mode = MIN_MODE_VECTOR_FLOAT;
1273       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1274         new_mode = MIN_MODE_VECTOR_FRACT;
1275       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1276         new_mode = MIN_MODE_VECTOR_UFRACT;
1277       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1278         new_mode = MIN_MODE_VECTOR_ACCUM;
1279       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1280         new_mode = MIN_MODE_VECTOR_UACCUM;
1281       else
1282         new_mode = MIN_MODE_VECTOR_INT;
1283
1284       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1285         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1286             && targetm.vector_mode_supported_p (new_mode))
1287           break;
1288       if (new_mode != VOIDmode)
1289         op0 = gen_lowpart (new_mode, op0);
1290     }
1291
1292   /* Use vec_extract patterns for extracting parts of vectors whenever
1293      available.  */
1294   if (VECTOR_MODE_P (GET_MODE (op0))
1295       && !MEM_P (op0)
1296       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1297       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1298           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1299     {
1300       struct expand_operand ops[3];
1301       enum machine_mode outermode = GET_MODE (op0);
1302       enum machine_mode innermode = GET_MODE_INNER (outermode);
1303       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1304       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1305
1306       create_output_operand (&ops[0], target, innermode);
1307       create_input_operand (&ops[1], op0, outermode);
1308       create_integer_operand (&ops[2], pos);
1309       if (maybe_expand_insn (icode, 3, ops))
1310         {
1311           target = ops[0].value;
1312           if (GET_MODE (target) != mode)
1313             return gen_lowpart (tmode, target);
1314           return target;
1315         }
1316     }
1317
1318   /* Make sure we are playing with integral modes.  Pun with subregs
1319      if we aren't.  */
1320   {
1321     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1322     if (imode != GET_MODE (op0))
1323       {
1324         if (MEM_P (op0))
1325           op0 = adjust_bitfield_address (op0, imode, 0);
1326         else if (imode != BLKmode)
1327           {
1328             op0 = gen_lowpart (imode, op0);
1329
1330             /* If we got a SUBREG, force it into a register since we
1331                aren't going to be able to do another SUBREG on it.  */
1332             if (GET_CODE (op0) == SUBREG)
1333               op0 = force_reg (imode, op0);
1334           }
1335         else if (REG_P (op0))
1336           {
1337             rtx reg, subreg;
1338             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1339                                             MODE_INT);
1340             reg = gen_reg_rtx (imode);
1341             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1342             emit_move_insn (subreg, op0);
1343             op0 = reg;
1344             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1345           }
1346         else
1347           {
1348             rtx mem = assign_stack_temp (GET_MODE (op0),
1349                                          GET_MODE_SIZE (GET_MODE (op0)));
1350             emit_move_insn (mem, op0);
1351             op0 = adjust_bitfield_address (mem, BLKmode, 0);
1352           }
1353       }
1354   }
1355
1356   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1357      If that's wrong, the solution is to test for it and set TARGET to 0
1358      if needed.  */
1359
1360   /* If the bitfield is volatile, we need to make sure the access
1361      remains on a type-aligned boundary.  */
1362   if (GET_CODE (op0) == MEM
1363       && MEM_VOLATILE_P (op0)
1364       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1365       && flag_strict_volatile_bitfields > 0)
1366     goto no_subreg_mode_swap;
1367
1368   /* Only scalar integer modes can be converted via subregs.  There is an
1369      additional problem for FP modes here in that they can have a precision
1370      which is different from the size.  mode_for_size uses precision, but
1371      we want a mode based on the size, so we must avoid calling it for FP
1372      modes.  */
1373   mode1 = mode;
1374   if (SCALAR_INT_MODE_P (tmode))
1375     {
1376       enum machine_mode try_mode = mode_for_size (bitsize,
1377                                                   GET_MODE_CLASS (tmode), 0);
1378       if (try_mode != BLKmode)
1379         mode1 = try_mode;
1380     }
1381   gcc_assert (mode1 != BLKmode);
1382
1383   /* Extraction of a full MODE1 value can be done with a subreg as long
1384      as the least significant bit of the value is the least significant
1385      bit of either OP0 or a word of OP0.  */
1386   if (!MEM_P (op0)
1387       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1388       && bitsize == GET_MODE_BITSIZE (mode1)
1389       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1390     {
1391       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1392                                      bitnum / BITS_PER_UNIT);
1393       if (sub)
1394         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1395     }
1396
1397   /* Extraction of a full MODE1 value can be done with a load as long as
1398      the field is on a byte boundary and is sufficiently aligned.  */
1399   if (MEM_P (op0)
1400       && bitnum % BITS_PER_UNIT == 0
1401       && bitsize == GET_MODE_BITSIZE (mode1)
1402       && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1403           || (bitnum % bitsize == 0
1404               && MEM_ALIGN (op0) % bitsize == 0)))
1405     {
1406       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1407       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1408     }
1409
1410  no_subreg_mode_swap:
1411
1412   /* Handle fields bigger than a word.  */
1413
1414   if (bitsize > BITS_PER_WORD)
1415     {
1416       /* Here we transfer the words of the field
1417          in the order least significant first.
1418          This is because the most significant word is the one which may
1419          be less than full.  */
1420
1421       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1422       unsigned int i;
1423       rtx last;
1424
1425       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1426         target = gen_reg_rtx (mode);
1427
1428       /* Indicate for flow that the entire target reg is being set.  */
1429       emit_clobber (target);
1430
1431       last = get_last_insn ();
1432       for (i = 0; i < nwords; i++)
1433         {
1434           /* If I is 0, use the low-order word in both field and target;
1435              if I is 1, use the next to lowest word; and so on.  */
1436           /* Word number in TARGET to use.  */
1437           unsigned int wordnum
1438             = (WORDS_BIG_ENDIAN
1439                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1440                : i);
1441           /* Offset from start of field in OP0.  */
1442           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1443                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1444                                                 * (int) BITS_PER_WORD))
1445                                      : (int) i * BITS_PER_WORD);
1446           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1447           rtx result_part
1448             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1449                                              bitsize - i * BITS_PER_WORD),
1450                                    bitnum + bit_offset, 1, false, target_part,
1451                                    mode, word_mode, fallback_p);
1452
1453           gcc_assert (target_part);
1454           if (!result_part)
1455             {
1456               delete_insns_since (last);
1457               return NULL;
1458             }
1459
1460           if (result_part != target_part)
1461             emit_move_insn (target_part, result_part);
1462         }
1463
1464       if (unsignedp)
1465         {
1466           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1467              need to be zero'd out.  */
1468           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1469             {
1470               unsigned int i, total_words;
1471
1472               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1473               for (i = nwords; i < total_words; i++)
1474                 emit_move_insn
1475                   (operand_subword (target,
1476                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1477                                     1, VOIDmode),
1478                    const0_rtx);
1479             }
1480           return target;
1481         }
1482
1483       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1484       target = expand_shift (LSHIFT_EXPR, mode, target,
1485                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1486       return expand_shift (RSHIFT_EXPR, mode, target,
1487                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1488     }
1489
1490   /* If OP0 is a multi-word register, narrow it to the affected word.
1491      If the region spans two words, defer to extract_split_bit_field.  */
1492   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1493     {
1494       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1495                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1496       bitnum %= BITS_PER_WORD;
1497       if (bitnum + bitsize > BITS_PER_WORD)
1498         {
1499           if (!fallback_p)
1500             return NULL_RTX;
1501           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1502           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1503         }
1504     }
1505
1506   /* From here on we know the desired field is smaller than a word.
1507      If OP0 is a register, it too fits within a word.  */
1508
1509   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1510   if (ext_mode != MAX_MACHINE_MODE
1511       && bitsize > 0
1512       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1513       /* Do not use extv/extzv for volatile bitfields when
1514          -fstrict-volatile-bitfields is in effect.  */
1515       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1516            && flag_strict_volatile_bitfields > 0)
1517       /* If op0 is a register, we need it in EXT_MODE to make it
1518          acceptable to the format of ext(z)v.  */
1519       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode))
1520     {
1521       struct expand_operand ops[4];
1522       unsigned HOST_WIDE_INT bitpos = bitnum;
1523       rtx xop0 = op0;
1524       rtx xtarget = target;
1525       rtx xspec_target = target;
1526       rtx xspec_target_subreg = 0;
1527       unsigned unit = GET_MODE_BITSIZE (ext_mode);
1528
1529       /* If op0 is a register, we need it in EXT_MODE to make it
1530          acceptable to the format of ext(z)v.  */
1531       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1532         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1533
1534       if (MEM_P (xop0))
1535         {
1536           /* Get a reference to the first byte of the field.  */
1537           xop0 = adjust_bitfield_address (xop0, byte_mode,
1538                                           bitpos / BITS_PER_UNIT);
1539           bitpos %= BITS_PER_UNIT;
1540         }
1541       else
1542         {
1543           /* Convert from counting within OP0 to counting in EXT_MODE.  */
1544           if (BYTES_BIG_ENDIAN)
1545             bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1546         }
1547
1548       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1549          "backwards" from the size of the unit we are extracting from.
1550          Otherwise, we count bits from the most significant on a
1551          BYTES/BITS_BIG_ENDIAN machine.  */
1552
1553       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1554         bitpos = unit - bitsize - bitpos;
1555
1556       if (xtarget == 0)
1557         xtarget = xspec_target = gen_reg_rtx (tmode);
1558
1559       if (GET_MODE (xtarget) != ext_mode)
1560         {
1561           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1562              between the mode of the extraction (word_mode) and the target
1563              mode.  Instead, create a temporary and use convert_move to set
1564              the target.  */
1565           if (REG_P (xtarget)
1566               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1567             {
1568               xtarget = gen_lowpart (ext_mode, xtarget);
1569               if (GET_MODE_PRECISION (ext_mode)
1570                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1571                 xspec_target_subreg = xtarget;
1572             }
1573           else
1574             xtarget = gen_reg_rtx (ext_mode);
1575         }
1576
1577       create_output_operand (&ops[0], xtarget, ext_mode);
1578       create_fixed_operand (&ops[1], xop0);
1579       create_integer_operand (&ops[2], bitsize);
1580       create_integer_operand (&ops[3], bitpos);
1581       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1582                              4, ops))
1583         {
1584           xtarget = ops[0].value;
1585           if (xtarget == xspec_target)
1586             return xtarget;
1587           if (xtarget == xspec_target_subreg)
1588             return xspec_target;
1589           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1590         }
1591     }
1592
1593   /* If OP0 is a memory, try copying it to a register and seeing if a
1594      cheap register alternative is available.  */
1595   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1596     {
1597       enum machine_mode bestmode;
1598
1599       /* Get the mode to use for inserting into this field.  If
1600          OP0 is BLKmode, get the smallest mode consistent with the
1601          alignment. If OP0 is a non-BLKmode object that is no
1602          wider than EXT_MODE, use its mode. Otherwise, use the
1603          smallest mode containing the field.  */
1604
1605       if (GET_MODE (op0) == BLKmode
1606           || (ext_mode != MAX_MACHINE_MODE
1607               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1608         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1609                                   (ext_mode == MAX_MACHINE_MODE
1610                                    ? VOIDmode : ext_mode),
1611                                   MEM_VOLATILE_P (op0));
1612       else
1613         bestmode = GET_MODE (op0);
1614
1615       if (bestmode != VOIDmode
1616           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1617                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1618         {
1619           unsigned HOST_WIDE_INT offset, bitpos;
1620
1621           /* Compute the offset as a multiple of this unit,
1622              counting in bytes.  */
1623           unsigned int unit = GET_MODE_BITSIZE (bestmode);
1624           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1625           bitpos = bitnum % unit;
1626
1627           /* Make sure the register is big enough for the whole field.  */
1628           if (bitpos + bitsize <= unit)
1629             {
1630               rtx last, result, xop0;
1631
1632               last = get_last_insn ();
1633
1634               /* Fetch it to a register in that size.  */
1635               xop0 = adjust_bitfield_address (op0, bestmode, offset);
1636               xop0 = force_reg (bestmode, xop0);
1637               result = extract_bit_field_1 (xop0, bitsize, bitpos,
1638                                             unsignedp, packedp, target,
1639                                             mode, tmode, false);
1640               if (result)
1641                 return result;
1642
1643               delete_insns_since (last);
1644             }
1645         }
1646     }
1647
1648   if (!fallback_p)
1649     return NULL;
1650
1651   /* Find a correspondingly-sized integer field, so we can apply
1652      shifts and masks to it.  */
1653   int_mode = int_mode_for_mode (tmode);
1654   if (int_mode == BLKmode)
1655     int_mode = int_mode_for_mode (mode);
1656   /* Should probably push op0 out to memory and then do a load.  */
1657   gcc_assert (int_mode != BLKmode);
1658
1659   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1660                                     target, unsignedp, packedp);
1661   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1662 }
1663
1664 /* Generate code to extract a byte-field from STR_RTX
1665    containing BITSIZE bits, starting at BITNUM,
1666    and put it in TARGET if possible (if TARGET is nonzero).
1667    Regardless of TARGET, we return the rtx for where the value is placed.
1668
1669    STR_RTX is the structure containing the byte (a REG or MEM).
1670    UNSIGNEDP is nonzero if this is an unsigned bit field.
1671    PACKEDP is nonzero if the field has the packed attribute.
1672    MODE is the natural mode of the field value once extracted.
1673    TMODE is the mode the caller would like the value to have;
1674    but the value may be returned with type MODE instead.
1675
1676    If a TARGET is specified and we can store in it at no extra cost,
1677    we do so, and return TARGET.
1678    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1679    if they are equally easy.  */
1680
1681 rtx
1682 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1683                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1684                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1685 {
1686   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1687                               target, mode, tmode, true);
1688 }
1689 \f
1690 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1691    from bit BITNUM of OP0.
1692
1693    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1694    PACKEDP is true if the field has the packed attribute.
1695
1696    If TARGET is nonzero, attempts to store the value there
1697    and return TARGET, but this is not guaranteed.
1698    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1699
1700 static rtx
1701 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1702                          unsigned HOST_WIDE_INT bitsize,
1703                          unsigned HOST_WIDE_INT bitnum, rtx target,
1704                          int unsignedp, bool packedp)
1705 {
1706   enum machine_mode mode;
1707
1708   if (MEM_P (op0))
1709     {
1710       /* Get the proper mode to use for this field.  We want a mode that
1711          includes the entire field.  If such a mode would be larger than
1712          a word, we won't be doing the extraction the normal way.  */
1713
1714       if (MEM_VOLATILE_P (op0)
1715           && flag_strict_volatile_bitfields > 0)
1716         {
1717           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1718             mode = GET_MODE (op0);
1719           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1720             mode = GET_MODE (target);
1721           else
1722             mode = tmode;
1723         }
1724       else
1725         mode = get_best_mode (bitsize, bitnum, 0, 0,
1726                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1727
1728       if (mode == VOIDmode)
1729         /* The only way this should occur is if the field spans word
1730            boundaries.  */
1731         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1732
1733       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1734       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1735
1736       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1737          if it results in a multi-word access where we otherwise wouldn't
1738          have one.  So, check for that case here.  */
1739       if (MEM_P (op0)
1740           && MEM_VOLATILE_P (op0)
1741           && flag_strict_volatile_bitfields > 0
1742           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1743           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1744         {
1745           if (STRICT_ALIGNMENT)
1746             {
1747               static bool informed_about_misalignment = false;
1748
1749               if (packedp)
1750                 {
1751                   if (bitsize == total_bits)
1752                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1753                                 "multiple accesses to volatile structure"
1754                                 " member because of packed attribute");
1755                   else
1756                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1757                                 "multiple accesses to volatile structure"
1758                                 " bitfield because of packed attribute");
1759
1760                   return extract_split_bit_field (op0, bitsize, bitnum,
1761                                                   unsignedp);
1762                 }
1763
1764               if (bitsize == total_bits)
1765                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1766                             "mis-aligned access used for structure member");
1767               else
1768                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1769                             "mis-aligned access used for structure bitfield");
1770
1771               if (! informed_about_misalignment)
1772                 {
1773                   informed_about_misalignment = true;
1774                   inform (input_location,
1775                           "when a volatile object spans multiple type-sized"
1776                           " locations, the compiler must choose between using"
1777                           " a single mis-aligned access to preserve the"
1778                           " volatility, or using multiple aligned accesses"
1779                           " to avoid runtime faults; this code may fail at"
1780                           " runtime if the hardware does not allow this"
1781                           " access");
1782                 }
1783             }
1784           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1785         }
1786       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1787       bitnum -= bit_offset;
1788     }
1789
1790   mode = GET_MODE (op0);
1791   gcc_assert (SCALAR_INT_MODE_P (mode));
1792
1793   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1794      for invalid input, such as extract equivalent of f5 from
1795      gcc.dg/pr48335-2.c.  */
1796
1797   if (BYTES_BIG_ENDIAN)
1798     /* BITNUM is the distance between our msb and that of OP0.
1799        Convert it to the distance from the lsb.  */
1800     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1801
1802   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1803      We have reduced the big-endian case to the little-endian case.  */
1804
1805   if (unsignedp)
1806     {
1807       if (bitnum)
1808         {
1809           /* If the field does not already start at the lsb,
1810              shift it so it does.  */
1811           /* Maybe propagate the target for the shift.  */
1812           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1813           if (tmode != mode)
1814             subtarget = 0;
1815           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1816         }
1817       /* Convert the value to the desired mode.  */
1818       if (mode != tmode)
1819         op0 = convert_to_mode (tmode, op0, 1);
1820
1821       /* Unless the msb of the field used to be the msb when we shifted,
1822          mask out the upper bits.  */
1823
1824       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1825         return expand_binop (GET_MODE (op0), and_optab, op0,
1826                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1827                              target, 1, OPTAB_LIB_WIDEN);
1828       return op0;
1829     }
1830
1831   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1832      then arithmetic-shift its lsb to the lsb of the word.  */
1833   op0 = force_reg (mode, op0);
1834
1835   /* Find the narrowest integer mode that contains the field.  */
1836
1837   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1838        mode = GET_MODE_WIDER_MODE (mode))
1839     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1840       {
1841         op0 = convert_to_mode (mode, op0, 0);
1842         break;
1843       }
1844
1845   if (mode != tmode)
1846     target = 0;
1847
1848   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1849     {
1850       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1851       /* Maybe propagate the target for the shift.  */
1852       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1853       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1854     }
1855
1856   return expand_shift (RSHIFT_EXPR, mode, op0,
1857                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1858 }
1859 \f
1860 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1861    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1862    complement of that if COMPLEMENT.  The mask is truncated if
1863    necessary to the width of mode MODE.  The mask is zero-extended if
1864    BITSIZE+BITPOS is too small for MODE.  */
1865
1866 static rtx
1867 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1868 {
1869   double_int mask;
1870
1871   mask = double_int::mask (bitsize);
1872   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1873
1874   if (complement)
1875     mask = ~mask;
1876
1877   return immed_double_int_const (mask, mode);
1878 }
1879
1880 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1881    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1882
1883 static rtx
1884 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1885 {
1886   double_int val;
1887
1888   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1889   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1890
1891   return immed_double_int_const (val, mode);
1892 }
1893 \f
1894 /* Extract a bit field that is split across two words
1895    and return an RTX for the result.
1896
1897    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1898    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1899    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1900
1901 static rtx
1902 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1903                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1904 {
1905   unsigned int unit;
1906   unsigned int bitsdone = 0;
1907   rtx result = NULL_RTX;
1908   int first = 1;
1909
1910   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1911      much at a time.  */
1912   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1913     unit = BITS_PER_WORD;
1914   else
1915     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1916
1917   while (bitsdone < bitsize)
1918     {
1919       unsigned HOST_WIDE_INT thissize;
1920       rtx part, word;
1921       unsigned HOST_WIDE_INT thispos;
1922       unsigned HOST_WIDE_INT offset;
1923
1924       offset = (bitpos + bitsdone) / unit;
1925       thispos = (bitpos + bitsdone) % unit;
1926
1927       /* THISSIZE must not overrun a word boundary.  Otherwise,
1928          extract_fixed_bit_field will call us again, and we will mutually
1929          recurse forever.  */
1930       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1931       thissize = MIN (thissize, unit - thispos);
1932
1933       /* If OP0 is a register, then handle OFFSET here.
1934
1935          When handling multiword bitfields, extract_bit_field may pass
1936          down a word_mode SUBREG of a larger REG for a bitfield that actually
1937          crosses a word boundary.  Thus, for a SUBREG, we must find
1938          the current word starting from the base register.  */
1939       if (GET_CODE (op0) == SUBREG)
1940         {
1941           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1942           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1943                                         GET_MODE (SUBREG_REG (op0)));
1944           offset = 0;
1945         }
1946       else if (REG_P (op0))
1947         {
1948           word = operand_subword_force (op0, offset, GET_MODE (op0));
1949           offset = 0;
1950         }
1951       else
1952         word = op0;
1953
1954       /* Extract the parts in bit-counting order,
1955          whose meaning is determined by BYTES_PER_UNIT.
1956          OFFSET is in UNITs, and UNIT is in bits.  */
1957       part = extract_fixed_bit_field (word_mode, word, thissize,
1958                                       offset * unit + thispos, 0, 1, false);
1959       bitsdone += thissize;
1960
1961       /* Shift this part into place for the result.  */
1962       if (BYTES_BIG_ENDIAN)
1963         {
1964           if (bitsize != bitsdone)
1965             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1966                                  bitsize - bitsdone, 0, 1);
1967         }
1968       else
1969         {
1970           if (bitsdone != thissize)
1971             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1972                                  bitsdone - thissize, 0, 1);
1973         }
1974
1975       if (first)
1976         result = part;
1977       else
1978         /* Combine the parts with bitwise or.  This works
1979            because we extracted each part as an unsigned bit field.  */
1980         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1981                                OPTAB_LIB_WIDEN);
1982
1983       first = 0;
1984     }
1985
1986   /* Unsigned bit field: we are done.  */
1987   if (unsignedp)
1988     return result;
1989   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1990   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1991                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1992   return expand_shift (RSHIFT_EXPR, word_mode, result,
1993                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1994 }
1995 \f
1996 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1997    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1998    MODE, fill the upper bits with zeros.  Fail if the layout of either
1999    mode is unknown (as for CC modes) or if the extraction would involve
2000    unprofitable mode punning.  Return the value on success, otherwise
2001    return null.
2002
2003    This is different from gen_lowpart* in these respects:
2004
2005      - the returned value must always be considered an rvalue
2006
2007      - when MODE is wider than SRC_MODE, the extraction involves
2008        a zero extension
2009
2010      - when MODE is smaller than SRC_MODE, the extraction involves
2011        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2012
2013    In other words, this routine performs a computation, whereas the
2014    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2015    operations.  */
2016
2017 rtx
2018 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2019 {
2020   enum machine_mode int_mode, src_int_mode;
2021
2022   if (mode == src_mode)
2023     return src;
2024
2025   if (CONSTANT_P (src))
2026     {
2027       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2028          fails, it will happily create (subreg (symbol_ref)) or similar
2029          invalid SUBREGs.  */
2030       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2031       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2032       if (ret)
2033         return ret;
2034
2035       if (GET_MODE (src) == VOIDmode
2036           || !validate_subreg (mode, src_mode, src, byte))
2037         return NULL_RTX;
2038
2039       src = force_reg (GET_MODE (src), src);
2040       return gen_rtx_SUBREG (mode, src, byte);
2041     }
2042
2043   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2044     return NULL_RTX;
2045
2046   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2047       && MODES_TIEABLE_P (mode, src_mode))
2048     {
2049       rtx x = gen_lowpart_common (mode, src);
2050       if (x)
2051         return x;
2052     }
2053
2054   src_int_mode = int_mode_for_mode (src_mode);
2055   int_mode = int_mode_for_mode (mode);
2056   if (src_int_mode == BLKmode || int_mode == BLKmode)
2057     return NULL_RTX;
2058
2059   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2060     return NULL_RTX;
2061   if (!MODES_TIEABLE_P (int_mode, mode))
2062     return NULL_RTX;
2063
2064   src = gen_lowpart (src_int_mode, src);
2065   src = convert_modes (int_mode, src_int_mode, src, true);
2066   src = gen_lowpart (mode, src);
2067   return src;
2068 }
2069 \f
2070 /* Add INC into TARGET.  */
2071
2072 void
2073 expand_inc (rtx target, rtx inc)
2074 {
2075   rtx value = expand_binop (GET_MODE (target), add_optab,
2076                             target, inc,
2077                             target, 0, OPTAB_LIB_WIDEN);
2078   if (value != target)
2079     emit_move_insn (target, value);
2080 }
2081
2082 /* Subtract DEC from TARGET.  */
2083
2084 void
2085 expand_dec (rtx target, rtx dec)
2086 {
2087   rtx value = expand_binop (GET_MODE (target), sub_optab,
2088                             target, dec,
2089                             target, 0, OPTAB_LIB_WIDEN);
2090   if (value != target)
2091     emit_move_insn (target, value);
2092 }
2093 \f
2094 /* Output a shift instruction for expression code CODE,
2095    with SHIFTED being the rtx for the value to shift,
2096    and AMOUNT the rtx for the amount to shift by.
2097    Store the result in the rtx TARGET, if that is convenient.
2098    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2099    Return the rtx for where the value is.  */
2100
2101 static rtx
2102 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2103                 rtx amount, rtx target, int unsignedp)
2104 {
2105   rtx op1, temp = 0;
2106   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2107   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2108   optab lshift_optab = ashl_optab;
2109   optab rshift_arith_optab = ashr_optab;
2110   optab rshift_uns_optab = lshr_optab;
2111   optab lrotate_optab = rotl_optab;
2112   optab rrotate_optab = rotr_optab;
2113   enum machine_mode op1_mode;
2114   int attempt;
2115   bool speed = optimize_insn_for_speed_p ();
2116
2117   op1 = amount;
2118   op1_mode = GET_MODE (op1);
2119
2120   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2121      shift amount is a vector, use the vector/vector shift patterns.  */
2122   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2123     {
2124       lshift_optab = vashl_optab;
2125       rshift_arith_optab = vashr_optab;
2126       rshift_uns_optab = vlshr_optab;
2127       lrotate_optab = vrotl_optab;
2128       rrotate_optab = vrotr_optab;
2129     }
2130
2131   /* Previously detected shift-counts computed by NEGATE_EXPR
2132      and shifted in the other direction; but that does not work
2133      on all machines.  */
2134
2135   if (SHIFT_COUNT_TRUNCATED)
2136     {
2137       if (CONST_INT_P (op1)
2138           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2139               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2140         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2141                        % GET_MODE_BITSIZE (mode));
2142       else if (GET_CODE (op1) == SUBREG
2143                && subreg_lowpart_p (op1)
2144                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2145         op1 = SUBREG_REG (op1);
2146     }
2147
2148   if (op1 == const0_rtx)
2149     return shifted;
2150
2151   /* Check whether its cheaper to implement a left shift by a constant
2152      bit count by a sequence of additions.  */
2153   if (code == LSHIFT_EXPR
2154       && CONST_INT_P (op1)
2155       && INTVAL (op1) > 0
2156       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2157       && INTVAL (op1) < MAX_BITS_PER_WORD
2158       && (shift_cost (speed, mode, INTVAL (op1))
2159           > INTVAL (op1) * add_cost (speed, mode))
2160       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2161     {
2162       int i;
2163       for (i = 0; i < INTVAL (op1); i++)
2164         {
2165           temp = force_reg (mode, shifted);
2166           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2167                                   unsignedp, OPTAB_LIB_WIDEN);
2168         }
2169       return shifted;
2170     }
2171
2172   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2173     {
2174       enum optab_methods methods;
2175
2176       if (attempt == 0)
2177         methods = OPTAB_DIRECT;
2178       else if (attempt == 1)
2179         methods = OPTAB_WIDEN;
2180       else
2181         methods = OPTAB_LIB_WIDEN;
2182
2183       if (rotate)
2184         {
2185           /* Widening does not work for rotation.  */
2186           if (methods == OPTAB_WIDEN)
2187             continue;
2188           else if (methods == OPTAB_LIB_WIDEN)
2189             {
2190               /* If we have been unable to open-code this by a rotation,
2191                  do it as the IOR of two shifts.  I.e., to rotate A
2192                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2193                  where C is the bitsize of A.
2194
2195                  It is theoretically possible that the target machine might
2196                  not be able to perform either shift and hence we would
2197                  be making two libcalls rather than just the one for the
2198                  shift (similarly if IOR could not be done).  We will allow
2199                  this extremely unlikely lossage to avoid complicating the
2200                  code below.  */
2201
2202               rtx subtarget = target == shifted ? 0 : target;
2203               rtx new_amount, other_amount;
2204               rtx temp1;
2205
2206               new_amount = op1;
2207               if (CONST_INT_P (op1))
2208                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2209                                         - INTVAL (op1));
2210               else
2211                 other_amount
2212                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2213                                          GEN_INT (GET_MODE_PRECISION (mode)),
2214                                          op1);
2215
2216               shifted = force_reg (mode, shifted);
2217
2218               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2219                                      mode, shifted, new_amount, 0, 1);
2220               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2221                                       mode, shifted, other_amount,
2222                                       subtarget, 1);
2223               return expand_binop (mode, ior_optab, temp, temp1, target,
2224                                    unsignedp, methods);
2225             }
2226
2227           temp = expand_binop (mode,
2228                                left ? lrotate_optab : rrotate_optab,
2229                                shifted, op1, target, unsignedp, methods);
2230         }
2231       else if (unsignedp)
2232         temp = expand_binop (mode,
2233                              left ? lshift_optab : rshift_uns_optab,
2234                              shifted, op1, target, unsignedp, methods);
2235
2236       /* Do arithmetic shifts.
2237          Also, if we are going to widen the operand, we can just as well
2238          use an arithmetic right-shift instead of a logical one.  */
2239       if (temp == 0 && ! rotate
2240           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2241         {
2242           enum optab_methods methods1 = methods;
2243
2244           /* If trying to widen a log shift to an arithmetic shift,
2245              don't accept an arithmetic shift of the same size.  */
2246           if (unsignedp)
2247             methods1 = OPTAB_MUST_WIDEN;
2248
2249           /* Arithmetic shift */
2250
2251           temp = expand_binop (mode,
2252                                left ? lshift_optab : rshift_arith_optab,
2253                                shifted, op1, target, unsignedp, methods1);
2254         }
2255
2256       /* We used to try extzv here for logical right shifts, but that was
2257          only useful for one machine, the VAX, and caused poor code
2258          generation there for lshrdi3, so the code was deleted and a
2259          define_expand for lshrsi3 was added to vax.md.  */
2260     }
2261
2262   gcc_assert (temp);
2263   return temp;
2264 }
2265
2266 /* Output a shift instruction for expression code CODE,
2267    with SHIFTED being the rtx for the value to shift,
2268    and AMOUNT the amount to shift by.
2269    Store the result in the rtx TARGET, if that is convenient.
2270    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2271    Return the rtx for where the value is.  */
2272
2273 rtx
2274 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2275               int amount, rtx target, int unsignedp)
2276 {
2277   return expand_shift_1 (code, mode,
2278                          shifted, GEN_INT (amount), target, unsignedp);
2279 }
2280
2281 /* Output a shift instruction for expression code CODE,
2282    with SHIFTED being the rtx for the value to shift,
2283    and AMOUNT the tree for the amount to shift by.
2284    Store the result in the rtx TARGET, if that is convenient.
2285    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2286    Return the rtx for where the value is.  */
2287
2288 rtx
2289 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2290                        tree amount, rtx target, int unsignedp)
2291 {
2292   return expand_shift_1 (code, mode,
2293                          shifted, expand_normal (amount), target, unsignedp);
2294 }
2295
2296 \f
2297 /* Indicates the type of fixup needed after a constant multiplication.
2298    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2299    the result should be negated, and ADD_VARIANT means that the
2300    multiplicand should be added to the result.  */
2301 enum mult_variant {basic_variant, negate_variant, add_variant};
2302
2303 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2304                         const struct mult_cost *, enum machine_mode mode);
2305 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2306                                  struct algorithm *, enum mult_variant *, int);
2307 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2308                               const struct algorithm *, enum mult_variant);
2309 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2310 static rtx extract_high_half (enum machine_mode, rtx);
2311 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2312 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2313                                        int, int);
2314 /* Compute and return the best algorithm for multiplying by T.
2315    The algorithm must cost less than cost_limit
2316    If retval.cost >= COST_LIMIT, no algorithm was found and all
2317    other field of the returned struct are undefined.
2318    MODE is the machine mode of the multiplication.  */
2319
2320 static void
2321 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2322             const struct mult_cost *cost_limit, enum machine_mode mode)
2323 {
2324   int m;
2325   struct algorithm *alg_in, *best_alg;
2326   struct mult_cost best_cost;
2327   struct mult_cost new_limit;
2328   int op_cost, op_latency;
2329   unsigned HOST_WIDE_INT orig_t = t;
2330   unsigned HOST_WIDE_INT q;
2331   int maxm, hash_index;
2332   bool cache_hit = false;
2333   enum alg_code cache_alg = alg_zero;
2334   bool speed = optimize_insn_for_speed_p ();
2335   enum machine_mode imode;
2336   struct alg_hash_entry *entry_ptr;
2337
2338   /* Indicate that no algorithm is yet found.  If no algorithm
2339      is found, this value will be returned and indicate failure.  */
2340   alg_out->cost.cost = cost_limit->cost + 1;
2341   alg_out->cost.latency = cost_limit->latency + 1;
2342
2343   if (cost_limit->cost < 0
2344       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2345     return;
2346
2347   /* Be prepared for vector modes.  */
2348   imode = GET_MODE_INNER (mode);
2349   if (imode == VOIDmode)
2350     imode = mode;
2351
2352   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2353
2354   /* Restrict the bits of "t" to the multiplication's mode.  */
2355   t &= GET_MODE_MASK (imode);
2356
2357   /* t == 1 can be done in zero cost.  */
2358   if (t == 1)
2359     {
2360       alg_out->ops = 1;
2361       alg_out->cost.cost = 0;
2362       alg_out->cost.latency = 0;
2363       alg_out->op[0] = alg_m;
2364       return;
2365     }
2366
2367   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2368      fail now.  */
2369   if (t == 0)
2370     {
2371       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2372         return;
2373       else
2374         {
2375           alg_out->ops = 1;
2376           alg_out->cost.cost = zero_cost (speed);
2377           alg_out->cost.latency = zero_cost (speed);
2378           alg_out->op[0] = alg_zero;
2379           return;
2380         }
2381     }
2382
2383   /* We'll be needing a couple extra algorithm structures now.  */
2384
2385   alg_in = XALLOCA (struct algorithm);
2386   best_alg = XALLOCA (struct algorithm);
2387   best_cost = *cost_limit;
2388
2389   /* Compute the hash index.  */
2390   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2391
2392   /* See if we already know what to do for T.  */
2393   entry_ptr = alg_hash_entry_ptr (hash_index);
2394   if (entry_ptr->t == t
2395       && entry_ptr->mode == mode
2396       && entry_ptr->mode == mode
2397       && entry_ptr->speed == speed
2398       && entry_ptr->alg != alg_unknown)
2399     {
2400       cache_alg = entry_ptr->alg;
2401
2402       if (cache_alg == alg_impossible)
2403         {
2404           /* The cache tells us that it's impossible to synthesize
2405              multiplication by T within entry_ptr->cost.  */
2406           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2407             /* COST_LIMIT is at least as restrictive as the one
2408                recorded in the hash table, in which case we have no
2409                hope of synthesizing a multiplication.  Just
2410                return.  */
2411             return;
2412
2413           /* If we get here, COST_LIMIT is less restrictive than the
2414              one recorded in the hash table, so we may be able to
2415              synthesize a multiplication.  Proceed as if we didn't
2416              have the cache entry.  */
2417         }
2418       else
2419         {
2420           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2421             /* The cached algorithm shows that this multiplication
2422                requires more cost than COST_LIMIT.  Just return.  This
2423                way, we don't clobber this cache entry with
2424                alg_impossible but retain useful information.  */
2425             return;
2426
2427           cache_hit = true;
2428
2429           switch (cache_alg)
2430             {
2431             case alg_shift:
2432               goto do_alg_shift;
2433
2434             case alg_add_t_m2:
2435             case alg_sub_t_m2:
2436               goto do_alg_addsub_t_m2;
2437
2438             case alg_add_factor:
2439             case alg_sub_factor:
2440               goto do_alg_addsub_factor;
2441
2442             case alg_add_t2_m:
2443               goto do_alg_add_t2_m;
2444
2445             case alg_sub_t2_m:
2446               goto do_alg_sub_t2_m;
2447
2448             default:
2449               gcc_unreachable ();
2450             }
2451         }
2452     }
2453
2454   /* If we have a group of zero bits at the low-order part of T, try
2455      multiplying by the remaining bits and then doing a shift.  */
2456
2457   if ((t & 1) == 0)
2458     {
2459     do_alg_shift:
2460       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2461       if (m < maxm)
2462         {
2463           q = t >> m;
2464           /* The function expand_shift will choose between a shift and
2465              a sequence of additions, so the observed cost is given as
2466              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2467           op_cost = m * add_cost (speed, mode);
2468           if (shift_cost (speed, mode, m) < op_cost)
2469             op_cost = shift_cost (speed, mode, m);
2470           new_limit.cost = best_cost.cost - op_cost;
2471           new_limit.latency = best_cost.latency - op_cost;
2472           synth_mult (alg_in, q, &new_limit, mode);
2473
2474           alg_in->cost.cost += op_cost;
2475           alg_in->cost.latency += op_cost;
2476           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2477             {
2478               struct algorithm *x;
2479               best_cost = alg_in->cost;
2480               x = alg_in, alg_in = best_alg, best_alg = x;
2481               best_alg->log[best_alg->ops] = m;
2482               best_alg->op[best_alg->ops] = alg_shift;
2483             }
2484
2485           /* See if treating ORIG_T as a signed number yields a better
2486              sequence.  Try this sequence only for a negative ORIG_T
2487              as it would be useless for a non-negative ORIG_T.  */
2488           if ((HOST_WIDE_INT) orig_t < 0)
2489             {
2490               /* Shift ORIG_T as follows because a right shift of a
2491                  negative-valued signed type is implementation
2492                  defined.  */
2493               q = ~(~orig_t >> m);
2494               /* The function expand_shift will choose between a shift
2495                  and a sequence of additions, so the observed cost is
2496                  given as MIN (m * add_cost(speed, mode),
2497                  shift_cost(speed, mode, m)).  */
2498               op_cost = m * add_cost (speed, mode);
2499               if (shift_cost (speed, mode, m) < op_cost)
2500                 op_cost = shift_cost (speed, mode, m);
2501               new_limit.cost = best_cost.cost - op_cost;
2502               new_limit.latency = best_cost.latency - op_cost;
2503               synth_mult (alg_in, q, &new_limit, mode);
2504
2505               alg_in->cost.cost += op_cost;
2506               alg_in->cost.latency += op_cost;
2507               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2508                 {
2509                   struct algorithm *x;
2510                   best_cost = alg_in->cost;
2511                   x = alg_in, alg_in = best_alg, best_alg = x;
2512                   best_alg->log[best_alg->ops] = m;
2513                   best_alg->op[best_alg->ops] = alg_shift;
2514                 }
2515             }
2516         }
2517       if (cache_hit)
2518         goto done;
2519     }
2520
2521   /* If we have an odd number, add or subtract one.  */
2522   if ((t & 1) != 0)
2523     {
2524       unsigned HOST_WIDE_INT w;
2525
2526     do_alg_addsub_t_m2:
2527       for (w = 1; (w & t) != 0; w <<= 1)
2528         ;
2529       /* If T was -1, then W will be zero after the loop.  This is another
2530          case where T ends with ...111.  Handling this with (T + 1) and
2531          subtract 1 produces slightly better code and results in algorithm
2532          selection much faster than treating it like the ...0111 case
2533          below.  */
2534       if (w == 0
2535           || (w > 2
2536               /* Reject the case where t is 3.
2537                  Thus we prefer addition in that case.  */
2538               && t != 3))
2539         {
2540           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2541
2542           op_cost = add_cost (speed, mode);
2543           new_limit.cost = best_cost.cost - op_cost;
2544           new_limit.latency = best_cost.latency - op_cost;
2545           synth_mult (alg_in, t + 1, &new_limit, mode);
2546
2547           alg_in->cost.cost += op_cost;
2548           alg_in->cost.latency += op_cost;
2549           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2550             {
2551               struct algorithm *x;
2552               best_cost = alg_in->cost;
2553               x = alg_in, alg_in = best_alg, best_alg = x;
2554               best_alg->log[best_alg->ops] = 0;
2555               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2556             }
2557         }
2558       else
2559         {
2560           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2561
2562           op_cost = add_cost (speed, mode);
2563           new_limit.cost = best_cost.cost - op_cost;
2564           new_limit.latency = best_cost.latency - op_cost;
2565           synth_mult (alg_in, t - 1, &new_limit, mode);
2566
2567           alg_in->cost.cost += op_cost;
2568           alg_in->cost.latency += op_cost;
2569           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2570             {
2571               struct algorithm *x;
2572               best_cost = alg_in->cost;
2573               x = alg_in, alg_in = best_alg, best_alg = x;
2574               best_alg->log[best_alg->ops] = 0;
2575               best_alg->op[best_alg->ops] = alg_add_t_m2;
2576             }
2577         }
2578
2579       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2580          quickly with a - a * n for some appropriate constant n.  */
2581       m = exact_log2 (-orig_t + 1);
2582       if (m >= 0 && m < maxm)
2583         {
2584           op_cost = shiftsub1_cost (speed, mode, m);
2585           new_limit.cost = best_cost.cost - op_cost;
2586           new_limit.latency = best_cost.latency - op_cost;
2587           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2588                       &new_limit, mode);
2589
2590           alg_in->cost.cost += op_cost;
2591           alg_in->cost.latency += op_cost;
2592           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2593             {
2594               struct algorithm *x;
2595               best_cost = alg_in->cost;
2596               x = alg_in, alg_in = best_alg, best_alg = x;
2597               best_alg->log[best_alg->ops] = m;
2598               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2599             }
2600         }
2601
2602       if (cache_hit)
2603         goto done;
2604     }
2605
2606   /* Look for factors of t of the form
2607      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2608      If we find such a factor, we can multiply by t using an algorithm that
2609      multiplies by q, shift the result by m and add/subtract it to itself.
2610
2611      We search for large factors first and loop down, even if large factors
2612      are less probable than small; if we find a large factor we will find a
2613      good sequence quickly, and therefore be able to prune (by decreasing
2614      COST_LIMIT) the search.  */
2615
2616  do_alg_addsub_factor:
2617   for (m = floor_log2 (t - 1); m >= 2; m--)
2618     {
2619       unsigned HOST_WIDE_INT d;
2620
2621       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2622       if (t % d == 0 && t > d && m < maxm
2623           && (!cache_hit || cache_alg == alg_add_factor))
2624         {
2625           /* If the target has a cheap shift-and-add instruction use
2626              that in preference to a shift insn followed by an add insn.
2627              Assume that the shift-and-add is "atomic" with a latency
2628              equal to its cost, otherwise assume that on superscalar
2629              hardware the shift may be executed concurrently with the
2630              earlier steps in the algorithm.  */
2631           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2632           if (shiftadd_cost (speed, mode, m) < op_cost)
2633             {
2634               op_cost = shiftadd_cost (speed, mode, m);
2635               op_latency = op_cost;
2636             }
2637           else
2638             op_latency = add_cost (speed, mode);
2639
2640           new_limit.cost = best_cost.cost - op_cost;
2641           new_limit.latency = best_cost.latency - op_latency;
2642           synth_mult (alg_in, t / d, &new_limit, mode);
2643
2644           alg_in->cost.cost += op_cost;
2645           alg_in->cost.latency += op_latency;
2646           if (alg_in->cost.latency < op_cost)
2647             alg_in->cost.latency = op_cost;
2648           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2649             {
2650               struct algorithm *x;
2651               best_cost = alg_in->cost;
2652               x = alg_in, alg_in = best_alg, best_alg = x;
2653               best_alg->log[best_alg->ops] = m;
2654               best_alg->op[best_alg->ops] = alg_add_factor;
2655             }
2656           /* Other factors will have been taken care of in the recursion.  */
2657           break;
2658         }
2659
2660       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2661       if (t % d == 0 && t > d && m < maxm
2662           && (!cache_hit || cache_alg == alg_sub_factor))
2663         {
2664           /* If the target has a cheap shift-and-subtract insn use
2665              that in preference to a shift insn followed by a sub insn.
2666              Assume that the shift-and-sub is "atomic" with a latency
2667              equal to it's cost, otherwise assume that on superscalar
2668              hardware the shift may be executed concurrently with the
2669              earlier steps in the algorithm.  */
2670           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2671           if (shiftsub0_cost (speed, mode, m) < op_cost)
2672             {
2673               op_cost = shiftsub0_cost (speed, mode, m);
2674               op_latency = op_cost;
2675             }
2676           else
2677             op_latency = add_cost (speed, mode);
2678
2679           new_limit.cost = best_cost.cost - op_cost;
2680           new_limit.latency = best_cost.latency - op_latency;
2681           synth_mult (alg_in, t / d, &new_limit, mode);
2682
2683           alg_in->cost.cost += op_cost;
2684           alg_in->cost.latency += op_latency;
2685           if (alg_in->cost.latency < op_cost)
2686             alg_in->cost.latency = op_cost;
2687           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2688             {
2689               struct algorithm *x;
2690               best_cost = alg_in->cost;
2691               x = alg_in, alg_in = best_alg, best_alg = x;
2692               best_alg->log[best_alg->ops] = m;
2693               best_alg->op[best_alg->ops] = alg_sub_factor;
2694             }
2695           break;
2696         }
2697     }
2698   if (cache_hit)
2699     goto done;
2700
2701   /* Try shift-and-add (load effective address) instructions,
2702      i.e. do a*3, a*5, a*9.  */
2703   if ((t & 1) != 0)
2704     {
2705     do_alg_add_t2_m:
2706       q = t - 1;
2707       q = q & -q;
2708       m = exact_log2 (q);
2709       if (m >= 0 && m < maxm)
2710         {
2711           op_cost = shiftadd_cost (speed, mode, m);
2712           new_limit.cost = best_cost.cost - op_cost;
2713           new_limit.latency = best_cost.latency - op_cost;
2714           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2715
2716           alg_in->cost.cost += op_cost;
2717           alg_in->cost.latency += op_cost;
2718           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2719             {
2720               struct algorithm *x;
2721               best_cost = alg_in->cost;
2722               x = alg_in, alg_in = best_alg, best_alg = x;
2723               best_alg->log[best_alg->ops] = m;
2724               best_alg->op[best_alg->ops] = alg_add_t2_m;
2725             }
2726         }
2727       if (cache_hit)
2728         goto done;
2729
2730     do_alg_sub_t2_m:
2731       q = t + 1;
2732       q = q & -q;
2733       m = exact_log2 (q);
2734       if (m >= 0 && m < maxm)
2735         {
2736           op_cost = shiftsub0_cost (speed, mode, m);
2737           new_limit.cost = best_cost.cost - op_cost;
2738           new_limit.latency = best_cost.latency - op_cost;
2739           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2740
2741           alg_in->cost.cost += op_cost;
2742           alg_in->cost.latency += op_cost;
2743           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2744             {
2745               struct algorithm *x;
2746               best_cost = alg_in->cost;
2747               x = alg_in, alg_in = best_alg, best_alg = x;
2748               best_alg->log[best_alg->ops] = m;
2749               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2750             }
2751         }
2752       if (cache_hit)
2753         goto done;
2754     }
2755
2756  done:
2757   /* If best_cost has not decreased, we have not found any algorithm.  */
2758   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2759     {
2760       /* We failed to find an algorithm.  Record alg_impossible for
2761          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2762          we are asked to find an algorithm for T within the same or
2763          lower COST_LIMIT, we can immediately return to the
2764          caller.  */
2765       entry_ptr->t = t;
2766       entry_ptr->mode = mode;
2767       entry_ptr->speed = speed;
2768       entry_ptr->alg = alg_impossible;
2769       entry_ptr->cost = *cost_limit;
2770       return;
2771     }
2772
2773   /* Cache the result.  */
2774   if (!cache_hit)
2775     {
2776       entry_ptr->t = t;
2777       entry_ptr->mode = mode;
2778       entry_ptr->speed = speed;
2779       entry_ptr->alg = best_alg->op[best_alg->ops];
2780       entry_ptr->cost.cost = best_cost.cost;
2781       entry_ptr->cost.latency = best_cost.latency;
2782     }
2783
2784   /* If we are getting a too long sequence for `struct algorithm'
2785      to record, make this search fail.  */
2786   if (best_alg->ops == MAX_BITS_PER_WORD)
2787     return;
2788
2789   /* Copy the algorithm from temporary space to the space at alg_out.
2790      We avoid using structure assignment because the majority of
2791      best_alg is normally undefined, and this is a critical function.  */
2792   alg_out->ops = best_alg->ops + 1;
2793   alg_out->cost = best_cost;
2794   memcpy (alg_out->op, best_alg->op,
2795           alg_out->ops * sizeof *alg_out->op);
2796   memcpy (alg_out->log, best_alg->log,
2797           alg_out->ops * sizeof *alg_out->log);
2798 }
2799 \f
2800 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2801    Try three variations:
2802
2803        - a shift/add sequence based on VAL itself
2804        - a shift/add sequence based on -VAL, followed by a negation
2805        - a shift/add sequence based on VAL - 1, followed by an addition.
2806
2807    Return true if the cheapest of these cost less than MULT_COST,
2808    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2809
2810 static bool
2811 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2812                      struct algorithm *alg, enum mult_variant *variant,
2813                      int mult_cost)
2814 {
2815   struct algorithm alg2;
2816   struct mult_cost limit;
2817   int op_cost;
2818   bool speed = optimize_insn_for_speed_p ();
2819
2820   /* Fail quickly for impossible bounds.  */
2821   if (mult_cost < 0)
2822     return false;
2823
2824   /* Ensure that mult_cost provides a reasonable upper bound.
2825      Any constant multiplication can be performed with less
2826      than 2 * bits additions.  */
2827   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2828   if (mult_cost > op_cost)
2829     mult_cost = op_cost;
2830
2831   *variant = basic_variant;
2832   limit.cost = mult_cost;
2833   limit.latency = mult_cost;
2834   synth_mult (alg, val, &limit, mode);
2835
2836   /* This works only if the inverted value actually fits in an
2837      `unsigned int' */
2838   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2839     {
2840       op_cost = neg_cost(speed, mode);
2841       if (MULT_COST_LESS (&alg->cost, mult_cost))
2842         {
2843           limit.cost = alg->cost.cost - op_cost;
2844           limit.latency = alg->cost.latency - op_cost;
2845         }
2846       else
2847         {
2848           limit.cost = mult_cost - op_cost;
2849           limit.latency = mult_cost - op_cost;
2850         }
2851
2852       synth_mult (&alg2, -val, &limit, mode);
2853       alg2.cost.cost += op_cost;
2854       alg2.cost.latency += op_cost;
2855       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2856         *alg = alg2, *variant = negate_variant;
2857     }
2858
2859   /* This proves very useful for division-by-constant.  */
2860   op_cost = add_cost (speed, mode);
2861   if (MULT_COST_LESS (&alg->cost, mult_cost))
2862     {
2863       limit.cost = alg->cost.cost - op_cost;
2864       limit.latency = alg->cost.latency - op_cost;
2865     }
2866   else
2867     {
2868       limit.cost = mult_cost - op_cost;
2869       limit.latency = mult_cost - op_cost;
2870     }
2871
2872   synth_mult (&alg2, val - 1, &limit, mode);
2873   alg2.cost.cost += op_cost;
2874   alg2.cost.latency += op_cost;
2875   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2876     *alg = alg2, *variant = add_variant;
2877
2878   return MULT_COST_LESS (&alg->cost, mult_cost);
2879 }
2880
2881 /* A subroutine of expand_mult, used for constant multiplications.
2882    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2883    convenient.  Use the shift/add sequence described by ALG and apply
2884    the final fixup specified by VARIANT.  */
2885
2886 static rtx
2887 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2888                    rtx target, const struct algorithm *alg,
2889                    enum mult_variant variant)
2890 {
2891   HOST_WIDE_INT val_so_far;
2892   rtx insn, accum, tem;
2893   int opno;
2894   enum machine_mode nmode;
2895
2896   /* Avoid referencing memory over and over and invalid sharing
2897      on SUBREGs.  */
2898   op0 = force_reg (mode, op0);
2899
2900   /* ACCUM starts out either as OP0 or as a zero, depending on
2901      the first operation.  */
2902
2903   if (alg->op[0] == alg_zero)
2904     {
2905       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2906       val_so_far = 0;
2907     }
2908   else if (alg->op[0] == alg_m)
2909     {
2910       accum = copy_to_mode_reg (mode, op0);
2911       val_so_far = 1;
2912     }
2913   else
2914     gcc_unreachable ();
2915
2916   for (opno = 1; opno < alg->ops; opno++)
2917     {
2918       int log = alg->log[opno];
2919       rtx shift_subtarget = optimize ? 0 : accum;
2920       rtx add_target
2921         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2922            && !optimize)
2923           ? target : 0;
2924       rtx accum_target = optimize ? 0 : accum;
2925       rtx accum_inner;
2926
2927       switch (alg->op[opno])
2928         {
2929         case alg_shift:
2930           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2931           /* REG_EQUAL note will be attached to the following insn.  */
2932           emit_move_insn (accum, tem);
2933           val_so_far <<= log;
2934           break;
2935
2936         case alg_add_t_m2:
2937           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2938           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2939                                  add_target ? add_target : accum_target);
2940           val_so_far += (HOST_WIDE_INT) 1 << log;
2941           break;
2942
2943         case alg_sub_t_m2:
2944           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2945           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2946                                  add_target ? add_target : accum_target);
2947           val_so_far -= (HOST_WIDE_INT) 1 << log;
2948           break;
2949
2950         case alg_add_t2_m:
2951           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2952                                 log, shift_subtarget, 0);
2953           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2954                                  add_target ? add_target : accum_target);
2955           val_so_far = (val_so_far << log) + 1;
2956           break;
2957
2958         case alg_sub_t2_m:
2959           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2960                                 log, shift_subtarget, 0);
2961           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2962                                  add_target ? add_target : accum_target);
2963           val_so_far = (val_so_far << log) - 1;
2964           break;
2965
2966         case alg_add_factor:
2967           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2968           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2969                                  add_target ? add_target : accum_target);
2970           val_so_far += val_so_far << log;
2971           break;
2972
2973         case alg_sub_factor:
2974           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2975           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2976                                  (add_target
2977                                   ? add_target : (optimize ? 0 : tem)));
2978           val_so_far = (val_so_far << log) - val_so_far;
2979           break;
2980
2981         default:
2982           gcc_unreachable ();
2983         }
2984
2985       if (SCALAR_INT_MODE_P (mode))
2986         {
2987           /* Write a REG_EQUAL note on the last insn so that we can cse
2988              multiplication sequences.  Note that if ACCUM is a SUBREG,
2989              we've set the inner register and must properly indicate that.  */
2990           tem = op0, nmode = mode;
2991           accum_inner = accum;
2992           if (GET_CODE (accum) == SUBREG)
2993             {
2994               accum_inner = SUBREG_REG (accum);
2995               nmode = GET_MODE (accum_inner);
2996               tem = gen_lowpart (nmode, op0);
2997             }
2998
2999           insn = get_last_insn ();
3000           set_dst_reg_note (insn, REG_EQUAL,
3001                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3002                             accum_inner);
3003         }
3004     }
3005
3006   if (variant == negate_variant)
3007     {
3008       val_so_far = -val_so_far;
3009       accum = expand_unop (mode, neg_optab, accum, target, 0);
3010     }
3011   else if (variant == add_variant)
3012     {
3013       val_so_far = val_so_far + 1;
3014       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3015     }
3016
3017   /* Compare only the bits of val and val_so_far that are significant
3018      in the result mode, to avoid sign-/zero-extension confusion.  */
3019   nmode = GET_MODE_INNER (mode);
3020   if (nmode == VOIDmode)
3021     nmode = mode;
3022   val &= GET_MODE_MASK (nmode);
3023   val_so_far &= GET_MODE_MASK (nmode);
3024   gcc_assert (val == val_so_far);
3025
3026   return accum;
3027 }
3028
3029 /* Perform a multiplication and return an rtx for the result.
3030    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3031    TARGET is a suggestion for where to store the result (an rtx).
3032
3033    We check specially for a constant integer as OP1.
3034    If you want this check for OP0 as well, then before calling
3035    you should swap the two operands if OP0 would be constant.  */
3036
3037 rtx
3038 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3039              int unsignedp)
3040 {
3041   enum mult_variant variant;
3042   struct algorithm algorithm;
3043   rtx scalar_op1;
3044   int max_cost;
3045   bool speed = optimize_insn_for_speed_p ();
3046   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3047
3048   if (CONSTANT_P (op0))
3049     {
3050       rtx temp = op0;
3051       op0 = op1;
3052       op1 = temp;
3053     }
3054
3055   /* For vectors, there are several simplifications that can be made if
3056      all elements of the vector constant are identical.  */
3057   scalar_op1 = op1;
3058   if (GET_CODE (op1) == CONST_VECTOR)
3059     {
3060       int i, n = CONST_VECTOR_NUNITS (op1);
3061       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3062       for (i = 1; i < n; ++i)
3063         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3064           goto skip_scalar;
3065     }
3066
3067   if (INTEGRAL_MODE_P (mode))
3068     {
3069       rtx fake_reg;
3070       HOST_WIDE_INT coeff;
3071       bool is_neg;
3072       int mode_bitsize;
3073
3074       if (op1 == CONST0_RTX (mode))
3075         return op1;
3076       if (op1 == CONST1_RTX (mode))
3077         return op0;
3078       if (op1 == CONSTM1_RTX (mode))
3079         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3080                             op0, target, 0);
3081
3082       if (do_trapv)
3083         goto skip_synth;
3084
3085       /* These are the operations that are potentially turned into
3086          a sequence of shifts and additions.  */
3087       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3088
3089       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3090          less than or equal in size to `unsigned int' this doesn't matter.
3091          If the mode is larger than `unsigned int', then synth_mult works
3092          only if the constant value exactly fits in an `unsigned int' without
3093          any truncation.  This means that multiplying by negative values does
3094          not work; results are off by 2^32 on a 32 bit machine.  */
3095
3096       if (CONST_INT_P (scalar_op1))
3097         {
3098           coeff = INTVAL (scalar_op1);
3099           is_neg = coeff < 0;
3100         }
3101       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3102         {
3103           /* If we are multiplying in DImode, it may still be a win
3104              to try to work with shifts and adds.  */
3105           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3106               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3107             {
3108               coeff = CONST_DOUBLE_LOW (scalar_op1);
3109               is_neg = false;
3110             }
3111           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3112             {
3113               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3114               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3115                 {
3116                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3117                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3118                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3119                     return expand_shift (LSHIFT_EXPR, mode, op0,
3120                                          shift, target, unsignedp);
3121                 }
3122               goto skip_synth;
3123             }
3124           else
3125             goto skip_synth;
3126         }
3127       else
3128         goto skip_synth;
3129
3130       /* We used to test optimize here, on the grounds that it's better to
3131          produce a smaller program when -O is not used.  But this causes
3132          such a terrible slowdown sometimes that it seems better to always
3133          use synth_mult.  */
3134
3135       /* Special case powers of two.  */
3136       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3137         return expand_shift (LSHIFT_EXPR, mode, op0,
3138                              floor_log2 (coeff), target, unsignedp);
3139
3140       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3141
3142       /* Attempt to handle multiplication of DImode values by negative
3143          coefficients, by performing the multiplication by a positive
3144          multiplier and then inverting the result.  */
3145       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3146         {
3147           /* Its safe to use -coeff even for INT_MIN, as the
3148              result is interpreted as an unsigned coefficient.
3149              Exclude cost of op0 from max_cost to match the cost
3150              calculation of the synth_mult.  */
3151           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3152                       - neg_cost(speed, mode));
3153           if (max_cost > 0
3154               && choose_mult_variant (mode, -coeff, &algorithm,
3155                                       &variant, max_cost))
3156             {
3157               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3158                                             &algorithm, variant);
3159               return expand_unop (mode, neg_optab, temp, target, 0);
3160             }
3161           goto skip_synth;
3162         }
3163
3164       /* Exclude cost of op0 from max_cost to match the cost
3165          calculation of the synth_mult.  */
3166       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3167       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3168         return expand_mult_const (mode, op0, coeff, target,
3169                                   &algorithm, variant);
3170     }
3171  skip_synth:
3172
3173   /* Expand x*2.0 as x+x.  */
3174   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3175     {
3176       REAL_VALUE_TYPE d;
3177       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3178
3179       if (REAL_VALUES_EQUAL (d, dconst2))
3180         {
3181           op0 = force_reg (GET_MODE (op0), op0);
3182           return expand_binop (mode, add_optab, op0, op0,
3183                                target, unsignedp, OPTAB_LIB_WIDEN);
3184         }
3185     }
3186  skip_scalar:
3187
3188   /* This used to use umul_optab if unsigned, but for non-widening multiply
3189      there is no difference between signed and unsigned.  */
3190   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3191                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3192   gcc_assert (op0);
3193   return op0;
3194 }
3195
3196 /* Return a cost estimate for multiplying a register by the given
3197    COEFFicient in the given MODE and SPEED.  */
3198
3199 int
3200 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3201 {
3202   int max_cost;
3203   struct algorithm algorithm;
3204   enum mult_variant variant;
3205
3206   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3207   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3208   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3209     return algorithm.cost.cost;
3210   else
3211     return max_cost;
3212 }
3213
3214 /* Perform a widening multiplication and return an rtx for the result.
3215    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3216    TARGET is a suggestion for where to store the result (an rtx).
3217    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3218    or smul_widen_optab.
3219
3220    We check specially for a constant integer as OP1, comparing the
3221    cost of a widening multiply against the cost of a sequence of shifts
3222    and adds.  */
3223
3224 rtx
3225 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3226                       int unsignedp, optab this_optab)
3227 {
3228   bool speed = optimize_insn_for_speed_p ();
3229   rtx cop1;
3230
3231   if (CONST_INT_P (op1)
3232       && GET_MODE (op0) != VOIDmode
3233       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3234                                 this_optab == umul_widen_optab))
3235       && CONST_INT_P (cop1)
3236       && (INTVAL (cop1) >= 0
3237           || HWI_COMPUTABLE_MODE_P (mode)))
3238     {
3239       HOST_WIDE_INT coeff = INTVAL (cop1);
3240       int max_cost;
3241       enum mult_variant variant;
3242       struct algorithm algorithm;
3243
3244       /* Special case powers of two.  */
3245       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3246         {
3247           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3248           return expand_shift (LSHIFT_EXPR, mode, op0,
3249                                floor_log2 (coeff), target, unsignedp);
3250         }
3251
3252       /* Exclude cost of op0 from max_cost to match the cost
3253          calculation of the synth_mult.  */
3254       max_cost = mul_widen_cost (speed, mode);
3255       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3256                                max_cost))
3257         {
3258           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3259           return expand_mult_const (mode, op0, coeff, target,
3260                                     &algorithm, variant);
3261         }
3262     }
3263   return expand_binop (mode, this_optab, op0, op1, target,
3264                        unsignedp, OPTAB_LIB_WIDEN);
3265 }
3266 \f
3267 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3268    replace division by D, and put the least significant N bits of the result
3269    in *MULTIPLIER_PTR and return the most significant bit.
3270
3271    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3272    needed precision is in PRECISION (should be <= N).
3273
3274    PRECISION should be as small as possible so this function can choose
3275    multiplier more freely.
3276
3277    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3278    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3279
3280    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3281    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3282
3283 unsigned HOST_WIDE_INT
3284 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3285                    unsigned HOST_WIDE_INT *multiplier_ptr,
3286                    int *post_shift_ptr, int *lgup_ptr)
3287 {
3288   double_int mhigh, mlow;
3289   int lgup, post_shift;
3290   int pow, pow2;
3291
3292   /* lgup = ceil(log2(divisor)); */
3293   lgup = ceil_log2 (d);
3294
3295   gcc_assert (lgup <= n);
3296
3297   pow = n + lgup;
3298   pow2 = n + lgup - precision;
3299
3300   /* We could handle this with some effort, but this case is much
3301      better handled directly with a scc insn, so rely on caller using
3302      that.  */
3303   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3304
3305   /* mlow = 2^(N + lgup)/d */
3306   double_int val = double_int_zero.set_bit (pow);
3307   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3308
3309   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3310   val |= double_int_zero.set_bit (pow2);
3311   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3312
3313   gcc_assert (!mhigh.high || val.high - d < d);
3314   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3315   /* Assert that mlow < mhigh.  */
3316   gcc_assert (mlow.ult (mhigh));
3317
3318   /* If precision == N, then mlow, mhigh exceed 2^N
3319      (but they do not exceed 2^(N+1)).  */
3320
3321   /* Reduce to lowest terms.  */
3322   for (post_shift = lgup; post_shift > 0; post_shift--)
3323     {
3324       int shft = HOST_BITS_PER_WIDE_INT - 1;
3325       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3326       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3327       if (ml_lo >= mh_lo)
3328         break;
3329
3330       mlow = double_int::from_uhwi (ml_lo);
3331       mhigh = double_int::from_uhwi (mh_lo);
3332     }
3333
3334   *post_shift_ptr = post_shift;
3335   *lgup_ptr = lgup;
3336   if (n < HOST_BITS_PER_WIDE_INT)
3337     {
3338       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3339       *multiplier_ptr = mhigh.low & mask;
3340       return mhigh.low >= mask;
3341     }
3342   else
3343     {
3344       *multiplier_ptr = mhigh.low;
3345       return mhigh.high;
3346     }
3347 }
3348
3349 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3350    congruent to 1 (mod 2**N).  */
3351
3352 static unsigned HOST_WIDE_INT
3353 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3354 {
3355   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3356
3357   /* The algorithm notes that the choice y = x satisfies
3358      x*y == 1 mod 2^3, since x is assumed odd.
3359      Each iteration doubles the number of bits of significance in y.  */
3360
3361   unsigned HOST_WIDE_INT mask;
3362   unsigned HOST_WIDE_INT y = x;
3363   int nbit = 3;
3364
3365   mask = (n == HOST_BITS_PER_WIDE_INT
3366           ? ~(unsigned HOST_WIDE_INT) 0
3367           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3368
3369   while (nbit < n)
3370     {
3371       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3372       nbit *= 2;
3373     }
3374   return y;
3375 }
3376
3377 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3378    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3379    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3380    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3381    become signed.
3382
3383    The result is put in TARGET if that is convenient.
3384
3385    MODE is the mode of operation.  */
3386
3387 rtx
3388 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3389                              rtx op1, rtx target, int unsignedp)
3390 {
3391   rtx tem;
3392   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3393
3394   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3395                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3396   tem = expand_and (mode, tem, op1, NULL_RTX);
3397   adj_operand
3398     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3399                      adj_operand);
3400
3401   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3402                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3403   tem = expand_and (mode, tem, op0, NULL_RTX);
3404   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3405                           target);
3406
3407   return target;
3408 }
3409
3410 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3411
3412 static rtx
3413 extract_high_half (enum machine_mode mode, rtx op)
3414 {
3415   enum machine_mode wider_mode;
3416
3417   if (mode == word_mode)
3418     return gen_highpart (mode, op);
3419
3420   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3421
3422   wider_mode = GET_MODE_WIDER_MODE (mode);
3423   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3424                      GET_MODE_BITSIZE (mode), 0, 1);
3425   return convert_modes (mode, wider_mode, op, 0);
3426 }
3427
3428 /* Like expmed_mult_highpart, but only consider using a multiplication
3429    optab.  OP1 is an rtx for the constant operand.  */
3430
3431 static rtx
3432 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3433                             rtx target, int unsignedp, int max_cost)
3434 {
3435   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3436   enum machine_mode wider_mode;
3437   optab moptab;
3438   rtx tem;
3439   int size;
3440   bool speed = optimize_insn_for_speed_p ();
3441
3442   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3443
3444   wider_mode = GET_MODE_WIDER_MODE (mode);
3445   size = GET_MODE_BITSIZE (mode);
3446
3447   /* Firstly, try using a multiplication insn that only generates the needed
3448      high part of the product, and in the sign flavor of unsignedp.  */
3449   if (mul_highpart_cost (speed, mode) < max_cost)
3450     {
3451       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3452       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3453                           unsignedp, OPTAB_DIRECT);
3454       if (tem)
3455         return tem;
3456     }
3457
3458   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3459      Need to adjust the result after the multiplication.  */
3460   if (size - 1 < BITS_PER_WORD
3461       && (mul_highpart_cost (speed, mode)
3462           + 2 * shift_cost (speed, mode, size-1)
3463           + 4 * add_cost (speed, mode) < max_cost))
3464     {
3465       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3466       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3467                           unsignedp, OPTAB_DIRECT);
3468       if (tem)
3469         /* We used the wrong signedness.  Adjust the result.  */
3470         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3471                                             tem, unsignedp);
3472     }
3473
3474   /* Try widening multiplication.  */
3475   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3476   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3477       && mul_widen_cost (speed, wider_mode) < max_cost)
3478     {
3479       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3480                           unsignedp, OPTAB_WIDEN);
3481       if (tem)
3482         return extract_high_half (mode, tem);
3483     }
3484
3485   /* Try widening the mode and perform a non-widening multiplication.  */
3486   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3487       && size - 1 < BITS_PER_WORD
3488       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3489           < max_cost))
3490     {
3491       rtx insns, wop0, wop1;
3492
3493       /* We need to widen the operands, for example to ensure the
3494          constant multiplier is correctly sign or zero extended.
3495          Use a sequence to clean-up any instructions emitted by
3496          the conversions if things don't work out.  */
3497       start_sequence ();
3498       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3499       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3500       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3501                           unsignedp, OPTAB_WIDEN);
3502       insns = get_insns ();
3503       end_sequence ();
3504
3505       if (tem)
3506         {
3507           emit_insn (insns);
3508           return extract_high_half (mode, tem);
3509         }
3510     }
3511
3512   /* Try widening multiplication of opposite signedness, and adjust.  */
3513   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3514   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3515       && size - 1 < BITS_PER_WORD
3516       && (mul_widen_cost (speed, wider_mode)
3517           + 2 * shift_cost (speed, mode, size-1)
3518           + 4 * add_cost (speed, mode) < max_cost))
3519     {
3520       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3521                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3522       if (tem != 0)
3523         {
3524           tem = extract_high_half (mode, tem);
3525           /* We used the wrong signedness.  Adjust the result.  */
3526           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3527                                               target, unsignedp);
3528         }
3529     }
3530
3531   return 0;
3532 }
3533
3534 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3535    putting the high half of the result in TARGET if that is convenient,
3536    and return where the result is.  If the operation can not be performed,
3537    0 is returned.
3538
3539    MODE is the mode of operation and result.
3540
3541    UNSIGNEDP nonzero means unsigned multiply.
3542
3543    MAX_COST is the total allowed cost for the expanded RTL.  */
3544
3545 static rtx
3546 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3547                       rtx target, int unsignedp, int max_cost)
3548 {
3549   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3550   unsigned HOST_WIDE_INT cnst1;
3551   int extra_cost;
3552   bool sign_adjust = false;
3553   enum mult_variant variant;
3554   struct algorithm alg;
3555   rtx tem;
3556   bool speed = optimize_insn_for_speed_p ();
3557
3558   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3559   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3560   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3561
3562   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3563
3564   /* We can't optimize modes wider than BITS_PER_WORD.
3565      ??? We might be able to perform double-word arithmetic if
3566      mode == word_mode, however all the cost calculations in
3567      synth_mult etc. assume single-word operations.  */
3568   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3569     return expmed_mult_highpart_optab (mode, op0, op1, target,
3570                                        unsignedp, max_cost);
3571
3572   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3573
3574   /* Check whether we try to multiply by a negative constant.  */
3575   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3576     {
3577       sign_adjust = true;
3578       extra_cost += add_cost (speed, mode);
3579     }
3580
3581   /* See whether shift/add multiplication is cheap enough.  */
3582   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3583                            max_cost - extra_cost))
3584     {
3585       /* See whether the specialized multiplication optabs are
3586          cheaper than the shift/add version.  */
3587       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3588                                         alg.cost.cost + extra_cost);
3589       if (tem)
3590         return tem;
3591
3592       tem = convert_to_mode (wider_mode, op0, unsignedp);
3593       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3594       tem = extract_high_half (mode, tem);
3595
3596       /* Adjust result for signedness.  */
3597       if (sign_adjust)
3598         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3599
3600       return tem;
3601     }
3602   return expmed_mult_highpart_optab (mode, op0, op1, target,
3603                                      unsignedp, max_cost);
3604 }
3605
3606
3607 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3608
3609 static rtx
3610 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3611 {
3612   unsigned HOST_WIDE_INT masklow, maskhigh;
3613   rtx result, temp, shift, label;
3614   int logd;
3615
3616   logd = floor_log2 (d);
3617   result = gen_reg_rtx (mode);
3618
3619   /* Avoid conditional branches when they're expensive.  */
3620   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3621       && optimize_insn_for_speed_p ())
3622     {
3623       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3624                                       mode, 0, -1);
3625       if (signmask)
3626         {
3627           signmask = force_reg (mode, signmask);
3628           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3629           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3630
3631           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3632              which instruction sequence to use.  If logical right shifts
3633              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3634              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3635
3636           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3637           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3638               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3639                   > COSTS_N_INSNS (2)))
3640             {
3641               temp = expand_binop (mode, xor_optab, op0, signmask,
3642                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3643               temp = expand_binop (mode, sub_optab, temp, signmask,
3644                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3645               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3646                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3647               temp = expand_binop (mode, xor_optab, temp, signmask,
3648                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3649               temp = expand_binop (mode, sub_optab, temp, signmask,
3650                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651             }
3652           else
3653             {
3654               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3655                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3656               signmask = force_reg (mode, signmask);
3657
3658               temp = expand_binop (mode, add_optab, op0, signmask,
3659                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3660               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3661                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662               temp = expand_binop (mode, sub_optab, temp, signmask,
3663                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664             }
3665           return temp;
3666         }
3667     }
3668
3669   /* Mask contains the mode's signbit and the significant bits of the
3670      modulus.  By including the signbit in the operation, many targets
3671      can avoid an explicit compare operation in the following comparison
3672      against zero.  */
3673
3674   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3675   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3676     {
3677       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3678       maskhigh = -1;
3679     }
3680   else
3681     maskhigh = (HOST_WIDE_INT) -1
3682                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3683
3684   temp = expand_binop (mode, and_optab, op0,
3685                        immed_double_const (masklow, maskhigh, mode),
3686                        result, 1, OPTAB_LIB_WIDEN);
3687   if (temp != result)
3688     emit_move_insn (result, temp);
3689
3690   label = gen_label_rtx ();
3691   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3692
3693   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3694                        0, OPTAB_LIB_WIDEN);
3695   masklow = (HOST_WIDE_INT) -1 << logd;
3696   maskhigh = -1;
3697   temp = expand_binop (mode, ior_optab, temp,
3698                        immed_double_const (masklow, maskhigh, mode),
3699                        result, 1, OPTAB_LIB_WIDEN);
3700   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3701                        0, OPTAB_LIB_WIDEN);
3702   if (temp != result)
3703     emit_move_insn (result, temp);
3704   emit_label (label);
3705   return result;
3706 }
3707
3708 /* Expand signed division of OP0 by a power of two D in mode MODE.
3709    This routine is only called for positive values of D.  */
3710
3711 static rtx
3712 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3713 {
3714   rtx temp, label;
3715   int logd;
3716
3717   logd = floor_log2 (d);
3718
3719   if (d == 2
3720       && BRANCH_COST (optimize_insn_for_speed_p (),
3721                       false) >= 1)
3722     {
3723       temp = gen_reg_rtx (mode);
3724       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3725       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3726                            0, OPTAB_LIB_WIDEN);
3727       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3728     }
3729
3730 #ifdef HAVE_conditional_move
3731   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3732       >= 2)
3733     {
3734       rtx temp2;
3735
3736       /* ??? emit_conditional_move forces a stack adjustment via
3737          compare_from_rtx so, if the sequence is discarded, it will
3738          be lost.  Do it now instead.  */
3739       do_pending_stack_adjust ();
3740
3741       start_sequence ();
3742       temp2 = copy_to_mode_reg (mode, op0);
3743       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3744                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3745       temp = force_reg (mode, temp);
3746
3747       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3748       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3749                                      mode, temp, temp2, mode, 0);
3750       if (temp2)
3751         {
3752           rtx seq = get_insns ();
3753           end_sequence ();
3754           emit_insn (seq);
3755           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3756         }
3757       end_sequence ();
3758     }
3759 #endif
3760
3761   if (BRANCH_COST (optimize_insn_for_speed_p (),
3762                    false) >= 2)
3763     {
3764       int ushift = GET_MODE_BITSIZE (mode) - logd;
3765
3766       temp = gen_reg_rtx (mode);
3767       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3768       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3769           > COSTS_N_INSNS (1))
3770         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3771                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3772       else
3773         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3774                              ushift, NULL_RTX, 1);
3775       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3776                            0, OPTAB_LIB_WIDEN);
3777       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3778     }
3779
3780   label = gen_label_rtx ();
3781   temp = copy_to_mode_reg (mode, op0);
3782   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3783   expand_inc (temp, GEN_INT (d - 1));
3784   emit_label (label);
3785   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3786 }
3787 \f
3788 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3789    if that is convenient, and returning where the result is.
3790    You may request either the quotient or the remainder as the result;
3791    specify REM_FLAG nonzero to get the remainder.
3792
3793    CODE is the expression code for which kind of division this is;
3794    it controls how rounding is done.  MODE is the machine mode to use.
3795    UNSIGNEDP nonzero means do unsigned division.  */
3796
3797 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3798    and then correct it by or'ing in missing high bits
3799    if result of ANDI is nonzero.
3800    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3801    This could optimize to a bfexts instruction.
3802    But C doesn't use these operations, so their optimizations are
3803    left for later.  */
3804 /* ??? For modulo, we don't actually need the highpart of the first product,
3805    the low part will do nicely.  And for small divisors, the second multiply
3806    can also be a low-part only multiply or even be completely left out.
3807    E.g. to calculate the remainder of a division by 3 with a 32 bit
3808    multiply, multiply with 0x55555556 and extract the upper two bits;
3809    the result is exact for inputs up to 0x1fffffff.
3810    The input range can be reduced by using cross-sum rules.
3811    For odd divisors >= 3, the following table gives right shift counts
3812    so that if a number is shifted by an integer multiple of the given
3813    amount, the remainder stays the same:
3814    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3815    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3816    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3817    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3818    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3819
3820    Cross-sum rules for even numbers can be derived by leaving as many bits
3821    to the right alone as the divisor has zeros to the right.
3822    E.g. if x is an unsigned 32 bit number:
3823    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3824    */
3825
3826 rtx
3827 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3828                rtx op0, rtx op1, rtx target, int unsignedp)
3829 {
3830   enum machine_mode compute_mode;
3831   rtx tquotient;
3832   rtx quotient = 0, remainder = 0;
3833   rtx last;
3834   int size;
3835   rtx insn;
3836   optab optab1, optab2;
3837   int op1_is_constant, op1_is_pow2 = 0;
3838   int max_cost, extra_cost;
3839   static HOST_WIDE_INT last_div_const = 0;
3840   static HOST_WIDE_INT ext_op1;
3841   bool speed = optimize_insn_for_speed_p ();
3842
3843   op1_is_constant = CONST_INT_P (op1);
3844   if (op1_is_constant)
3845     {
3846       ext_op1 = INTVAL (op1);
3847       if (unsignedp)
3848         ext_op1 &= GET_MODE_MASK (mode);
3849       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3850                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3851     }
3852
3853   /*
3854      This is the structure of expand_divmod:
3855
3856      First comes code to fix up the operands so we can perform the operations
3857      correctly and efficiently.
3858
3859      Second comes a switch statement with code specific for each rounding mode.
3860      For some special operands this code emits all RTL for the desired
3861      operation, for other cases, it generates only a quotient and stores it in
3862      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3863      to indicate that it has not done anything.
3864
3865      Last comes code that finishes the operation.  If QUOTIENT is set and
3866      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3867      QUOTIENT is not set, it is computed using trunc rounding.
3868
3869      We try to generate special code for division and remainder when OP1 is a
3870      constant.  If |OP1| = 2**n we can use shifts and some other fast
3871      operations.  For other values of OP1, we compute a carefully selected
3872      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3873      by m.
3874
3875      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3876      half of the product.  Different strategies for generating the product are
3877      implemented in expmed_mult_highpart.
3878
3879      If what we actually want is the remainder, we generate that by another
3880      by-constant multiplication and a subtraction.  */
3881
3882   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3883      code below will malfunction if we are, so check here and handle
3884      the special case if so.  */
3885   if (op1 == const1_rtx)
3886     return rem_flag ? const0_rtx : op0;
3887
3888     /* When dividing by -1, we could get an overflow.
3889      negv_optab can handle overflows.  */
3890   if (! unsignedp && op1 == constm1_rtx)
3891     {
3892       if (rem_flag)
3893         return const0_rtx;
3894       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3895                           ? negv_optab : neg_optab, op0, target, 0);
3896     }
3897
3898   if (target
3899       /* Don't use the function value register as a target
3900          since we have to read it as well as write it,
3901          and function-inlining gets confused by this.  */
3902       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3903           /* Don't clobber an operand while doing a multi-step calculation.  */
3904           || ((rem_flag || op1_is_constant)
3905               && (reg_mentioned_p (target, op0)
3906                   || (MEM_P (op0) && MEM_P (target))))
3907           || reg_mentioned_p (target, op1)
3908           || (MEM_P (op1) && MEM_P (target))))
3909     target = 0;
3910
3911   /* Get the mode in which to perform this computation.  Normally it will
3912      be MODE, but sometimes we can't do the desired operation in MODE.
3913      If so, pick a wider mode in which we can do the operation.  Convert
3914      to that mode at the start to avoid repeated conversions.
3915
3916      First see what operations we need.  These depend on the expression
3917      we are evaluating.  (We assume that divxx3 insns exist under the
3918      same conditions that modxx3 insns and that these insns don't normally
3919      fail.  If these assumptions are not correct, we may generate less
3920      efficient code in some cases.)
3921
3922      Then see if we find a mode in which we can open-code that operation
3923      (either a division, modulus, or shift).  Finally, check for the smallest
3924      mode for which we can do the operation with a library call.  */
3925
3926   /* We might want to refine this now that we have division-by-constant
3927      optimization.  Since expmed_mult_highpart tries so many variants, it is
3928      not straightforward to generalize this.  Maybe we should make an array
3929      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3930
3931   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3932             ? (unsignedp ? lshr_optab : ashr_optab)
3933             : (unsignedp ? udiv_optab : sdiv_optab));
3934   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3935             ? optab1
3936             : (unsignedp ? udivmod_optab : sdivmod_optab));
3937
3938   for (compute_mode = mode; compute_mode != VOIDmode;
3939        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3940     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3941         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3942       break;
3943
3944   if (compute_mode == VOIDmode)
3945     for (compute_mode = mode; compute_mode != VOIDmode;
3946          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3947       if (optab_libfunc (optab1, compute_mode)
3948           || optab_libfunc (optab2, compute_mode))
3949         break;
3950
3951   /* If we still couldn't find a mode, use MODE, but expand_binop will
3952      probably die.  */
3953   if (compute_mode == VOIDmode)
3954     compute_mode = mode;
3955
3956   if (target && GET_MODE (target) == compute_mode)
3957     tquotient = target;
3958   else
3959     tquotient = gen_reg_rtx (compute_mode);
3960
3961   size = GET_MODE_BITSIZE (compute_mode);
3962 #if 0
3963   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3964      (mode), and thereby get better code when OP1 is a constant.  Do that
3965      later.  It will require going over all usages of SIZE below.  */
3966   size = GET_MODE_BITSIZE (mode);
3967 #endif
3968
3969   /* Only deduct something for a REM if the last divide done was
3970      for a different constant.   Then set the constant of the last
3971      divide.  */
3972   max_cost = (unsignedp
3973               ? udiv_cost (speed, compute_mode)
3974               : sdiv_cost (speed, compute_mode));
3975   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3976                      && INTVAL (op1) == last_div_const))
3977     max_cost -= (mul_cost (speed, compute_mode)
3978                  + add_cost (speed, compute_mode));
3979
3980   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3981
3982   /* Now convert to the best mode to use.  */
3983   if (compute_mode != mode)
3984     {
3985       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3986       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3987
3988       /* convert_modes may have placed op1 into a register, so we
3989          must recompute the following.  */
3990       op1_is_constant = CONST_INT_P (op1);
3991       op1_is_pow2 = (op1_is_constant
3992                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3993                           || (! unsignedp
3994                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3995     }
3996
3997   /* If one of the operands is a volatile MEM, copy it into a register.  */
3998
3999   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4000     op0 = force_reg (compute_mode, op0);
4001   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4002     op1 = force_reg (compute_mode, op1);
4003
4004   /* If we need the remainder or if OP1 is constant, we need to
4005      put OP0 in a register in case it has any queued subexpressions.  */
4006   if (rem_flag || op1_is_constant)
4007     op0 = force_reg (compute_mode, op0);
4008
4009   last = get_last_insn ();
4010
4011   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4012   if (unsignedp)
4013     {
4014       if (code == FLOOR_DIV_EXPR)
4015         code = TRUNC_DIV_EXPR;
4016       if (code == FLOOR_MOD_EXPR)
4017         code = TRUNC_MOD_EXPR;
4018       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4019         code = TRUNC_DIV_EXPR;
4020     }
4021
4022   if (op1 != const0_rtx)
4023     switch (code)
4024       {
4025       case TRUNC_MOD_EXPR:
4026       case TRUNC_DIV_EXPR:
4027         if (op1_is_constant)
4028           {
4029             if (unsignedp)
4030               {
4031                 unsigned HOST_WIDE_INT mh, ml;
4032                 int pre_shift, post_shift;
4033                 int dummy;
4034                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4035                                             & GET_MODE_MASK (compute_mode));
4036
4037                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4038                   {
4039                     pre_shift = floor_log2 (d);
4040                     if (rem_flag)
4041                       {
4042                         remainder
4043                           = expand_binop (compute_mode, and_optab, op0,
4044                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4045                                           remainder, 1,
4046                                           OPTAB_LIB_WIDEN);
4047                         if (remainder)
4048                           return gen_lowpart (mode, remainder);
4049                       }
4050                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4051                                              pre_shift, tquotient, 1);
4052                   }
4053                 else if (size <= HOST_BITS_PER_WIDE_INT)
4054                   {
4055                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4056                       {
4057                         /* Most significant bit of divisor is set; emit an scc
4058                            insn.  */
4059                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4060                                                           compute_mode, 1, 1);
4061                       }
4062                     else
4063                       {
4064                         /* Find a suitable multiplier and right shift count
4065                            instead of multiplying with D.  */
4066
4067                         mh = choose_multiplier (d, size, size,
4068                                                 &ml, &post_shift, &dummy);
4069
4070                         /* If the suggested multiplier is more than SIZE bits,
4071                            we can do better for even divisors, using an
4072                            initial right shift.  */
4073                         if (mh != 0 && (d & 1) == 0)
4074                           {
4075                             pre_shift = floor_log2 (d & -d);
4076                             mh = choose_multiplier (d >> pre_shift, size,
4077                                                     size - pre_shift,
4078                                                     &ml, &post_shift, &dummy);
4079                             gcc_assert (!mh);
4080                           }
4081                         else
4082                           pre_shift = 0;
4083
4084                         if (mh != 0)
4085                           {
4086                             rtx t1, t2, t3, t4;
4087
4088                             if (post_shift - 1 >= BITS_PER_WORD)
4089                               goto fail1;
4090
4091                             extra_cost
4092                               = (shift_cost (speed, compute_mode, post_shift - 1)
4093                                  + shift_cost (speed, compute_mode, 1)
4094                                  + 2 * add_cost (speed, compute_mode));
4095                             t1 = expmed_mult_highpart (compute_mode, op0,
4096                                                        GEN_INT (ml),
4097                                                        NULL_RTX, 1,
4098                                                        max_cost - extra_cost);
4099                             if (t1 == 0)
4100                               goto fail1;
4101                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4102                                                                op0, t1),
4103                                                 NULL_RTX);
4104                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4105                                                t2, 1, NULL_RTX, 1);
4106                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4107                                                               t1, t3),
4108                                                 NULL_RTX);
4109                             quotient = expand_shift
4110                               (RSHIFT_EXPR, compute_mode, t4,
4111                                post_shift - 1, tquotient, 1);
4112                           }
4113                         else
4114                           {
4115                             rtx t1, t2;
4116
4117                             if (pre_shift >= BITS_PER_WORD
4118                                 || post_shift >= BITS_PER_WORD)
4119                               goto fail1;
4120
4121                             t1 = expand_shift
4122                               (RSHIFT_EXPR, compute_mode, op0,
4123                                pre_shift, NULL_RTX, 1);
4124                             extra_cost
4125                               = (shift_cost (speed, compute_mode, pre_shift)
4126                                  + shift_cost (speed, compute_mode, post_shift));
4127                             t2 = expmed_mult_highpart (compute_mode, t1,
4128                                                        GEN_INT (ml),
4129                                                        NULL_RTX, 1,
4130                                                        max_cost - extra_cost);
4131                             if (t2 == 0)
4132                               goto fail1;
4133                             quotient = expand_shift
4134                               (RSHIFT_EXPR, compute_mode, t2,
4135                                post_shift, tquotient, 1);
4136                           }
4137                       }
4138                   }
4139                 else            /* Too wide mode to use tricky code */
4140                   break;
4141
4142                 insn = get_last_insn ();
4143                 if (insn != last)
4144                   set_dst_reg_note (insn, REG_EQUAL,
4145                                     gen_rtx_UDIV (compute_mode, op0, op1),
4146                                     quotient);
4147               }
4148             else                /* TRUNC_DIV, signed */
4149               {
4150                 unsigned HOST_WIDE_INT ml;
4151                 int lgup, post_shift;
4152                 rtx mlr;
4153                 HOST_WIDE_INT d = INTVAL (op1);
4154                 unsigned HOST_WIDE_INT abs_d;
4155
4156                 /* Since d might be INT_MIN, we have to cast to
4157                    unsigned HOST_WIDE_INT before negating to avoid
4158                    undefined signed overflow.  */
4159                 abs_d = (d >= 0
4160                          ? (unsigned HOST_WIDE_INT) d
4161                          : - (unsigned HOST_WIDE_INT) d);
4162
4163                 /* n rem d = n rem -d */
4164                 if (rem_flag && d < 0)
4165                   {
4166                     d = abs_d;
4167                     op1 = gen_int_mode (abs_d, compute_mode);
4168                   }
4169
4170                 if (d == 1)
4171                   quotient = op0;
4172                 else if (d == -1)
4173                   quotient = expand_unop (compute_mode, neg_optab, op0,
4174                                           tquotient, 0);
4175                 else if (HOST_BITS_PER_WIDE_INT >= size
4176                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4177                   {
4178                     /* This case is not handled correctly below.  */
4179                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4180                                                 compute_mode, 1, 1);
4181                     if (quotient == 0)
4182                       goto fail1;
4183                   }
4184                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4185                          && (rem_flag
4186                              ? smod_pow2_cheap (speed, compute_mode)
4187                              : sdiv_pow2_cheap (speed, compute_mode))
4188                          /* We assume that cheap metric is true if the
4189                             optab has an expander for this mode.  */
4190                          && ((optab_handler ((rem_flag ? smod_optab
4191                                               : sdiv_optab),
4192                                              compute_mode)
4193                               != CODE_FOR_nothing)
4194                              || (optab_handler (sdivmod_optab,
4195                                                 compute_mode)
4196                                  != CODE_FOR_nothing)))
4197                   ;
4198                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4199                   {
4200                     if (rem_flag)
4201                       {
4202                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4203                         if (remainder)
4204                           return gen_lowpart (mode, remainder);
4205                       }
4206
4207                     if (sdiv_pow2_cheap (speed, compute_mode)
4208                         && ((optab_handler (sdiv_optab, compute_mode)
4209                              != CODE_FOR_nothing)
4210                             || (optab_handler (sdivmod_optab, compute_mode)
4211                                 != CODE_FOR_nothing)))
4212                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4213                                                 compute_mode, op0,
4214                                                 gen_int_mode (abs_d,
4215                                                               compute_mode),
4216                                                 NULL_RTX, 0);
4217                     else
4218                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4219
4220                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4221                        negate the quotient.  */
4222                     if (d < 0)
4223                       {
4224                         insn = get_last_insn ();
4225                         if (insn != last
4226                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4227                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4228                           set_dst_reg_note (insn, REG_EQUAL,
4229                                             gen_rtx_DIV (compute_mode, op0,
4230                                                          gen_int_mode
4231                                                            (abs_d,
4232                                                             compute_mode)),
4233                                             quotient);
4234
4235                         quotient = expand_unop (compute_mode, neg_optab,
4236                                                 quotient, quotient, 0);
4237                       }
4238                   }
4239                 else if (size <= HOST_BITS_PER_WIDE_INT)
4240                   {
4241                     choose_multiplier (abs_d, size, size - 1,
4242                                        &ml, &post_shift, &lgup);
4243                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4244                       {
4245                         rtx t1, t2, t3;
4246
4247                         if (post_shift >= BITS_PER_WORD
4248                             || size - 1 >= BITS_PER_WORD)
4249                           goto fail1;
4250
4251                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4252                                       + shift_cost (speed, compute_mode, size - 1)
4253                                       + add_cost (speed, compute_mode));
4254                         t1 = expmed_mult_highpart (compute_mode, op0,
4255                                                    GEN_INT (ml), NULL_RTX, 0,
4256                                                    max_cost - extra_cost);
4257                         if (t1 == 0)
4258                           goto fail1;
4259                         t2 = expand_shift
4260                           (RSHIFT_EXPR, compute_mode, t1,
4261                            post_shift, NULL_RTX, 0);
4262                         t3 = expand_shift
4263                           (RSHIFT_EXPR, compute_mode, op0,
4264                            size - 1, NULL_RTX, 0);
4265                         if (d < 0)
4266                           quotient
4267                             = force_operand (gen_rtx_MINUS (compute_mode,
4268                                                             t3, t2),
4269                                              tquotient);
4270                         else
4271                           quotient
4272                             = force_operand (gen_rtx_MINUS (compute_mode,
4273                                                             t2, t3),
4274                                              tquotient);
4275                       }
4276                     else
4277                       {
4278                         rtx t1, t2, t3, t4;
4279
4280                         if (post_shift >= BITS_PER_WORD
4281                             || size - 1 >= BITS_PER_WORD)
4282                           goto fail1;
4283
4284                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4285                         mlr = gen_int_mode (ml, compute_mode);
4286                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4287                                       + shift_cost (speed, compute_mode, size - 1)
4288                                       + 2 * add_cost (speed, compute_mode));
4289                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4290                                                    NULL_RTX, 0,
4291                                                    max_cost - extra_cost);
4292                         if (t1 == 0)
4293                           goto fail1;
4294                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4295                                                           t1, op0),
4296                                             NULL_RTX);
4297                         t3 = expand_shift
4298                           (RSHIFT_EXPR, compute_mode, t2,
4299                            post_shift, NULL_RTX, 0);
4300                         t4 = expand_shift
4301                           (RSHIFT_EXPR, compute_mode, op0,
4302                            size - 1, NULL_RTX, 0);
4303                         if (d < 0)
4304                           quotient
4305                             = force_operand (gen_rtx_MINUS (compute_mode,
4306                                                             t4, t3),
4307                                              tquotient);
4308                         else
4309                           quotient
4310                             = force_operand (gen_rtx_MINUS (compute_mode,
4311                                                             t3, t4),
4312                                              tquotient);
4313                       }
4314                   }
4315                 else            /* Too wide mode to use tricky code */
4316                   break;
4317
4318                 insn = get_last_insn ();
4319                 if (insn != last)
4320                   set_dst_reg_note (insn, REG_EQUAL,
4321                                     gen_rtx_DIV (compute_mode, op0, op1),
4322                                     quotient);
4323               }
4324             break;
4325           }
4326       fail1:
4327         delete_insns_since (last);
4328         break;
4329
4330       case FLOOR_DIV_EXPR:
4331       case FLOOR_MOD_EXPR:
4332       /* We will come here only for signed operations.  */
4333         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4334           {
4335             unsigned HOST_WIDE_INT mh, ml;
4336             int pre_shift, lgup, post_shift;
4337             HOST_WIDE_INT d = INTVAL (op1);
4338
4339             if (d > 0)
4340               {
4341                 /* We could just as easily deal with negative constants here,
4342                    but it does not seem worth the trouble for GCC 2.6.  */
4343                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4344                   {
4345                     pre_shift = floor_log2 (d);
4346                     if (rem_flag)
4347                       {
4348                         remainder = expand_binop (compute_mode, and_optab, op0,
4349                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4350                                                   remainder, 0, OPTAB_LIB_WIDEN);
4351                         if (remainder)
4352                           return gen_lowpart (mode, remainder);
4353                       }
4354                     quotient = expand_shift
4355                       (RSHIFT_EXPR, compute_mode, op0,
4356                        pre_shift, tquotient, 0);
4357                   }
4358                 else
4359                   {
4360                     rtx t1, t2, t3, t4;
4361
4362                     mh = choose_multiplier (d, size, size - 1,
4363                                             &ml, &post_shift, &lgup);
4364                     gcc_assert (!mh);
4365
4366                     if (post_shift < BITS_PER_WORD
4367                         && size - 1 < BITS_PER_WORD)
4368                       {
4369                         t1 = expand_shift
4370                           (RSHIFT_EXPR, compute_mode, op0,
4371                            size - 1, NULL_RTX, 0);
4372                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4373                                            NULL_RTX, 0, OPTAB_WIDEN);
4374                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4375                                       + shift_cost (speed, compute_mode, size - 1)
4376                                       + 2 * add_cost (speed, compute_mode));
4377                         t3 = expmed_mult_highpart (compute_mode, t2,
4378                                                    GEN_INT (ml), NULL_RTX, 1,
4379                                                    max_cost - extra_cost);
4380                         if (t3 != 0)
4381                           {
4382                             t4 = expand_shift
4383                               (RSHIFT_EXPR, compute_mode, t3,
4384                                post_shift, NULL_RTX, 1);
4385                             quotient = expand_binop (compute_mode, xor_optab,
4386                                                      t4, t1, tquotient, 0,
4387                                                      OPTAB_WIDEN);
4388                           }
4389                       }
4390                   }
4391               }
4392             else
4393               {
4394                 rtx nsign, t1, t2, t3, t4;
4395                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4396                                                   op0, constm1_rtx), NULL_RTX);
4397                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4398                                    0, OPTAB_WIDEN);
4399                 nsign = expand_shift
4400                   (RSHIFT_EXPR, compute_mode, t2,
4401                    size - 1, NULL_RTX, 0);
4402                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4403                                     NULL_RTX);
4404                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4405                                     NULL_RTX, 0);
4406                 if (t4)
4407                   {
4408                     rtx t5;
4409                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4410                                       NULL_RTX, 0);
4411                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4412                                                             t4, t5),
4413                                               tquotient);
4414                   }
4415               }
4416           }
4417
4418         if (quotient != 0)
4419           break;
4420         delete_insns_since (last);
4421
4422         /* Try using an instruction that produces both the quotient and
4423            remainder, using truncation.  We can easily compensate the quotient
4424            or remainder to get floor rounding, once we have the remainder.
4425            Notice that we compute also the final remainder value here,
4426            and return the result right away.  */
4427         if (target == 0 || GET_MODE (target) != compute_mode)
4428           target = gen_reg_rtx (compute_mode);
4429
4430         if (rem_flag)
4431           {
4432             remainder
4433               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4434             quotient = gen_reg_rtx (compute_mode);
4435           }
4436         else
4437           {
4438             quotient
4439               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4440             remainder = gen_reg_rtx (compute_mode);
4441           }
4442
4443         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4444                                  quotient, remainder, 0))
4445           {
4446             /* This could be computed with a branch-less sequence.
4447                Save that for later.  */
4448             rtx tem;
4449             rtx label = gen_label_rtx ();
4450             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4451             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4452                                 NULL_RTX, 0, OPTAB_WIDEN);
4453             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4454             expand_dec (quotient, const1_rtx);
4455             expand_inc (remainder, op1);
4456             emit_label (label);
4457             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4458           }
4459
4460         /* No luck with division elimination or divmod.  Have to do it
4461            by conditionally adjusting op0 *and* the result.  */
4462         {
4463           rtx label1, label2, label3, label4, label5;
4464           rtx adjusted_op0;
4465           rtx tem;
4466
4467           quotient = gen_reg_rtx (compute_mode);
4468           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4469           label1 = gen_label_rtx ();
4470           label2 = gen_label_rtx ();
4471           label3 = gen_label_rtx ();
4472           label4 = gen_label_rtx ();
4473           label5 = gen_label_rtx ();
4474           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4475           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4476           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4477                               quotient, 0, OPTAB_LIB_WIDEN);
4478           if (tem != quotient)
4479             emit_move_insn (quotient, tem);
4480           emit_jump_insn (gen_jump (label5));
4481           emit_barrier ();
4482           emit_label (label1);
4483           expand_inc (adjusted_op0, const1_rtx);
4484           emit_jump_insn (gen_jump (label4));
4485           emit_barrier ();
4486           emit_label (label2);
4487           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4488           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4489                               quotient, 0, OPTAB_LIB_WIDEN);
4490           if (tem != quotient)
4491             emit_move_insn (quotient, tem);
4492           emit_jump_insn (gen_jump (label5));
4493           emit_barrier ();
4494           emit_label (label3);
4495           expand_dec (adjusted_op0, const1_rtx);
4496           emit_label (label4);
4497           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4498                               quotient, 0, OPTAB_LIB_WIDEN);
4499           if (tem != quotient)
4500             emit_move_insn (quotient, tem);
4501           expand_dec (quotient, const1_rtx);
4502           emit_label (label5);
4503         }
4504         break;
4505
4506       case CEIL_DIV_EXPR:
4507       case CEIL_MOD_EXPR:
4508         if (unsignedp)
4509           {
4510             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4511               {
4512                 rtx t1, t2, t3;
4513                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4514                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4515                                    floor_log2 (d), tquotient, 1);
4516                 t2 = expand_binop (compute_mode, and_optab, op0,
4517                                    GEN_INT (d - 1),
4518                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4519                 t3 = gen_reg_rtx (compute_mode);
4520                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4521                                       compute_mode, 1, 1);
4522                 if (t3 == 0)
4523                   {
4524                     rtx lab;
4525                     lab = gen_label_rtx ();
4526                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4527                     expand_inc (t1, const1_rtx);
4528                     emit_label (lab);
4529                     quotient = t1;
4530                   }
4531                 else
4532                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4533                                                           t1, t3),
4534                                             tquotient);
4535                 break;
4536               }
4537
4538             /* Try using an instruction that produces both the quotient and
4539                remainder, using truncation.  We can easily compensate the
4540                quotient or remainder to get ceiling rounding, once we have the
4541                remainder.  Notice that we compute also the final remainder
4542                value here, and return the result right away.  */
4543             if (target == 0 || GET_MODE (target) != compute_mode)
4544               target = gen_reg_rtx (compute_mode);
4545
4546             if (rem_flag)
4547               {
4548                 remainder = (REG_P (target)
4549                              ? target : gen_reg_rtx (compute_mode));
4550                 quotient = gen_reg_rtx (compute_mode);
4551               }
4552             else
4553               {
4554                 quotient = (REG_P (target)
4555                             ? target : gen_reg_rtx (compute_mode));
4556                 remainder = gen_reg_rtx (compute_mode);
4557               }
4558
4559             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4560                                      remainder, 1))
4561               {
4562                 /* This could be computed with a branch-less sequence.
4563                    Save that for later.  */
4564                 rtx label = gen_label_rtx ();
4565                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4566                                  compute_mode, label);
4567                 expand_inc (quotient, const1_rtx);
4568                 expand_dec (remainder, op1);
4569                 emit_label (label);
4570                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4571               }
4572
4573             /* No luck with division elimination or divmod.  Have to do it
4574                by conditionally adjusting op0 *and* the result.  */
4575             {
4576               rtx label1, label2;
4577               rtx adjusted_op0, tem;
4578
4579               quotient = gen_reg_rtx (compute_mode);
4580               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4581               label1 = gen_label_rtx ();
4582               label2 = gen_label_rtx ();
4583               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4584                                compute_mode, label1);
4585               emit_move_insn  (quotient, const0_rtx);
4586               emit_jump_insn (gen_jump (label2));
4587               emit_barrier ();
4588               emit_label (label1);
4589               expand_dec (adjusted_op0, const1_rtx);
4590               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4591                                   quotient, 1, OPTAB_LIB_WIDEN);
4592               if (tem != quotient)
4593                 emit_move_insn (quotient, tem);
4594               expand_inc (quotient, const1_rtx);
4595               emit_label (label2);
4596             }
4597           }
4598         else /* signed */
4599           {
4600             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4601                 && INTVAL (op1) >= 0)
4602               {
4603                 /* This is extremely similar to the code for the unsigned case
4604                    above.  For 2.7 we should merge these variants, but for
4605                    2.6.1 I don't want to touch the code for unsigned since that
4606                    get used in C.  The signed case will only be used by other
4607                    languages (Ada).  */
4608
4609                 rtx t1, t2, t3;
4610                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4611                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4612                                    floor_log2 (d), tquotient, 0);
4613                 t2 = expand_binop (compute_mode, and_optab, op0,
4614                                    GEN_INT (d - 1),
4615                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4616                 t3 = gen_reg_rtx (compute_mode);
4617                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4618                                       compute_mode, 1, 1);
4619                 if (t3 == 0)
4620                   {
4621                     rtx lab;
4622                     lab = gen_label_rtx ();
4623                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4624                     expand_inc (t1, const1_rtx);
4625                     emit_label (lab);
4626                     quotient = t1;
4627                   }
4628                 else
4629                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4630                                                           t1, t3),
4631                                             tquotient);
4632                 break;
4633               }
4634
4635             /* Try using an instruction that produces both the quotient and
4636                remainder, using truncation.  We can easily compensate the
4637                quotient or remainder to get ceiling rounding, once we have the
4638                remainder.  Notice that we compute also the final remainder
4639                value here, and return the result right away.  */
4640             if (target == 0 || GET_MODE (target) != compute_mode)
4641               target = gen_reg_rtx (compute_mode);
4642             if (rem_flag)
4643               {
4644                 remainder= (REG_P (target)
4645                             ? target : gen_reg_rtx (compute_mode));
4646                 quotient = gen_reg_rtx (compute_mode);
4647               }
4648             else
4649               {
4650                 quotient = (REG_P (target)
4651                             ? target : gen_reg_rtx (compute_mode));
4652                 remainder = gen_reg_rtx (compute_mode);
4653               }
4654
4655             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4656                                      remainder, 0))
4657               {
4658                 /* This could be computed with a branch-less sequence.
4659                    Save that for later.  */
4660                 rtx tem;
4661                 rtx label = gen_label_rtx ();
4662                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4663                                  compute_mode, label);
4664                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4665                                     NULL_RTX, 0, OPTAB_WIDEN);
4666                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4667                 expand_inc (quotient, const1_rtx);
4668                 expand_dec (remainder, op1);
4669                 emit_label (label);
4670                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4671               }
4672
4673             /* No luck with division elimination or divmod.  Have to do it
4674                by conditionally adjusting op0 *and* the result.  */
4675             {
4676               rtx label1, label2, label3, label4, label5;
4677               rtx adjusted_op0;
4678               rtx tem;
4679
4680               quotient = gen_reg_rtx (compute_mode);
4681               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4682               label1 = gen_label_rtx ();
4683               label2 = gen_label_rtx ();
4684               label3 = gen_label_rtx ();
4685               label4 = gen_label_rtx ();
4686               label5 = gen_label_rtx ();
4687               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4688               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4689                                compute_mode, label1);
4690               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4691                                   quotient, 0, OPTAB_LIB_WIDEN);
4692               if (tem != quotient)
4693                 emit_move_insn (quotient, tem);
4694               emit_jump_insn (gen_jump (label5));
4695               emit_barrier ();
4696               emit_label (label1);
4697               expand_dec (adjusted_op0, const1_rtx);
4698               emit_jump_insn (gen_jump (label4));
4699               emit_barrier ();
4700               emit_label (label2);
4701               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4702                                compute_mode, label3);
4703               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4704                                   quotient, 0, OPTAB_LIB_WIDEN);
4705               if (tem != quotient)
4706                 emit_move_insn (quotient, tem);
4707               emit_jump_insn (gen_jump (label5));
4708               emit_barrier ();
4709               emit_label (label3);
4710               expand_inc (adjusted_op0, const1_rtx);
4711               emit_label (label4);
4712               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4713                                   quotient, 0, OPTAB_LIB_WIDEN);
4714               if (tem != quotient)
4715                 emit_move_insn (quotient, tem);
4716               expand_inc (quotient, const1_rtx);
4717               emit_label (label5);
4718             }
4719           }
4720         break;
4721
4722       case EXACT_DIV_EXPR:
4723         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4724           {
4725             HOST_WIDE_INT d = INTVAL (op1);
4726             unsigned HOST_WIDE_INT ml;
4727             int pre_shift;
4728             rtx t1;
4729
4730             pre_shift = floor_log2 (d & -d);
4731             ml = invert_mod2n (d >> pre_shift, size);
4732             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4733                                pre_shift, NULL_RTX, unsignedp);
4734             quotient = expand_mult (compute_mode, t1,
4735                                     gen_int_mode (ml, compute_mode),
4736                                     NULL_RTX, 1);
4737
4738             insn = get_last_insn ();
4739             set_dst_reg_note (insn, REG_EQUAL,
4740                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4741                                               compute_mode, op0, op1),
4742                               quotient);
4743           }
4744         break;
4745
4746       case ROUND_DIV_EXPR:
4747       case ROUND_MOD_EXPR:
4748         if (unsignedp)
4749           {
4750             rtx tem;
4751             rtx label;
4752             label = gen_label_rtx ();
4753             quotient = gen_reg_rtx (compute_mode);
4754             remainder = gen_reg_rtx (compute_mode);
4755             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4756               {
4757                 rtx tem;
4758                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4759                                          quotient, 1, OPTAB_LIB_WIDEN);
4760                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4761                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4762                                           remainder, 1, OPTAB_LIB_WIDEN);
4763               }
4764             tem = plus_constant (compute_mode, op1, -1);
4765             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4766             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4767             expand_inc (quotient, const1_rtx);
4768             expand_dec (remainder, op1);
4769             emit_label (label);
4770           }
4771         else
4772           {
4773             rtx abs_rem, abs_op1, tem, mask;
4774             rtx label;
4775             label = gen_label_rtx ();
4776             quotient = gen_reg_rtx (compute_mode);
4777             remainder = gen_reg_rtx (compute_mode);
4778             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4779               {
4780                 rtx tem;
4781                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4782                                          quotient, 0, OPTAB_LIB_WIDEN);
4783                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4784                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4785                                           remainder, 0, OPTAB_LIB_WIDEN);
4786               }
4787             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4788             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4789             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4790                                 1, NULL_RTX, 1);
4791             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4792             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4793                                 NULL_RTX, 0, OPTAB_WIDEN);
4794             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4795                                  size - 1, NULL_RTX, 0);
4796             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4797                                 NULL_RTX, 0, OPTAB_WIDEN);
4798             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             expand_inc (quotient, tem);
4801             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4802                                 NULL_RTX, 0, OPTAB_WIDEN);
4803             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4804                                 NULL_RTX, 0, OPTAB_WIDEN);
4805             expand_dec (remainder, tem);
4806             emit_label (label);
4807           }
4808         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4809
4810       default:
4811         gcc_unreachable ();
4812       }
4813
4814   if (quotient == 0)
4815     {
4816       if (target && GET_MODE (target) != compute_mode)
4817         target = 0;
4818
4819       if (rem_flag)
4820         {
4821           /* Try to produce the remainder without producing the quotient.
4822              If we seem to have a divmod pattern that does not require widening,
4823              don't try widening here.  We should really have a WIDEN argument
4824              to expand_twoval_binop, since what we'd really like to do here is
4825              1) try a mod insn in compute_mode
4826              2) try a divmod insn in compute_mode
4827              3) try a div insn in compute_mode and multiply-subtract to get
4828                 remainder
4829              4) try the same things with widening allowed.  */
4830           remainder
4831             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4832                                  op0, op1, target,
4833                                  unsignedp,
4834                                  ((optab_handler (optab2, compute_mode)
4835                                    != CODE_FOR_nothing)
4836                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4837           if (remainder == 0)
4838             {
4839               /* No luck there.  Can we do remainder and divide at once
4840                  without a library call?  */
4841               remainder = gen_reg_rtx (compute_mode);
4842               if (! expand_twoval_binop ((unsignedp
4843                                           ? udivmod_optab
4844                                           : sdivmod_optab),
4845                                          op0, op1,
4846                                          NULL_RTX, remainder, unsignedp))
4847                 remainder = 0;
4848             }
4849
4850           if (remainder)
4851             return gen_lowpart (mode, remainder);
4852         }
4853
4854       /* Produce the quotient.  Try a quotient insn, but not a library call.
4855          If we have a divmod in this mode, use it in preference to widening
4856          the div (for this test we assume it will not fail). Note that optab2
4857          is set to the one of the two optabs that the call below will use.  */
4858       quotient
4859         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4860                              op0, op1, rem_flag ? NULL_RTX : target,
4861                              unsignedp,
4862                              ((optab_handler (optab2, compute_mode)
4863                                != CODE_FOR_nothing)
4864                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4865
4866       if (quotient == 0)
4867         {
4868           /* No luck there.  Try a quotient-and-remainder insn,
4869              keeping the quotient alone.  */
4870           quotient = gen_reg_rtx (compute_mode);
4871           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4872                                      op0, op1,
4873                                      quotient, NULL_RTX, unsignedp))
4874             {
4875               quotient = 0;
4876               if (! rem_flag)
4877                 /* Still no luck.  If we are not computing the remainder,
4878                    use a library call for the quotient.  */
4879                 quotient = sign_expand_binop (compute_mode,
4880                                               udiv_optab, sdiv_optab,
4881                                               op0, op1, target,
4882                                               unsignedp, OPTAB_LIB_WIDEN);
4883             }
4884         }
4885     }
4886
4887   if (rem_flag)
4888     {
4889       if (target && GET_MODE (target) != compute_mode)
4890         target = 0;
4891
4892       if (quotient == 0)
4893         {
4894           /* No divide instruction either.  Use library for remainder.  */
4895           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4896                                          op0, op1, target,
4897                                          unsignedp, OPTAB_LIB_WIDEN);
4898           /* No remainder function.  Try a quotient-and-remainder
4899              function, keeping the remainder.  */
4900           if (!remainder)
4901             {
4902               remainder = gen_reg_rtx (compute_mode);
4903               if (!expand_twoval_binop_libfunc
4904                   (unsignedp ? udivmod_optab : sdivmod_optab,
4905                    op0, op1,
4906                    NULL_RTX, remainder,
4907                    unsignedp ? UMOD : MOD))
4908                 remainder = NULL_RTX;
4909             }
4910         }
4911       else
4912         {
4913           /* We divided.  Now finish doing X - Y * (X / Y).  */
4914           remainder = expand_mult (compute_mode, quotient, op1,
4915                                    NULL_RTX, unsignedp);
4916           remainder = expand_binop (compute_mode, sub_optab, op0,
4917                                     remainder, target, unsignedp,
4918                                     OPTAB_LIB_WIDEN);
4919         }
4920     }
4921
4922   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4923 }
4924 \f
4925 /* Return a tree node with data type TYPE, describing the value of X.
4926    Usually this is an VAR_DECL, if there is no obvious better choice.
4927    X may be an expression, however we only support those expressions
4928    generated by loop.c.  */
4929
4930 tree
4931 make_tree (tree type, rtx x)
4932 {
4933   tree t;
4934
4935   switch (GET_CODE (x))
4936     {
4937     case CONST_INT:
4938       {
4939         HOST_WIDE_INT hi = 0;
4940
4941         if (INTVAL (x) < 0
4942             && !(TYPE_UNSIGNED (type)
4943                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4944                      < HOST_BITS_PER_WIDE_INT)))
4945           hi = -1;
4946
4947         t = build_int_cst_wide (type, INTVAL (x), hi);
4948
4949         return t;
4950       }
4951
4952     case CONST_DOUBLE:
4953       if (GET_MODE (x) == VOIDmode)
4954         t = build_int_cst_wide (type,
4955                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4956       else
4957         {
4958           REAL_VALUE_TYPE d;
4959
4960           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4961           t = build_real (type, d);
4962         }
4963
4964       return t;
4965
4966     case CONST_VECTOR:
4967       {
4968         int units = CONST_VECTOR_NUNITS (x);
4969         tree itype = TREE_TYPE (type);
4970         tree *elts;
4971         int i;
4972
4973         /* Build a tree with vector elements.  */
4974         elts = XALLOCAVEC (tree, units);
4975         for (i = units - 1; i >= 0; --i)
4976           {
4977             rtx elt = CONST_VECTOR_ELT (x, i);
4978             elts[i] = make_tree (itype, elt);
4979           }
4980
4981         return build_vector (type, elts);
4982       }
4983
4984     case PLUS:
4985       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4986                           make_tree (type, XEXP (x, 1)));
4987
4988     case MINUS:
4989       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4990                           make_tree (type, XEXP (x, 1)));
4991
4992     case NEG:
4993       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4994
4995     case MULT:
4996       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4997                           make_tree (type, XEXP (x, 1)));
4998
4999     case ASHIFT:
5000       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5001                           make_tree (type, XEXP (x, 1)));
5002
5003     case LSHIFTRT:
5004       t = unsigned_type_for (type);
5005       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5006                                          make_tree (t, XEXP (x, 0)),
5007                                          make_tree (type, XEXP (x, 1))));
5008
5009     case ASHIFTRT:
5010       t = signed_type_for (type);
5011       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5012                                          make_tree (t, XEXP (x, 0)),
5013                                          make_tree (type, XEXP (x, 1))));
5014
5015     case DIV:
5016       if (TREE_CODE (type) != REAL_TYPE)
5017         t = signed_type_for (type);
5018       else
5019         t = type;
5020
5021       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5022                                          make_tree (t, XEXP (x, 0)),
5023                                          make_tree (t, XEXP (x, 1))));
5024     case UDIV:
5025       t = unsigned_type_for (type);
5026       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5027                                          make_tree (t, XEXP (x, 0)),
5028                                          make_tree (t, XEXP (x, 1))));
5029
5030     case SIGN_EXTEND:
5031     case ZERO_EXTEND:
5032       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5033                                           GET_CODE (x) == ZERO_EXTEND);
5034       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5035
5036     case CONST:
5037       return make_tree (type, XEXP (x, 0));
5038
5039     case SYMBOL_REF:
5040       t = SYMBOL_REF_DECL (x);
5041       if (t)
5042         return fold_convert (type, build_fold_addr_expr (t));
5043       /* else fall through.  */
5044
5045     default:
5046       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5047
5048       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5049          address mode to pointer mode.  */
5050       if (POINTER_TYPE_P (type))
5051         x = convert_memory_address_addr_space
5052               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5053
5054       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5055          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5056       t->decl_with_rtl.rtl = x;
5057
5058       return t;
5059     }
5060 }
5061 \f
5062 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5063    and returning TARGET.
5064
5065    If TARGET is 0, a pseudo-register or constant is returned.  */
5066
5067 rtx
5068 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5069 {
5070   rtx tem = 0;
5071
5072   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5073     tem = simplify_binary_operation (AND, mode, op0, op1);
5074   if (tem == 0)
5075     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5076
5077   if (target == 0)
5078     target = tem;
5079   else if (tem != target)
5080     emit_move_insn (target, tem);
5081   return target;
5082 }
5083
5084 /* Helper function for emit_store_flag.  */
5085 static rtx
5086 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5087              enum machine_mode mode, enum machine_mode compare_mode,
5088              int unsignedp, rtx x, rtx y, int normalizep,
5089              enum machine_mode target_mode)
5090 {
5091   struct expand_operand ops[4];
5092   rtx op0, last, comparison, subtarget;
5093   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5094
5095   last = get_last_insn ();
5096   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5097   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5098   if (!x || !y)
5099     {
5100       delete_insns_since (last);
5101       return NULL_RTX;
5102     }
5103
5104   if (target_mode == VOIDmode)
5105     target_mode = result_mode;
5106   if (!target)
5107     target = gen_reg_rtx (target_mode);
5108
5109   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5110
5111   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5112   create_fixed_operand (&ops[1], comparison);
5113   create_fixed_operand (&ops[2], x);
5114   create_fixed_operand (&ops[3], y);
5115   if (!maybe_expand_insn (icode, 4, ops))
5116     {
5117       delete_insns_since (last);
5118       return NULL_RTX;
5119     }
5120   subtarget = ops[0].value;
5121
5122   /* If we are converting to a wider mode, first convert to
5123      TARGET_MODE, then normalize.  This produces better combining
5124      opportunities on machines that have a SIGN_EXTRACT when we are
5125      testing a single bit.  This mostly benefits the 68k.
5126
5127      If STORE_FLAG_VALUE does not have the sign bit set when
5128      interpreted in MODE, we can do this conversion as unsigned, which
5129      is usually more efficient.  */
5130   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5131     {
5132       convert_move (target, subtarget,
5133                     val_signbit_known_clear_p (result_mode,
5134                                                STORE_FLAG_VALUE));
5135       op0 = target;
5136       result_mode = target_mode;
5137     }
5138   else
5139     op0 = subtarget;
5140
5141   /* If we want to keep subexpressions around, don't reuse our last
5142      target.  */
5143   if (optimize)
5144     subtarget = 0;
5145
5146   /* Now normalize to the proper value in MODE.  Sometimes we don't
5147      have to do anything.  */
5148   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5149     ;
5150   /* STORE_FLAG_VALUE might be the most negative number, so write
5151      the comparison this way to avoid a compiler-time warning.  */
5152   else if (- normalizep == STORE_FLAG_VALUE)
5153     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5154
5155   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5156      it hard to use a value of just the sign bit due to ANSI integer
5157      constant typing rules.  */
5158   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5159     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5160                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5161                         normalizep == 1);
5162   else
5163     {
5164       gcc_assert (STORE_FLAG_VALUE & 1);
5165
5166       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5167       if (normalizep == -1)
5168         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5169     }
5170
5171   /* If we were converting to a smaller mode, do the conversion now.  */
5172   if (target_mode != result_mode)
5173     {
5174       convert_move (target, op0, 0);
5175       return target;
5176     }
5177   else
5178     return op0;
5179 }
5180
5181
5182 /* A subroutine of emit_store_flag only including "tricks" that do not
5183    need a recursive call.  These are kept separate to avoid infinite
5184    loops.  */
5185
5186 static rtx
5187 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5188                    enum machine_mode mode, int unsignedp, int normalizep,
5189                    enum machine_mode target_mode)
5190 {
5191   rtx subtarget;
5192   enum insn_code icode;
5193   enum machine_mode compare_mode;
5194   enum mode_class mclass;
5195   enum rtx_code scode;
5196   rtx tem;
5197
5198   if (unsignedp)
5199     code = unsigned_condition (code);
5200   scode = swap_condition (code);
5201
5202   /* If one operand is constant, make it the second one.  Only do this
5203      if the other operand is not constant as well.  */
5204
5205   if (swap_commutative_operands_p (op0, op1))
5206     {
5207       tem = op0;
5208       op0 = op1;
5209       op1 = tem;
5210       code = swap_condition (code);
5211     }
5212
5213   if (mode == VOIDmode)
5214     mode = GET_MODE (op0);
5215
5216   /* For some comparisons with 1 and -1, we can convert this to
5217      comparisons with zero.  This will often produce more opportunities for
5218      store-flag insns.  */
5219
5220   switch (code)
5221     {
5222     case LT:
5223       if (op1 == const1_rtx)
5224         op1 = const0_rtx, code = LE;
5225       break;
5226     case LE:
5227       if (op1 == constm1_rtx)
5228         op1 = const0_rtx, code = LT;
5229       break;
5230     case GE:
5231       if (op1 == const1_rtx)
5232         op1 = const0_rtx, code = GT;
5233       break;
5234     case GT:
5235       if (op1 == constm1_rtx)
5236         op1 = const0_rtx, code = GE;
5237       break;
5238     case GEU:
5239       if (op1 == const1_rtx)
5240         op1 = const0_rtx, code = NE;
5241       break;
5242     case LTU:
5243       if (op1 == const1_rtx)
5244         op1 = const0_rtx, code = EQ;
5245       break;
5246     default:
5247       break;
5248     }
5249
5250   /* If we are comparing a double-word integer with zero or -1, we can
5251      convert the comparison into one involving a single word.  */
5252   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5253       && GET_MODE_CLASS (mode) == MODE_INT
5254       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5255     {
5256       if ((code == EQ || code == NE)
5257           && (op1 == const0_rtx || op1 == constm1_rtx))
5258         {
5259           rtx op00, op01;
5260
5261           /* Do a logical OR or AND of the two words and compare the
5262              result.  */
5263           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5264           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5265           tem = expand_binop (word_mode,
5266                               op1 == const0_rtx ? ior_optab : and_optab,
5267                               op00, op01, NULL_RTX, unsignedp,
5268                               OPTAB_DIRECT);
5269
5270           if (tem != 0)
5271             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5272                                    unsignedp, normalizep);
5273         }
5274       else if ((code == LT || code == GE) && op1 == const0_rtx)
5275         {
5276           rtx op0h;
5277
5278           /* If testing the sign bit, can just test on high word.  */
5279           op0h = simplify_gen_subreg (word_mode, op0, mode,
5280                                       subreg_highpart_offset (word_mode,
5281                                                               mode));
5282           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5283                                  unsignedp, normalizep);
5284         }
5285       else
5286         tem = NULL_RTX;
5287
5288       if (tem)
5289         {
5290           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5291             return tem;
5292           if (!target)
5293             target = gen_reg_rtx (target_mode);
5294
5295           convert_move (target, tem,
5296                         !val_signbit_known_set_p (word_mode,
5297                                                   (normalizep ? normalizep
5298                                                    : STORE_FLAG_VALUE)));
5299           return target;
5300         }
5301     }
5302
5303   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5304      complement of A (for GE) and shifting the sign bit to the low bit.  */
5305   if (op1 == const0_rtx && (code == LT || code == GE)
5306       && GET_MODE_CLASS (mode) == MODE_INT
5307       && (normalizep || STORE_FLAG_VALUE == 1
5308           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5309     {
5310       subtarget = target;
5311
5312       if (!target)
5313         target_mode = mode;
5314
5315       /* If the result is to be wider than OP0, it is best to convert it
5316          first.  If it is to be narrower, it is *incorrect* to convert it
5317          first.  */
5318       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5319         {
5320           op0 = convert_modes (target_mode, mode, op0, 0);
5321           mode = target_mode;
5322         }
5323
5324       if (target_mode != mode)
5325         subtarget = 0;
5326
5327       if (code == GE)
5328         op0 = expand_unop (mode, one_cmpl_optab, op0,
5329                            ((STORE_FLAG_VALUE == 1 || normalizep)
5330                             ? 0 : subtarget), 0);
5331
5332       if (STORE_FLAG_VALUE == 1 || normalizep)
5333         /* If we are supposed to produce a 0/1 value, we want to do
5334            a logical shift from the sign bit to the low-order bit; for
5335            a -1/0 value, we do an arithmetic shift.  */
5336         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5337                             GET_MODE_BITSIZE (mode) - 1,
5338                             subtarget, normalizep != -1);
5339
5340       if (mode != target_mode)
5341         op0 = convert_modes (target_mode, mode, op0, 0);
5342
5343       return op0;
5344     }
5345
5346   mclass = GET_MODE_CLASS (mode);
5347   for (compare_mode = mode; compare_mode != VOIDmode;
5348        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5349     {
5350      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5351      icode = optab_handler (cstore_optab, optab_mode);
5352      if (icode != CODE_FOR_nothing)
5353         {
5354           do_pending_stack_adjust ();
5355           tem = emit_cstore (target, icode, code, mode, compare_mode,
5356                              unsignedp, op0, op1, normalizep, target_mode);
5357           if (tem)
5358             return tem;
5359
5360           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5361             {
5362               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5363                                  unsignedp, op1, op0, normalizep, target_mode);
5364               if (tem)
5365                 return tem;
5366             }
5367           break;
5368         }
5369     }
5370
5371   return 0;
5372 }
5373
5374 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5375    and storing in TARGET.  Normally return TARGET.
5376    Return 0 if that cannot be done.
5377
5378    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5379    it is VOIDmode, they cannot both be CONST_INT.
5380
5381    UNSIGNEDP is for the case where we have to widen the operands
5382    to perform the operation.  It says to use zero-extension.
5383
5384    NORMALIZEP is 1 if we should convert the result to be either zero
5385    or one.  Normalize is -1 if we should convert the result to be
5386    either zero or -1.  If NORMALIZEP is zero, the result will be left
5387    "raw" out of the scc insn.  */
5388
5389 rtx
5390 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5391                  enum machine_mode mode, int unsignedp, int normalizep)
5392 {
5393   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5394   enum rtx_code rcode;
5395   rtx subtarget;
5396   rtx tem, last, trueval;
5397
5398   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5399                            target_mode);
5400   if (tem)
5401     return tem;
5402
5403   /* If we reached here, we can't do this with a scc insn, however there
5404      are some comparisons that can be done in other ways.  Don't do any
5405      of these cases if branches are very cheap.  */
5406   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5407     return 0;
5408
5409   /* See what we need to return.  We can only return a 1, -1, or the
5410      sign bit.  */
5411
5412   if (normalizep == 0)
5413     {
5414       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5415         normalizep = STORE_FLAG_VALUE;
5416
5417       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5418         ;
5419       else
5420         return 0;
5421     }
5422
5423   last = get_last_insn ();
5424
5425   /* If optimizing, use different pseudo registers for each insn, instead
5426      of reusing the same pseudo.  This leads to better CSE, but slows
5427      down the compiler, since there are more pseudos */
5428   subtarget = (!optimize
5429                && (target_mode == mode)) ? target : NULL_RTX;
5430   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5431
5432   /* For floating-point comparisons, try the reverse comparison or try
5433      changing the "orderedness" of the comparison.  */
5434   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5435     {
5436       enum rtx_code first_code;
5437       bool and_them;
5438
5439       rcode = reverse_condition_maybe_unordered (code);
5440       if (can_compare_p (rcode, mode, ccp_store_flag)
5441           && (code == ORDERED || code == UNORDERED
5442               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5443               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5444         {
5445           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5446                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5447
5448           /* For the reverse comparison, use either an addition or a XOR.  */
5449           if (want_add
5450               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5451                            optimize_insn_for_speed_p ()) == 0)
5452             {
5453               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5454                                        STORE_FLAG_VALUE, target_mode);
5455               if (tem)
5456                 return expand_binop (target_mode, add_optab, tem,
5457                                      GEN_INT (normalizep),
5458                                      target, 0, OPTAB_WIDEN);
5459             }
5460           else if (!want_add
5461                    && rtx_cost (trueval, XOR, 1,
5462                                 optimize_insn_for_speed_p ()) == 0)
5463             {
5464               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5465                                        normalizep, target_mode);
5466               if (tem)
5467                 return expand_binop (target_mode, xor_optab, tem, trueval,
5468                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5469             }
5470         }
5471
5472       delete_insns_since (last);
5473
5474       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5475       if (code == ORDERED || code == UNORDERED)
5476         return 0;
5477
5478       and_them = split_comparison (code, mode, &first_code, &code);
5479
5480       /* If there are no NaNs, the first comparison should always fall through.
5481          Effectively change the comparison to the other one.  */
5482       if (!HONOR_NANS (mode))
5483         {
5484           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5485           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5486                                     target_mode);
5487         }
5488
5489 #ifdef HAVE_conditional_move
5490       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5491          conditional move.  */
5492       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5493                                normalizep, target_mode);
5494       if (tem == 0)
5495         return 0;
5496
5497       if (and_them)
5498         tem = emit_conditional_move (target, code, op0, op1, mode,
5499                                      tem, const0_rtx, GET_MODE (tem), 0);
5500       else
5501         tem = emit_conditional_move (target, code, op0, op1, mode,
5502                                      trueval, tem, GET_MODE (tem), 0);
5503
5504       if (tem == 0)
5505         delete_insns_since (last);
5506       return tem;
5507 #else
5508       return 0;
5509 #endif
5510     }
5511
5512   /* The remaining tricks only apply to integer comparisons.  */
5513
5514   if (GET_MODE_CLASS (mode) != MODE_INT)
5515     return 0;
5516
5517   /* If this is an equality comparison of integers, we can try to exclusive-or
5518      (or subtract) the two operands and use a recursive call to try the
5519      comparison with zero.  Don't do any of these cases if branches are
5520      very cheap.  */
5521
5522   if ((code == EQ || code == NE) && op1 != const0_rtx)
5523     {
5524       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5525                           OPTAB_WIDEN);
5526
5527       if (tem == 0)
5528         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5529                             OPTAB_WIDEN);
5530       if (tem != 0)
5531         tem = emit_store_flag (target, code, tem, const0_rtx,
5532                                mode, unsignedp, normalizep);
5533       if (tem != 0)
5534         return tem;
5535
5536       delete_insns_since (last);
5537     }
5538
5539   /* For integer comparisons, try the reverse comparison.  However, for
5540      small X and if we'd have anyway to extend, implementing "X != 0"
5541      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5542   rcode = reverse_condition (code);
5543   if (can_compare_p (rcode, mode, ccp_store_flag)
5544       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5545             && code == NE
5546             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5547             && op1 == const0_rtx))
5548     {
5549       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5550                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5551
5552       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5553       if (want_add
5554           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5555                        optimize_insn_for_speed_p ()) == 0)
5556         {
5557           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5558                                    STORE_FLAG_VALUE, target_mode);
5559           if (tem != 0)
5560             tem = expand_binop (target_mode, add_optab, tem,
5561                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5562         }
5563       else if (!want_add
5564                && rtx_cost (trueval, XOR, 1,
5565                             optimize_insn_for_speed_p ()) == 0)
5566         {
5567           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5568                                    normalizep, target_mode);
5569           if (tem != 0)
5570             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5571                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5572         }
5573
5574       if (tem != 0)
5575         return tem;
5576       delete_insns_since (last);
5577     }
5578
5579   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5580      the constant zero.  Reject all other comparisons at this point.  Only
5581      do LE and GT if branches are expensive since they are expensive on
5582      2-operand machines.  */
5583
5584   if (op1 != const0_rtx
5585       || (code != EQ && code != NE
5586           && (BRANCH_COST (optimize_insn_for_speed_p (),
5587                            false) <= 1 || (code != LE && code != GT))))
5588     return 0;
5589
5590   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5591      do the necessary operation below.  */
5592
5593   tem = 0;
5594
5595   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5596      the sign bit set.  */
5597
5598   if (code == LE)
5599     {
5600       /* This is destructive, so SUBTARGET can't be OP0.  */
5601       if (rtx_equal_p (subtarget, op0))
5602         subtarget = 0;
5603
5604       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5605                           OPTAB_WIDEN);
5606       if (tem)
5607         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5608                             OPTAB_WIDEN);
5609     }
5610
5611   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5612      number of bits in the mode of OP0, minus one.  */
5613
5614   if (code == GT)
5615     {
5616       if (rtx_equal_p (subtarget, op0))
5617         subtarget = 0;
5618
5619       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5620                           GET_MODE_BITSIZE (mode) - 1,
5621                           subtarget, 0);
5622       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5623                           OPTAB_WIDEN);
5624     }
5625
5626   if (code == EQ || code == NE)
5627     {
5628       /* For EQ or NE, one way to do the comparison is to apply an operation
5629          that converts the operand into a positive number if it is nonzero
5630          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5631          for NE we negate.  This puts the result in the sign bit.  Then we
5632          normalize with a shift, if needed.
5633
5634          Two operations that can do the above actions are ABS and FFS, so try
5635          them.  If that doesn't work, and MODE is smaller than a full word,
5636          we can use zero-extension to the wider mode (an unsigned conversion)
5637          as the operation.  */
5638
5639       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5640          that is compensated by the subsequent overflow when subtracting
5641          one / negating.  */
5642
5643       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5644         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5645       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5646         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5647       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5648         {
5649           tem = convert_modes (word_mode, mode, op0, 1);
5650           mode = word_mode;
5651         }
5652
5653       if (tem != 0)
5654         {
5655           if (code == EQ)
5656             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5657                                 0, OPTAB_WIDEN);
5658           else
5659             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5660         }
5661
5662       /* If we couldn't do it that way, for NE we can "or" the two's complement
5663          of the value with itself.  For EQ, we take the one's complement of
5664          that "or", which is an extra insn, so we only handle EQ if branches
5665          are expensive.  */
5666
5667       if (tem == 0
5668           && (code == NE
5669               || BRANCH_COST (optimize_insn_for_speed_p (),
5670                               false) > 1))
5671         {
5672           if (rtx_equal_p (subtarget, op0))
5673             subtarget = 0;
5674
5675           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5676           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5677                               OPTAB_WIDEN);
5678
5679           if (tem && code == EQ)
5680             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5681         }
5682     }
5683
5684   if (tem && normalizep)
5685     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5686                         GET_MODE_BITSIZE (mode) - 1,
5687                         subtarget, normalizep == 1);
5688
5689   if (tem)
5690     {
5691       if (!target)
5692         ;
5693       else if (GET_MODE (tem) != target_mode)
5694         {
5695           convert_move (target, tem, 0);
5696           tem = target;
5697         }
5698       else if (!subtarget)
5699         {
5700           emit_move_insn (target, tem);
5701           tem = target;
5702         }
5703     }
5704   else
5705     delete_insns_since (last);
5706
5707   return tem;
5708 }
5709
5710 /* Like emit_store_flag, but always succeeds.  */
5711
5712 rtx
5713 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5714                        enum machine_mode mode, int unsignedp, int normalizep)
5715 {
5716   rtx tem, label;
5717   rtx trueval, falseval;
5718
5719   /* First see if emit_store_flag can do the job.  */
5720   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5721   if (tem != 0)
5722     return tem;
5723
5724   if (!target)
5725     target = gen_reg_rtx (word_mode);
5726
5727   /* If this failed, we have to do this with set/compare/jump/set code.
5728      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5729   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5730   if (code == NE
5731       && GET_MODE_CLASS (mode) == MODE_INT
5732       && REG_P (target)
5733       && op0 == target
5734       && op1 == const0_rtx)
5735     {
5736       label = gen_label_rtx ();
5737       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5738                                mode, NULL_RTX, NULL_RTX, label, -1);
5739       emit_move_insn (target, trueval);
5740       emit_label (label);
5741       return target;
5742     }
5743
5744   if (!REG_P (target)
5745       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5746     target = gen_reg_rtx (GET_MODE (target));
5747
5748   /* Jump in the right direction if the target cannot implement CODE
5749      but can jump on its reverse condition.  */
5750   falseval = const0_rtx;
5751   if (! can_compare_p (code, mode, ccp_jump)
5752       && (! FLOAT_MODE_P (mode)
5753           || code == ORDERED || code == UNORDERED
5754           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5755           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5756     {
5757       enum rtx_code rcode;
5758       if (FLOAT_MODE_P (mode))
5759         rcode = reverse_condition_maybe_unordered (code);
5760       else
5761         rcode = reverse_condition (code);
5762
5763       /* Canonicalize to UNORDERED for the libcall.  */
5764       if (can_compare_p (rcode, mode, ccp_jump)
5765           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5766         {
5767           falseval = trueval;
5768           trueval = const0_rtx;
5769           code = rcode;
5770         }
5771     }
5772
5773   emit_move_insn (target, trueval);
5774   label = gen_label_rtx ();
5775   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5776                            NULL_RTX, label, -1);
5777
5778   emit_move_insn (target, falseval);
5779   emit_label (label);
5780
5781   return target;
5782 }
5783 \f
5784 /* Perform possibly multi-word comparison and conditional jump to LABEL
5785    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5786    now a thin wrapper around do_compare_rtx_and_jump.  */
5787
5788 static void
5789 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5790                  rtx label)
5791 {
5792   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5793   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5794                            NULL_RTX, NULL_RTX, label, -1);
5795 }