gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2013 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "optabs.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "expmed.h"
  40
  41 struct target_expmed default_target_expmed;
  42 #if SWITCHABLE_TARGET
  43 struct target_expmed *this_target_expmed = &default_target_expmed;
  44 #endif
  45
  46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    rtx);
  56 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  57                                     unsigned HOST_WIDE_INT,
  58                                     unsigned HOST_WIDE_INT, rtx, int);
  59 static rtx mask_rtx (enum machine_mode, int, int, int);
  60 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  61 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, int);
  63 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  64 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  65 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  66
  67 /* Test whether a value is zero of a power of two.  */
  68 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  69   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  70
  71 struct init_expmed_rtl
  72 {
  73   struct rtx_def reg;
  74   struct rtx_def plus;
  75   struct rtx_def neg;
  76   struct rtx_def mult;
  77   struct rtx_def sdiv;
  78   struct rtx_def udiv;
  79   struct rtx_def sdiv_32;
  80   struct rtx_def smod_32;
  81   struct rtx_def wide_mult;
  82   struct rtx_def wide_lshr;
  83   struct rtx_def wide_trunc;
  84   struct rtx_def shift;
  85   struct rtx_def shift_mult;
  86   struct rtx_def shift_add;
  87   struct rtx_def shift_sub0;
  88   struct rtx_def shift_sub1;
  89   struct rtx_def zext;
  90   struct rtx_def trunc;
  91
  92   rtx pow2[MAX_BITS_PER_WORD];
  93   rtx cint[MAX_BITS_PER_WORD];
  94 };
  95
  96 static void
  97 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  98                       enum machine_mode from_mode, bool speed)
  99 {
 100   int to_size, from_size;
 101   rtx which;
 102
 103   /* We're given no information about the true size of a partial integer,
 104      only the size of the "full" integer it requires for storage.  For
 105      comparison purposes here, reduce the bit size by one in that case.  */
 106   to_size = (GET_MODE_BITSIZE (to_mode)
 107              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 108   from_size = (GET_MODE_BITSIZE (from_mode)
 109                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 110
 111   /* Assume cost of zero-extend and sign-extend is the same.  */
 112   which = (to_size < from_size ? &all->trunc : &all->zext);
 113
 114   PUT_MODE (&all->reg, from_mode);
 115   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 116 }
 117
 118 static void
 119 init_expmed_one_mode (struct init_expmed_rtl *all,
 120                       enum machine_mode mode, int speed)
 121 {
 122   int m, n, mode_bitsize;
 123   enum machine_mode mode_from;
 124
 125   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 126
 127   PUT_MODE (&all->reg, mode);
 128   PUT_MODE (&all->plus, mode);
 129   PUT_MODE (&all->neg, mode);
 130   PUT_MODE (&all->mult, mode);
 131   PUT_MODE (&all->sdiv, mode);
 132   PUT_MODE (&all->udiv, mode);
 133   PUT_MODE (&all->sdiv_32, mode);
 134   PUT_MODE (&all->smod_32, mode);
 135   PUT_MODE (&all->wide_trunc, mode);
 136   PUT_MODE (&all->shift, mode);
 137   PUT_MODE (&all->shift_mult, mode);
 138   PUT_MODE (&all->shift_add, mode);
 139   PUT_MODE (&all->shift_sub0, mode);
 140   PUT_MODE (&all->shift_sub1, mode);
 141   PUT_MODE (&all->zext, mode);
 142   PUT_MODE (&all->trunc, mode);
 143
 144   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 145   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 146   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 147   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 148   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 149
 150   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 151                                      <= 2 * add_cost (speed, mode)));
 152   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 153                                      <= 4 * add_cost (speed, mode)));
 154
 155   set_shift_cost (speed, mode, 0, 0);
 156   {
 157     int cost = add_cost (speed, mode);
 158     set_shiftadd_cost (speed, mode, 0, cost);
 159     set_shiftsub0_cost (speed, mode, 0, cost);
 160     set_shiftsub1_cost (speed, mode, 0, cost);
 161   }
 162
 163   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 164   for (m = 1; m < n; m++)
 165     {
 166       XEXP (&all->shift, 1) = all->cint[m];
 167       XEXP (&all->shift_mult, 1) = all->pow2[m];
 168
 169       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 170       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 171       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 172       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 173     }
 174
 175   if (SCALAR_INT_MODE_P (mode))
 176     {
 177       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 178            mode_from = (enum machine_mode)(mode_from + 1))
 179         init_expmed_one_conv (all, mode, mode_from, speed);
 180     }
 181   if (GET_MODE_CLASS (mode) == MODE_INT)
 182     {
 183       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 184       if (wider_mode != VOIDmode)
 185         {
 186           PUT_MODE (&all->zext, wider_mode);
 187           PUT_MODE (&all->wide_mult, wider_mode);
 188           PUT_MODE (&all->wide_lshr, wider_mode);
 189           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 190
 191           set_mul_widen_cost (speed, wider_mode,
 192                               set_src_cost (&all->wide_mult, speed));
 193           set_mul_highpart_cost (speed, mode,
 194                                  set_src_cost (&all->wide_trunc, speed));
 195         }
 196     }
 197 }
 198
 199 void
 200 init_expmed (void)
 201 {
 202   struct init_expmed_rtl all;
 203   enum machine_mode mode;
 204   int m, speed;
 205
 206   memset (&all, 0, sizeof all);
 207   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 208     {
 209       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 210       all.cint[m] = GEN_INT (m);
 211     }
 212
 213   PUT_CODE (&all.reg, REG);
 214   /* Avoid using hard regs in ways which may be unsupported.  */
 215   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 216
 217   PUT_CODE (&all.plus, PLUS);
 218   XEXP (&all.plus, 0) = &all.reg;
 219   XEXP (&all.plus, 1) = &all.reg;
 220
 221   PUT_CODE (&all.neg, NEG);
 222   XEXP (&all.neg, 0) = &all.reg;
 223
 224   PUT_CODE (&all.mult, MULT);
 225   XEXP (&all.mult, 0) = &all.reg;
 226   XEXP (&all.mult, 1) = &all.reg;
 227
 228   PUT_CODE (&all.sdiv, DIV);
 229   XEXP (&all.sdiv, 0) = &all.reg;
 230   XEXP (&all.sdiv, 1) = &all.reg;
 231
 232   PUT_CODE (&all.udiv, UDIV);
 233   XEXP (&all.udiv, 0) = &all.reg;
 234   XEXP (&all.udiv, 1) = &all.reg;
 235
 236   PUT_CODE (&all.sdiv_32, DIV);
 237   XEXP (&all.sdiv_32, 0) = &all.reg;
 238   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 239
 240   PUT_CODE (&all.smod_32, MOD);
 241   XEXP (&all.smod_32, 0) = &all.reg;
 242   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 243
 244   PUT_CODE (&all.zext, ZERO_EXTEND);
 245   XEXP (&all.zext, 0) = &all.reg;
 246
 247   PUT_CODE (&all.wide_mult, MULT);
 248   XEXP (&all.wide_mult, 0) = &all.zext;
 249   XEXP (&all.wide_mult, 1) = &all.zext;
 250
 251   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 252   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 253
 254   PUT_CODE (&all.wide_trunc, TRUNCATE);
 255   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 256
 257   PUT_CODE (&all.shift, ASHIFT);
 258   XEXP (&all.shift, 0) = &all.reg;
 259
 260   PUT_CODE (&all.shift_mult, MULT);
 261   XEXP (&all.shift_mult, 0) = &all.reg;
 262
 263   PUT_CODE (&all.shift_add, PLUS);
 264   XEXP (&all.shift_add, 0) = &all.shift_mult;
 265   XEXP (&all.shift_add, 1) = &all.reg;
 266
 267   PUT_CODE (&all.shift_sub0, MINUS);
 268   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 269   XEXP (&all.shift_sub0, 1) = &all.reg;
 270
 271   PUT_CODE (&all.shift_sub1, MINUS);
 272   XEXP (&all.shift_sub1, 0) = &all.reg;
 273   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 274
 275   PUT_CODE (&all.trunc, TRUNCATE);
 276   XEXP (&all.trunc, 0) = &all.reg;
 277
 278   for (speed = 0; speed < 2; speed++)
 279     {
 280       crtl->maybe_hot_insn_p = speed;
 281       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 282
 283       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 284            mode = (enum machine_mode)(mode + 1))
 285         init_expmed_one_mode (&all, mode, speed);
 286
 287       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 288         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 289              mode = (enum machine_mode)(mode + 1))
 290           init_expmed_one_mode (&all, mode, speed);
 291
 292       if (MIN_MODE_VECTOR_INT != VOIDmode)
 293         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 294              mode = (enum machine_mode)(mode + 1))
 295           init_expmed_one_mode (&all, mode, speed);
 296     }
 297
 298   if (alg_hash_used_p ())
 299     {
 300       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 301       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 302     }
 303   else
 304     set_alg_hash_used_p (true);
 305   default_rtl_profile ();
 306 }
 307
 308 /* Return an rtx representing minus the value of X.
 309    MODE is the intended mode of the result,
 310    useful if X is a CONST_INT.  */
 311
 312 rtx
 313 negate_rtx (enum machine_mode mode, rtx x)
 314 {
 315   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 316
 317   if (result == 0)
 318     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 319
 320   return result;
 321 }
 322
 323 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 324    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 325    If MODE is BLKmode, return a reference to every byte in the bitfield.
 326    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 327
 328 static rtx
 329 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 330                       unsigned HOST_WIDE_INT bitsize,
 331                       unsigned HOST_WIDE_INT bitnum,
 332                       unsigned HOST_WIDE_INT *new_bitnum)
 333 {
 334   if (mode == BLKmode)
 335     {
 336       *new_bitnum = bitnum % BITS_PER_UNIT;
 337       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 338       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 339                             / BITS_PER_UNIT);
 340       return adjust_bitfield_address_size (mem, mode, offset, size);
 341     }
 342   else
 343     {
 344       unsigned int unit = GET_MODE_BITSIZE (mode);
 345       *new_bitnum = bitnum % unit;
 346       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 347       return adjust_bitfield_address (mem, mode, offset);
 348     }
 349 }
 350
 351 /* The caller wants to perform insertion or extraction PATTERN on a
 352    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 353    BITREGION_START and BITREGION_END are as for store_bit_field
 354    and FIELDMODE is the natural mode of the field.
 355
 356    Search for a mode that is compatible with the memory access
 357    restrictions and (where applicable) with a register insertion or
 358    extraction.  Return the new memory on success, storing the adjusted
 359    bit position in *NEW_BITNUM.  Return null otherwise.  */
 360
 361 static rtx
 362 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 363                               rtx op0, HOST_WIDE_INT bitsize,
 364                               HOST_WIDE_INT bitnum,
 365                               unsigned HOST_WIDE_INT bitregion_start,
 366                               unsigned HOST_WIDE_INT bitregion_end,
 367                               enum machine_mode fieldmode,
 368                               unsigned HOST_WIDE_INT *new_bitnum)
 369 {
 370   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 371                                 bitregion_end, MEM_ALIGN (op0),
 372                                 MEM_VOLATILE_P (op0));
 373   enum machine_mode best_mode;
 374   if (iter.next_mode (&best_mode))
 375     {
 376       /* We can use a memory in BEST_MODE.  See whether this is true for
 377          any wider modes.  All other things being equal, we prefer to
 378          use the widest mode possible because it tends to expose more
 379          CSE opportunities.  */
 380       if (!iter.prefer_smaller_modes ())
 381         {
 382           /* Limit the search to the mode required by the corresponding
 383              register insertion or extraction instruction, if any.  */
 384           enum machine_mode limit_mode = word_mode;
 385           extraction_insn insn;
 386           if (get_best_reg_extraction_insn (&insn, pattern,
 387                                             GET_MODE_BITSIZE (best_mode),
 388                                             fieldmode))
 389             limit_mode = insn.field_mode;
 390
 391           enum machine_mode wider_mode;
 392           while (iter.next_mode (&wider_mode)
 393                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 394             best_mode = wider_mode;
 395         }
 396       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 397                                    new_bitnum);
 398     }
 399   return NULL_RTX;
 400 }
 401
 402 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 403    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 404    offset is then BITNUM / BITS_PER_UNIT.  */
 405
 406 static bool
 407 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 408                      unsigned HOST_WIDE_INT bitsize,
 409                      enum machine_mode struct_mode)
 410 {
 411   if (BYTES_BIG_ENDIAN)
 412     return (bitnum % BITS_PER_UNIT == 0
 413             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 414                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 415   else
 416     return bitnum % BITS_PER_WORD == 0;
 417 }
 418
 419 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 420    bit number BITNUM can be treated as a simple value of mode MODE.  */
 421
 422 static bool
 423 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 424                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 425 {
 426   return (MEM_P (op0)
 427           && bitnum % BITS_PER_UNIT == 0
 428           && bitsize == GET_MODE_BITSIZE (mode)
 429           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 430               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 431                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 432 }
 433 \f
 434 /* Try to use instruction INSV to store VALUE into a field of OP0.
 435    BITSIZE and BITNUM are as for store_bit_field.  */
 436
 437 static bool
 438 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 439                             unsigned HOST_WIDE_INT bitsize,
 440                             unsigned HOST_WIDE_INT bitnum, rtx value)
 441 {
 442   struct expand_operand ops[4];
 443   rtx value1;
 444   rtx xop0 = op0;
 445   rtx last = get_last_insn ();
 446   bool copy_back = false;
 447
 448   enum machine_mode op_mode = insv->field_mode;
 449   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 450   if (bitsize == 0 || bitsize > unit)
 451     return false;
 452
 453   if (MEM_P (xop0))
 454     /* Get a reference to the first byte of the field.  */
 455     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 456                                  &bitnum);
 457   else
 458     {
 459       /* Convert from counting within OP0 to counting in OP_MODE.  */
 460       if (BYTES_BIG_ENDIAN)
 461         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 462
 463       /* If xop0 is a register, we need it in OP_MODE
 464          to make it acceptable to the format of insv.  */
 465       if (GET_CODE (xop0) == SUBREG)
 466         /* We can't just change the mode, because this might clobber op0,
 467            and we will need the original value of op0 if insv fails.  */
 468         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 469       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 470         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 471     }
 472
 473   /* If the destination is a paradoxical subreg such that we need a
 474      truncate to the inner mode, perform the insertion on a temporary and
 475      truncate the result to the original destination.  Note that we can't
 476      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 477      X) 0)) is (reg:N X).  */
 478   if (GET_CODE (xop0) == SUBREG
 479       && REG_P (SUBREG_REG (xop0))
 480       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 481                                          op_mode))
 482     {
 483       rtx tem = gen_reg_rtx (op_mode);
 484       emit_move_insn (tem, xop0);
 485       xop0 = tem;
 486       copy_back = true;
 487     }
 488
 489   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 490      "backwards" from the size of the unit we are inserting into.
 491      Otherwise, we count bits from the most significant on a
 492      BYTES/BITS_BIG_ENDIAN machine.  */
 493
 494   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 495     bitnum = unit - bitsize - bitnum;
 496
 497   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 498   value1 = value;
 499   if (GET_MODE (value) != op_mode)
 500     {
 501       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 502         {
 503           /* Optimization: Don't bother really extending VALUE
 504              if it has all the bits we will actually use.  However,
 505              if we must narrow it, be sure we do it correctly.  */
 506
 507           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 508             {
 509               rtx tmp;
 510
 511               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 512               if (! tmp)
 513                 tmp = simplify_gen_subreg (op_mode,
 514                                            force_reg (GET_MODE (value),
 515                                                       value1),
 516                                            GET_MODE (value), 0);
 517               value1 = tmp;
 518             }
 519           else
 520             value1 = gen_lowpart (op_mode, value1);
 521         }
 522       else if (CONST_INT_P (value))
 523         value1 = gen_int_mode (INTVAL (value), op_mode);
 524       else
 525         /* Parse phase is supposed to make VALUE's data type
 526            match that of the component reference, which is a type
 527            at least as wide as the field; so VALUE should have
 528            a mode that corresponds to that type.  */
 529         gcc_assert (CONSTANT_P (value));
 530     }
 531
 532   create_fixed_operand (&ops[0], xop0);
 533   create_integer_operand (&ops[1], bitsize);
 534   create_integer_operand (&ops[2], bitnum);
 535   create_input_operand (&ops[3], value1, op_mode);
 536   if (maybe_expand_insn (insv->icode, 4, ops))
 537     {
 538       if (copy_back)
 539         convert_move (op0, xop0, true);
 540       return true;
 541     }
 542   delete_insns_since (last);
 543   return false;
 544 }
 545
 546 /* A subroutine of store_bit_field, with the same arguments.  Return true
 547    if the operation could be implemented.
 548
 549    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 550    no other way of implementing the operation.  If FALLBACK_P is false,
 551    return false instead.  */
 552
 553 static bool
 554 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 555                    unsigned HOST_WIDE_INT bitnum,
 556                    unsigned HOST_WIDE_INT bitregion_start,
 557                    unsigned HOST_WIDE_INT bitregion_end,
 558                    enum machine_mode fieldmode,
 559                    rtx value, bool fallback_p)
 560 {
 561   rtx op0 = str_rtx;
 562   rtx orig_value;
 563
 564   while (GET_CODE (op0) == SUBREG)
 565     {
 566       /* The following line once was done only if WORDS_BIG_ENDIAN,
 567          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 568          meaningful at a much higher level; when structures are copied
 569          between memory and regs, the higher-numbered regs
 570          always get higher addresses.  */
 571       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 572       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 573       int byte_offset = 0;
 574
 575       /* Paradoxical subregs need special handling on big endian machines.  */
 576       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 577         {
 578           int difference = inner_mode_size - outer_mode_size;
 579
 580           if (WORDS_BIG_ENDIAN)
 581             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 582           if (BYTES_BIG_ENDIAN)
 583             byte_offset += difference % UNITS_PER_WORD;
 584         }
 585       else
 586         byte_offset = SUBREG_BYTE (op0);
 587
 588       bitnum += byte_offset * BITS_PER_UNIT;
 589       op0 = SUBREG_REG (op0);
 590     }
 591
 592   /* No action is needed if the target is a register and if the field
 593      lies completely outside that register.  This can occur if the source
 594      code contains an out-of-bounds access to a small array.  */
 595   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 596     return true;
 597
 598   /* Use vec_set patterns for inserting parts of vectors whenever
 599      available.  */
 600   if (VECTOR_MODE_P (GET_MODE (op0))
 601       && !MEM_P (op0)
 602       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 603       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 604       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 605       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 606     {
 607       struct expand_operand ops[3];
 608       enum machine_mode outermode = GET_MODE (op0);
 609       enum machine_mode innermode = GET_MODE_INNER (outermode);
 610       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 611       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 612
 613       create_fixed_operand (&ops[0], op0);
 614       create_input_operand (&ops[1], value, innermode);
 615       create_integer_operand (&ops[2], pos);
 616       if (maybe_expand_insn (icode, 3, ops))
 617         return true;
 618     }
 619
 620   /* If the target is a register, overwriting the entire object, or storing
 621      a full-word or multi-word field can be done with just a SUBREG.  */
 622   if (!MEM_P (op0)
 623       && bitsize == GET_MODE_BITSIZE (fieldmode)
 624       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 625           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 626     {
 627       /* Use the subreg machinery either to narrow OP0 to the required
 628          words or to cope with mode punning between equal-sized modes.
 629          In the latter case, use subreg on the rhs side, not lhs.  */
 630       rtx sub;
 631
 632       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 633         {
 634           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 635           if (sub)
 636             {
 637               emit_move_insn (op0, sub);
 638               return true;
 639             }
 640         }
 641       else
 642         {
 643           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 644                                      bitnum / BITS_PER_UNIT);
 645           if (sub)
 646             {
 647               emit_move_insn (sub, value);
 648               return true;
 649             }
 650         }
 651     }
 652
 653   /* If the target is memory, storing any naturally aligned field can be
 654      done with a simple store.  For targets that support fast unaligned
 655      memory, any naturally sized, unit aligned field can be done directly.  */
 656   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 657     {
 658       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 659       emit_move_insn (op0, value);
 660       return true;
 661     }
 662
 663   /* Make sure we are playing with integral modes.  Pun with subregs
 664      if we aren't.  This must come after the entire register case above,
 665      since that case is valid for any mode.  The following cases are only
 666      valid for integral modes.  */
 667   {
 668     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 669     if (imode != GET_MODE (op0))
 670       {
 671         if (MEM_P (op0))
 672           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 673         else
 674           {
 675             gcc_assert (imode != BLKmode);
 676             op0 = gen_lowpart (imode, op0);
 677           }
 678       }
 679   }
 680
 681   /* Storing an lsb-aligned field in a register
 682      can be done with a movstrict instruction.  */
 683
 684   if (!MEM_P (op0)
 685       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 686       && bitsize == GET_MODE_BITSIZE (fieldmode)
 687       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 688     {
 689       struct expand_operand ops[2];
 690       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 691       rtx arg0 = op0;
 692       unsigned HOST_WIDE_INT subreg_off;
 693
 694       if (GET_CODE (arg0) == SUBREG)
 695         {
 696           /* Else we've got some float mode source being extracted into
 697              a different float mode destination -- this combination of
 698              subregs results in Severe Tire Damage.  */
 699           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 700                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 701                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 702           arg0 = SUBREG_REG (arg0);
 703         }
 704
 705       subreg_off = bitnum / BITS_PER_UNIT;
 706       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 707         {
 708           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 709
 710           create_fixed_operand (&ops[0], arg0);
 711           /* Shrink the source operand to FIELDMODE.  */
 712           create_convert_operand_to (&ops[1], value, fieldmode, false);
 713           if (maybe_expand_insn (icode, 2, ops))
 714             return true;
 715         }
 716     }
 717
 718   /* Handle fields bigger than a word.  */
 719
 720   if (bitsize > BITS_PER_WORD)
 721     {
 722       /* Here we transfer the words of the field
 723          in the order least significant first.
 724          This is because the most significant word is the one which may
 725          be less than full.
 726          However, only do that if the value is not BLKmode.  */
 727
 728       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 729       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 730       unsigned int i;
 731       rtx last;
 732
 733       /* This is the mode we must force value to, so that there will be enough
 734          subwords to extract.  Note that fieldmode will often (always?) be
 735          VOIDmode, because that is what store_field uses to indicate that this
 736          is a bit field, but passing VOIDmode to operand_subword_force
 737          is not allowed.  */
 738       fieldmode = GET_MODE (value);
 739       if (fieldmode == VOIDmode)
 740         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 741
 742       last = get_last_insn ();
 743       for (i = 0; i < nwords; i++)
 744         {
 745           /* If I is 0, use the low-order word in both field and target;
 746              if I is 1, use the next to lowest word; and so on.  */
 747           unsigned int wordnum = (backwards
 748                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 749                                   - i - 1
 750                                   : i);
 751           unsigned int bit_offset = (backwards
 752                                      ? MAX ((int) bitsize - ((int) i + 1)
 753                                             * BITS_PER_WORD,
 754                                             0)
 755                                      : (int) i * BITS_PER_WORD);
 756           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 757           unsigned HOST_WIDE_INT new_bitsize =
 758             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 759
 760           /* If the remaining chunk doesn't have full wordsize we have
 761              to make sure that for big endian machines the higher order
 762              bits are used.  */
 763           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 764             value_word = simplify_expand_binop (word_mode, lshr_optab,
 765                                                 value_word,
 766                                                 GEN_INT (BITS_PER_WORD
 767                                                          - new_bitsize),
 768                                                 NULL_RTX, true,
 769                                                 OPTAB_LIB_WIDEN);
 770
 771           if (!store_bit_field_1 (op0, new_bitsize,
 772                                   bitnum + bit_offset,
 773                                   bitregion_start, bitregion_end,
 774                                   word_mode,
 775                                   value_word, fallback_p))
 776             {
 777               delete_insns_since (last);
 778               return false;
 779             }
 780         }
 781       return true;
 782     }
 783
 784   /* If VALUE has a floating-point or complex mode, access it as an
 785      integer of the corresponding size.  This can occur on a machine
 786      with 64 bit registers that uses SFmode for float.  It can also
 787      occur for unaligned float or complex fields.  */
 788   orig_value = value;
 789   if (GET_MODE (value) != VOIDmode
 790       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 791       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 792     {
 793       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 794       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 795     }
 796
 797   /* If OP0 is a multi-word register, narrow it to the affected word.
 798      If the region spans two words, defer to store_split_bit_field.  */
 799   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 800     {
 801       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 802                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 803       gcc_assert (op0);
 804       bitnum %= BITS_PER_WORD;
 805       if (bitnum + bitsize > BITS_PER_WORD)
 806         {
 807           if (!fallback_p)
 808             return false;
 809
 810           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 811                                  bitregion_end, value);
 812           return true;
 813         }
 814     }
 815
 816   /* From here on we can assume that the field to be stored in fits
 817      within a word.  If the destination is a register, it too fits
 818      in a word.  */
 819
 820   extraction_insn insv;
 821   if (!MEM_P (op0)
 822       && get_best_reg_extraction_insn (&insv, EP_insv,
 823                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 824                                        fieldmode)
 825       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 826     return true;
 827
 828   /* If OP0 is a memory, try copying it to a register and seeing if a
 829      cheap register alternative is available.  */
 830   if (MEM_P (op0))
 831     {
 832       /* Do not use unaligned memory insvs for volatile bitfields when
 833          -fstrict-volatile-bitfields is in effect.  */
 834       if (!(MEM_VOLATILE_P (op0)
 835             && flag_strict_volatile_bitfields > 0)
 836           && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 837                                            fieldmode)
 838           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 839         return true;
 840
 841       rtx last = get_last_insn ();
 842
 843       /* Try loading part of OP0 into a register, inserting the bitfield
 844          into that, and then copying the result back to OP0.  */
 845       unsigned HOST_WIDE_INT bitpos;
 846       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 847                                                bitregion_start, bitregion_end,
 848                                                fieldmode, &bitpos);
 849       if (xop0)
 850         {
 851           rtx tempreg = copy_to_reg (xop0);
 852           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 853                                  bitregion_start, bitregion_end,
 854                                  fieldmode, orig_value, false))
 855             {
 856               emit_move_insn (xop0, tempreg);
 857               return true;
 858             }
 859           delete_insns_since (last);
 860         }
 861     }
 862
 863   if (!fallback_p)
 864     return false;
 865
 866   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 867                          bitregion_end, value);
 868   return true;
 869 }
 870
 871 /* Generate code to store value from rtx VALUE
 872    into a bit-field within structure STR_RTX
 873    containing BITSIZE bits starting at bit BITNUM.
 874
 875    BITREGION_START is bitpos of the first bitfield in this region.
 876    BITREGION_END is the bitpos of the ending bitfield in this region.
 877    These two fields are 0, if the C++ memory model does not apply,
 878    or we are not interested in keeping track of bitfield regions.
 879
 880    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 881
 882 void
 883 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 884                  unsigned HOST_WIDE_INT bitnum,
 885                  unsigned HOST_WIDE_INT bitregion_start,
 886                  unsigned HOST_WIDE_INT bitregion_end,
 887                  enum machine_mode fieldmode,
 888                  rtx value)
 889 {
 890   /* Under the C++0x memory model, we must not touch bits outside the
 891      bit region.  Adjust the address to start at the beginning of the
 892      bit region.  */
 893   if (MEM_P (str_rtx) && bitregion_start > 0)
 894     {
 895       enum machine_mode bestmode;
 896       HOST_WIDE_INT offset, size;
 897
 898       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 899
 900       offset = bitregion_start / BITS_PER_UNIT;
 901       bitnum -= bitregion_start;
 902       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 903       bitregion_end -= bitregion_start;
 904       bitregion_start = 0;
 905       bestmode = get_best_mode (bitsize, bitnum,
 906                                 bitregion_start, bitregion_end,
 907                                 MEM_ALIGN (str_rtx), VOIDmode,
 908                                 MEM_VOLATILE_P (str_rtx));
 909       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 910     }
 911
 912   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 913                           bitregion_start, bitregion_end,
 914                           fieldmode, value, true))
 915     gcc_unreachable ();
 916 }
 917 \f
 918 /* Use shifts and boolean operations to store VALUE into a bit field of
 919    width BITSIZE in OP0, starting at bit BITNUM.  */
 920
 921 static void
 922 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 923                        unsigned HOST_WIDE_INT bitnum,
 924                        unsigned HOST_WIDE_INT bitregion_start,
 925                        unsigned HOST_WIDE_INT bitregion_end,
 926                        rtx value)
 927 {
 928   enum machine_mode mode;
 929   rtx temp;
 930   int all_zero = 0;
 931   int all_one = 0;
 932
 933   /* There is a case not handled here:
 934      a structure with a known alignment of just a halfword
 935      and a field split across two aligned halfwords within the structure.
 936      Or likewise a structure with a known alignment of just a byte
 937      and a field split across two bytes.
 938      Such cases are not supposed to be able to occur.  */
 939
 940   if (MEM_P (op0))
 941     {
 942       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 943
 944       if (bitregion_end)
 945         maxbits = bitregion_end - bitregion_start + 1;
 946
 947       /* Get the proper mode to use for this field.  We want a mode that
 948          includes the entire field.  If such a mode would be larger than
 949          a word, we won't be doing the extraction the normal way.
 950          We don't want a mode bigger than the destination.  */
 951
 952       mode = GET_MODE (op0);
 953       if (GET_MODE_BITSIZE (mode) == 0
 954           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 955         mode = word_mode;
 956
 957       if (MEM_VOLATILE_P (op0)
 958           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 959           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 960           && flag_strict_volatile_bitfields > 0)
 961         mode = GET_MODE (op0);
 962       else
 963         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 964                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 965
 966       if (mode == VOIDmode)
 967         {
 968           /* The only way this should occur is if the field spans word
 969              boundaries.  */
 970           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 971                                  bitregion_end, value);
 972           return;
 973         }
 974
 975       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
 976     }
 977
 978   mode = GET_MODE (op0);
 979   gcc_assert (SCALAR_INT_MODE_P (mode));
 980
 981   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 982      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 983
 984   if (BYTES_BIG_ENDIAN)
 985     /* BITNUM is the distance between our msb
 986        and that of the containing datum.
 987        Convert it to the distance from the lsb.  */
 988     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 989
 990   /* Now BITNUM is always the distance between our lsb
 991      and that of OP0.  */
 992
 993   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 994      we must first convert its mode to MODE.  */
 995
 996   if (CONST_INT_P (value))
 997     {
 998       HOST_WIDE_INT v = INTVAL (value);
 999
1000       if (bitsize < HOST_BITS_PER_WIDE_INT)
1001         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1002
1003       if (v == 0)
1004         all_zero = 1;
1005       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1006                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1007                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1008         all_one = 1;
1009
1010       value = lshift_value (mode, v, bitnum);
1011     }
1012   else
1013     {
1014       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1015                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1016
1017       if (GET_MODE (value) != mode)
1018         value = convert_to_mode (mode, value, 1);
1019
1020       if (must_and)
1021         value = expand_binop (mode, and_optab, value,
1022                               mask_rtx (mode, 0, bitsize, 0),
1023                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1024       if (bitnum > 0)
1025         value = expand_shift (LSHIFT_EXPR, mode, value,
1026                               bitnum, NULL_RTX, 1);
1027     }
1028
1029   /* Now clear the chosen bits in OP0,
1030      except that if VALUE is -1 we need not bother.  */
1031   /* We keep the intermediates in registers to allow CSE to combine
1032      consecutive bitfield assignments.  */
1033
1034   temp = force_reg (mode, op0);
1035
1036   if (! all_one)
1037     {
1038       temp = expand_binop (mode, and_optab, temp,
1039                            mask_rtx (mode, bitnum, bitsize, 1),
1040                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1041       temp = force_reg (mode, temp);
1042     }
1043
1044   /* Now logical-or VALUE into OP0, unless it is zero.  */
1045
1046   if (! all_zero)
1047     {
1048       temp = expand_binop (mode, ior_optab, temp, value,
1049                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1050       temp = force_reg (mode, temp);
1051     }
1052
1053   if (op0 != temp)
1054     {
1055       op0 = copy_rtx (op0);
1056       emit_move_insn (op0, temp);
1057     }
1058 }
1059 \f
1060 /* Store a bit field that is split across multiple accessible memory objects.
1061
1062    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1063    BITSIZE is the field width; BITPOS the position of its first bit
1064    (within the word).
1065    VALUE is the value to store.
1066
1067    This does not yet handle fields wider than BITS_PER_WORD.  */
1068
1069 static void
1070 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1071                        unsigned HOST_WIDE_INT bitpos,
1072                        unsigned HOST_WIDE_INT bitregion_start,
1073                        unsigned HOST_WIDE_INT bitregion_end,
1074                        rtx value)
1075 {
1076   unsigned int unit;
1077   unsigned int bitsdone = 0;
1078
1079   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1080      much at a time.  */
1081   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1082     unit = BITS_PER_WORD;
1083   else
1084     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1085
1086   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1087      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1088      that VALUE might be a floating-point constant.  */
1089   if (CONSTANT_P (value) && !CONST_INT_P (value))
1090     {
1091       rtx word = gen_lowpart_common (word_mode, value);
1092
1093       if (word && (value != word))
1094         value = word;
1095       else
1096         value = gen_lowpart_common (word_mode,
1097                                     force_reg (GET_MODE (value) != VOIDmode
1098                                                ? GET_MODE (value)
1099                                                : word_mode, value));
1100     }
1101
1102   while (bitsdone < bitsize)
1103     {
1104       unsigned HOST_WIDE_INT thissize;
1105       rtx part, word;
1106       unsigned HOST_WIDE_INT thispos;
1107       unsigned HOST_WIDE_INT offset;
1108
1109       offset = (bitpos + bitsdone) / unit;
1110       thispos = (bitpos + bitsdone) % unit;
1111
1112       /* When region of bytes we can touch is restricted, decrease
1113          UNIT close to the end of the region as needed.  If op0 is a REG
1114          or SUBREG of REG, don't do this, as there can't be data races
1115          on a register and we can expand shorter code in some cases.  */
1116       if (bitregion_end
1117           && unit > BITS_PER_UNIT
1118           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1119           && !REG_P (op0)
1120           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1121         {
1122           unit = unit / 2;
1123           continue;
1124         }
1125
1126       /* THISSIZE must not overrun a word boundary.  Otherwise,
1127          store_fixed_bit_field will call us again, and we will mutually
1128          recurse forever.  */
1129       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1130       thissize = MIN (thissize, unit - thispos);
1131
1132       if (BYTES_BIG_ENDIAN)
1133         {
1134           /* Fetch successively less significant portions.  */
1135           if (CONST_INT_P (value))
1136             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1137                              >> (bitsize - bitsdone - thissize))
1138                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1139           else
1140             {
1141               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1142               /* The args are chosen so that the last part includes the
1143                  lsb.  Give extract_bit_field the value it needs (with
1144                  endianness compensation) to fetch the piece we want.  */
1145               part = extract_fixed_bit_field (word_mode, value, thissize,
1146                                               total_bits - bitsize + bitsdone,
1147                                               NULL_RTX, 1);
1148             }
1149         }
1150       else
1151         {
1152           /* Fetch successively more significant portions.  */
1153           if (CONST_INT_P (value))
1154             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1155                              >> bitsdone)
1156                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1157           else
1158             part = extract_fixed_bit_field (word_mode, value, thissize,
1159                                             bitsdone, NULL_RTX, 1);
1160         }
1161
1162       /* If OP0 is a register, then handle OFFSET here.
1163
1164          When handling multiword bitfields, extract_bit_field may pass
1165          down a word_mode SUBREG of a larger REG for a bitfield that actually
1166          crosses a word boundary.  Thus, for a SUBREG, we must find
1167          the current word starting from the base register.  */
1168       if (GET_CODE (op0) == SUBREG)
1169         {
1170           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1171                             + (offset * unit / BITS_PER_WORD);
1172           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1173           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1174             word = word_offset ? const0_rtx : op0;
1175           else
1176             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1177                                           GET_MODE (SUBREG_REG (op0)));
1178           offset &= BITS_PER_WORD / unit - 1;
1179         }
1180       else if (REG_P (op0))
1181         {
1182           enum machine_mode op0_mode = GET_MODE (op0);
1183           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1184             word = offset ? const0_rtx : op0;
1185           else
1186             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1187                                           GET_MODE (op0));
1188           offset &= BITS_PER_WORD / unit - 1;
1189         }
1190       else
1191         word = op0;
1192
1193       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1194          it is just an out-of-bounds access.  Ignore it.  */
1195       if (word != const0_rtx)
1196         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1197                                bitregion_start, bitregion_end, part);
1198       bitsdone += thissize;
1199     }
1200 }
1201 \f
1202 /* A subroutine of extract_bit_field_1 that converts return value X
1203    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1204    to extract_bit_field.  */
1205
1206 static rtx
1207 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1208                              enum machine_mode tmode, bool unsignedp)
1209 {
1210   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1211     return x;
1212
1213   /* If the x mode is not a scalar integral, first convert to the
1214      integer mode of that size and then access it as a floating-point
1215      value via a SUBREG.  */
1216   if (!SCALAR_INT_MODE_P (tmode))
1217     {
1218       enum machine_mode smode;
1219
1220       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1221       x = convert_to_mode (smode, x, unsignedp);
1222       x = force_reg (smode, x);
1223       return gen_lowpart (tmode, x);
1224     }
1225
1226   return convert_to_mode (tmode, x, unsignedp);
1227 }
1228
1229 /* Try to use an ext(z)v pattern to extract a field from OP0.
1230    Return the extracted value on success, otherwise return null.
1231    EXT_MODE is the mode of the extraction and the other arguments
1232    are as for extract_bit_field.  */
1233
1234 static rtx
1235 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1236                               unsigned HOST_WIDE_INT bitsize,
1237                               unsigned HOST_WIDE_INT bitnum,
1238                               int unsignedp, rtx target,
1239                               enum machine_mode mode, enum machine_mode tmode)
1240 {
1241   struct expand_operand ops[4];
1242   rtx spec_target = target;
1243   rtx spec_target_subreg = 0;
1244   enum machine_mode ext_mode = extv->field_mode;
1245   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1246
1247   if (bitsize == 0 || unit < bitsize)
1248     return NULL_RTX;
1249
1250   if (MEM_P (op0))
1251     /* Get a reference to the first byte of the field.  */
1252     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1253                                 &bitnum);
1254   else
1255     {
1256       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1257       if (BYTES_BIG_ENDIAN)
1258         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1259
1260       /* If op0 is a register, we need it in EXT_MODE to make it
1261          acceptable to the format of ext(z)v.  */
1262       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1263         return NULL_RTX;
1264       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1265         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1266     }
1267
1268   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1269      "backwards" from the size of the unit we are extracting from.
1270      Otherwise, we count bits from the most significant on a
1271      BYTES/BITS_BIG_ENDIAN machine.  */
1272
1273   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1274     bitnum = unit - bitsize - bitnum;
1275
1276   if (target == 0)
1277     target = spec_target = gen_reg_rtx (tmode);
1278
1279   if (GET_MODE (target) != ext_mode)
1280     {
1281       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1282          between the mode of the extraction (word_mode) and the target
1283          mode.  Instead, create a temporary and use convert_move to set
1284          the target.  */
1285       if (REG_P (target)
1286           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1287         {
1288           target = gen_lowpart (ext_mode, target);
1289           if (GET_MODE_PRECISION (ext_mode)
1290               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1291             spec_target_subreg = target;
1292         }
1293       else
1294         target = gen_reg_rtx (ext_mode);
1295     }
1296
1297   create_output_operand (&ops[0], target, ext_mode);
1298   create_fixed_operand (&ops[1], op0);
1299   create_integer_operand (&ops[2], bitsize);
1300   create_integer_operand (&ops[3], bitnum);
1301   if (maybe_expand_insn (extv->icode, 4, ops))
1302     {
1303       target = ops[0].value;
1304       if (target == spec_target)
1305         return target;
1306       if (target == spec_target_subreg)
1307         return spec_target;
1308       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1309     }
1310   return NULL_RTX;
1311 }
1312
1313 /* A subroutine of extract_bit_field, with the same arguments.
1314    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1315    if we can find no other means of implementing the operation.
1316    if FALLBACK_P is false, return NULL instead.  */
1317
1318 static rtx
1319 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1320                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1321                      enum machine_mode mode, enum machine_mode tmode,
1322                      bool fallback_p)
1323 {
1324   rtx op0 = str_rtx;
1325   enum machine_mode int_mode;
1326   enum machine_mode mode1;
1327
1328   if (tmode == VOIDmode)
1329     tmode = mode;
1330
1331   while (GET_CODE (op0) == SUBREG)
1332     {
1333       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1334       op0 = SUBREG_REG (op0);
1335     }
1336
1337   /* If we have an out-of-bounds access to a register, just return an
1338      uninitialized register of the required mode.  This can occur if the
1339      source code contains an out-of-bounds access to a small array.  */
1340   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1341     return gen_reg_rtx (tmode);
1342
1343   if (REG_P (op0)
1344       && mode == GET_MODE (op0)
1345       && bitnum == 0
1346       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1347     {
1348       /* We're trying to extract a full register from itself.  */
1349       return op0;
1350     }
1351
1352   /* See if we can get a better vector mode before extracting.  */
1353   if (VECTOR_MODE_P (GET_MODE (op0))
1354       && !MEM_P (op0)
1355       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1356     {
1357       enum machine_mode new_mode;
1358
1359       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1360         new_mode = MIN_MODE_VECTOR_FLOAT;
1361       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1362         new_mode = MIN_MODE_VECTOR_FRACT;
1363       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1364         new_mode = MIN_MODE_VECTOR_UFRACT;
1365       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1366         new_mode = MIN_MODE_VECTOR_ACCUM;
1367       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1368         new_mode = MIN_MODE_VECTOR_UACCUM;
1369       else
1370         new_mode = MIN_MODE_VECTOR_INT;
1371
1372       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1373         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1374             && targetm.vector_mode_supported_p (new_mode))
1375           break;
1376       if (new_mode != VOIDmode)
1377         op0 = gen_lowpart (new_mode, op0);
1378     }
1379
1380   /* Use vec_extract patterns for extracting parts of vectors whenever
1381      available.  */
1382   if (VECTOR_MODE_P (GET_MODE (op0))
1383       && !MEM_P (op0)
1384       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1385       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1386           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1387     {
1388       struct expand_operand ops[3];
1389       enum machine_mode outermode = GET_MODE (op0);
1390       enum machine_mode innermode = GET_MODE_INNER (outermode);
1391       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1392       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1393
1394       create_output_operand (&ops[0], target, innermode);
1395       create_input_operand (&ops[1], op0, outermode);
1396       create_integer_operand (&ops[2], pos);
1397       if (maybe_expand_insn (icode, 3, ops))
1398         {
1399           target = ops[0].value;
1400           if (GET_MODE (target) != mode)
1401             return gen_lowpart (tmode, target);
1402           return target;
1403         }
1404     }
1405
1406   /* Make sure we are playing with integral modes.  Pun with subregs
1407      if we aren't.  */
1408   {
1409     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1410     if (imode != GET_MODE (op0))
1411       {
1412         if (MEM_P (op0))
1413           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1414         else if (imode != BLKmode)
1415           {
1416             op0 = gen_lowpart (imode, op0);
1417
1418             /* If we got a SUBREG, force it into a register since we
1419                aren't going to be able to do another SUBREG on it.  */
1420             if (GET_CODE (op0) == SUBREG)
1421               op0 = force_reg (imode, op0);
1422           }
1423         else if (REG_P (op0))
1424           {
1425             rtx reg, subreg;
1426             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1427                                             MODE_INT);
1428             reg = gen_reg_rtx (imode);
1429             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1430             emit_move_insn (subreg, op0);
1431             op0 = reg;
1432             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1433           }
1434         else
1435           {
1436             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1437             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1438             emit_move_insn (mem, op0);
1439             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1440           }
1441       }
1442   }
1443
1444   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1445      If that's wrong, the solution is to test for it and set TARGET to 0
1446      if needed.  */
1447
1448   /* If the bitfield is volatile, we need to make sure the access
1449      remains on a type-aligned boundary.  */
1450   if (GET_CODE (op0) == MEM
1451       && MEM_VOLATILE_P (op0)
1452       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1453       && flag_strict_volatile_bitfields > 0)
1454     goto no_subreg_mode_swap;
1455
1456   /* Only scalar integer modes can be converted via subregs.  There is an
1457      additional problem for FP modes here in that they can have a precision
1458      which is different from the size.  mode_for_size uses precision, but
1459      we want a mode based on the size, so we must avoid calling it for FP
1460      modes.  */
1461   mode1 = mode;
1462   if (SCALAR_INT_MODE_P (tmode))
1463     {
1464       enum machine_mode try_mode = mode_for_size (bitsize,
1465                                                   GET_MODE_CLASS (tmode), 0);
1466       if (try_mode != BLKmode)
1467         mode1 = try_mode;
1468     }
1469   gcc_assert (mode1 != BLKmode);
1470
1471   /* Extraction of a full MODE1 value can be done with a subreg as long
1472      as the least significant bit of the value is the least significant
1473      bit of either OP0 or a word of OP0.  */
1474   if (!MEM_P (op0)
1475       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1476       && bitsize == GET_MODE_BITSIZE (mode1)
1477       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1478     {
1479       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1480                                      bitnum / BITS_PER_UNIT);
1481       if (sub)
1482         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1483     }
1484
1485   /* Extraction of a full MODE1 value can be done with a load as long as
1486      the field is on a byte boundary and is sufficiently aligned.  */
1487   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1488     {
1489       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1490       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1491     }
1492
1493  no_subreg_mode_swap:
1494
1495   /* Handle fields bigger than a word.  */
1496
1497   if (bitsize > BITS_PER_WORD)
1498     {
1499       /* Here we transfer the words of the field
1500          in the order least significant first.
1501          This is because the most significant word is the one which may
1502          be less than full.  */
1503
1504       unsigned int backwards = WORDS_BIG_ENDIAN;
1505       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1506       unsigned int i;
1507       rtx last;
1508
1509       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1510         target = gen_reg_rtx (mode);
1511
1512       /* Indicate for flow that the entire target reg is being set.  */
1513       emit_clobber (target);
1514
1515       last = get_last_insn ();
1516       for (i = 0; i < nwords; i++)
1517         {
1518           /* If I is 0, use the low-order word in both field and target;
1519              if I is 1, use the next to lowest word; and so on.  */
1520           /* Word number in TARGET to use.  */
1521           unsigned int wordnum
1522             = (backwards
1523                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1524                : i);
1525           /* Offset from start of field in OP0.  */
1526           unsigned int bit_offset = (backwards
1527                                      ? MAX ((int) bitsize - ((int) i + 1)
1528                                             * BITS_PER_WORD,
1529                                             0)
1530                                      : (int) i * BITS_PER_WORD);
1531           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1532           rtx result_part
1533             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1534                                              bitsize - i * BITS_PER_WORD),
1535                                    bitnum + bit_offset, 1, target_part,
1536                                    mode, word_mode, fallback_p);
1537
1538           gcc_assert (target_part);
1539           if (!result_part)
1540             {
1541               delete_insns_since (last);
1542               return NULL;
1543             }
1544
1545           if (result_part != target_part)
1546             emit_move_insn (target_part, result_part);
1547         }
1548
1549       if (unsignedp)
1550         {
1551           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1552              need to be zero'd out.  */
1553           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1554             {
1555               unsigned int i, total_words;
1556
1557               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1558               for (i = nwords; i < total_words; i++)
1559                 emit_move_insn
1560                   (operand_subword (target,
1561                                     backwards ? total_words - i - 1 : i,
1562                                     1, VOIDmode),
1563                    const0_rtx);
1564             }
1565           return target;
1566         }
1567
1568       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1569       target = expand_shift (LSHIFT_EXPR, mode, target,
1570                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1571       return expand_shift (RSHIFT_EXPR, mode, target,
1572                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1573     }
1574
1575   /* If OP0 is a multi-word register, narrow it to the affected word.
1576      If the region spans two words, defer to extract_split_bit_field.  */
1577   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1578     {
1579       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1580                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1581       bitnum %= BITS_PER_WORD;
1582       if (bitnum + bitsize > BITS_PER_WORD)
1583         {
1584           if (!fallback_p)
1585             return NULL_RTX;
1586           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1587           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1588         }
1589     }
1590
1591   /* From here on we know the desired field is smaller than a word.
1592      If OP0 is a register, it too fits within a word.  */
1593   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1594   extraction_insn extv;
1595   if (!MEM_P (op0)
1596       /* ??? We could limit the structure size to the part of OP0 that
1597          contains the field, with appropriate checks for endianness
1598          and TRULY_NOOP_TRUNCATION.  */
1599       && get_best_reg_extraction_insn (&extv, pattern,
1600                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1601                                        tmode))
1602     {
1603       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1604                                                  unsignedp, target, mode,
1605                                                  tmode);
1606       if (result)
1607         return result;
1608     }
1609
1610   /* If OP0 is a memory, try copying it to a register and seeing if a
1611      cheap register alternative is available.  */
1612   if (MEM_P (op0))
1613     {
1614       /* Do not use extv/extzv for volatile bitfields when
1615          -fstrict-volatile-bitfields is in effect.  */
1616       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1617           && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1618                                            tmode))
1619         {
1620           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1621                                                      bitnum, unsignedp,
1622                                                      target, mode,
1623                                                      tmode);
1624           if (result)
1625             return result;
1626         }
1627
1628       rtx last = get_last_insn ();
1629
1630       /* Try loading part of OP0 into a register and extracting the
1631          bitfield from that.  */
1632       unsigned HOST_WIDE_INT bitpos;
1633       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1634                                                0, 0, tmode, &bitpos);
1635       if (xop0)
1636         {
1637           xop0 = copy_to_reg (xop0);
1638           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1639                                             unsignedp, target,
1640                                             mode, tmode, false);
1641           if (result)
1642             return result;
1643           delete_insns_since (last);
1644         }
1645     }
1646
1647   if (!fallback_p)
1648     return NULL;
1649
1650   /* Find a correspondingly-sized integer field, so we can apply
1651      shifts and masks to it.  */
1652   int_mode = int_mode_for_mode (tmode);
1653   if (int_mode == BLKmode)
1654     int_mode = int_mode_for_mode (mode);
1655   /* Should probably push op0 out to memory and then do a load.  */
1656   gcc_assert (int_mode != BLKmode);
1657
1658   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1659                                     target, unsignedp);
1660   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1661 }
1662
1663 /* Generate code to extract a byte-field from STR_RTX
1664    containing BITSIZE bits, starting at BITNUM,
1665    and put it in TARGET if possible (if TARGET is nonzero).
1666    Regardless of TARGET, we return the rtx for where the value is placed.
1667
1668    STR_RTX is the structure containing the byte (a REG or MEM).
1669    UNSIGNEDP is nonzero if this is an unsigned bit field.
1670    MODE is the natural mode of the field value once extracted.
1671    TMODE is the mode the caller would like the value to have;
1672    but the value may be returned with type MODE instead.
1673
1674    If a TARGET is specified and we can store in it at no extra cost,
1675    we do so, and return TARGET.
1676    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1677    if they are equally easy.  */
1678
1679 rtx
1680 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1681                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1682                    enum machine_mode mode, enum machine_mode tmode)
1683 {
1684   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1685                               target, mode, tmode, true);
1686 }
1687 \f
1688 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1689    from bit BITNUM of OP0.
1690
1691    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1692    If TARGET is nonzero, attempts to store the value there
1693    and return TARGET, but this is not guaranteed.
1694    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1695
1696 static rtx
1697 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1698                          unsigned HOST_WIDE_INT bitsize,
1699                          unsigned HOST_WIDE_INT bitnum, rtx target,
1700                          int unsignedp)
1701 {
1702   enum machine_mode mode;
1703
1704   if (MEM_P (op0))
1705     {
1706       /* Get the proper mode to use for this field.  We want a mode that
1707          includes the entire field.  If such a mode would be larger than
1708          a word, we won't be doing the extraction the normal way.  */
1709
1710       if (MEM_VOLATILE_P (op0)
1711           && flag_strict_volatile_bitfields > 0)
1712         {
1713           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1714             mode = GET_MODE (op0);
1715           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1716             mode = GET_MODE (target);
1717           else
1718             mode = tmode;
1719         }
1720       else
1721         mode = get_best_mode (bitsize, bitnum, 0, 0,
1722                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1723
1724       if (mode == VOIDmode)
1725         /* The only way this should occur is if the field spans word
1726            boundaries.  */
1727         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1728
1729       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1730       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1731
1732       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1733          if it results in a multi-word access where we otherwise wouldn't
1734          have one.  So, check for that case here.  */
1735       if (MEM_P (op0)
1736           && MEM_VOLATILE_P (op0)
1737           && flag_strict_volatile_bitfields > 0
1738           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1739           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1740         {
1741           /* If the target doesn't support unaligned access, give up and
1742              split the access into two.  */
1743           if (STRICT_ALIGNMENT)
1744             return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1745           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1746         }
1747       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1748       bitnum -= bit_offset;
1749     }
1750
1751   mode = GET_MODE (op0);
1752   gcc_assert (SCALAR_INT_MODE_P (mode));
1753
1754   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1755      for invalid input, such as extract equivalent of f5 from
1756      gcc.dg/pr48335-2.c.  */
1757
1758   if (BYTES_BIG_ENDIAN)
1759     /* BITNUM is the distance between our msb and that of OP0.
1760        Convert it to the distance from the lsb.  */
1761     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1762
1763   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1764      We have reduced the big-endian case to the little-endian case.  */
1765
1766   if (unsignedp)
1767     {
1768       if (bitnum)
1769         {
1770           /* If the field does not already start at the lsb,
1771              shift it so it does.  */
1772           /* Maybe propagate the target for the shift.  */
1773           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1774           if (tmode != mode)
1775             subtarget = 0;
1776           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1777         }
1778       /* Convert the value to the desired mode.  */
1779       if (mode != tmode)
1780         op0 = convert_to_mode (tmode, op0, 1);
1781
1782       /* Unless the msb of the field used to be the msb when we shifted,
1783          mask out the upper bits.  */
1784
1785       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1786         return expand_binop (GET_MODE (op0), and_optab, op0,
1787                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1788                              target, 1, OPTAB_LIB_WIDEN);
1789       return op0;
1790     }
1791
1792   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1793      then arithmetic-shift its lsb to the lsb of the word.  */
1794   op0 = force_reg (mode, op0);
1795
1796   /* Find the narrowest integer mode that contains the field.  */
1797
1798   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1799        mode = GET_MODE_WIDER_MODE (mode))
1800     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1801       {
1802         op0 = convert_to_mode (mode, op0, 0);
1803         break;
1804       }
1805
1806   if (mode != tmode)
1807     target = 0;
1808
1809   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1810     {
1811       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1812       /* Maybe propagate the target for the shift.  */
1813       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1814       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1815     }
1816
1817   return expand_shift (RSHIFT_EXPR, mode, op0,
1818                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1819 }
1820 \f
1821 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1822    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1823    complement of that if COMPLEMENT.  The mask is truncated if
1824    necessary to the width of mode MODE.  The mask is zero-extended if
1825    BITSIZE+BITPOS is too small for MODE.  */
1826
1827 static rtx
1828 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1829 {
1830   double_int mask;
1831
1832   mask = double_int::mask (bitsize);
1833   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1834
1835   if (complement)
1836     mask = ~mask;
1837
1838   return immed_double_int_const (mask, mode);
1839 }
1840
1841 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1842    VALUE << BITPOS.  */
1843
1844 static rtx
1845 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1846               int bitpos)
1847 {
1848   double_int val;
1849
1850   val = double_int::from_uhwi (value);
1851   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1852
1853   return immed_double_int_const (val, mode);
1854 }
1855 \f
1856 /* Extract a bit field that is split across two words
1857    and return an RTX for the result.
1858
1859    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1860    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1861    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1862
1863 static rtx
1864 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1866 {
1867   unsigned int unit;
1868   unsigned int bitsdone = 0;
1869   rtx result = NULL_RTX;
1870   int first = 1;
1871
1872   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1873      much at a time.  */
1874   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1875     unit = BITS_PER_WORD;
1876   else
1877     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1878
1879   while (bitsdone < bitsize)
1880     {
1881       unsigned HOST_WIDE_INT thissize;
1882       rtx part, word;
1883       unsigned HOST_WIDE_INT thispos;
1884       unsigned HOST_WIDE_INT offset;
1885
1886       offset = (bitpos + bitsdone) / unit;
1887       thispos = (bitpos + bitsdone) % unit;
1888
1889       /* THISSIZE must not overrun a word boundary.  Otherwise,
1890          extract_fixed_bit_field will call us again, and we will mutually
1891          recurse forever.  */
1892       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1893       thissize = MIN (thissize, unit - thispos);
1894
1895       /* If OP0 is a register, then handle OFFSET here.
1896
1897          When handling multiword bitfields, extract_bit_field may pass
1898          down a word_mode SUBREG of a larger REG for a bitfield that actually
1899          crosses a word boundary.  Thus, for a SUBREG, we must find
1900          the current word starting from the base register.  */
1901       if (GET_CODE (op0) == SUBREG)
1902         {
1903           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1904           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1905                                         GET_MODE (SUBREG_REG (op0)));
1906           offset = 0;
1907         }
1908       else if (REG_P (op0))
1909         {
1910           word = operand_subword_force (op0, offset, GET_MODE (op0));
1911           offset = 0;
1912         }
1913       else
1914         word = op0;
1915
1916       /* Extract the parts in bit-counting order,
1917          whose meaning is determined by BYTES_PER_UNIT.
1918          OFFSET is in UNITs, and UNIT is in bits.  */
1919       part = extract_fixed_bit_field (word_mode, word, thissize,
1920                                       offset * unit + thispos, 0, 1);
1921       bitsdone += thissize;
1922
1923       /* Shift this part into place for the result.  */
1924       if (BYTES_BIG_ENDIAN)
1925         {
1926           if (bitsize != bitsdone)
1927             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1928                                  bitsize - bitsdone, 0, 1);
1929         }
1930       else
1931         {
1932           if (bitsdone != thissize)
1933             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1934                                  bitsdone - thissize, 0, 1);
1935         }
1936
1937       if (first)
1938         result = part;
1939       else
1940         /* Combine the parts with bitwise or.  This works
1941            because we extracted each part as an unsigned bit field.  */
1942         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1943                                OPTAB_LIB_WIDEN);
1944
1945       first = 0;
1946     }
1947
1948   /* Unsigned bit field: we are done.  */
1949   if (unsignedp)
1950     return result;
1951   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1952   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1953                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1954   return expand_shift (RSHIFT_EXPR, word_mode, result,
1955                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1956 }
1957 \f
1958 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1959    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1960    MODE, fill the upper bits with zeros.  Fail if the layout of either
1961    mode is unknown (as for CC modes) or if the extraction would involve
1962    unprofitable mode punning.  Return the value on success, otherwise
1963    return null.
1964
1965    This is different from gen_lowpart* in these respects:
1966
1967      - the returned value must always be considered an rvalue
1968
1969      - when MODE is wider than SRC_MODE, the extraction involves
1970        a zero extension
1971
1972      - when MODE is smaller than SRC_MODE, the extraction involves
1973        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1974
1975    In other words, this routine performs a computation, whereas the
1976    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1977    operations.  */
1978
1979 rtx
1980 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1981 {
1982   enum machine_mode int_mode, src_int_mode;
1983
1984   if (mode == src_mode)
1985     return src;
1986
1987   if (CONSTANT_P (src))
1988     {
1989       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1990          fails, it will happily create (subreg (symbol_ref)) or similar
1991          invalid SUBREGs.  */
1992       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1993       rtx ret = simplify_subreg (mode, src, src_mode, byte);
1994       if (ret)
1995         return ret;
1996
1997       if (GET_MODE (src) == VOIDmode
1998           || !validate_subreg (mode, src_mode, src, byte))
1999         return NULL_RTX;
2000
2001       src = force_reg (GET_MODE (src), src);
2002       return gen_rtx_SUBREG (mode, src, byte);
2003     }
2004
2005   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2006     return NULL_RTX;
2007
2008   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2009       && MODES_TIEABLE_P (mode, src_mode))
2010     {
2011       rtx x = gen_lowpart_common (mode, src);
2012       if (x)
2013         return x;
2014     }
2015
2016   src_int_mode = int_mode_for_mode (src_mode);
2017   int_mode = int_mode_for_mode (mode);
2018   if (src_int_mode == BLKmode || int_mode == BLKmode)
2019     return NULL_RTX;
2020
2021   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2022     return NULL_RTX;
2023   if (!MODES_TIEABLE_P (int_mode, mode))
2024     return NULL_RTX;
2025
2026   src = gen_lowpart (src_int_mode, src);
2027   src = convert_modes (int_mode, src_int_mode, src, true);
2028   src = gen_lowpart (mode, src);
2029   return src;
2030 }
2031 \f
2032 /* Add INC into TARGET.  */
2033
2034 void
2035 expand_inc (rtx target, rtx inc)
2036 {
2037   rtx value = expand_binop (GET_MODE (target), add_optab,
2038                             target, inc,
2039                             target, 0, OPTAB_LIB_WIDEN);
2040   if (value != target)
2041     emit_move_insn (target, value);
2042 }
2043
2044 /* Subtract DEC from TARGET.  */
2045
2046 void
2047 expand_dec (rtx target, rtx dec)
2048 {
2049   rtx value = expand_binop (GET_MODE (target), sub_optab,
2050                             target, dec,
2051                             target, 0, OPTAB_LIB_WIDEN);
2052   if (value != target)
2053     emit_move_insn (target, value);
2054 }
2055 \f
2056 /* Output a shift instruction for expression code CODE,
2057    with SHIFTED being the rtx for the value to shift,
2058    and AMOUNT the rtx for the amount to shift by.
2059    Store the result in the rtx TARGET, if that is convenient.
2060    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2061    Return the rtx for where the value is.  */
2062
2063 static rtx
2064 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2065                 rtx amount, rtx target, int unsignedp)
2066 {
2067   rtx op1, temp = 0;
2068   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2069   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2070   optab lshift_optab = ashl_optab;
2071   optab rshift_arith_optab = ashr_optab;
2072   optab rshift_uns_optab = lshr_optab;
2073   optab lrotate_optab = rotl_optab;
2074   optab rrotate_optab = rotr_optab;
2075   enum machine_mode op1_mode;
2076   int attempt;
2077   bool speed = optimize_insn_for_speed_p ();
2078
2079   op1 = amount;
2080   op1_mode = GET_MODE (op1);
2081
2082   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2083      shift amount is a vector, use the vector/vector shift patterns.  */
2084   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2085     {
2086       lshift_optab = vashl_optab;
2087       rshift_arith_optab = vashr_optab;
2088       rshift_uns_optab = vlshr_optab;
2089       lrotate_optab = vrotl_optab;
2090       rrotate_optab = vrotr_optab;
2091     }
2092
2093   /* Previously detected shift-counts computed by NEGATE_EXPR
2094      and shifted in the other direction; but that does not work
2095      on all machines.  */
2096
2097   if (SHIFT_COUNT_TRUNCATED)
2098     {
2099       if (CONST_INT_P (op1)
2100           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2101               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2102         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2103                        % GET_MODE_BITSIZE (mode));
2104       else if (GET_CODE (op1) == SUBREG
2105                && subreg_lowpart_p (op1)
2106                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2107                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2108         op1 = SUBREG_REG (op1);
2109     }
2110
2111   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2112      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2113      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2114      amount instead.  */
2115   if (rotate
2116       && CONST_INT_P (op1)
2117       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2118                    GET_MODE_BITSIZE (mode) - 1))
2119     {
2120       op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2121       left = !left;
2122       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2123     }
2124
2125   if (op1 == const0_rtx)
2126     return shifted;
2127
2128   /* Check whether its cheaper to implement a left shift by a constant
2129      bit count by a sequence of additions.  */
2130   if (code == LSHIFT_EXPR
2131       && CONST_INT_P (op1)
2132       && INTVAL (op1) > 0
2133       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2134       && INTVAL (op1) < MAX_BITS_PER_WORD
2135       && (shift_cost (speed, mode, INTVAL (op1))
2136           > INTVAL (op1) * add_cost (speed, mode))
2137       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2138     {
2139       int i;
2140       for (i = 0; i < INTVAL (op1); i++)
2141         {
2142           temp = force_reg (mode, shifted);
2143           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2144                                   unsignedp, OPTAB_LIB_WIDEN);
2145         }
2146       return shifted;
2147     }
2148
2149   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2150     {
2151       enum optab_methods methods;
2152
2153       if (attempt == 0)
2154         methods = OPTAB_DIRECT;
2155       else if (attempt == 1)
2156         methods = OPTAB_WIDEN;
2157       else
2158         methods = OPTAB_LIB_WIDEN;
2159
2160       if (rotate)
2161         {
2162           /* Widening does not work for rotation.  */
2163           if (methods == OPTAB_WIDEN)
2164             continue;
2165           else if (methods == OPTAB_LIB_WIDEN)
2166             {
2167               /* If we have been unable to open-code this by a rotation,
2168                  do it as the IOR of two shifts.  I.e., to rotate A
2169                  by N bits, compute
2170                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2171                  where C is the bitsize of A.
2172
2173                  It is theoretically possible that the target machine might
2174                  not be able to perform either shift and hence we would
2175                  be making two libcalls rather than just the one for the
2176                  shift (similarly if IOR could not be done).  We will allow
2177                  this extremely unlikely lossage to avoid complicating the
2178                  code below.  */
2179
2180               rtx subtarget = target == shifted ? 0 : target;
2181               rtx new_amount, other_amount;
2182               rtx temp1;
2183
2184               new_amount = op1;
2185               if (op1 == const0_rtx)
2186                 return shifted;
2187               else if (CONST_INT_P (op1))
2188                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2189                                         - INTVAL (op1));
2190               else
2191                 {
2192                   other_amount
2193                     = simplify_gen_unary (NEG, GET_MODE (op1),
2194                                           op1, GET_MODE (op1));
2195                   HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2196                   other_amount
2197                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2198                                            gen_int_mode (mask, GET_MODE (op1)));
2199                 }
2200
2201               shifted = force_reg (mode, shifted);
2202
2203               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2204                                      mode, shifted, new_amount, 0, 1);
2205               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2206                                       mode, shifted, other_amount,
2207                                       subtarget, 1);
2208               return expand_binop (mode, ior_optab, temp, temp1, target,
2209                                    unsignedp, methods);
2210             }
2211
2212           temp = expand_binop (mode,
2213                                left ? lrotate_optab : rrotate_optab,
2214                                shifted, op1, target, unsignedp, methods);
2215         }
2216       else if (unsignedp)
2217         temp = expand_binop (mode,
2218                              left ? lshift_optab : rshift_uns_optab,
2219                              shifted, op1, target, unsignedp, methods);
2220
2221       /* Do arithmetic shifts.
2222          Also, if we are going to widen the operand, we can just as well
2223          use an arithmetic right-shift instead of a logical one.  */
2224       if (temp == 0 && ! rotate
2225           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2226         {
2227           enum optab_methods methods1 = methods;
2228
2229           /* If trying to widen a log shift to an arithmetic shift,
2230              don't accept an arithmetic shift of the same size.  */
2231           if (unsignedp)
2232             methods1 = OPTAB_MUST_WIDEN;
2233
2234           /* Arithmetic shift */
2235
2236           temp = expand_binop (mode,
2237                                left ? lshift_optab : rshift_arith_optab,
2238                                shifted, op1, target, unsignedp, methods1);
2239         }
2240
2241       /* We used to try extzv here for logical right shifts, but that was
2242          only useful for one machine, the VAX, and caused poor code
2243          generation there for lshrdi3, so the code was deleted and a
2244          define_expand for lshrsi3 was added to vax.md.  */
2245     }
2246
2247   gcc_assert (temp);
2248   return temp;
2249 }
2250
2251 /* Output a shift instruction for expression code CODE,
2252    with SHIFTED being the rtx for the value to shift,
2253    and AMOUNT the amount to shift by.
2254    Store the result in the rtx TARGET, if that is convenient.
2255    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2256    Return the rtx for where the value is.  */
2257
2258 rtx
2259 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2260               int amount, rtx target, int unsignedp)
2261 {
2262   return expand_shift_1 (code, mode,
2263                          shifted, GEN_INT (amount), target, unsignedp);
2264 }
2265
2266 /* Output a shift instruction for expression code CODE,
2267    with SHIFTED being the rtx for the value to shift,
2268    and AMOUNT the tree for the amount to shift by.
2269    Store the result in the rtx TARGET, if that is convenient.
2270    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2271    Return the rtx for where the value is.  */
2272
2273 rtx
2274 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2275                        tree amount, rtx target, int unsignedp)
2276 {
2277   return expand_shift_1 (code, mode,
2278                          shifted, expand_normal (amount), target, unsignedp);
2279 }
2280
2281 \f
2282 /* Indicates the type of fixup needed after a constant multiplication.
2283    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2284    the result should be negated, and ADD_VARIANT means that the
2285    multiplicand should be added to the result.  */
2286 enum mult_variant {basic_variant, negate_variant, add_variant};
2287
2288 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2289                         const struct mult_cost *, enum machine_mode mode);
2290 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2291                                  struct algorithm *, enum mult_variant *, int);
2292 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2293                               const struct algorithm *, enum mult_variant);
2294 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2295 static rtx extract_high_half (enum machine_mode, rtx);
2296 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2297 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2298                                        int, int);
2299 /* Compute and return the best algorithm for multiplying by T.
2300    The algorithm must cost less than cost_limit
2301    If retval.cost >= COST_LIMIT, no algorithm was found and all
2302    other field of the returned struct are undefined.
2303    MODE is the machine mode of the multiplication.  */
2304
2305 static void
2306 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2307             const struct mult_cost *cost_limit, enum machine_mode mode)
2308 {
2309   int m;
2310   struct algorithm *alg_in, *best_alg;
2311   struct mult_cost best_cost;
2312   struct mult_cost new_limit;
2313   int op_cost, op_latency;
2314   unsigned HOST_WIDE_INT orig_t = t;
2315   unsigned HOST_WIDE_INT q;
2316   int maxm, hash_index;
2317   bool cache_hit = false;
2318   enum alg_code cache_alg = alg_zero;
2319   bool speed = optimize_insn_for_speed_p ();
2320   enum machine_mode imode;
2321   struct alg_hash_entry *entry_ptr;
2322
2323   /* Indicate that no algorithm is yet found.  If no algorithm
2324      is found, this value will be returned and indicate failure.  */
2325   alg_out->cost.cost = cost_limit->cost + 1;
2326   alg_out->cost.latency = cost_limit->latency + 1;
2327
2328   if (cost_limit->cost < 0
2329       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2330     return;
2331
2332   /* Be prepared for vector modes.  */
2333   imode = GET_MODE_INNER (mode);
2334   if (imode == VOIDmode)
2335     imode = mode;
2336
2337   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2338
2339   /* Restrict the bits of "t" to the multiplication's mode.  */
2340   t &= GET_MODE_MASK (imode);
2341
2342   /* t == 1 can be done in zero cost.  */
2343   if (t == 1)
2344     {
2345       alg_out->ops = 1;
2346       alg_out->cost.cost = 0;
2347       alg_out->cost.latency = 0;
2348       alg_out->op[0] = alg_m;
2349       return;
2350     }
2351
2352   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2353      fail now.  */
2354   if (t == 0)
2355     {
2356       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2357         return;
2358       else
2359         {
2360           alg_out->ops = 1;
2361           alg_out->cost.cost = zero_cost (speed);
2362           alg_out->cost.latency = zero_cost (speed);
2363           alg_out->op[0] = alg_zero;
2364           return;
2365         }
2366     }
2367
2368   /* We'll be needing a couple extra algorithm structures now.  */
2369
2370   alg_in = XALLOCA (struct algorithm);
2371   best_alg = XALLOCA (struct algorithm);
2372   best_cost = *cost_limit;
2373
2374   /* Compute the hash index.  */
2375   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2376
2377   /* See if we already know what to do for T.  */
2378   entry_ptr = alg_hash_entry_ptr (hash_index);
2379   if (entry_ptr->t == t
2380       && entry_ptr->mode == mode
2381       && entry_ptr->mode == mode
2382       && entry_ptr->speed == speed
2383       && entry_ptr->alg != alg_unknown)
2384     {
2385       cache_alg = entry_ptr->alg;
2386
2387       if (cache_alg == alg_impossible)
2388         {
2389           /* The cache tells us that it's impossible to synthesize
2390              multiplication by T within entry_ptr->cost.  */
2391           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2392             /* COST_LIMIT is at least as restrictive as the one
2393                recorded in the hash table, in which case we have no
2394                hope of synthesizing a multiplication.  Just
2395                return.  */
2396             return;
2397
2398           /* If we get here, COST_LIMIT is less restrictive than the
2399              one recorded in the hash table, so we may be able to
2400              synthesize a multiplication.  Proceed as if we didn't
2401              have the cache entry.  */
2402         }
2403       else
2404         {
2405           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2406             /* The cached algorithm shows that this multiplication
2407                requires more cost than COST_LIMIT.  Just return.  This
2408                way, we don't clobber this cache entry with
2409                alg_impossible but retain useful information.  */
2410             return;
2411
2412           cache_hit = true;
2413
2414           switch (cache_alg)
2415             {
2416             case alg_shift:
2417               goto do_alg_shift;
2418
2419             case alg_add_t_m2:
2420             case alg_sub_t_m2:
2421               goto do_alg_addsub_t_m2;
2422
2423             case alg_add_factor:
2424             case alg_sub_factor:
2425               goto do_alg_addsub_factor;
2426
2427             case alg_add_t2_m:
2428               goto do_alg_add_t2_m;
2429
2430             case alg_sub_t2_m:
2431               goto do_alg_sub_t2_m;
2432
2433             default:
2434               gcc_unreachable ();
2435             }
2436         }
2437     }
2438
2439   /* If we have a group of zero bits at the low-order part of T, try
2440      multiplying by the remaining bits and then doing a shift.  */
2441
2442   if ((t & 1) == 0)
2443     {
2444     do_alg_shift:
2445       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2446       if (m < maxm)
2447         {
2448           q = t >> m;
2449           /* The function expand_shift will choose between a shift and
2450              a sequence of additions, so the observed cost is given as
2451              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2452           op_cost = m * add_cost (speed, mode);
2453           if (shift_cost (speed, mode, m) < op_cost)
2454             op_cost = shift_cost (speed, mode, m);
2455           new_limit.cost = best_cost.cost - op_cost;
2456           new_limit.latency = best_cost.latency - op_cost;
2457           synth_mult (alg_in, q, &new_limit, mode);
2458
2459           alg_in->cost.cost += op_cost;
2460           alg_in->cost.latency += op_cost;
2461           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2462             {
2463               struct algorithm *x;
2464               best_cost = alg_in->cost;
2465               x = alg_in, alg_in = best_alg, best_alg = x;
2466               best_alg->log[best_alg->ops] = m;
2467               best_alg->op[best_alg->ops] = alg_shift;
2468             }
2469
2470           /* See if treating ORIG_T as a signed number yields a better
2471              sequence.  Try this sequence only for a negative ORIG_T
2472              as it would be useless for a non-negative ORIG_T.  */
2473           if ((HOST_WIDE_INT) orig_t < 0)
2474             {
2475               /* Shift ORIG_T as follows because a right shift of a
2476                  negative-valued signed type is implementation
2477                  defined.  */
2478               q = ~(~orig_t >> m);
2479               /* The function expand_shift will choose between a shift
2480                  and a sequence of additions, so the observed cost is
2481                  given as MIN (m * add_cost(speed, mode),
2482                  shift_cost(speed, mode, m)).  */
2483               op_cost = m * add_cost (speed, mode);
2484               if (shift_cost (speed, mode, m) < op_cost)
2485                 op_cost = shift_cost (speed, mode, m);
2486               new_limit.cost = best_cost.cost - op_cost;
2487               new_limit.latency = best_cost.latency - op_cost;
2488               synth_mult (alg_in, q, &new_limit, mode);
2489
2490               alg_in->cost.cost += op_cost;
2491               alg_in->cost.latency += op_cost;
2492               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2493                 {
2494                   struct algorithm *x;
2495                   best_cost = alg_in->cost;
2496                   x = alg_in, alg_in = best_alg, best_alg = x;
2497                   best_alg->log[best_alg->ops] = m;
2498                   best_alg->op[best_alg->ops] = alg_shift;
2499                 }
2500             }
2501         }
2502       if (cache_hit)
2503         goto done;
2504     }
2505
2506   /* If we have an odd number, add or subtract one.  */
2507   if ((t & 1) != 0)
2508     {
2509       unsigned HOST_WIDE_INT w;
2510
2511     do_alg_addsub_t_m2:
2512       for (w = 1; (w & t) != 0; w <<= 1)
2513         ;
2514       /* If T was -1, then W will be zero after the loop.  This is another
2515          case where T ends with ...111.  Handling this with (T + 1) and
2516          subtract 1 produces slightly better code and results in algorithm
2517          selection much faster than treating it like the ...0111 case
2518          below.  */
2519       if (w == 0
2520           || (w > 2
2521               /* Reject the case where t is 3.
2522                  Thus we prefer addition in that case.  */
2523               && t != 3))
2524         {
2525           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2526
2527           op_cost = add_cost (speed, mode);
2528           new_limit.cost = best_cost.cost - op_cost;
2529           new_limit.latency = best_cost.latency - op_cost;
2530           synth_mult (alg_in, t + 1, &new_limit, mode);
2531
2532           alg_in->cost.cost += op_cost;
2533           alg_in->cost.latency += op_cost;
2534           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2535             {
2536               struct algorithm *x;
2537               best_cost = alg_in->cost;
2538               x = alg_in, alg_in = best_alg, best_alg = x;
2539               best_alg->log[best_alg->ops] = 0;
2540               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2541             }
2542         }
2543       else
2544         {
2545           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2546
2547           op_cost = add_cost (speed, mode);
2548           new_limit.cost = best_cost.cost - op_cost;
2549           new_limit.latency = best_cost.latency - op_cost;
2550           synth_mult (alg_in, t - 1, &new_limit, mode);
2551
2552           alg_in->cost.cost += op_cost;
2553           alg_in->cost.latency += op_cost;
2554           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2555             {
2556               struct algorithm *x;
2557               best_cost = alg_in->cost;
2558               x = alg_in, alg_in = best_alg, best_alg = x;
2559               best_alg->log[best_alg->ops] = 0;
2560               best_alg->op[best_alg->ops] = alg_add_t_m2;
2561             }
2562         }
2563
2564       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2565          quickly with a - a * n for some appropriate constant n.  */
2566       m = exact_log2 (-orig_t + 1);
2567       if (m >= 0 && m < maxm)
2568         {
2569           op_cost = shiftsub1_cost (speed, mode, m);
2570           new_limit.cost = best_cost.cost - op_cost;
2571           new_limit.latency = best_cost.latency - op_cost;
2572           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2573                       &new_limit, mode);
2574
2575           alg_in->cost.cost += op_cost;
2576           alg_in->cost.latency += op_cost;
2577           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2578             {
2579               struct algorithm *x;
2580               best_cost = alg_in->cost;
2581               x = alg_in, alg_in = best_alg, best_alg = x;
2582               best_alg->log[best_alg->ops] = m;
2583               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2584             }
2585         }
2586
2587       if (cache_hit)
2588         goto done;
2589     }
2590
2591   /* Look for factors of t of the form
2592      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2593      If we find such a factor, we can multiply by t using an algorithm that
2594      multiplies by q, shift the result by m and add/subtract it to itself.
2595
2596      We search for large factors first and loop down, even if large factors
2597      are less probable than small; if we find a large factor we will find a
2598      good sequence quickly, and therefore be able to prune (by decreasing
2599      COST_LIMIT) the search.  */
2600
2601  do_alg_addsub_factor:
2602   for (m = floor_log2 (t - 1); m >= 2; m--)
2603     {
2604       unsigned HOST_WIDE_INT d;
2605
2606       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2607       if (t % d == 0 && t > d && m < maxm
2608           && (!cache_hit || cache_alg == alg_add_factor))
2609         {
2610           /* If the target has a cheap shift-and-add instruction use
2611              that in preference to a shift insn followed by an add insn.
2612              Assume that the shift-and-add is "atomic" with a latency
2613              equal to its cost, otherwise assume that on superscalar
2614              hardware the shift may be executed concurrently with the
2615              earlier steps in the algorithm.  */
2616           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2617           if (shiftadd_cost (speed, mode, m) < op_cost)
2618             {
2619               op_cost = shiftadd_cost (speed, mode, m);
2620               op_latency = op_cost;
2621             }
2622           else
2623             op_latency = add_cost (speed, mode);
2624
2625           new_limit.cost = best_cost.cost - op_cost;
2626           new_limit.latency = best_cost.latency - op_latency;
2627           synth_mult (alg_in, t / d, &new_limit, mode);
2628
2629           alg_in->cost.cost += op_cost;
2630           alg_in->cost.latency += op_latency;
2631           if (alg_in->cost.latency < op_cost)
2632             alg_in->cost.latency = op_cost;
2633           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2634             {
2635               struct algorithm *x;
2636               best_cost = alg_in->cost;
2637               x = alg_in, alg_in = best_alg, best_alg = x;
2638               best_alg->log[best_alg->ops] = m;
2639               best_alg->op[best_alg->ops] = alg_add_factor;
2640             }
2641           /* Other factors will have been taken care of in the recursion.  */
2642           break;
2643         }
2644
2645       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2646       if (t % d == 0 && t > d && m < maxm
2647           && (!cache_hit || cache_alg == alg_sub_factor))
2648         {
2649           /* If the target has a cheap shift-and-subtract insn use
2650              that in preference to a shift insn followed by a sub insn.
2651              Assume that the shift-and-sub is "atomic" with a latency
2652              equal to it's cost, otherwise assume that on superscalar
2653              hardware the shift may be executed concurrently with the
2654              earlier steps in the algorithm.  */
2655           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2656           if (shiftsub0_cost (speed, mode, m) < op_cost)
2657             {
2658               op_cost = shiftsub0_cost (speed, mode, m);
2659               op_latency = op_cost;
2660             }
2661           else
2662             op_latency = add_cost (speed, mode);
2663
2664           new_limit.cost = best_cost.cost - op_cost;
2665           new_limit.latency = best_cost.latency - op_latency;
2666           synth_mult (alg_in, t / d, &new_limit, mode);
2667
2668           alg_in->cost.cost += op_cost;
2669           alg_in->cost.latency += op_latency;
2670           if (alg_in->cost.latency < op_cost)
2671             alg_in->cost.latency = op_cost;
2672           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673             {
2674               struct algorithm *x;
2675               best_cost = alg_in->cost;
2676               x = alg_in, alg_in = best_alg, best_alg = x;
2677               best_alg->log[best_alg->ops] = m;
2678               best_alg->op[best_alg->ops] = alg_sub_factor;
2679             }
2680           break;
2681         }
2682     }
2683   if (cache_hit)
2684     goto done;
2685
2686   /* Try shift-and-add (load effective address) instructions,
2687      i.e. do a*3, a*5, a*9.  */
2688   if ((t & 1) != 0)
2689     {
2690     do_alg_add_t2_m:
2691       q = t - 1;
2692       q = q & -q;
2693       m = exact_log2 (q);
2694       if (m >= 0 && m < maxm)
2695         {
2696           op_cost = shiftadd_cost (speed, mode, m);
2697           new_limit.cost = best_cost.cost - op_cost;
2698           new_limit.latency = best_cost.latency - op_cost;
2699           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2700
2701           alg_in->cost.cost += op_cost;
2702           alg_in->cost.latency += op_cost;
2703           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2704             {
2705               struct algorithm *x;
2706               best_cost = alg_in->cost;
2707               x = alg_in, alg_in = best_alg, best_alg = x;
2708               best_alg->log[best_alg->ops] = m;
2709               best_alg->op[best_alg->ops] = alg_add_t2_m;
2710             }
2711         }
2712       if (cache_hit)
2713         goto done;
2714
2715     do_alg_sub_t2_m:
2716       q = t + 1;
2717       q = q & -q;
2718       m = exact_log2 (q);
2719       if (m >= 0 && m < maxm)
2720         {
2721           op_cost = shiftsub0_cost (speed, mode, m);
2722           new_limit.cost = best_cost.cost - op_cost;
2723           new_limit.latency = best_cost.latency - op_cost;
2724           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2725
2726           alg_in->cost.cost += op_cost;
2727           alg_in->cost.latency += op_cost;
2728           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729             {
2730               struct algorithm *x;
2731               best_cost = alg_in->cost;
2732               x = alg_in, alg_in = best_alg, best_alg = x;
2733               best_alg->log[best_alg->ops] = m;
2734               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2735             }
2736         }
2737       if (cache_hit)
2738         goto done;
2739     }
2740
2741  done:
2742   /* If best_cost has not decreased, we have not found any algorithm.  */
2743   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2744     {
2745       /* We failed to find an algorithm.  Record alg_impossible for
2746          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2747          we are asked to find an algorithm for T within the same or
2748          lower COST_LIMIT, we can immediately return to the
2749          caller.  */
2750       entry_ptr->t = t;
2751       entry_ptr->mode = mode;
2752       entry_ptr->speed = speed;
2753       entry_ptr->alg = alg_impossible;
2754       entry_ptr->cost = *cost_limit;
2755       return;
2756     }
2757
2758   /* Cache the result.  */
2759   if (!cache_hit)
2760     {
2761       entry_ptr->t = t;
2762       entry_ptr->mode = mode;
2763       entry_ptr->speed = speed;
2764       entry_ptr->alg = best_alg->op[best_alg->ops];
2765       entry_ptr->cost.cost = best_cost.cost;
2766       entry_ptr->cost.latency = best_cost.latency;
2767     }
2768
2769   /* If we are getting a too long sequence for `struct algorithm'
2770      to record, make this search fail.  */
2771   if (best_alg->ops == MAX_BITS_PER_WORD)
2772     return;
2773
2774   /* Copy the algorithm from temporary space to the space at alg_out.
2775      We avoid using structure assignment because the majority of
2776      best_alg is normally undefined, and this is a critical function.  */
2777   alg_out->ops = best_alg->ops + 1;
2778   alg_out->cost = best_cost;
2779   memcpy (alg_out->op, best_alg->op,
2780           alg_out->ops * sizeof *alg_out->op);
2781   memcpy (alg_out->log, best_alg->log,
2782           alg_out->ops * sizeof *alg_out->log);
2783 }
2784 \f
2785 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2786    Try three variations:
2787
2788        - a shift/add sequence based on VAL itself
2789        - a shift/add sequence based on -VAL, followed by a negation
2790        - a shift/add sequence based on VAL - 1, followed by an addition.
2791
2792    Return true if the cheapest of these cost less than MULT_COST,
2793    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2794
2795 static bool
2796 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2797                      struct algorithm *alg, enum mult_variant *variant,
2798                      int mult_cost)
2799 {
2800   struct algorithm alg2;
2801   struct mult_cost limit;
2802   int op_cost;
2803   bool speed = optimize_insn_for_speed_p ();
2804
2805   /* Fail quickly for impossible bounds.  */
2806   if (mult_cost < 0)
2807     return false;
2808
2809   /* Ensure that mult_cost provides a reasonable upper bound.
2810      Any constant multiplication can be performed with less
2811      than 2 * bits additions.  */
2812   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2813   if (mult_cost > op_cost)
2814     mult_cost = op_cost;
2815
2816   *variant = basic_variant;
2817   limit.cost = mult_cost;
2818   limit.latency = mult_cost;
2819   synth_mult (alg, val, &limit, mode);
2820
2821   /* This works only if the inverted value actually fits in an
2822      `unsigned int' */
2823   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2824     {
2825       op_cost = neg_cost (speed, mode);
2826       if (MULT_COST_LESS (&alg->cost, mult_cost))
2827         {
2828           limit.cost = alg->cost.cost - op_cost;
2829           limit.latency = alg->cost.latency - op_cost;
2830         }
2831       else
2832         {
2833           limit.cost = mult_cost - op_cost;
2834           limit.latency = mult_cost - op_cost;
2835         }
2836
2837       synth_mult (&alg2, -val, &limit, mode);
2838       alg2.cost.cost += op_cost;
2839       alg2.cost.latency += op_cost;
2840       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2841         *alg = alg2, *variant = negate_variant;
2842     }
2843
2844   /* This proves very useful for division-by-constant.  */
2845   op_cost = add_cost (speed, mode);
2846   if (MULT_COST_LESS (&alg->cost, mult_cost))
2847     {
2848       limit.cost = alg->cost.cost - op_cost;
2849       limit.latency = alg->cost.latency - op_cost;
2850     }
2851   else
2852     {
2853       limit.cost = mult_cost - op_cost;
2854       limit.latency = mult_cost - op_cost;
2855     }
2856
2857   synth_mult (&alg2, val - 1, &limit, mode);
2858   alg2.cost.cost += op_cost;
2859   alg2.cost.latency += op_cost;
2860   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2861     *alg = alg2, *variant = add_variant;
2862
2863   return MULT_COST_LESS (&alg->cost, mult_cost);
2864 }
2865
2866 /* A subroutine of expand_mult, used for constant multiplications.
2867    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2868    convenient.  Use the shift/add sequence described by ALG and apply
2869    the final fixup specified by VARIANT.  */
2870
2871 static rtx
2872 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2873                    rtx target, const struct algorithm *alg,
2874                    enum mult_variant variant)
2875 {
2876   HOST_WIDE_INT val_so_far;
2877   rtx insn, accum, tem;
2878   int opno;
2879   enum machine_mode nmode;
2880
2881   /* Avoid referencing memory over and over and invalid sharing
2882      on SUBREGs.  */
2883   op0 = force_reg (mode, op0);
2884
2885   /* ACCUM starts out either as OP0 or as a zero, depending on
2886      the first operation.  */
2887
2888   if (alg->op[0] == alg_zero)
2889     {
2890       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2891       val_so_far = 0;
2892     }
2893   else if (alg->op[0] == alg_m)
2894     {
2895       accum = copy_to_mode_reg (mode, op0);
2896       val_so_far = 1;
2897     }
2898   else
2899     gcc_unreachable ();
2900
2901   for (opno = 1; opno < alg->ops; opno++)
2902     {
2903       int log = alg->log[opno];
2904       rtx shift_subtarget = optimize ? 0 : accum;
2905       rtx add_target
2906         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2907            && !optimize)
2908           ? target : 0;
2909       rtx accum_target = optimize ? 0 : accum;
2910       rtx accum_inner;
2911
2912       switch (alg->op[opno])
2913         {
2914         case alg_shift:
2915           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2916           /* REG_EQUAL note will be attached to the following insn.  */
2917           emit_move_insn (accum, tem);
2918           val_so_far <<= log;
2919           break;
2920
2921         case alg_add_t_m2:
2922           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2923           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2924                                  add_target ? add_target : accum_target);
2925           val_so_far += (HOST_WIDE_INT) 1 << log;
2926           break;
2927
2928         case alg_sub_t_m2:
2929           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2930           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2931                                  add_target ? add_target : accum_target);
2932           val_so_far -= (HOST_WIDE_INT) 1 << log;
2933           break;
2934
2935         case alg_add_t2_m:
2936           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2937                                 log, shift_subtarget, 0);
2938           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2939                                  add_target ? add_target : accum_target);
2940           val_so_far = (val_so_far << log) + 1;
2941           break;
2942
2943         case alg_sub_t2_m:
2944           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2945                                 log, shift_subtarget, 0);
2946           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2947                                  add_target ? add_target : accum_target);
2948           val_so_far = (val_so_far << log) - 1;
2949           break;
2950
2951         case alg_add_factor:
2952           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2953           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2954                                  add_target ? add_target : accum_target);
2955           val_so_far += val_so_far << log;
2956           break;
2957
2958         case alg_sub_factor:
2959           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2960           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2961                                  (add_target
2962                                   ? add_target : (optimize ? 0 : tem)));
2963           val_so_far = (val_so_far << log) - val_so_far;
2964           break;
2965
2966         default:
2967           gcc_unreachable ();
2968         }
2969
2970       if (SCALAR_INT_MODE_P (mode))
2971         {
2972           /* Write a REG_EQUAL note on the last insn so that we can cse
2973              multiplication sequences.  Note that if ACCUM is a SUBREG,
2974              we've set the inner register and must properly indicate that.  */
2975           tem = op0, nmode = mode;
2976           accum_inner = accum;
2977           if (GET_CODE (accum) == SUBREG)
2978             {
2979               accum_inner = SUBREG_REG (accum);
2980               nmode = GET_MODE (accum_inner);
2981               tem = gen_lowpart (nmode, op0);
2982             }
2983
2984           insn = get_last_insn ();
2985           set_dst_reg_note (insn, REG_EQUAL,
2986                             gen_rtx_MULT (nmode, tem,
2987                                           gen_int_mode (val_so_far, nmode)),
2988                             accum_inner);
2989         }
2990     }
2991
2992   if (variant == negate_variant)
2993     {
2994       val_so_far = -val_so_far;
2995       accum = expand_unop (mode, neg_optab, accum, target, 0);
2996     }
2997   else if (variant == add_variant)
2998     {
2999       val_so_far = val_so_far + 1;
3000       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3001     }
3002
3003   /* Compare only the bits of val and val_so_far that are significant
3004      in the result mode, to avoid sign-/zero-extension confusion.  */
3005   nmode = GET_MODE_INNER (mode);
3006   if (nmode == VOIDmode)
3007     nmode = mode;
3008   val &= GET_MODE_MASK (nmode);
3009   val_so_far &= GET_MODE_MASK (nmode);
3010   gcc_assert (val == val_so_far);
3011
3012   return accum;
3013 }
3014
3015 /* Perform a multiplication and return an rtx for the result.
3016    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3017    TARGET is a suggestion for where to store the result (an rtx).
3018
3019    We check specially for a constant integer as OP1.
3020    If you want this check for OP0 as well, then before calling
3021    you should swap the two operands if OP0 would be constant.  */
3022
3023 rtx
3024 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3025              int unsignedp)
3026 {
3027   enum mult_variant variant;
3028   struct algorithm algorithm;
3029   rtx scalar_op1;
3030   int max_cost;
3031   bool speed = optimize_insn_for_speed_p ();
3032   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3033
3034   if (CONSTANT_P (op0))
3035     {
3036       rtx temp = op0;
3037       op0 = op1;
3038       op1 = temp;
3039     }
3040
3041   /* For vectors, there are several simplifications that can be made if
3042      all elements of the vector constant are identical.  */
3043   scalar_op1 = op1;
3044   if (GET_CODE (op1) == CONST_VECTOR)
3045     {
3046       int i, n = CONST_VECTOR_NUNITS (op1);
3047       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3048       for (i = 1; i < n; ++i)
3049         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3050           goto skip_scalar;
3051     }
3052
3053   if (INTEGRAL_MODE_P (mode))
3054     {
3055       rtx fake_reg;
3056       HOST_WIDE_INT coeff;
3057       bool is_neg;
3058       int mode_bitsize;
3059
3060       if (op1 == CONST0_RTX (mode))
3061         return op1;
3062       if (op1 == CONST1_RTX (mode))
3063         return op0;
3064       if (op1 == CONSTM1_RTX (mode))
3065         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3066                             op0, target, 0);
3067
3068       if (do_trapv)
3069         goto skip_synth;
3070
3071       /* These are the operations that are potentially turned into
3072          a sequence of shifts and additions.  */
3073       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3074
3075       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3076          less than or equal in size to `unsigned int' this doesn't matter.
3077          If the mode is larger than `unsigned int', then synth_mult works
3078          only if the constant value exactly fits in an `unsigned int' without
3079          any truncation.  This means that multiplying by negative values does
3080          not work; results are off by 2^32 on a 32 bit machine.  */
3081
3082       if (CONST_INT_P (scalar_op1))
3083         {
3084           coeff = INTVAL (scalar_op1);
3085           is_neg = coeff < 0;
3086         }
3087       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3088         {
3089           /* If we are multiplying in DImode, it may still be a win
3090              to try to work with shifts and adds.  */
3091           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3092               && (CONST_DOUBLE_LOW (scalar_op1) > 0
3093                   || (CONST_DOUBLE_LOW (scalar_op1) < 0
3094                       && EXACT_POWER_OF_2_OR_ZERO_P
3095                            (CONST_DOUBLE_LOW (scalar_op1)))))
3096             {
3097               coeff = CONST_DOUBLE_LOW (scalar_op1);
3098               is_neg = false;
3099             }
3100           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3101             {
3102               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3103               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3104                 {
3105                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3106                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3107                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3108                     return expand_shift (LSHIFT_EXPR, mode, op0,
3109                                          shift, target, unsignedp);
3110                 }
3111               goto skip_synth;
3112             }
3113           else
3114             goto skip_synth;
3115         }
3116       else
3117         goto skip_synth;
3118
3119       /* We used to test optimize here, on the grounds that it's better to
3120          produce a smaller program when -O is not used.  But this causes
3121          such a terrible slowdown sometimes that it seems better to always
3122          use synth_mult.  */
3123
3124       /* Special case powers of two.  */
3125       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3126           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3127         return expand_shift (LSHIFT_EXPR, mode, op0,
3128                              floor_log2 (coeff), target, unsignedp);
3129
3130       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3131
3132       /* Attempt to handle multiplication of DImode values by negative
3133          coefficients, by performing the multiplication by a positive
3134          multiplier and then inverting the result.  */
3135       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3136         {
3137           /* Its safe to use -coeff even for INT_MIN, as the
3138              result is interpreted as an unsigned coefficient.
3139              Exclude cost of op0 from max_cost to match the cost
3140              calculation of the synth_mult.  */
3141           coeff = -(unsigned HOST_WIDE_INT) coeff;
3142           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3143                       - neg_cost (speed, mode));
3144           if (max_cost <= 0)
3145             goto skip_synth;
3146
3147           /* Special case powers of two.  */
3148           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3149             {
3150               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3151                                        floor_log2 (coeff), target, unsignedp);
3152               return expand_unop (mode, neg_optab, temp, target, 0);
3153             }
3154
3155           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3156                                    max_cost))
3157             {
3158               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3159                                             &algorithm, variant);
3160               return expand_unop (mode, neg_optab, temp, target, 0);
3161             }
3162           goto skip_synth;
3163         }
3164
3165       /* Exclude cost of op0 from max_cost to match the cost
3166          calculation of the synth_mult.  */
3167       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3168       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3169         return expand_mult_const (mode, op0, coeff, target,
3170                                   &algorithm, variant);
3171     }
3172  skip_synth:
3173
3174   /* Expand x*2.0 as x+x.  */
3175   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3176     {
3177       REAL_VALUE_TYPE d;
3178       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3179
3180       if (REAL_VALUES_EQUAL (d, dconst2))
3181         {
3182           op0 = force_reg (GET_MODE (op0), op0);
3183           return expand_binop (mode, add_optab, op0, op0,
3184                                target, unsignedp, OPTAB_LIB_WIDEN);
3185         }
3186     }
3187  skip_scalar:
3188
3189   /* This used to use umul_optab if unsigned, but for non-widening multiply
3190      there is no difference between signed and unsigned.  */
3191   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3192                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3193   gcc_assert (op0);
3194   return op0;
3195 }
3196
3197 /* Return a cost estimate for multiplying a register by the given
3198    COEFFicient in the given MODE and SPEED.  */
3199
3200 int
3201 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3202 {
3203   int max_cost;
3204   struct algorithm algorithm;
3205   enum mult_variant variant;
3206
3207   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3208   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3209   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3210     return algorithm.cost.cost;
3211   else
3212     return max_cost;
3213 }
3214
3215 /* Perform a widening multiplication and return an rtx for the result.
3216    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3217    TARGET is a suggestion for where to store the result (an rtx).
3218    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3219    or smul_widen_optab.
3220
3221    We check specially for a constant integer as OP1, comparing the
3222    cost of a widening multiply against the cost of a sequence of shifts
3223    and adds.  */
3224
3225 rtx
3226 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3227                       int unsignedp, optab this_optab)
3228 {
3229   bool speed = optimize_insn_for_speed_p ();
3230   rtx cop1;
3231
3232   if (CONST_INT_P (op1)
3233       && GET_MODE (op0) != VOIDmode
3234       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3235                                 this_optab == umul_widen_optab))
3236       && CONST_INT_P (cop1)
3237       && (INTVAL (cop1) >= 0
3238           || HWI_COMPUTABLE_MODE_P (mode)))
3239     {
3240       HOST_WIDE_INT coeff = INTVAL (cop1);
3241       int max_cost;
3242       enum mult_variant variant;
3243       struct algorithm algorithm;
3244
3245       /* Special case powers of two.  */
3246       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3247         {
3248           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3249           return expand_shift (LSHIFT_EXPR, mode, op0,
3250                                floor_log2 (coeff), target, unsignedp);
3251         }
3252
3253       /* Exclude cost of op0 from max_cost to match the cost
3254          calculation of the synth_mult.  */
3255       max_cost = mul_widen_cost (speed, mode);
3256       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3257                                max_cost))
3258         {
3259           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3260           return expand_mult_const (mode, op0, coeff, target,
3261                                     &algorithm, variant);
3262         }
3263     }
3264   return expand_binop (mode, this_optab, op0, op1, target,
3265                        unsignedp, OPTAB_LIB_WIDEN);
3266 }
3267 \f
3268 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3269    replace division by D, and put the least significant N bits of the result
3270    in *MULTIPLIER_PTR and return the most significant bit.
3271
3272    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3273    needed precision is in PRECISION (should be <= N).
3274
3275    PRECISION should be as small as possible so this function can choose
3276    multiplier more freely.
3277
3278    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3279    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3280
3281    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3282    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3283
3284 unsigned HOST_WIDE_INT
3285 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3286                    unsigned HOST_WIDE_INT *multiplier_ptr,
3287                    int *post_shift_ptr, int *lgup_ptr)
3288 {
3289   double_int mhigh, mlow;
3290   int lgup, post_shift;
3291   int pow, pow2;
3292
3293   /* lgup = ceil(log2(divisor)); */
3294   lgup = ceil_log2 (d);
3295
3296   gcc_assert (lgup <= n);
3297
3298   pow = n + lgup;
3299   pow2 = n + lgup - precision;
3300
3301   /* We could handle this with some effort, but this case is much
3302      better handled directly with a scc insn, so rely on caller using
3303      that.  */
3304   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3305
3306   /* mlow = 2^(N + lgup)/d */
3307   double_int val = double_int_zero.set_bit (pow);
3308   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3309
3310   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3311   val |= double_int_zero.set_bit (pow2);
3312   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3313
3314   gcc_assert (!mhigh.high || val.high - d < d);
3315   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3316   /* Assert that mlow < mhigh.  */
3317   gcc_assert (mlow.ult (mhigh));
3318
3319   /* If precision == N, then mlow, mhigh exceed 2^N
3320      (but they do not exceed 2^(N+1)).  */
3321
3322   /* Reduce to lowest terms.  */
3323   for (post_shift = lgup; post_shift > 0; post_shift--)
3324     {
3325       int shft = HOST_BITS_PER_WIDE_INT - 1;
3326       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3327       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3328       if (ml_lo >= mh_lo)
3329         break;
3330
3331       mlow = double_int::from_uhwi (ml_lo);
3332       mhigh = double_int::from_uhwi (mh_lo);
3333     }
3334
3335   *post_shift_ptr = post_shift;
3336   *lgup_ptr = lgup;
3337   if (n < HOST_BITS_PER_WIDE_INT)
3338     {
3339       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3340       *multiplier_ptr = mhigh.low & mask;
3341       return mhigh.low >= mask;
3342     }
3343   else
3344     {
3345       *multiplier_ptr = mhigh.low;
3346       return mhigh.high;
3347     }
3348 }
3349
3350 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3351    congruent to 1 (mod 2**N).  */
3352
3353 static unsigned HOST_WIDE_INT
3354 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3355 {
3356   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3357
3358   /* The algorithm notes that the choice y = x satisfies
3359      x*y == 1 mod 2^3, since x is assumed odd.
3360      Each iteration doubles the number of bits of significance in y.  */
3361
3362   unsigned HOST_WIDE_INT mask;
3363   unsigned HOST_WIDE_INT y = x;
3364   int nbit = 3;
3365
3366   mask = (n == HOST_BITS_PER_WIDE_INT
3367           ? ~(unsigned HOST_WIDE_INT) 0
3368           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3369
3370   while (nbit < n)
3371     {
3372       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3373       nbit *= 2;
3374     }
3375   return y;
3376 }
3377
3378 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3379    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3380    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3381    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3382    become signed.
3383
3384    The result is put in TARGET if that is convenient.
3385
3386    MODE is the mode of operation.  */
3387
3388 rtx
3389 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3390                              rtx op1, rtx target, int unsignedp)
3391 {
3392   rtx tem;
3393   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3394
3395   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3396                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3397   tem = expand_and (mode, tem, op1, NULL_RTX);
3398   adj_operand
3399     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3400                      adj_operand);
3401
3402   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3403                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3404   tem = expand_and (mode, tem, op0, NULL_RTX);
3405   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3406                           target);
3407
3408   return target;
3409 }
3410
3411 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3412
3413 static rtx
3414 extract_high_half (enum machine_mode mode, rtx op)
3415 {
3416   enum machine_mode wider_mode;
3417
3418   if (mode == word_mode)
3419     return gen_highpart (mode, op);
3420
3421   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3422
3423   wider_mode = GET_MODE_WIDER_MODE (mode);
3424   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3425                      GET_MODE_BITSIZE (mode), 0, 1);
3426   return convert_modes (mode, wider_mode, op, 0);
3427 }
3428
3429 /* Like expmed_mult_highpart, but only consider using a multiplication
3430    optab.  OP1 is an rtx for the constant operand.  */
3431
3432 static rtx
3433 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3434                             rtx target, int unsignedp, int max_cost)
3435 {
3436   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3437   enum machine_mode wider_mode;
3438   optab moptab;
3439   rtx tem;
3440   int size;
3441   bool speed = optimize_insn_for_speed_p ();
3442
3443   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3444
3445   wider_mode = GET_MODE_WIDER_MODE (mode);
3446   size = GET_MODE_BITSIZE (mode);
3447
3448   /* Firstly, try using a multiplication insn that only generates the needed
3449      high part of the product, and in the sign flavor of unsignedp.  */
3450   if (mul_highpart_cost (speed, mode) < max_cost)
3451     {
3452       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3453       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3454                           unsignedp, OPTAB_DIRECT);
3455       if (tem)
3456         return tem;
3457     }
3458
3459   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3460      Need to adjust the result after the multiplication.  */
3461   if (size - 1 < BITS_PER_WORD
3462       && (mul_highpart_cost (speed, mode)
3463           + 2 * shift_cost (speed, mode, size-1)
3464           + 4 * add_cost (speed, mode) < max_cost))
3465     {
3466       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3467       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3468                           unsignedp, OPTAB_DIRECT);
3469       if (tem)
3470         /* We used the wrong signedness.  Adjust the result.  */
3471         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3472                                             tem, unsignedp);
3473     }
3474
3475   /* Try widening multiplication.  */
3476   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3477   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3478       && mul_widen_cost (speed, wider_mode) < max_cost)
3479     {
3480       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3481                           unsignedp, OPTAB_WIDEN);
3482       if (tem)
3483         return extract_high_half (mode, tem);
3484     }
3485
3486   /* Try widening the mode and perform a non-widening multiplication.  */
3487   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3488       && size - 1 < BITS_PER_WORD
3489       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3490           < max_cost))
3491     {
3492       rtx insns, wop0, wop1;
3493
3494       /* We need to widen the operands, for example to ensure the
3495          constant multiplier is correctly sign or zero extended.
3496          Use a sequence to clean-up any instructions emitted by
3497          the conversions if things don't work out.  */
3498       start_sequence ();
3499       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3500       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3501       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3502                           unsignedp, OPTAB_WIDEN);
3503       insns = get_insns ();
3504       end_sequence ();
3505
3506       if (tem)
3507         {
3508           emit_insn (insns);
3509           return extract_high_half (mode, tem);
3510         }
3511     }
3512
3513   /* Try widening multiplication of opposite signedness, and adjust.  */
3514   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3515   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3516       && size - 1 < BITS_PER_WORD
3517       && (mul_widen_cost (speed, wider_mode)
3518           + 2 * shift_cost (speed, mode, size-1)
3519           + 4 * add_cost (speed, mode) < max_cost))
3520     {
3521       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3522                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3523       if (tem != 0)
3524         {
3525           tem = extract_high_half (mode, tem);
3526           /* We used the wrong signedness.  Adjust the result.  */
3527           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3528                                               target, unsignedp);
3529         }
3530     }
3531
3532   return 0;
3533 }
3534
3535 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3536    putting the high half of the result in TARGET if that is convenient,
3537    and return where the result is.  If the operation can not be performed,
3538    0 is returned.
3539
3540    MODE is the mode of operation and result.
3541
3542    UNSIGNEDP nonzero means unsigned multiply.
3543
3544    MAX_COST is the total allowed cost for the expanded RTL.  */
3545
3546 static rtx
3547 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3548                       rtx target, int unsignedp, int max_cost)
3549 {
3550   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3551   unsigned HOST_WIDE_INT cnst1;
3552   int extra_cost;
3553   bool sign_adjust = false;
3554   enum mult_variant variant;
3555   struct algorithm alg;
3556   rtx tem;
3557   bool speed = optimize_insn_for_speed_p ();
3558
3559   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3560   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3561   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3562
3563   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3564
3565   /* We can't optimize modes wider than BITS_PER_WORD.
3566      ??? We might be able to perform double-word arithmetic if
3567      mode == word_mode, however all the cost calculations in
3568      synth_mult etc. assume single-word operations.  */
3569   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3570     return expmed_mult_highpart_optab (mode, op0, op1, target,
3571                                        unsignedp, max_cost);
3572
3573   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3574
3575   /* Check whether we try to multiply by a negative constant.  */
3576   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3577     {
3578       sign_adjust = true;
3579       extra_cost += add_cost (speed, mode);
3580     }
3581
3582   /* See whether shift/add multiplication is cheap enough.  */
3583   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3584                            max_cost - extra_cost))
3585     {
3586       /* See whether the specialized multiplication optabs are
3587          cheaper than the shift/add version.  */
3588       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3589                                         alg.cost.cost + extra_cost);
3590       if (tem)
3591         return tem;
3592
3593       tem = convert_to_mode (wider_mode, op0, unsignedp);
3594       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3595       tem = extract_high_half (mode, tem);
3596
3597       /* Adjust result for signedness.  */
3598       if (sign_adjust)
3599         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3600
3601       return tem;
3602     }
3603   return expmed_mult_highpart_optab (mode, op0, op1, target,
3604                                      unsignedp, max_cost);
3605 }
3606
3607
3608 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3609
3610 static rtx
3611 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3612 {
3613   unsigned HOST_WIDE_INT masklow, maskhigh;
3614   rtx result, temp, shift, label;
3615   int logd;
3616
3617   logd = floor_log2 (d);
3618   result = gen_reg_rtx (mode);
3619
3620   /* Avoid conditional branches when they're expensive.  */
3621   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3622       && optimize_insn_for_speed_p ())
3623     {
3624       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3625                                       mode, 0, -1);
3626       if (signmask)
3627         {
3628           signmask = force_reg (mode, signmask);
3629           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3630           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3631
3632           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3633              which instruction sequence to use.  If logical right shifts
3634              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3635              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3636
3637           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3638           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3639               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3640                   > COSTS_N_INSNS (2)))
3641             {
3642               temp = expand_binop (mode, xor_optab, op0, signmask,
3643                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644               temp = expand_binop (mode, sub_optab, temp, signmask,
3645                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646               temp = expand_binop (mode, and_optab, temp,
3647                                    gen_int_mode (masklow, mode),
3648                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3649               temp = expand_binop (mode, xor_optab, temp, signmask,
3650                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651               temp = expand_binop (mode, sub_optab, temp, signmask,
3652                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3653             }
3654           else
3655             {
3656               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3657                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658               signmask = force_reg (mode, signmask);
3659
3660               temp = expand_binop (mode, add_optab, op0, signmask,
3661                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662               temp = expand_binop (mode, and_optab, temp,
3663                                    gen_int_mode (masklow, mode),
3664                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665               temp = expand_binop (mode, sub_optab, temp, signmask,
3666                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667             }
3668           return temp;
3669         }
3670     }
3671
3672   /* Mask contains the mode's signbit and the significant bits of the
3673      modulus.  By including the signbit in the operation, many targets
3674      can avoid an explicit compare operation in the following comparison
3675      against zero.  */
3676
3677   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3678   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3679     {
3680       masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3681       maskhigh = -1;
3682     }
3683   else
3684     maskhigh = HOST_WIDE_INT_M1U
3685                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3686
3687   temp = expand_binop (mode, and_optab, op0,
3688                        immed_double_const (masklow, maskhigh, mode),
3689                        result, 1, OPTAB_LIB_WIDEN);
3690   if (temp != result)
3691     emit_move_insn (result, temp);
3692
3693   label = gen_label_rtx ();
3694   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3695
3696   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3697                        0, OPTAB_LIB_WIDEN);
3698   masklow = HOST_WIDE_INT_M1U << logd;
3699   maskhigh = -1;
3700   temp = expand_binop (mode, ior_optab, temp,
3701                        immed_double_const (masklow, maskhigh, mode),
3702                        result, 1, OPTAB_LIB_WIDEN);
3703   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3704                        0, OPTAB_LIB_WIDEN);
3705   if (temp != result)
3706     emit_move_insn (result, temp);
3707   emit_label (label);
3708   return result;
3709 }
3710
3711 /* Expand signed division of OP0 by a power of two D in mode MODE.
3712    This routine is only called for positive values of D.  */
3713
3714 static rtx
3715 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3716 {
3717   rtx temp, label;
3718   int logd;
3719
3720   logd = floor_log2 (d);
3721
3722   if (d == 2
3723       && BRANCH_COST (optimize_insn_for_speed_p (),
3724                       false) >= 1)
3725     {
3726       temp = gen_reg_rtx (mode);
3727       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3728       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3729                            0, OPTAB_LIB_WIDEN);
3730       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3731     }
3732
3733 #ifdef HAVE_conditional_move
3734   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3735       >= 2)
3736     {
3737       rtx temp2;
3738
3739       start_sequence ();
3740       temp2 = copy_to_mode_reg (mode, op0);
3741       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3742                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3743       temp = force_reg (mode, temp);
3744
3745       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3746       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3747                                      mode, temp, temp2, mode, 0);
3748       if (temp2)
3749         {
3750           rtx seq = get_insns ();
3751           end_sequence ();
3752           emit_insn (seq);
3753           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3754         }
3755       end_sequence ();
3756     }
3757 #endif
3758
3759   if (BRANCH_COST (optimize_insn_for_speed_p (),
3760                    false) >= 2)
3761     {
3762       int ushift = GET_MODE_BITSIZE (mode) - logd;
3763
3764       temp = gen_reg_rtx (mode);
3765       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3766       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3767           > COSTS_N_INSNS (1))
3768         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3769                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3770       else
3771         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3772                              ushift, NULL_RTX, 1);
3773       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3774                            0, OPTAB_LIB_WIDEN);
3775       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3776     }
3777
3778   label = gen_label_rtx ();
3779   temp = copy_to_mode_reg (mode, op0);
3780   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3781   expand_inc (temp, gen_int_mode (d - 1, mode));
3782   emit_label (label);
3783   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3784 }
3785 \f
3786 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3787    if that is convenient, and returning where the result is.
3788    You may request either the quotient or the remainder as the result;
3789    specify REM_FLAG nonzero to get the remainder.
3790
3791    CODE is the expression code for which kind of division this is;
3792    it controls how rounding is done.  MODE is the machine mode to use.
3793    UNSIGNEDP nonzero means do unsigned division.  */
3794
3795 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3796    and then correct it by or'ing in missing high bits
3797    if result of ANDI is nonzero.
3798    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3799    This could optimize to a bfexts instruction.
3800    But C doesn't use these operations, so their optimizations are
3801    left for later.  */
3802 /* ??? For modulo, we don't actually need the highpart of the first product,
3803    the low part will do nicely.  And for small divisors, the second multiply
3804    can also be a low-part only multiply or even be completely left out.
3805    E.g. to calculate the remainder of a division by 3 with a 32 bit
3806    multiply, multiply with 0x55555556 and extract the upper two bits;
3807    the result is exact for inputs up to 0x1fffffff.
3808    The input range can be reduced by using cross-sum rules.
3809    For odd divisors >= 3, the following table gives right shift counts
3810    so that if a number is shifted by an integer multiple of the given
3811    amount, the remainder stays the same:
3812    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3813    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3814    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3815    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3816    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3817
3818    Cross-sum rules for even numbers can be derived by leaving as many bits
3819    to the right alone as the divisor has zeros to the right.
3820    E.g. if x is an unsigned 32 bit number:
3821    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3822    */
3823
3824 rtx
3825 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3826                rtx op0, rtx op1, rtx target, int unsignedp)
3827 {
3828   enum machine_mode compute_mode;
3829   rtx tquotient;
3830   rtx quotient = 0, remainder = 0;
3831   rtx last;
3832   int size;
3833   rtx insn;
3834   optab optab1, optab2;
3835   int op1_is_constant, op1_is_pow2 = 0;
3836   int max_cost, extra_cost;
3837   static HOST_WIDE_INT last_div_const = 0;
3838   bool speed = optimize_insn_for_speed_p ();
3839
3840   op1_is_constant = CONST_INT_P (op1);
3841   if (op1_is_constant)
3842     {
3843       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3844       if (unsignedp)
3845         ext_op1 &= GET_MODE_MASK (mode);
3846       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3847                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3848     }
3849
3850   /*
3851      This is the structure of expand_divmod:
3852
3853      First comes code to fix up the operands so we can perform the operations
3854      correctly and efficiently.
3855
3856      Second comes a switch statement with code specific for each rounding mode.
3857      For some special operands this code emits all RTL for the desired
3858      operation, for other cases, it generates only a quotient and stores it in
3859      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3860      to indicate that it has not done anything.
3861
3862      Last comes code that finishes the operation.  If QUOTIENT is set and
3863      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3864      QUOTIENT is not set, it is computed using trunc rounding.
3865
3866      We try to generate special code for division and remainder when OP1 is a
3867      constant.  If |OP1| = 2**n we can use shifts and some other fast
3868      operations.  For other values of OP1, we compute a carefully selected
3869      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3870      by m.
3871
3872      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3873      half of the product.  Different strategies for generating the product are
3874      implemented in expmed_mult_highpart.
3875
3876      If what we actually want is the remainder, we generate that by another
3877      by-constant multiplication and a subtraction.  */
3878
3879   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3880      code below will malfunction if we are, so check here and handle
3881      the special case if so.  */
3882   if (op1 == const1_rtx)
3883     return rem_flag ? const0_rtx : op0;
3884
3885     /* When dividing by -1, we could get an overflow.
3886      negv_optab can handle overflows.  */
3887   if (! unsignedp && op1 == constm1_rtx)
3888     {
3889       if (rem_flag)
3890         return const0_rtx;
3891       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3892                           ? negv_optab : neg_optab, op0, target, 0);
3893     }
3894
3895   if (target
3896       /* Don't use the function value register as a target
3897          since we have to read it as well as write it,
3898          and function-inlining gets confused by this.  */
3899       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3900           /* Don't clobber an operand while doing a multi-step calculation.  */
3901           || ((rem_flag || op1_is_constant)
3902               && (reg_mentioned_p (target, op0)
3903                   || (MEM_P (op0) && MEM_P (target))))
3904           || reg_mentioned_p (target, op1)
3905           || (MEM_P (op1) && MEM_P (target))))
3906     target = 0;
3907
3908   /* Get the mode in which to perform this computation.  Normally it will
3909      be MODE, but sometimes we can't do the desired operation in MODE.
3910      If so, pick a wider mode in which we can do the operation.  Convert
3911      to that mode at the start to avoid repeated conversions.
3912
3913      First see what operations we need.  These depend on the expression
3914      we are evaluating.  (We assume that divxx3 insns exist under the
3915      same conditions that modxx3 insns and that these insns don't normally
3916      fail.  If these assumptions are not correct, we may generate less
3917      efficient code in some cases.)
3918
3919      Then see if we find a mode in which we can open-code that operation
3920      (either a division, modulus, or shift).  Finally, check for the smallest
3921      mode for which we can do the operation with a library call.  */
3922
3923   /* We might want to refine this now that we have division-by-constant
3924      optimization.  Since expmed_mult_highpart tries so many variants, it is
3925      not straightforward to generalize this.  Maybe we should make an array
3926      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3927
3928   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3929             ? (unsignedp ? lshr_optab : ashr_optab)
3930             : (unsignedp ? udiv_optab : sdiv_optab));
3931   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3932             ? optab1
3933             : (unsignedp ? udivmod_optab : sdivmod_optab));
3934
3935   for (compute_mode = mode; compute_mode != VOIDmode;
3936        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3937     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3938         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3939       break;
3940
3941   if (compute_mode == VOIDmode)
3942     for (compute_mode = mode; compute_mode != VOIDmode;
3943          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3944       if (optab_libfunc (optab1, compute_mode)
3945           || optab_libfunc (optab2, compute_mode))
3946         break;
3947
3948   /* If we still couldn't find a mode, use MODE, but expand_binop will
3949      probably die.  */
3950   if (compute_mode == VOIDmode)
3951     compute_mode = mode;
3952
3953   if (target && GET_MODE (target) == compute_mode)
3954     tquotient = target;
3955   else
3956     tquotient = gen_reg_rtx (compute_mode);
3957
3958   size = GET_MODE_BITSIZE (compute_mode);
3959 #if 0
3960   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3961      (mode), and thereby get better code when OP1 is a constant.  Do that
3962      later.  It will require going over all usages of SIZE below.  */
3963   size = GET_MODE_BITSIZE (mode);
3964 #endif
3965
3966   /* Only deduct something for a REM if the last divide done was
3967      for a different constant.   Then set the constant of the last
3968      divide.  */
3969   max_cost = (unsignedp
3970               ? udiv_cost (speed, compute_mode)
3971               : sdiv_cost (speed, compute_mode));
3972   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3973                      && INTVAL (op1) == last_div_const))
3974     max_cost -= (mul_cost (speed, compute_mode)
3975                  + add_cost (speed, compute_mode));
3976
3977   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3978
3979   /* Now convert to the best mode to use.  */
3980   if (compute_mode != mode)
3981     {
3982       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3983       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3984
3985       /* convert_modes may have placed op1 into a register, so we
3986          must recompute the following.  */
3987       op1_is_constant = CONST_INT_P (op1);
3988       op1_is_pow2 = (op1_is_constant
3989                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3990                           || (! unsignedp
3991                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
3992     }
3993
3994   /* If one of the operands is a volatile MEM, copy it into a register.  */
3995
3996   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3997     op0 = force_reg (compute_mode, op0);
3998   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3999     op1 = force_reg (compute_mode, op1);
4000
4001   /* If we need the remainder or if OP1 is constant, we need to
4002      put OP0 in a register in case it has any queued subexpressions.  */
4003   if (rem_flag || op1_is_constant)
4004     op0 = force_reg (compute_mode, op0);
4005
4006   last = get_last_insn ();
4007
4008   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4009   if (unsignedp)
4010     {
4011       if (code == FLOOR_DIV_EXPR)
4012         code = TRUNC_DIV_EXPR;
4013       if (code == FLOOR_MOD_EXPR)
4014         code = TRUNC_MOD_EXPR;
4015       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4016         code = TRUNC_DIV_EXPR;
4017     }
4018
4019   if (op1 != const0_rtx)
4020     switch (code)
4021       {
4022       case TRUNC_MOD_EXPR:
4023       case TRUNC_DIV_EXPR:
4024         if (op1_is_constant)
4025           {
4026             if (unsignedp)
4027               {
4028                 unsigned HOST_WIDE_INT mh, ml;
4029                 int pre_shift, post_shift;
4030                 int dummy;
4031                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4032                                             & GET_MODE_MASK (compute_mode));
4033
4034                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4035                   {
4036                     pre_shift = floor_log2 (d);
4037                     if (rem_flag)
4038                       {
4039                         unsigned HOST_WIDE_INT mask
4040                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4041                         remainder
4042                           = expand_binop (compute_mode, and_optab, op0,
4043                                           gen_int_mode (mask, compute_mode),
4044                                           remainder, 1,
4045                                           OPTAB_LIB_WIDEN);
4046                         if (remainder)
4047                           return gen_lowpart (mode, remainder);
4048                       }
4049                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4050                                              pre_shift, tquotient, 1);
4051                   }
4052                 else if (size <= HOST_BITS_PER_WIDE_INT)
4053                   {
4054                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4055                       {
4056                         /* Most significant bit of divisor is set; emit an scc
4057                            insn.  */
4058                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4059                                                           compute_mode, 1, 1);
4060                       }
4061                     else
4062                       {
4063                         /* Find a suitable multiplier and right shift count
4064                            instead of multiplying with D.  */
4065
4066                         mh = choose_multiplier (d, size, size,
4067                                                 &ml, &post_shift, &dummy);
4068
4069                         /* If the suggested multiplier is more than SIZE bits,
4070                            we can do better for even divisors, using an
4071                            initial right shift.  */
4072                         if (mh != 0 && (d & 1) == 0)
4073                           {
4074                             pre_shift = floor_log2 (d & -d);
4075                             mh = choose_multiplier (d >> pre_shift, size,
4076                                                     size - pre_shift,
4077                                                     &ml, &post_shift, &dummy);
4078                             gcc_assert (!mh);
4079                           }
4080                         else
4081                           pre_shift = 0;
4082
4083                         if (mh != 0)
4084                           {
4085                             rtx t1, t2, t3, t4;
4086
4087                             if (post_shift - 1 >= BITS_PER_WORD)
4088                               goto fail1;
4089
4090                             extra_cost
4091                               = (shift_cost (speed, compute_mode, post_shift - 1)
4092                                  + shift_cost (speed, compute_mode, 1)
4093                                  + 2 * add_cost (speed, compute_mode));
4094                             t1 = expmed_mult_highpart
4095                               (compute_mode, op0,
4096                                gen_int_mode (ml, compute_mode),
4097                                NULL_RTX, 1, max_cost - extra_cost);
4098                             if (t1 == 0)
4099                               goto fail1;
4100                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4101                                                                op0, t1),
4102                                                 NULL_RTX);
4103                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4104                                                t2, 1, NULL_RTX, 1);
4105                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4106                                                               t1, t3),
4107                                                 NULL_RTX);
4108                             quotient = expand_shift
4109                               (RSHIFT_EXPR, compute_mode, t4,
4110                                post_shift - 1, tquotient, 1);
4111                           }
4112                         else
4113                           {
4114                             rtx t1, t2;
4115
4116                             if (pre_shift >= BITS_PER_WORD
4117                                 || post_shift >= BITS_PER_WORD)
4118                               goto fail1;
4119
4120                             t1 = expand_shift
4121                               (RSHIFT_EXPR, compute_mode, op0,
4122                                pre_shift, NULL_RTX, 1);
4123                             extra_cost
4124                               = (shift_cost (speed, compute_mode, pre_shift)
4125                                  + shift_cost (speed, compute_mode, post_shift));
4126                             t2 = expmed_mult_highpart
4127                               (compute_mode, t1,
4128                                gen_int_mode (ml, compute_mode),
4129                                NULL_RTX, 1, max_cost - extra_cost);
4130                             if (t2 == 0)
4131                               goto fail1;
4132                             quotient = expand_shift
4133                               (RSHIFT_EXPR, compute_mode, t2,
4134                                post_shift, tquotient, 1);
4135                           }
4136                       }
4137                   }
4138                 else            /* Too wide mode to use tricky code */
4139                   break;
4140
4141                 insn = get_last_insn ();
4142                 if (insn != last)
4143                   set_dst_reg_note (insn, REG_EQUAL,
4144                                     gen_rtx_UDIV (compute_mode, op0, op1),
4145                                     quotient);
4146               }
4147             else                /* TRUNC_DIV, signed */
4148               {
4149                 unsigned HOST_WIDE_INT ml;
4150                 int lgup, post_shift;
4151                 rtx mlr;
4152                 HOST_WIDE_INT d = INTVAL (op1);
4153                 unsigned HOST_WIDE_INT abs_d;
4154
4155                 /* Since d might be INT_MIN, we have to cast to
4156                    unsigned HOST_WIDE_INT before negating to avoid
4157                    undefined signed overflow.  */
4158                 abs_d = (d >= 0
4159                          ? (unsigned HOST_WIDE_INT) d
4160                          : - (unsigned HOST_WIDE_INT) d);
4161
4162                 /* n rem d = n rem -d */
4163                 if (rem_flag && d < 0)
4164                   {
4165                     d = abs_d;
4166                     op1 = gen_int_mode (abs_d, compute_mode);
4167                   }
4168
4169                 if (d == 1)
4170                   quotient = op0;
4171                 else if (d == -1)
4172                   quotient = expand_unop (compute_mode, neg_optab, op0,
4173                                           tquotient, 0);
4174                 else if (HOST_BITS_PER_WIDE_INT >= size
4175                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4176                   {
4177                     /* This case is not handled correctly below.  */
4178                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4179                                                 compute_mode, 1, 1);
4180                     if (quotient == 0)
4181                       goto fail1;
4182                   }
4183                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4184                          && (rem_flag
4185                              ? smod_pow2_cheap (speed, compute_mode)
4186                              : sdiv_pow2_cheap (speed, compute_mode))
4187                          /* We assume that cheap metric is true if the
4188                             optab has an expander for this mode.  */
4189                          && ((optab_handler ((rem_flag ? smod_optab
4190                                               : sdiv_optab),
4191                                              compute_mode)
4192                               != CODE_FOR_nothing)
4193                              || (optab_handler (sdivmod_optab,
4194                                                 compute_mode)
4195                                  != CODE_FOR_nothing)))
4196                   ;
4197                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4198                   {
4199                     if (rem_flag)
4200                       {
4201                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4202                         if (remainder)
4203                           return gen_lowpart (mode, remainder);
4204                       }
4205
4206                     if (sdiv_pow2_cheap (speed, compute_mode)
4207                         && ((optab_handler (sdiv_optab, compute_mode)
4208                              != CODE_FOR_nothing)
4209                             || (optab_handler (sdivmod_optab, compute_mode)
4210                                 != CODE_FOR_nothing)))
4211                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4212                                                 compute_mode, op0,
4213                                                 gen_int_mode (abs_d,
4214                                                               compute_mode),
4215                                                 NULL_RTX, 0);
4216                     else
4217                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4218
4219                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4220                        negate the quotient.  */
4221                     if (d < 0)
4222                       {
4223                         insn = get_last_insn ();
4224                         if (insn != last
4225                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4226                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4227                           set_dst_reg_note (insn, REG_EQUAL,
4228                                             gen_rtx_DIV (compute_mode, op0,
4229                                                          gen_int_mode
4230                                                            (abs_d,
4231                                                             compute_mode)),
4232                                             quotient);
4233
4234                         quotient = expand_unop (compute_mode, neg_optab,
4235                                                 quotient, quotient, 0);
4236                       }
4237                   }
4238                 else if (size <= HOST_BITS_PER_WIDE_INT)
4239                   {
4240                     choose_multiplier (abs_d, size, size - 1,
4241                                        &ml, &post_shift, &lgup);
4242                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4243                       {
4244                         rtx t1, t2, t3;
4245
4246                         if (post_shift >= BITS_PER_WORD
4247                             || size - 1 >= BITS_PER_WORD)
4248                           goto fail1;
4249
4250                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4251                                       + shift_cost (speed, compute_mode, size - 1)
4252                                       + add_cost (speed, compute_mode));
4253                         t1 = expmed_mult_highpart
4254                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4255                            NULL_RTX, 0, max_cost - extra_cost);
4256                         if (t1 == 0)
4257                           goto fail1;
4258                         t2 = expand_shift
4259                           (RSHIFT_EXPR, compute_mode, t1,
4260                            post_shift, NULL_RTX, 0);
4261                         t3 = expand_shift
4262                           (RSHIFT_EXPR, compute_mode, op0,
4263                            size - 1, NULL_RTX, 0);
4264                         if (d < 0)
4265                           quotient
4266                             = force_operand (gen_rtx_MINUS (compute_mode,
4267                                                             t3, t2),
4268                                              tquotient);
4269                         else
4270                           quotient
4271                             = force_operand (gen_rtx_MINUS (compute_mode,
4272                                                             t2, t3),
4273                                              tquotient);
4274                       }
4275                     else
4276                       {
4277                         rtx t1, t2, t3, t4;
4278
4279                         if (post_shift >= BITS_PER_WORD
4280                             || size - 1 >= BITS_PER_WORD)
4281                           goto fail1;
4282
4283                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4284                         mlr = gen_int_mode (ml, compute_mode);
4285                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4286                                       + shift_cost (speed, compute_mode, size - 1)
4287                                       + 2 * add_cost (speed, compute_mode));
4288                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4289                                                    NULL_RTX, 0,
4290                                                    max_cost - extra_cost);
4291                         if (t1 == 0)
4292                           goto fail1;
4293                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4294                                                           t1, op0),
4295                                             NULL_RTX);
4296                         t3 = expand_shift
4297                           (RSHIFT_EXPR, compute_mode, t2,
4298                            post_shift, NULL_RTX, 0);
4299                         t4 = expand_shift
4300                           (RSHIFT_EXPR, compute_mode, op0,
4301                            size - 1, NULL_RTX, 0);
4302                         if (d < 0)
4303                           quotient
4304                             = force_operand (gen_rtx_MINUS (compute_mode,
4305                                                             t4, t3),
4306                                              tquotient);
4307                         else
4308                           quotient
4309                             = force_operand (gen_rtx_MINUS (compute_mode,
4310                                                             t3, t4),
4311                                              tquotient);
4312                       }
4313                   }
4314                 else            /* Too wide mode to use tricky code */
4315                   break;
4316
4317                 insn = get_last_insn ();
4318                 if (insn != last)
4319                   set_dst_reg_note (insn, REG_EQUAL,
4320                                     gen_rtx_DIV (compute_mode, op0, op1),
4321                                     quotient);
4322               }
4323             break;
4324           }
4325       fail1:
4326         delete_insns_since (last);
4327         break;
4328
4329       case FLOOR_DIV_EXPR:
4330       case FLOOR_MOD_EXPR:
4331       /* We will come here only for signed operations.  */
4332         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4333           {
4334             unsigned HOST_WIDE_INT mh, ml;
4335             int pre_shift, lgup, post_shift;
4336             HOST_WIDE_INT d = INTVAL (op1);
4337
4338             if (d > 0)
4339               {
4340                 /* We could just as easily deal with negative constants here,
4341                    but it does not seem worth the trouble for GCC 2.6.  */
4342                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4343                   {
4344                     pre_shift = floor_log2 (d);
4345                     if (rem_flag)
4346                       {
4347                         unsigned HOST_WIDE_INT mask
4348                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4349                         remainder = expand_binop
4350                           (compute_mode, and_optab, op0,
4351                            gen_int_mode (mask, compute_mode),
4352                            remainder, 0, OPTAB_LIB_WIDEN);
4353                         if (remainder)
4354                           return gen_lowpart (mode, remainder);
4355                       }
4356                     quotient = expand_shift
4357                       (RSHIFT_EXPR, compute_mode, op0,
4358                        pre_shift, tquotient, 0);
4359                   }
4360                 else
4361                   {
4362                     rtx t1, t2, t3, t4;
4363
4364                     mh = choose_multiplier (d, size, size - 1,
4365                                             &ml, &post_shift, &lgup);
4366                     gcc_assert (!mh);
4367
4368                     if (post_shift < BITS_PER_WORD
4369                         && size - 1 < BITS_PER_WORD)
4370                       {
4371                         t1 = expand_shift
4372                           (RSHIFT_EXPR, compute_mode, op0,
4373                            size - 1, NULL_RTX, 0);
4374                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4375                                            NULL_RTX, 0, OPTAB_WIDEN);
4376                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4377                                       + shift_cost (speed, compute_mode, size - 1)
4378                                       + 2 * add_cost (speed, compute_mode));
4379                         t3 = expmed_mult_highpart
4380                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4381                            NULL_RTX, 1, max_cost - extra_cost);
4382                         if (t3 != 0)
4383                           {
4384                             t4 = expand_shift
4385                               (RSHIFT_EXPR, compute_mode, t3,
4386                                post_shift, NULL_RTX, 1);
4387                             quotient = expand_binop (compute_mode, xor_optab,
4388                                                      t4, t1, tquotient, 0,
4389                                                      OPTAB_WIDEN);
4390                           }
4391                       }
4392                   }
4393               }
4394             else
4395               {
4396                 rtx nsign, t1, t2, t3, t4;
4397                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4398                                                   op0, constm1_rtx), NULL_RTX);
4399                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4400                                    0, OPTAB_WIDEN);
4401                 nsign = expand_shift
4402                   (RSHIFT_EXPR, compute_mode, t2,
4403                    size - 1, NULL_RTX, 0);
4404                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4405                                     NULL_RTX);
4406                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4407                                     NULL_RTX, 0);
4408                 if (t4)
4409                   {
4410                     rtx t5;
4411                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4412                                       NULL_RTX, 0);
4413                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4414                                                             t4, t5),
4415                                               tquotient);
4416                   }
4417               }
4418           }
4419
4420         if (quotient != 0)
4421           break;
4422         delete_insns_since (last);
4423
4424         /* Try using an instruction that produces both the quotient and
4425            remainder, using truncation.  We can easily compensate the quotient
4426            or remainder to get floor rounding, once we have the remainder.
4427            Notice that we compute also the final remainder value here,
4428            and return the result right away.  */
4429         if (target == 0 || GET_MODE (target) != compute_mode)
4430           target = gen_reg_rtx (compute_mode);
4431
4432         if (rem_flag)
4433           {
4434             remainder
4435               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4436             quotient = gen_reg_rtx (compute_mode);
4437           }
4438         else
4439           {
4440             quotient
4441               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4442             remainder = gen_reg_rtx (compute_mode);
4443           }
4444
4445         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4446                                  quotient, remainder, 0))
4447           {
4448             /* This could be computed with a branch-less sequence.
4449                Save that for later.  */
4450             rtx tem;
4451             rtx label = gen_label_rtx ();
4452             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4453             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4454                                 NULL_RTX, 0, OPTAB_WIDEN);
4455             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4456             expand_dec (quotient, const1_rtx);
4457             expand_inc (remainder, op1);
4458             emit_label (label);
4459             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4460           }
4461
4462         /* No luck with division elimination or divmod.  Have to do it
4463            by conditionally adjusting op0 *and* the result.  */
4464         {
4465           rtx label1, label2, label3, label4, label5;
4466           rtx adjusted_op0;
4467           rtx tem;
4468
4469           quotient = gen_reg_rtx (compute_mode);
4470           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4471           label1 = gen_label_rtx ();
4472           label2 = gen_label_rtx ();
4473           label3 = gen_label_rtx ();
4474           label4 = gen_label_rtx ();
4475           label5 = gen_label_rtx ();
4476           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4477           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4478           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4479                               quotient, 0, OPTAB_LIB_WIDEN);
4480           if (tem != quotient)
4481             emit_move_insn (quotient, tem);
4482           emit_jump_insn (gen_jump (label5));
4483           emit_barrier ();
4484           emit_label (label1);
4485           expand_inc (adjusted_op0, const1_rtx);
4486           emit_jump_insn (gen_jump (label4));
4487           emit_barrier ();
4488           emit_label (label2);
4489           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4490           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4491                               quotient, 0, OPTAB_LIB_WIDEN);
4492           if (tem != quotient)
4493             emit_move_insn (quotient, tem);
4494           emit_jump_insn (gen_jump (label5));
4495           emit_barrier ();
4496           emit_label (label3);
4497           expand_dec (adjusted_op0, const1_rtx);
4498           emit_label (label4);
4499           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4500                               quotient, 0, OPTAB_LIB_WIDEN);
4501           if (tem != quotient)
4502             emit_move_insn (quotient, tem);
4503           expand_dec (quotient, const1_rtx);
4504           emit_label (label5);
4505         }
4506         break;
4507
4508       case CEIL_DIV_EXPR:
4509       case CEIL_MOD_EXPR:
4510         if (unsignedp)
4511           {
4512             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4513               {
4514                 rtx t1, t2, t3;
4515                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4516                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4517                                    floor_log2 (d), tquotient, 1);
4518                 t2 = expand_binop (compute_mode, and_optab, op0,
4519                                    gen_int_mode (d - 1, compute_mode),
4520                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4521                 t3 = gen_reg_rtx (compute_mode);
4522                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4523                                       compute_mode, 1, 1);
4524                 if (t3 == 0)
4525                   {
4526                     rtx lab;
4527                     lab = gen_label_rtx ();
4528                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4529                     expand_inc (t1, const1_rtx);
4530                     emit_label (lab);
4531                     quotient = t1;
4532                   }
4533                 else
4534                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4535                                                           t1, t3),
4536                                             tquotient);
4537                 break;
4538               }
4539
4540             /* Try using an instruction that produces both the quotient and
4541                remainder, using truncation.  We can easily compensate the
4542                quotient or remainder to get ceiling rounding, once we have the
4543                remainder.  Notice that we compute also the final remainder
4544                value here, and return the result right away.  */
4545             if (target == 0 || GET_MODE (target) != compute_mode)
4546               target = gen_reg_rtx (compute_mode);
4547
4548             if (rem_flag)
4549               {
4550                 remainder = (REG_P (target)
4551                              ? target : gen_reg_rtx (compute_mode));
4552                 quotient = gen_reg_rtx (compute_mode);
4553               }
4554             else
4555               {
4556                 quotient = (REG_P (target)
4557                             ? target : gen_reg_rtx (compute_mode));
4558                 remainder = gen_reg_rtx (compute_mode);
4559               }
4560
4561             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4562                                      remainder, 1))
4563               {
4564                 /* This could be computed with a branch-less sequence.
4565                    Save that for later.  */
4566                 rtx label = gen_label_rtx ();
4567                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4568                                  compute_mode, label);
4569                 expand_inc (quotient, const1_rtx);
4570                 expand_dec (remainder, op1);
4571                 emit_label (label);
4572                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4573               }
4574
4575             /* No luck with division elimination or divmod.  Have to do it
4576                by conditionally adjusting op0 *and* the result.  */
4577             {
4578               rtx label1, label2;
4579               rtx adjusted_op0, tem;
4580
4581               quotient = gen_reg_rtx (compute_mode);
4582               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4583               label1 = gen_label_rtx ();
4584               label2 = gen_label_rtx ();
4585               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4586                                compute_mode, label1);
4587               emit_move_insn  (quotient, const0_rtx);
4588               emit_jump_insn (gen_jump (label2));
4589               emit_barrier ();
4590               emit_label (label1);
4591               expand_dec (adjusted_op0, const1_rtx);
4592               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4593                                   quotient, 1, OPTAB_LIB_WIDEN);
4594               if (tem != quotient)
4595                 emit_move_insn (quotient, tem);
4596               expand_inc (quotient, const1_rtx);
4597               emit_label (label2);
4598             }
4599           }
4600         else /* signed */
4601           {
4602             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4603                 && INTVAL (op1) >= 0)
4604               {
4605                 /* This is extremely similar to the code for the unsigned case
4606                    above.  For 2.7 we should merge these variants, but for
4607                    2.6.1 I don't want to touch the code for unsigned since that
4608                    get used in C.  The signed case will only be used by other
4609                    languages (Ada).  */
4610
4611                 rtx t1, t2, t3;
4612                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4613                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4614                                    floor_log2 (d), tquotient, 0);
4615                 t2 = expand_binop (compute_mode, and_optab, op0,
4616                                    gen_int_mode (d - 1, compute_mode),
4617                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4618                 t3 = gen_reg_rtx (compute_mode);
4619                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4620                                       compute_mode, 1, 1);
4621                 if (t3 == 0)
4622                   {
4623                     rtx lab;
4624                     lab = gen_label_rtx ();
4625                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4626                     expand_inc (t1, const1_rtx);
4627                     emit_label (lab);
4628                     quotient = t1;
4629                   }
4630                 else
4631                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4632                                                           t1, t3),
4633                                             tquotient);
4634                 break;
4635               }
4636
4637             /* Try using an instruction that produces both the quotient and
4638                remainder, using truncation.  We can easily compensate the
4639                quotient or remainder to get ceiling rounding, once we have the
4640                remainder.  Notice that we compute also the final remainder
4641                value here, and return the result right away.  */
4642             if (target == 0 || GET_MODE (target) != compute_mode)
4643               target = gen_reg_rtx (compute_mode);
4644             if (rem_flag)
4645               {
4646                 remainder= (REG_P (target)
4647                             ? target : gen_reg_rtx (compute_mode));
4648                 quotient = gen_reg_rtx (compute_mode);
4649               }
4650             else
4651               {
4652                 quotient = (REG_P (target)
4653                             ? target : gen_reg_rtx (compute_mode));
4654                 remainder = gen_reg_rtx (compute_mode);
4655               }
4656
4657             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4658                                      remainder, 0))
4659               {
4660                 /* This could be computed with a branch-less sequence.
4661                    Save that for later.  */
4662                 rtx tem;
4663                 rtx label = gen_label_rtx ();
4664                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4665                                  compute_mode, label);
4666                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4667                                     NULL_RTX, 0, OPTAB_WIDEN);
4668                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4669                 expand_inc (quotient, const1_rtx);
4670                 expand_dec (remainder, op1);
4671                 emit_label (label);
4672                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4673               }
4674
4675             /* No luck with division elimination or divmod.  Have to do it
4676                by conditionally adjusting op0 *and* the result.  */
4677             {
4678               rtx label1, label2, label3, label4, label5;
4679               rtx adjusted_op0;
4680               rtx tem;
4681
4682               quotient = gen_reg_rtx (compute_mode);
4683               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4684               label1 = gen_label_rtx ();
4685               label2 = gen_label_rtx ();
4686               label3 = gen_label_rtx ();
4687               label4 = gen_label_rtx ();
4688               label5 = gen_label_rtx ();
4689               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4690               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4691                                compute_mode, label1);
4692               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4693                                   quotient, 0, OPTAB_LIB_WIDEN);
4694               if (tem != quotient)
4695                 emit_move_insn (quotient, tem);
4696               emit_jump_insn (gen_jump (label5));
4697               emit_barrier ();
4698               emit_label (label1);
4699               expand_dec (adjusted_op0, const1_rtx);
4700               emit_jump_insn (gen_jump (label4));
4701               emit_barrier ();
4702               emit_label (label2);
4703               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4704                                compute_mode, label3);
4705               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4706                                   quotient, 0, OPTAB_LIB_WIDEN);
4707               if (tem != quotient)
4708                 emit_move_insn (quotient, tem);
4709               emit_jump_insn (gen_jump (label5));
4710               emit_barrier ();
4711               emit_label (label3);
4712               expand_inc (adjusted_op0, const1_rtx);
4713               emit_label (label4);
4714               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4715                                   quotient, 0, OPTAB_LIB_WIDEN);
4716               if (tem != quotient)
4717                 emit_move_insn (quotient, tem);
4718               expand_inc (quotient, const1_rtx);
4719               emit_label (label5);
4720             }
4721           }
4722         break;
4723
4724       case EXACT_DIV_EXPR:
4725         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4726           {
4727             HOST_WIDE_INT d = INTVAL (op1);
4728             unsigned HOST_WIDE_INT ml;
4729             int pre_shift;
4730             rtx t1;
4731
4732             pre_shift = floor_log2 (d & -d);
4733             ml = invert_mod2n (d >> pre_shift, size);
4734             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4735                                pre_shift, NULL_RTX, unsignedp);
4736             quotient = expand_mult (compute_mode, t1,
4737                                     gen_int_mode (ml, compute_mode),
4738                                     NULL_RTX, 1);
4739
4740             insn = get_last_insn ();
4741             set_dst_reg_note (insn, REG_EQUAL,
4742                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4743                                               compute_mode, op0, op1),
4744                               quotient);
4745           }
4746         break;
4747
4748       case ROUND_DIV_EXPR:
4749       case ROUND_MOD_EXPR:
4750         if (unsignedp)
4751           {
4752             rtx tem;
4753             rtx label;
4754             label = gen_label_rtx ();
4755             quotient = gen_reg_rtx (compute_mode);
4756             remainder = gen_reg_rtx (compute_mode);
4757             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4758               {
4759                 rtx tem;
4760                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4761                                          quotient, 1, OPTAB_LIB_WIDEN);
4762                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4763                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4764                                           remainder, 1, OPTAB_LIB_WIDEN);
4765               }
4766             tem = plus_constant (compute_mode, op1, -1);
4767             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4768             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4769             expand_inc (quotient, const1_rtx);
4770             expand_dec (remainder, op1);
4771             emit_label (label);
4772           }
4773         else
4774           {
4775             rtx abs_rem, abs_op1, tem, mask;
4776             rtx label;
4777             label = gen_label_rtx ();
4778             quotient = gen_reg_rtx (compute_mode);
4779             remainder = gen_reg_rtx (compute_mode);
4780             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4781               {
4782                 rtx tem;
4783                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4784                                          quotient, 0, OPTAB_LIB_WIDEN);
4785                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4786                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4787                                           remainder, 0, OPTAB_LIB_WIDEN);
4788               }
4789             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4790             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4791             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4792                                 1, NULL_RTX, 1);
4793             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4794             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4795                                 NULL_RTX, 0, OPTAB_WIDEN);
4796             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4797                                  size - 1, NULL_RTX, 0);
4798             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4801                                 NULL_RTX, 0, OPTAB_WIDEN);
4802             expand_inc (quotient, tem);
4803             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4804                                 NULL_RTX, 0, OPTAB_WIDEN);
4805             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4806                                 NULL_RTX, 0, OPTAB_WIDEN);
4807             expand_dec (remainder, tem);
4808             emit_label (label);
4809           }
4810         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4811
4812       default:
4813         gcc_unreachable ();
4814       }
4815
4816   if (quotient == 0)
4817     {
4818       if (target && GET_MODE (target) != compute_mode)
4819         target = 0;
4820
4821       if (rem_flag)
4822         {
4823           /* Try to produce the remainder without producing the quotient.
4824              If we seem to have a divmod pattern that does not require widening,
4825              don't try widening here.  We should really have a WIDEN argument
4826              to expand_twoval_binop, since what we'd really like to do here is
4827              1) try a mod insn in compute_mode
4828              2) try a divmod insn in compute_mode
4829              3) try a div insn in compute_mode and multiply-subtract to get
4830                 remainder
4831              4) try the same things with widening allowed.  */
4832           remainder
4833             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4834                                  op0, op1, target,
4835                                  unsignedp,
4836                                  ((optab_handler (optab2, compute_mode)
4837                                    != CODE_FOR_nothing)
4838                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4839           if (remainder == 0)
4840             {
4841               /* No luck there.  Can we do remainder and divide at once
4842                  without a library call?  */
4843               remainder = gen_reg_rtx (compute_mode);
4844               if (! expand_twoval_binop ((unsignedp
4845                                           ? udivmod_optab
4846                                           : sdivmod_optab),
4847                                          op0, op1,
4848                                          NULL_RTX, remainder, unsignedp))
4849                 remainder = 0;
4850             }
4851
4852           if (remainder)
4853             return gen_lowpart (mode, remainder);
4854         }
4855
4856       /* Produce the quotient.  Try a quotient insn, but not a library call.
4857          If we have a divmod in this mode, use it in preference to widening
4858          the div (for this test we assume it will not fail). Note that optab2
4859          is set to the one of the two optabs that the call below will use.  */
4860       quotient
4861         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4862                              op0, op1, rem_flag ? NULL_RTX : target,
4863                              unsignedp,
4864                              ((optab_handler (optab2, compute_mode)
4865                                != CODE_FOR_nothing)
4866                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4867
4868       if (quotient == 0)
4869         {
4870           /* No luck there.  Try a quotient-and-remainder insn,
4871              keeping the quotient alone.  */
4872           quotient = gen_reg_rtx (compute_mode);
4873           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4874                                      op0, op1,
4875                                      quotient, NULL_RTX, unsignedp))
4876             {
4877               quotient = 0;
4878               if (! rem_flag)
4879                 /* Still no luck.  If we are not computing the remainder,
4880                    use a library call for the quotient.  */
4881                 quotient = sign_expand_binop (compute_mode,
4882                                               udiv_optab, sdiv_optab,
4883                                               op0, op1, target,
4884                                               unsignedp, OPTAB_LIB_WIDEN);
4885             }
4886         }
4887     }
4888
4889   if (rem_flag)
4890     {
4891       if (target && GET_MODE (target) != compute_mode)
4892         target = 0;
4893
4894       if (quotient == 0)
4895         {
4896           /* No divide instruction either.  Use library for remainder.  */
4897           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4898                                          op0, op1, target,
4899                                          unsignedp, OPTAB_LIB_WIDEN);
4900           /* No remainder function.  Try a quotient-and-remainder
4901              function, keeping the remainder.  */
4902           if (!remainder)
4903             {
4904               remainder = gen_reg_rtx (compute_mode);
4905               if (!expand_twoval_binop_libfunc
4906                   (unsignedp ? udivmod_optab : sdivmod_optab,
4907                    op0, op1,
4908                    NULL_RTX, remainder,
4909                    unsignedp ? UMOD : MOD))
4910                 remainder = NULL_RTX;
4911             }
4912         }
4913       else
4914         {
4915           /* We divided.  Now finish doing X - Y * (X / Y).  */
4916           remainder = expand_mult (compute_mode, quotient, op1,
4917                                    NULL_RTX, unsignedp);
4918           remainder = expand_binop (compute_mode, sub_optab, op0,
4919                                     remainder, target, unsignedp,
4920                                     OPTAB_LIB_WIDEN);
4921         }
4922     }
4923
4924   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4925 }
4926 \f
4927 /* Return a tree node with data type TYPE, describing the value of X.
4928    Usually this is an VAR_DECL, if there is no obvious better choice.
4929    X may be an expression, however we only support those expressions
4930    generated by loop.c.  */
4931
4932 tree
4933 make_tree (tree type, rtx x)
4934 {
4935   tree t;
4936
4937   switch (GET_CODE (x))
4938     {
4939     case CONST_INT:
4940       {
4941         HOST_WIDE_INT hi = 0;
4942
4943         if (INTVAL (x) < 0
4944             && !(TYPE_UNSIGNED (type)
4945                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4946                      < HOST_BITS_PER_WIDE_INT)))
4947           hi = -1;
4948
4949         t = build_int_cst_wide (type, INTVAL (x), hi);
4950
4951         return t;
4952       }
4953
4954     case CONST_DOUBLE:
4955       if (GET_MODE (x) == VOIDmode)
4956         t = build_int_cst_wide (type,
4957                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4958       else
4959         {
4960           REAL_VALUE_TYPE d;
4961
4962           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4963           t = build_real (type, d);
4964         }
4965
4966       return t;
4967
4968     case CONST_VECTOR:
4969       {
4970         int units = CONST_VECTOR_NUNITS (x);
4971         tree itype = TREE_TYPE (type);
4972         tree *elts;
4973         int i;
4974
4975         /* Build a tree with vector elements.  */
4976         elts = XALLOCAVEC (tree, units);
4977         for (i = units - 1; i >= 0; --i)
4978           {
4979             rtx elt = CONST_VECTOR_ELT (x, i);
4980             elts[i] = make_tree (itype, elt);
4981           }
4982
4983         return build_vector (type, elts);
4984       }
4985
4986     case PLUS:
4987       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4988                           make_tree (type, XEXP (x, 1)));
4989
4990     case MINUS:
4991       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4992                           make_tree (type, XEXP (x, 1)));
4993
4994     case NEG:
4995       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4996
4997     case MULT:
4998       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4999                           make_tree (type, XEXP (x, 1)));
5000
5001     case ASHIFT:
5002       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5003                           make_tree (type, XEXP (x, 1)));
5004
5005     case LSHIFTRT:
5006       t = unsigned_type_for (type);
5007       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5008                                          make_tree (t, XEXP (x, 0)),
5009                                          make_tree (type, XEXP (x, 1))));
5010
5011     case ASHIFTRT:
5012       t = signed_type_for (type);
5013       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5014                                          make_tree (t, XEXP (x, 0)),
5015                                          make_tree (type, XEXP (x, 1))));
5016
5017     case DIV:
5018       if (TREE_CODE (type) != REAL_TYPE)
5019         t = signed_type_for (type);
5020       else
5021         t = type;
5022
5023       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5024                                          make_tree (t, XEXP (x, 0)),
5025                                          make_tree (t, XEXP (x, 1))));
5026     case UDIV:
5027       t = unsigned_type_for (type);
5028       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5029                                          make_tree (t, XEXP (x, 0)),
5030                                          make_tree (t, XEXP (x, 1))));
5031
5032     case SIGN_EXTEND:
5033     case ZERO_EXTEND:
5034       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5035                                           GET_CODE (x) == ZERO_EXTEND);
5036       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5037
5038     case CONST:
5039       return make_tree (type, XEXP (x, 0));
5040
5041     case SYMBOL_REF:
5042       t = SYMBOL_REF_DECL (x);
5043       if (t)
5044         return fold_convert (type, build_fold_addr_expr (t));
5045       /* else fall through.  */
5046
5047     default:
5048       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5049
5050       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5051          address mode to pointer mode.  */
5052       if (POINTER_TYPE_P (type))
5053         x = convert_memory_address_addr_space
5054               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5055
5056       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5057          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5058       t->decl_with_rtl.rtl = x;
5059
5060       return t;
5061     }
5062 }
5063 \f
5064 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5065    and returning TARGET.
5066
5067    If TARGET is 0, a pseudo-register or constant is returned.  */
5068
5069 rtx
5070 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5071 {
5072   rtx tem = 0;
5073
5074   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5075     tem = simplify_binary_operation (AND, mode, op0, op1);
5076   if (tem == 0)
5077     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5078
5079   if (target == 0)
5080     target = tem;
5081   else if (tem != target)
5082     emit_move_insn (target, tem);
5083   return target;
5084 }
5085
5086 /* Helper function for emit_store_flag.  */
5087 static rtx
5088 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5089              enum machine_mode mode, enum machine_mode compare_mode,
5090              int unsignedp, rtx x, rtx y, int normalizep,
5091              enum machine_mode target_mode)
5092 {
5093   struct expand_operand ops[4];
5094   rtx op0, last, comparison, subtarget;
5095   enum machine_mode result_mode = targetm.cstore_mode (icode);
5096
5097   last = get_last_insn ();
5098   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5099   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5100   if (!x || !y)
5101     {
5102       delete_insns_since (last);
5103       return NULL_RTX;
5104     }
5105
5106   if (target_mode == VOIDmode)
5107     target_mode = result_mode;
5108   if (!target)
5109     target = gen_reg_rtx (target_mode);
5110
5111   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5112
5113   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5114   create_fixed_operand (&ops[1], comparison);
5115   create_fixed_operand (&ops[2], x);
5116   create_fixed_operand (&ops[3], y);
5117   if (!maybe_expand_insn (icode, 4, ops))
5118     {
5119       delete_insns_since (last);
5120       return NULL_RTX;
5121     }
5122   subtarget = ops[0].value;
5123
5124   /* If we are converting to a wider mode, first convert to
5125      TARGET_MODE, then normalize.  This produces better combining
5126      opportunities on machines that have a SIGN_EXTRACT when we are
5127      testing a single bit.  This mostly benefits the 68k.
5128
5129      If STORE_FLAG_VALUE does not have the sign bit set when
5130      interpreted in MODE, we can do this conversion as unsigned, which
5131      is usually more efficient.  */
5132   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5133     {
5134       convert_move (target, subtarget,
5135                     val_signbit_known_clear_p (result_mode,
5136                                                STORE_FLAG_VALUE));
5137       op0 = target;
5138       result_mode = target_mode;
5139     }
5140   else
5141     op0 = subtarget;
5142
5143   /* If we want to keep subexpressions around, don't reuse our last
5144      target.  */
5145   if (optimize)
5146     subtarget = 0;
5147
5148   /* Now normalize to the proper value in MODE.  Sometimes we don't
5149      have to do anything.  */
5150   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5151     ;
5152   /* STORE_FLAG_VALUE might be the most negative number, so write
5153      the comparison this way to avoid a compiler-time warning.  */
5154   else if (- normalizep == STORE_FLAG_VALUE)
5155     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5156
5157   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5158      it hard to use a value of just the sign bit due to ANSI integer
5159      constant typing rules.  */
5160   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5161     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5162                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5163                         normalizep == 1);
5164   else
5165     {
5166       gcc_assert (STORE_FLAG_VALUE & 1);
5167
5168       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5169       if (normalizep == -1)
5170         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5171     }
5172
5173   /* If we were converting to a smaller mode, do the conversion now.  */
5174   if (target_mode != result_mode)
5175     {
5176       convert_move (target, op0, 0);
5177       return target;
5178     }
5179   else
5180     return op0;
5181 }
5182
5183
5184 /* A subroutine of emit_store_flag only including "tricks" that do not
5185    need a recursive call.  These are kept separate to avoid infinite
5186    loops.  */
5187
5188 static rtx
5189 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5190                    enum machine_mode mode, int unsignedp, int normalizep,
5191                    enum machine_mode target_mode)
5192 {
5193   rtx subtarget;
5194   enum insn_code icode;
5195   enum machine_mode compare_mode;
5196   enum mode_class mclass;
5197   enum rtx_code scode;
5198   rtx tem;
5199
5200   if (unsignedp)
5201     code = unsigned_condition (code);
5202   scode = swap_condition (code);
5203
5204   /* If one operand is constant, make it the second one.  Only do this
5205      if the other operand is not constant as well.  */
5206
5207   if (swap_commutative_operands_p (op0, op1))
5208     {
5209       tem = op0;
5210       op0 = op1;
5211       op1 = tem;
5212       code = swap_condition (code);
5213     }
5214
5215   if (mode == VOIDmode)
5216     mode = GET_MODE (op0);
5217
5218   /* For some comparisons with 1 and -1, we can convert this to
5219      comparisons with zero.  This will often produce more opportunities for
5220      store-flag insns.  */
5221
5222   switch (code)
5223     {
5224     case LT:
5225       if (op1 == const1_rtx)
5226         op1 = const0_rtx, code = LE;
5227       break;
5228     case LE:
5229       if (op1 == constm1_rtx)
5230         op1 = const0_rtx, code = LT;
5231       break;
5232     case GE:
5233       if (op1 == const1_rtx)
5234         op1 = const0_rtx, code = GT;
5235       break;
5236     case GT:
5237       if (op1 == constm1_rtx)
5238         op1 = const0_rtx, code = GE;
5239       break;
5240     case GEU:
5241       if (op1 == const1_rtx)
5242         op1 = const0_rtx, code = NE;
5243       break;
5244     case LTU:
5245       if (op1 == const1_rtx)
5246         op1 = const0_rtx, code = EQ;
5247       break;
5248     default:
5249       break;
5250     }
5251
5252   /* If we are comparing a double-word integer with zero or -1, we can
5253      convert the comparison into one involving a single word.  */
5254   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5255       && GET_MODE_CLASS (mode) == MODE_INT
5256       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5257     {
5258       if ((code == EQ || code == NE)
5259           && (op1 == const0_rtx || op1 == constm1_rtx))
5260         {
5261           rtx op00, op01;
5262
5263           /* Do a logical OR or AND of the two words and compare the
5264              result.  */
5265           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5266           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5267           tem = expand_binop (word_mode,
5268                               op1 == const0_rtx ? ior_optab : and_optab,
5269                               op00, op01, NULL_RTX, unsignedp,
5270                               OPTAB_DIRECT);
5271
5272           if (tem != 0)
5273             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5274                                    unsignedp, normalizep);
5275         }
5276       else if ((code == LT || code == GE) && op1 == const0_rtx)
5277         {
5278           rtx op0h;
5279
5280           /* If testing the sign bit, can just test on high word.  */
5281           op0h = simplify_gen_subreg (word_mode, op0, mode,
5282                                       subreg_highpart_offset (word_mode,
5283                                                               mode));
5284           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5285                                  unsignedp, normalizep);
5286         }
5287       else
5288         tem = NULL_RTX;
5289
5290       if (tem)
5291         {
5292           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5293             return tem;
5294           if (!target)
5295             target = gen_reg_rtx (target_mode);
5296
5297           convert_move (target, tem,
5298                         !val_signbit_known_set_p (word_mode,
5299                                                   (normalizep ? normalizep
5300                                                    : STORE_FLAG_VALUE)));
5301           return target;
5302         }
5303     }
5304
5305   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5306      complement of A (for GE) and shifting the sign bit to the low bit.  */
5307   if (op1 == const0_rtx && (code == LT || code == GE)
5308       && GET_MODE_CLASS (mode) == MODE_INT
5309       && (normalizep || STORE_FLAG_VALUE == 1
5310           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5311     {
5312       subtarget = target;
5313
5314       if (!target)
5315         target_mode = mode;
5316
5317       /* If the result is to be wider than OP0, it is best to convert it
5318          first.  If it is to be narrower, it is *incorrect* to convert it
5319          first.  */
5320       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5321         {
5322           op0 = convert_modes (target_mode, mode, op0, 0);
5323           mode = target_mode;
5324         }
5325
5326       if (target_mode != mode)
5327         subtarget = 0;
5328
5329       if (code == GE)
5330         op0 = expand_unop (mode, one_cmpl_optab, op0,
5331                            ((STORE_FLAG_VALUE == 1 || normalizep)
5332                             ? 0 : subtarget), 0);
5333
5334       if (STORE_FLAG_VALUE == 1 || normalizep)
5335         /* If we are supposed to produce a 0/1 value, we want to do
5336            a logical shift from the sign bit to the low-order bit; for
5337            a -1/0 value, we do an arithmetic shift.  */
5338         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5339                             GET_MODE_BITSIZE (mode) - 1,
5340                             subtarget, normalizep != -1);
5341
5342       if (mode != target_mode)
5343         op0 = convert_modes (target_mode, mode, op0, 0);
5344
5345       return op0;
5346     }
5347
5348   mclass = GET_MODE_CLASS (mode);
5349   for (compare_mode = mode; compare_mode != VOIDmode;
5350        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5351     {
5352      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5353      icode = optab_handler (cstore_optab, optab_mode);
5354      if (icode != CODE_FOR_nothing)
5355         {
5356           do_pending_stack_adjust ();
5357           tem = emit_cstore (target, icode, code, mode, compare_mode,
5358                              unsignedp, op0, op1, normalizep, target_mode);
5359           if (tem)
5360             return tem;
5361
5362           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5363             {
5364               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5365                                  unsignedp, op1, op0, normalizep, target_mode);
5366               if (tem)
5367                 return tem;
5368             }
5369           break;
5370         }
5371     }
5372
5373   return 0;
5374 }
5375
5376 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5377    and storing in TARGET.  Normally return TARGET.
5378    Return 0 if that cannot be done.
5379
5380    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5381    it is VOIDmode, they cannot both be CONST_INT.
5382
5383    UNSIGNEDP is for the case where we have to widen the operands
5384    to perform the operation.  It says to use zero-extension.
5385
5386    NORMALIZEP is 1 if we should convert the result to be either zero
5387    or one.  Normalize is -1 if we should convert the result to be
5388    either zero or -1.  If NORMALIZEP is zero, the result will be left
5389    "raw" out of the scc insn.  */
5390
5391 rtx
5392 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5393                  enum machine_mode mode, int unsignedp, int normalizep)
5394 {
5395   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5396   enum rtx_code rcode;
5397   rtx subtarget;
5398   rtx tem, last, trueval;
5399
5400   /* If we compare constants, we shouldn't use a store-flag operation,
5401      but a constant load.  We can get there via the vanilla route that
5402      usually generates a compare-branch sequence, but will in this case
5403      fold the comparison to a constant, and thus elide the branch.  */
5404   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5405     return NULL_RTX;
5406
5407   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5408                            target_mode);
5409   if (tem)
5410     return tem;
5411
5412   /* If we reached here, we can't do this with a scc insn, however there
5413      are some comparisons that can be done in other ways.  Don't do any
5414      of these cases if branches are very cheap.  */
5415   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5416     return 0;
5417
5418   /* See what we need to return.  We can only return a 1, -1, or the
5419      sign bit.  */
5420
5421   if (normalizep == 0)
5422     {
5423       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5424         normalizep = STORE_FLAG_VALUE;
5425
5426       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5427         ;
5428       else
5429         return 0;
5430     }
5431
5432   last = get_last_insn ();
5433
5434   /* If optimizing, use different pseudo registers for each insn, instead
5435      of reusing the same pseudo.  This leads to better CSE, but slows
5436      down the compiler, since there are more pseudos */
5437   subtarget = (!optimize
5438                && (target_mode == mode)) ? target : NULL_RTX;
5439   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5440
5441   /* For floating-point comparisons, try the reverse comparison or try
5442      changing the "orderedness" of the comparison.  */
5443   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5444     {
5445       enum rtx_code first_code;
5446       bool and_them;
5447
5448       rcode = reverse_condition_maybe_unordered (code);
5449       if (can_compare_p (rcode, mode, ccp_store_flag)
5450           && (code == ORDERED || code == UNORDERED
5451               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5452               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5453         {
5454           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5455                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5456
5457           /* For the reverse comparison, use either an addition or a XOR.  */
5458           if (want_add
5459               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5460                            optimize_insn_for_speed_p ()) == 0)
5461             {
5462               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5463                                        STORE_FLAG_VALUE, target_mode);
5464               if (tem)
5465                 return expand_binop (target_mode, add_optab, tem,
5466                                      gen_int_mode (normalizep, target_mode),
5467                                      target, 0, OPTAB_WIDEN);
5468             }
5469           else if (!want_add
5470                    && rtx_cost (trueval, XOR, 1,
5471                                 optimize_insn_for_speed_p ()) == 0)
5472             {
5473               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5474                                        normalizep, target_mode);
5475               if (tem)
5476                 return expand_binop (target_mode, xor_optab, tem, trueval,
5477                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5478             }
5479         }
5480
5481       delete_insns_since (last);
5482
5483       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5484       if (code == ORDERED || code == UNORDERED)
5485         return 0;
5486
5487       and_them = split_comparison (code, mode, &first_code, &code);
5488
5489       /* If there are no NaNs, the first comparison should always fall through.
5490          Effectively change the comparison to the other one.  */
5491       if (!HONOR_NANS (mode))
5492         {
5493           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5494           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5495                                     target_mode);
5496         }
5497
5498 #ifdef HAVE_conditional_move
5499       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5500          conditional move.  */
5501       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5502                                normalizep, target_mode);
5503       if (tem == 0)
5504         return 0;
5505
5506       if (and_them)
5507         tem = emit_conditional_move (target, code, op0, op1, mode,
5508                                      tem, const0_rtx, GET_MODE (tem), 0);
5509       else
5510         tem = emit_conditional_move (target, code, op0, op1, mode,
5511                                      trueval, tem, GET_MODE (tem), 0);
5512
5513       if (tem == 0)
5514         delete_insns_since (last);
5515       return tem;
5516 #else
5517       return 0;
5518 #endif
5519     }
5520
5521   /* The remaining tricks only apply to integer comparisons.  */
5522
5523   if (GET_MODE_CLASS (mode) != MODE_INT)
5524     return 0;
5525
5526   /* If this is an equality comparison of integers, we can try to exclusive-or
5527      (or subtract) the two operands and use a recursive call to try the
5528      comparison with zero.  Don't do any of these cases if branches are
5529      very cheap.  */
5530
5531   if ((code == EQ || code == NE) && op1 != const0_rtx)
5532     {
5533       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5534                           OPTAB_WIDEN);
5535
5536       if (tem == 0)
5537         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5538                             OPTAB_WIDEN);
5539       if (tem != 0)
5540         tem = emit_store_flag (target, code, tem, const0_rtx,
5541                                mode, unsignedp, normalizep);
5542       if (tem != 0)
5543         return tem;
5544
5545       delete_insns_since (last);
5546     }
5547
5548   /* For integer comparisons, try the reverse comparison.  However, for
5549      small X and if we'd have anyway to extend, implementing "X != 0"
5550      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5551   rcode = reverse_condition (code);
5552   if (can_compare_p (rcode, mode, ccp_store_flag)
5553       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5554             && code == NE
5555             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5556             && op1 == const0_rtx))
5557     {
5558       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5559                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5560
5561       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5562       if (want_add
5563           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5564                        optimize_insn_for_speed_p ()) == 0)
5565         {
5566           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5567                                    STORE_FLAG_VALUE, target_mode);
5568           if (tem != 0)
5569             tem = expand_binop (target_mode, add_optab, tem,
5570                                 gen_int_mode (normalizep, target_mode),
5571                                 target, 0, OPTAB_WIDEN);
5572         }
5573       else if (!want_add
5574                && rtx_cost (trueval, XOR, 1,
5575                             optimize_insn_for_speed_p ()) == 0)
5576         {
5577           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5578                                    normalizep, target_mode);
5579           if (tem != 0)
5580             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5581                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5582         }
5583
5584       if (tem != 0)
5585         return tem;
5586       delete_insns_since (last);
5587     }
5588
5589   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5590      the constant zero.  Reject all other comparisons at this point.  Only
5591      do LE and GT if branches are expensive since they are expensive on
5592      2-operand machines.  */
5593
5594   if (op1 != const0_rtx
5595       || (code != EQ && code != NE
5596           && (BRANCH_COST (optimize_insn_for_speed_p (),
5597                            false) <= 1 || (code != LE && code != GT))))
5598     return 0;
5599
5600   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5601      do the necessary operation below.  */
5602
5603   tem = 0;
5604
5605   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5606      the sign bit set.  */
5607
5608   if (code == LE)
5609     {
5610       /* This is destructive, so SUBTARGET can't be OP0.  */
5611       if (rtx_equal_p (subtarget, op0))
5612         subtarget = 0;
5613
5614       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5615                           OPTAB_WIDEN);
5616       if (tem)
5617         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5618                             OPTAB_WIDEN);
5619     }
5620
5621   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5622      number of bits in the mode of OP0, minus one.  */
5623
5624   if (code == GT)
5625     {
5626       if (rtx_equal_p (subtarget, op0))
5627         subtarget = 0;
5628
5629       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5630                           GET_MODE_BITSIZE (mode) - 1,
5631                           subtarget, 0);
5632       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5633                           OPTAB_WIDEN);
5634     }
5635
5636   if (code == EQ || code == NE)
5637     {
5638       /* For EQ or NE, one way to do the comparison is to apply an operation
5639          that converts the operand into a positive number if it is nonzero
5640          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5641          for NE we negate.  This puts the result in the sign bit.  Then we
5642          normalize with a shift, if needed.
5643
5644          Two operations that can do the above actions are ABS and FFS, so try
5645          them.  If that doesn't work, and MODE is smaller than a full word,
5646          we can use zero-extension to the wider mode (an unsigned conversion)
5647          as the operation.  */
5648
5649       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5650          that is compensated by the subsequent overflow when subtracting
5651          one / negating.  */
5652
5653       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5654         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5655       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5656         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5657       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5658         {
5659           tem = convert_modes (word_mode, mode, op0, 1);
5660           mode = word_mode;
5661         }
5662
5663       if (tem != 0)
5664         {
5665           if (code == EQ)
5666             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5667                                 0, OPTAB_WIDEN);
5668           else
5669             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5670         }
5671
5672       /* If we couldn't do it that way, for NE we can "or" the two's complement
5673          of the value with itself.  For EQ, we take the one's complement of
5674          that "or", which is an extra insn, so we only handle EQ if branches
5675          are expensive.  */
5676
5677       if (tem == 0
5678           && (code == NE
5679               || BRANCH_COST (optimize_insn_for_speed_p (),
5680                               false) > 1))
5681         {
5682           if (rtx_equal_p (subtarget, op0))
5683             subtarget = 0;
5684
5685           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5686           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5687                               OPTAB_WIDEN);
5688
5689           if (tem && code == EQ)
5690             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5691         }
5692     }
5693
5694   if (tem && normalizep)
5695     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5696                         GET_MODE_BITSIZE (mode) - 1,
5697                         subtarget, normalizep == 1);
5698
5699   if (tem)
5700     {
5701       if (!target)
5702         ;
5703       else if (GET_MODE (tem) != target_mode)
5704         {
5705           convert_move (target, tem, 0);
5706           tem = target;
5707         }
5708       else if (!subtarget)
5709         {
5710           emit_move_insn (target, tem);
5711           tem = target;
5712         }
5713     }
5714   else
5715     delete_insns_since (last);
5716
5717   return tem;
5718 }
5719
5720 /* Like emit_store_flag, but always succeeds.  */
5721
5722 rtx
5723 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5724                        enum machine_mode mode, int unsignedp, int normalizep)
5725 {
5726   rtx tem, label;
5727   rtx trueval, falseval;
5728
5729   /* First see if emit_store_flag can do the job.  */
5730   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5731   if (tem != 0)
5732     return tem;
5733
5734   if (!target)
5735     target = gen_reg_rtx (word_mode);
5736
5737   /* If this failed, we have to do this with set/compare/jump/set code.
5738      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5739   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5740   if (code == NE
5741       && GET_MODE_CLASS (mode) == MODE_INT
5742       && REG_P (target)
5743       && op0 == target
5744       && op1 == const0_rtx)
5745     {
5746       label = gen_label_rtx ();
5747       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5748                                mode, NULL_RTX, NULL_RTX, label, -1);
5749       emit_move_insn (target, trueval);
5750       emit_label (label);
5751       return target;
5752     }
5753
5754   if (!REG_P (target)
5755       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5756     target = gen_reg_rtx (GET_MODE (target));
5757
5758   /* Jump in the right direction if the target cannot implement CODE
5759      but can jump on its reverse condition.  */
5760   falseval = const0_rtx;
5761   if (! can_compare_p (code, mode, ccp_jump)
5762       && (! FLOAT_MODE_P (mode)
5763           || code == ORDERED || code == UNORDERED
5764           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5765           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5766     {
5767       enum rtx_code rcode;
5768       if (FLOAT_MODE_P (mode))
5769         rcode = reverse_condition_maybe_unordered (code);
5770       else
5771         rcode = reverse_condition (code);
5772
5773       /* Canonicalize to UNORDERED for the libcall.  */
5774       if (can_compare_p (rcode, mode, ccp_jump)
5775           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5776         {
5777           falseval = trueval;
5778           trueval = const0_rtx;
5779           code = rcode;
5780         }
5781     }
5782
5783   emit_move_insn (target, trueval);
5784   label = gen_label_rtx ();
5785   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5786                            NULL_RTX, label, -1);
5787
5788   emit_move_insn (target, falseval);
5789   emit_label (label);
5790
5791   return target;
5792 }
5793 \f
5794 /* Perform possibly multi-word comparison and conditional jump to LABEL
5795    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5796    now a thin wrapper around do_compare_rtx_and_jump.  */
5797
5798 static void
5799 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5800                  rtx label)
5801 {
5802   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5803   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5804                            NULL_RTX, NULL_RTX, label, -1);
5805 }