gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2013 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "optabs.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "expmed.h"
  40
  41 struct target_expmed default_target_expmed;
  42 #if SWITCHABLE_TARGET
  43 struct target_expmed *this_target_expmed = &default_target_expmed;
  44 #endif
  45
  46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    rtx);
  56 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  57                                     unsigned HOST_WIDE_INT,
  58                                     unsigned HOST_WIDE_INT, rtx, int);
  59 static rtx mask_rtx (enum machine_mode, int, int, int);
  60 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  61 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, int);
  63 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  64 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  65 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  66
  67 /* Test whether a value is zero of a power of two.  */
  68 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  69   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  70
  71 struct init_expmed_rtl
  72 {
  73   struct rtx_def reg;
  74   struct rtx_def plus;
  75   struct rtx_def neg;
  76   struct rtx_def mult;
  77   struct rtx_def sdiv;
  78   struct rtx_def udiv;
  79   struct rtx_def sdiv_32;
  80   struct rtx_def smod_32;
  81   struct rtx_def wide_mult;
  82   struct rtx_def wide_lshr;
  83   struct rtx_def wide_trunc;
  84   struct rtx_def shift;
  85   struct rtx_def shift_mult;
  86   struct rtx_def shift_add;
  87   struct rtx_def shift_sub0;
  88   struct rtx_def shift_sub1;
  89   struct rtx_def zext;
  90   struct rtx_def trunc;
  91
  92   rtx pow2[MAX_BITS_PER_WORD];
  93   rtx cint[MAX_BITS_PER_WORD];
  94 };
  95
  96 static void
  97 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  98                       enum machine_mode from_mode, bool speed)
  99 {
 100   int to_size, from_size;
 101   rtx which;
 102
 103   /* We're given no information about the true size of a partial integer,
 104      only the size of the "full" integer it requires for storage.  For
 105      comparison purposes here, reduce the bit size by one in that case.  */
 106   to_size = (GET_MODE_BITSIZE (to_mode)
 107              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 108   from_size = (GET_MODE_BITSIZE (from_mode)
 109                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 110
 111   /* Assume cost of zero-extend and sign-extend is the same.  */
 112   which = (to_size < from_size ? &all->trunc : &all->zext);
 113
 114   PUT_MODE (&all->reg, from_mode);
 115   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 116 }
 117
 118 static void
 119 init_expmed_one_mode (struct init_expmed_rtl *all,
 120                       enum machine_mode mode, int speed)
 121 {
 122   int m, n, mode_bitsize;
 123   enum machine_mode mode_from;
 124
 125   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 126
 127   PUT_MODE (&all->reg, mode);
 128   PUT_MODE (&all->plus, mode);
 129   PUT_MODE (&all->neg, mode);
 130   PUT_MODE (&all->mult, mode);
 131   PUT_MODE (&all->sdiv, mode);
 132   PUT_MODE (&all->udiv, mode);
 133   PUT_MODE (&all->sdiv_32, mode);
 134   PUT_MODE (&all->smod_32, mode);
 135   PUT_MODE (&all->wide_trunc, mode);
 136   PUT_MODE (&all->shift, mode);
 137   PUT_MODE (&all->shift_mult, mode);
 138   PUT_MODE (&all->shift_add, mode);
 139   PUT_MODE (&all->shift_sub0, mode);
 140   PUT_MODE (&all->shift_sub1, mode);
 141   PUT_MODE (&all->zext, mode);
 142   PUT_MODE (&all->trunc, mode);
 143
 144   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 145   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 146   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 147   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 148   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 149
 150   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 151                                      <= 2 * add_cost (speed, mode)));
 152   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 153                                      <= 4 * add_cost (speed, mode)));
 154
 155   set_shift_cost (speed, mode, 0, 0);
 156   {
 157     int cost = add_cost (speed, mode);
 158     set_shiftadd_cost (speed, mode, 0, cost);
 159     set_shiftsub0_cost (speed, mode, 0, cost);
 160     set_shiftsub1_cost (speed, mode, 0, cost);
 161   }
 162
 163   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 164   for (m = 1; m < n; m++)
 165     {
 166       XEXP (&all->shift, 1) = all->cint[m];
 167       XEXP (&all->shift_mult, 1) = all->pow2[m];
 168
 169       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 170       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 171       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 172       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 173     }
 174
 175   if (SCALAR_INT_MODE_P (mode))
 176     {
 177       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 178            mode_from = (enum machine_mode)(mode_from + 1))
 179         init_expmed_one_conv (all, mode, mode_from, speed);
 180     }
 181   if (GET_MODE_CLASS (mode) == MODE_INT)
 182     {
 183       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 184       if (wider_mode != VOIDmode)
 185         {
 186           PUT_MODE (&all->zext, wider_mode);
 187           PUT_MODE (&all->wide_mult, wider_mode);
 188           PUT_MODE (&all->wide_lshr, wider_mode);
 189           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 190
 191           set_mul_widen_cost (speed, wider_mode,
 192                               set_src_cost (&all->wide_mult, speed));
 193           set_mul_highpart_cost (speed, mode,
 194                                  set_src_cost (&all->wide_trunc, speed));
 195         }
 196     }
 197 }
 198
 199 void
 200 init_expmed (void)
 201 {
 202   struct init_expmed_rtl all;
 203   enum machine_mode mode;
 204   int m, speed;
 205
 206   memset (&all, 0, sizeof all);
 207   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 208     {
 209       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 210       all.cint[m] = GEN_INT (m);
 211     }
 212
 213   PUT_CODE (&all.reg, REG);
 214   /* Avoid using hard regs in ways which may be unsupported.  */
 215   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 216
 217   PUT_CODE (&all.plus, PLUS);
 218   XEXP (&all.plus, 0) = &all.reg;
 219   XEXP (&all.plus, 1) = &all.reg;
 220
 221   PUT_CODE (&all.neg, NEG);
 222   XEXP (&all.neg, 0) = &all.reg;
 223
 224   PUT_CODE (&all.mult, MULT);
 225   XEXP (&all.mult, 0) = &all.reg;
 226   XEXP (&all.mult, 1) = &all.reg;
 227
 228   PUT_CODE (&all.sdiv, DIV);
 229   XEXP (&all.sdiv, 0) = &all.reg;
 230   XEXP (&all.sdiv, 1) = &all.reg;
 231
 232   PUT_CODE (&all.udiv, UDIV);
 233   XEXP (&all.udiv, 0) = &all.reg;
 234   XEXP (&all.udiv, 1) = &all.reg;
 235
 236   PUT_CODE (&all.sdiv_32, DIV);
 237   XEXP (&all.sdiv_32, 0) = &all.reg;
 238   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 239
 240   PUT_CODE (&all.smod_32, MOD);
 241   XEXP (&all.smod_32, 0) = &all.reg;
 242   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 243
 244   PUT_CODE (&all.zext, ZERO_EXTEND);
 245   XEXP (&all.zext, 0) = &all.reg;
 246
 247   PUT_CODE (&all.wide_mult, MULT);
 248   XEXP (&all.wide_mult, 0) = &all.zext;
 249   XEXP (&all.wide_mult, 1) = &all.zext;
 250
 251   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 252   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 253
 254   PUT_CODE (&all.wide_trunc, TRUNCATE);
 255   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 256
 257   PUT_CODE (&all.shift, ASHIFT);
 258   XEXP (&all.shift, 0) = &all.reg;
 259
 260   PUT_CODE (&all.shift_mult, MULT);
 261   XEXP (&all.shift_mult, 0) = &all.reg;
 262
 263   PUT_CODE (&all.shift_add, PLUS);
 264   XEXP (&all.shift_add, 0) = &all.shift_mult;
 265   XEXP (&all.shift_add, 1) = &all.reg;
 266
 267   PUT_CODE (&all.shift_sub0, MINUS);
 268   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 269   XEXP (&all.shift_sub0, 1) = &all.reg;
 270
 271   PUT_CODE (&all.shift_sub1, MINUS);
 272   XEXP (&all.shift_sub1, 0) = &all.reg;
 273   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 274
 275   PUT_CODE (&all.trunc, TRUNCATE);
 276   XEXP (&all.trunc, 0) = &all.reg;
 277
 278   for (speed = 0; speed < 2; speed++)
 279     {
 280       crtl->maybe_hot_insn_p = speed;
 281       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 282
 283       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 284            mode = (enum machine_mode)(mode + 1))
 285         init_expmed_one_mode (&all, mode, speed);
 286
 287       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 288         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 289              mode = (enum machine_mode)(mode + 1))
 290           init_expmed_one_mode (&all, mode, speed);
 291
 292       if (MIN_MODE_VECTOR_INT != VOIDmode)
 293         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 294              mode = (enum machine_mode)(mode + 1))
 295           init_expmed_one_mode (&all, mode, speed);
 296     }
 297
 298   if (alg_hash_used_p ())
 299     {
 300       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 301       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 302     }
 303   else
 304     set_alg_hash_used_p (true);
 305   default_rtl_profile ();
 306 }
 307
 308 /* Return an rtx representing minus the value of X.
 309    MODE is the intended mode of the result,
 310    useful if X is a CONST_INT.  */
 311
 312 rtx
 313 negate_rtx (enum machine_mode mode, rtx x)
 314 {
 315   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 316
 317   if (result == 0)
 318     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 319
 320   return result;
 321 }
 322
 323 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 324    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 325    If MODE is BLKmode, return a reference to every byte in the bitfield.
 326    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 327
 328 static rtx
 329 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 330                       unsigned HOST_WIDE_INT bitsize,
 331                       unsigned HOST_WIDE_INT bitnum,
 332                       unsigned HOST_WIDE_INT *new_bitnum)
 333 {
 334   if (mode == BLKmode)
 335     {
 336       *new_bitnum = bitnum % BITS_PER_UNIT;
 337       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 338       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 339                             / BITS_PER_UNIT);
 340       return adjust_bitfield_address_size (mem, mode, offset, size);
 341     }
 342   else
 343     {
 344       unsigned int unit = GET_MODE_BITSIZE (mode);
 345       *new_bitnum = bitnum % unit;
 346       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 347       return adjust_bitfield_address (mem, mode, offset);
 348     }
 349 }
 350
 351 /* The caller wants to perform insertion or extraction PATTERN on a
 352    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 353    BITREGION_START and BITREGION_END are as for store_bit_field
 354    and FIELDMODE is the natural mode of the field.
 355
 356    Search for a mode that is compatible with the memory access
 357    restrictions and (where applicable) with a register insertion or
 358    extraction.  Return the new memory on success, storing the adjusted
 359    bit position in *NEW_BITNUM.  Return null otherwise.  */
 360
 361 static rtx
 362 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 363                               rtx op0, HOST_WIDE_INT bitsize,
 364                               HOST_WIDE_INT bitnum,
 365                               unsigned HOST_WIDE_INT bitregion_start,
 366                               unsigned HOST_WIDE_INT bitregion_end,
 367                               enum machine_mode fieldmode,
 368                               unsigned HOST_WIDE_INT *new_bitnum)
 369 {
 370   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 371                                 bitregion_end, MEM_ALIGN (op0),
 372                                 MEM_VOLATILE_P (op0));
 373   enum machine_mode best_mode;
 374   if (iter.next_mode (&best_mode))
 375     {
 376       /* We can use a memory in BEST_MODE.  See whether this is true for
 377          any wider modes.  All other things being equal, we prefer to
 378          use the widest mode possible because it tends to expose more
 379          CSE opportunities.  */
 380       if (!iter.prefer_smaller_modes ())
 381         {
 382           /* Limit the search to the mode required by the corresponding
 383              register insertion or extraction instruction, if any.  */
 384           enum machine_mode limit_mode = word_mode;
 385           extraction_insn insn;
 386           if (get_best_reg_extraction_insn (&insn, pattern,
 387                                             GET_MODE_BITSIZE (best_mode),
 388                                             fieldmode))
 389             limit_mode = insn.field_mode;
 390
 391           enum machine_mode wider_mode;
 392           while (iter.next_mode (&wider_mode)
 393                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 394             best_mode = wider_mode;
 395         }
 396       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 397                                    new_bitnum);
 398     }
 399   return NULL_RTX;
 400 }
 401
 402 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 403    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 404    offset is then BITNUM / BITS_PER_UNIT.  */
 405
 406 static bool
 407 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 408                      unsigned HOST_WIDE_INT bitsize,
 409                      enum machine_mode struct_mode)
 410 {
 411   if (BYTES_BIG_ENDIAN)
 412     return (bitnum % BITS_PER_UNIT == 0
 413             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 414                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 415   else
 416     return bitnum % BITS_PER_WORD == 0;
 417 }
 418
 419 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 420    bit number BITNUM can be treated as a simple value of mode MODE.  */
 421
 422 static bool
 423 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 424                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 425 {
 426   return (MEM_P (op0)
 427           && bitnum % BITS_PER_UNIT == 0
 428           && bitsize == GET_MODE_BITSIZE (mode)
 429           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 430               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 431                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 432 }
 433 \f
 434 /* Try to use instruction INSV to store VALUE into a field of OP0.
 435    BITSIZE and BITNUM are as for store_bit_field.  */
 436
 437 static bool
 438 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 439                             unsigned HOST_WIDE_INT bitsize,
 440                             unsigned HOST_WIDE_INT bitnum, rtx value)
 441 {
 442   struct expand_operand ops[4];
 443   rtx value1;
 444   rtx xop0 = op0;
 445   rtx last = get_last_insn ();
 446   bool copy_back = false;
 447
 448   enum machine_mode op_mode = insv->field_mode;
 449   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 450   if (bitsize == 0 || bitsize > unit)
 451     return false;
 452
 453   if (MEM_P (xop0))
 454     /* Get a reference to the first byte of the field.  */
 455     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 456                                  &bitnum);
 457   else
 458     {
 459       /* Convert from counting within OP0 to counting in OP_MODE.  */
 460       if (BYTES_BIG_ENDIAN)
 461         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 462
 463       /* If xop0 is a register, we need it in OP_MODE
 464          to make it acceptable to the format of insv.  */
 465       if (GET_CODE (xop0) == SUBREG)
 466         /* We can't just change the mode, because this might clobber op0,
 467            and we will need the original value of op0 if insv fails.  */
 468         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 469       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 470         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 471     }
 472
 473   /* If the destination is a paradoxical subreg such that we need a
 474      truncate to the inner mode, perform the insertion on a temporary and
 475      truncate the result to the original destination.  Note that we can't
 476      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 477      X) 0)) is (reg:N X).  */
 478   if (GET_CODE (xop0) == SUBREG
 479       && REG_P (SUBREG_REG (xop0))
 480       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 481                                          op_mode))
 482     {
 483       rtx tem = gen_reg_rtx (op_mode);
 484       emit_move_insn (tem, xop0);
 485       xop0 = tem;
 486       copy_back = true;
 487     }
 488
 489   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 490      "backwards" from the size of the unit we are inserting into.
 491      Otherwise, we count bits from the most significant on a
 492      BYTES/BITS_BIG_ENDIAN machine.  */
 493
 494   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 495     bitnum = unit - bitsize - bitnum;
 496
 497   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 498   value1 = value;
 499   if (GET_MODE (value) != op_mode)
 500     {
 501       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 502         {
 503           /* Optimization: Don't bother really extending VALUE
 504              if it has all the bits we will actually use.  However,
 505              if we must narrow it, be sure we do it correctly.  */
 506
 507           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 508             {
 509               rtx tmp;
 510
 511               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 512               if (! tmp)
 513                 tmp = simplify_gen_subreg (op_mode,
 514                                            force_reg (GET_MODE (value),
 515                                                       value1),
 516                                            GET_MODE (value), 0);
 517               value1 = tmp;
 518             }
 519           else
 520             value1 = gen_lowpart (op_mode, value1);
 521         }
 522       else if (CONST_INT_P (value))
 523         value1 = gen_int_mode (INTVAL (value), op_mode);
 524       else
 525         /* Parse phase is supposed to make VALUE's data type
 526            match that of the component reference, which is a type
 527            at least as wide as the field; so VALUE should have
 528            a mode that corresponds to that type.  */
 529         gcc_assert (CONSTANT_P (value));
 530     }
 531
 532   create_fixed_operand (&ops[0], xop0);
 533   create_integer_operand (&ops[1], bitsize);
 534   create_integer_operand (&ops[2], bitnum);
 535   create_input_operand (&ops[3], value1, op_mode);
 536   if (maybe_expand_insn (insv->icode, 4, ops))
 537     {
 538       if (copy_back)
 539         convert_move (op0, xop0, true);
 540       return true;
 541     }
 542   delete_insns_since (last);
 543   return false;
 544 }
 545
 546 /* A subroutine of store_bit_field, with the same arguments.  Return true
 547    if the operation could be implemented.
 548
 549    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 550    no other way of implementing the operation.  If FALLBACK_P is false,
 551    return false instead.  */
 552
 553 static bool
 554 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 555                    unsigned HOST_WIDE_INT bitnum,
 556                    unsigned HOST_WIDE_INT bitregion_start,
 557                    unsigned HOST_WIDE_INT bitregion_end,
 558                    enum machine_mode fieldmode,
 559                    rtx value, bool fallback_p)
 560 {
 561   rtx op0 = str_rtx;
 562   rtx orig_value;
 563
 564   while (GET_CODE (op0) == SUBREG)
 565     {
 566       /* The following line once was done only if WORDS_BIG_ENDIAN,
 567          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 568          meaningful at a much higher level; when structures are copied
 569          between memory and regs, the higher-numbered regs
 570          always get higher addresses.  */
 571       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 572       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 573       int byte_offset = 0;
 574
 575       /* Paradoxical subregs need special handling on big endian machines.  */
 576       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 577         {
 578           int difference = inner_mode_size - outer_mode_size;
 579
 580           if (WORDS_BIG_ENDIAN)
 581             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 582           if (BYTES_BIG_ENDIAN)
 583             byte_offset += difference % UNITS_PER_WORD;
 584         }
 585       else
 586         byte_offset = SUBREG_BYTE (op0);
 587
 588       bitnum += byte_offset * BITS_PER_UNIT;
 589       op0 = SUBREG_REG (op0);
 590     }
 591
 592   /* No action is needed if the target is a register and if the field
 593      lies completely outside that register.  This can occur if the source
 594      code contains an out-of-bounds access to a small array.  */
 595   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 596     return true;
 597
 598   /* Use vec_set patterns for inserting parts of vectors whenever
 599      available.  */
 600   if (VECTOR_MODE_P (GET_MODE (op0))
 601       && !MEM_P (op0)
 602       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 603       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 604       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 605       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 606     {
 607       struct expand_operand ops[3];
 608       enum machine_mode outermode = GET_MODE (op0);
 609       enum machine_mode innermode = GET_MODE_INNER (outermode);
 610       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 611       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 612
 613       create_fixed_operand (&ops[0], op0);
 614       create_input_operand (&ops[1], value, innermode);
 615       create_integer_operand (&ops[2], pos);
 616       if (maybe_expand_insn (icode, 3, ops))
 617         return true;
 618     }
 619
 620   /* If the target is a register, overwriting the entire object, or storing
 621      a full-word or multi-word field can be done with just a SUBREG.  */
 622   if (!MEM_P (op0)
 623       && bitsize == GET_MODE_BITSIZE (fieldmode)
 624       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 625           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 626     {
 627       /* Use the subreg machinery either to narrow OP0 to the required
 628          words or to cope with mode punning between equal-sized modes.
 629          In the latter case, use subreg on the rhs side, not lhs.  */
 630       rtx sub;
 631
 632       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 633         {
 634           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 635           if (sub)
 636             {
 637               emit_move_insn (op0, sub);
 638               return true;
 639             }
 640         }
 641       else
 642         {
 643           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 644                                      bitnum / BITS_PER_UNIT);
 645           if (sub)
 646             {
 647               emit_move_insn (sub, value);
 648               return true;
 649             }
 650         }
 651     }
 652
 653   /* If the target is memory, storing any naturally aligned field can be
 654      done with a simple store.  For targets that support fast unaligned
 655      memory, any naturally sized, unit aligned field can be done directly.  */
 656   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 657     {
 658       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 659       emit_move_insn (op0, value);
 660       return true;
 661     }
 662
 663   /* Make sure we are playing with integral modes.  Pun with subregs
 664      if we aren't.  This must come after the entire register case above,
 665      since that case is valid for any mode.  The following cases are only
 666      valid for integral modes.  */
 667   {
 668     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 669     if (imode != GET_MODE (op0))
 670       {
 671         if (MEM_P (op0))
 672           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 673         else
 674           {
 675             gcc_assert (imode != BLKmode);
 676             op0 = gen_lowpart (imode, op0);
 677           }
 678       }
 679   }
 680
 681   /* Storing an lsb-aligned field in a register
 682      can be done with a movstrict instruction.  */
 683
 684   if (!MEM_P (op0)
 685       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 686       && bitsize == GET_MODE_BITSIZE (fieldmode)
 687       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 688     {
 689       struct expand_operand ops[2];
 690       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 691       rtx arg0 = op0;
 692       unsigned HOST_WIDE_INT subreg_off;
 693
 694       if (GET_CODE (arg0) == SUBREG)
 695         {
 696           /* Else we've got some float mode source being extracted into
 697              a different float mode destination -- this combination of
 698              subregs results in Severe Tire Damage.  */
 699           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 700                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 701                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 702           arg0 = SUBREG_REG (arg0);
 703         }
 704
 705       subreg_off = bitnum / BITS_PER_UNIT;
 706       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 707         {
 708           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 709
 710           create_fixed_operand (&ops[0], arg0);
 711           /* Shrink the source operand to FIELDMODE.  */
 712           create_convert_operand_to (&ops[1], value, fieldmode, false);
 713           if (maybe_expand_insn (icode, 2, ops))
 714             return true;
 715         }
 716     }
 717
 718   /* Handle fields bigger than a word.  */
 719
 720   if (bitsize > BITS_PER_WORD)
 721     {
 722       /* Here we transfer the words of the field
 723          in the order least significant first.
 724          This is because the most significant word is the one which may
 725          be less than full.
 726          However, only do that if the value is not BLKmode.  */
 727
 728       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 729       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 730       unsigned int i;
 731       rtx last;
 732
 733       /* This is the mode we must force value to, so that there will be enough
 734          subwords to extract.  Note that fieldmode will often (always?) be
 735          VOIDmode, because that is what store_field uses to indicate that this
 736          is a bit field, but passing VOIDmode to operand_subword_force
 737          is not allowed.  */
 738       fieldmode = GET_MODE (value);
 739       if (fieldmode == VOIDmode)
 740         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 741
 742       last = get_last_insn ();
 743       for (i = 0; i < nwords; i++)
 744         {
 745           /* If I is 0, use the low-order word in both field and target;
 746              if I is 1, use the next to lowest word; and so on.  */
 747           unsigned int wordnum = (backwards
 748                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 749                                   - i - 1
 750                                   : i);
 751           unsigned int bit_offset = (backwards
 752                                      ? MAX ((int) bitsize - ((int) i + 1)
 753                                             * BITS_PER_WORD,
 754                                             0)
 755                                      : (int) i * BITS_PER_WORD);
 756           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 757           unsigned HOST_WIDE_INT new_bitsize =
 758             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 759
 760           /* If the remaining chunk doesn't have full wordsize we have
 761              to make sure that for big endian machines the higher order
 762              bits are used.  */
 763           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 764             value_word = simplify_expand_binop (word_mode, lshr_optab,
 765                                                 value_word,
 766                                                 GEN_INT (BITS_PER_WORD
 767                                                          - new_bitsize),
 768                                                 NULL_RTX, true,
 769                                                 OPTAB_LIB_WIDEN);
 770
 771           if (!store_bit_field_1 (op0, new_bitsize,
 772                                   bitnum + bit_offset,
 773                                   bitregion_start, bitregion_end,
 774                                   word_mode,
 775                                   value_word, fallback_p))
 776             {
 777               delete_insns_since (last);
 778               return false;
 779             }
 780         }
 781       return true;
 782     }
 783
 784   /* If VALUE has a floating-point or complex mode, access it as an
 785      integer of the corresponding size.  This can occur on a machine
 786      with 64 bit registers that uses SFmode for float.  It can also
 787      occur for unaligned float or complex fields.  */
 788   orig_value = value;
 789   if (GET_MODE (value) != VOIDmode
 790       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 791       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 792     {
 793       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 794       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 795     }
 796
 797   /* If OP0 is a multi-word register, narrow it to the affected word.
 798      If the region spans two words, defer to store_split_bit_field.  */
 799   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 800     {
 801       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 802                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 803       gcc_assert (op0);
 804       bitnum %= BITS_PER_WORD;
 805       if (bitnum + bitsize > BITS_PER_WORD)
 806         {
 807           if (!fallback_p)
 808             return false;
 809
 810           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 811                                  bitregion_end, value);
 812           return true;
 813         }
 814     }
 815
 816   /* From here on we can assume that the field to be stored in fits
 817      within a word.  If the destination is a register, it too fits
 818      in a word.  */
 819
 820   extraction_insn insv;
 821   if (!MEM_P (op0)
 822       && get_best_reg_extraction_insn (&insv, EP_insv,
 823                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 824                                        fieldmode)
 825       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 826     return true;
 827
 828   /* If OP0 is a memory, try copying it to a register and seeing if a
 829      cheap register alternative is available.  */
 830   if (MEM_P (op0))
 831     {
 832       /* Do not use unaligned memory insvs for volatile bitfields when
 833          -fstrict-volatile-bitfields is in effect.  */
 834       if (!(MEM_VOLATILE_P (op0)
 835             && flag_strict_volatile_bitfields > 0)
 836           && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 837                                            fieldmode)
 838           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 839         return true;
 840
 841       rtx last = get_last_insn ();
 842
 843       /* Try loading part of OP0 into a register, inserting the bitfield
 844          into that, and then copying the result back to OP0.  */
 845       unsigned HOST_WIDE_INT bitpos;
 846       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 847                                                bitregion_start, bitregion_end,
 848                                                fieldmode, &bitpos);
 849       if (xop0)
 850         {
 851           rtx tempreg = copy_to_reg (xop0);
 852           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 853                                  bitregion_start, bitregion_end,
 854                                  fieldmode, orig_value, false))
 855             {
 856               emit_move_insn (xop0, tempreg);
 857               return true;
 858             }
 859           delete_insns_since (last);
 860         }
 861     }
 862
 863   if (!fallback_p)
 864     return false;
 865
 866   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 867                          bitregion_end, value);
 868   return true;
 869 }
 870
 871 /* Generate code to store value from rtx VALUE
 872    into a bit-field within structure STR_RTX
 873    containing BITSIZE bits starting at bit BITNUM.
 874
 875    BITREGION_START is bitpos of the first bitfield in this region.
 876    BITREGION_END is the bitpos of the ending bitfield in this region.
 877    These two fields are 0, if the C++ memory model does not apply,
 878    or we are not interested in keeping track of bitfield regions.
 879
 880    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 881
 882 void
 883 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 884                  unsigned HOST_WIDE_INT bitnum,
 885                  unsigned HOST_WIDE_INT bitregion_start,
 886                  unsigned HOST_WIDE_INT bitregion_end,
 887                  enum machine_mode fieldmode,
 888                  rtx value)
 889 {
 890   /* Under the C++0x memory model, we must not touch bits outside the
 891      bit region.  Adjust the address to start at the beginning of the
 892      bit region.  */
 893   if (MEM_P (str_rtx) && bitregion_start > 0)
 894     {
 895       enum machine_mode bestmode;
 896       HOST_WIDE_INT offset, size;
 897
 898       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 899
 900       offset = bitregion_start / BITS_PER_UNIT;
 901       bitnum -= bitregion_start;
 902       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 903       bitregion_end -= bitregion_start;
 904       bitregion_start = 0;
 905       bestmode = get_best_mode (bitsize, bitnum,
 906                                 bitregion_start, bitregion_end,
 907                                 MEM_ALIGN (str_rtx), VOIDmode,
 908                                 MEM_VOLATILE_P (str_rtx));
 909       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 910     }
 911
 912   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 913                           bitregion_start, bitregion_end,
 914                           fieldmode, value, true))
 915     gcc_unreachable ();
 916 }
 917 \f
 918 /* Use shifts and boolean operations to store VALUE into a bit field of
 919    width BITSIZE in OP0, starting at bit BITNUM.  */
 920
 921 static void
 922 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 923                        unsigned HOST_WIDE_INT bitnum,
 924                        unsigned HOST_WIDE_INT bitregion_start,
 925                        unsigned HOST_WIDE_INT bitregion_end,
 926                        rtx value)
 927 {
 928   enum machine_mode mode;
 929   rtx temp;
 930   int all_zero = 0;
 931   int all_one = 0;
 932
 933   /* There is a case not handled here:
 934      a structure with a known alignment of just a halfword
 935      and a field split across two aligned halfwords within the structure.
 936      Or likewise a structure with a known alignment of just a byte
 937      and a field split across two bytes.
 938      Such cases are not supposed to be able to occur.  */
 939
 940   if (MEM_P (op0))
 941     {
 942       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 943
 944       if (bitregion_end)
 945         maxbits = bitregion_end - bitregion_start + 1;
 946
 947       /* Get the proper mode to use for this field.  We want a mode that
 948          includes the entire field.  If such a mode would be larger than
 949          a word, we won't be doing the extraction the normal way.
 950          We don't want a mode bigger than the destination.  */
 951
 952       mode = GET_MODE (op0);
 953       if (GET_MODE_BITSIZE (mode) == 0
 954           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 955         mode = word_mode;
 956
 957       if (MEM_VOLATILE_P (op0)
 958           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 959           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 960           && flag_strict_volatile_bitfields > 0)
 961         mode = GET_MODE (op0);
 962       else
 963         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 964                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 965
 966       if (mode == VOIDmode)
 967         {
 968           /* The only way this should occur is if the field spans word
 969              boundaries.  */
 970           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 971                                  bitregion_end, value);
 972           return;
 973         }
 974
 975       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
 976     }
 977
 978   mode = GET_MODE (op0);
 979   gcc_assert (SCALAR_INT_MODE_P (mode));
 980
 981   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 982      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 983
 984   if (BYTES_BIG_ENDIAN)
 985     /* BITNUM is the distance between our msb
 986        and that of the containing datum.
 987        Convert it to the distance from the lsb.  */
 988     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 989
 990   /* Now BITNUM is always the distance between our lsb
 991      and that of OP0.  */
 992
 993   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 994      we must first convert its mode to MODE.  */
 995
 996   if (CONST_INT_P (value))
 997     {
 998       HOST_WIDE_INT v = INTVAL (value);
 999
1000       if (bitsize < HOST_BITS_PER_WIDE_INT)
1001         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1002
1003       if (v == 0)
1004         all_zero = 1;
1005       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1006                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1007                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1008         all_one = 1;
1009
1010       value = lshift_value (mode, v, bitnum);
1011     }
1012   else
1013     {
1014       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1015                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1016
1017       if (GET_MODE (value) != mode)
1018         value = convert_to_mode (mode, value, 1);
1019
1020       if (must_and)
1021         value = expand_binop (mode, and_optab, value,
1022                               mask_rtx (mode, 0, bitsize, 0),
1023                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1024       if (bitnum > 0)
1025         value = expand_shift (LSHIFT_EXPR, mode, value,
1026                               bitnum, NULL_RTX, 1);
1027     }
1028
1029   /* Now clear the chosen bits in OP0,
1030      except that if VALUE is -1 we need not bother.  */
1031   /* We keep the intermediates in registers to allow CSE to combine
1032      consecutive bitfield assignments.  */
1033
1034   temp = force_reg (mode, op0);
1035
1036   if (! all_one)
1037     {
1038       temp = expand_binop (mode, and_optab, temp,
1039                            mask_rtx (mode, bitnum, bitsize, 1),
1040                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1041       temp = force_reg (mode, temp);
1042     }
1043
1044   /* Now logical-or VALUE into OP0, unless it is zero.  */
1045
1046   if (! all_zero)
1047     {
1048       temp = expand_binop (mode, ior_optab, temp, value,
1049                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1050       temp = force_reg (mode, temp);
1051     }
1052
1053   if (op0 != temp)
1054     {
1055       op0 = copy_rtx (op0);
1056       emit_move_insn (op0, temp);
1057     }
1058 }
1059 \f
1060 /* Store a bit field that is split across multiple accessible memory objects.
1061
1062    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1063    BITSIZE is the field width; BITPOS the position of its first bit
1064    (within the word).
1065    VALUE is the value to store.
1066
1067    This does not yet handle fields wider than BITS_PER_WORD.  */
1068
1069 static void
1070 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1071                        unsigned HOST_WIDE_INT bitpos,
1072                        unsigned HOST_WIDE_INT bitregion_start,
1073                        unsigned HOST_WIDE_INT bitregion_end,
1074                        rtx value)
1075 {
1076   unsigned int unit;
1077   unsigned int bitsdone = 0;
1078
1079   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1080      much at a time.  */
1081   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1082     unit = BITS_PER_WORD;
1083   else
1084     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1085
1086   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1087      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1088      that VALUE might be a floating-point constant.  */
1089   if (CONSTANT_P (value) && !CONST_INT_P (value))
1090     {
1091       rtx word = gen_lowpart_common (word_mode, value);
1092
1093       if (word && (value != word))
1094         value = word;
1095       else
1096         value = gen_lowpart_common (word_mode,
1097                                     force_reg (GET_MODE (value) != VOIDmode
1098                                                ? GET_MODE (value)
1099                                                : word_mode, value));
1100     }
1101
1102   while (bitsdone < bitsize)
1103     {
1104       unsigned HOST_WIDE_INT thissize;
1105       rtx part, word;
1106       unsigned HOST_WIDE_INT thispos;
1107       unsigned HOST_WIDE_INT offset;
1108
1109       offset = (bitpos + bitsdone) / unit;
1110       thispos = (bitpos + bitsdone) % unit;
1111
1112       /* When region of bytes we can touch is restricted, decrease
1113          UNIT close to the end of the region as needed.  If op0 is a REG
1114          or SUBREG of REG, don't do this, as there can't be data races
1115          on a register and we can expand shorter code in some cases.  */
1116       if (bitregion_end
1117           && unit > BITS_PER_UNIT
1118           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1119           && !REG_P (op0)
1120           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1121         {
1122           unit = unit / 2;
1123           continue;
1124         }
1125
1126       /* THISSIZE must not overrun a word boundary.  Otherwise,
1127          store_fixed_bit_field will call us again, and we will mutually
1128          recurse forever.  */
1129       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1130       thissize = MIN (thissize, unit - thispos);
1131
1132       if (BYTES_BIG_ENDIAN)
1133         {
1134           /* Fetch successively less significant portions.  */
1135           if (CONST_INT_P (value))
1136             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1137                              >> (bitsize - bitsdone - thissize))
1138                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1139           else
1140             {
1141               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1142               /* The args are chosen so that the last part includes the
1143                  lsb.  Give extract_bit_field the value it needs (with
1144                  endianness compensation) to fetch the piece we want.  */
1145               part = extract_fixed_bit_field (word_mode, value, thissize,
1146                                               total_bits - bitsize + bitsdone,
1147                                               NULL_RTX, 1);
1148             }
1149         }
1150       else
1151         {
1152           /* Fetch successively more significant portions.  */
1153           if (CONST_INT_P (value))
1154             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1155                              >> bitsdone)
1156                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1157           else
1158             part = extract_fixed_bit_field (word_mode, value, thissize,
1159                                             bitsdone, NULL_RTX, 1);
1160         }
1161
1162       /* If OP0 is a register, then handle OFFSET here.
1163
1164          When handling multiword bitfields, extract_bit_field may pass
1165          down a word_mode SUBREG of a larger REG for a bitfield that actually
1166          crosses a word boundary.  Thus, for a SUBREG, we must find
1167          the current word starting from the base register.  */
1168       if (GET_CODE (op0) == SUBREG)
1169         {
1170           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1171                             + (offset * unit / BITS_PER_WORD);
1172           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1173           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1174             word = word_offset ? const0_rtx : op0;
1175           else
1176             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1177                                           GET_MODE (SUBREG_REG (op0)));
1178           offset &= BITS_PER_WORD / unit - 1;
1179         }
1180       else if (REG_P (op0))
1181         {
1182           enum machine_mode op0_mode = GET_MODE (op0);
1183           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1184             word = offset ? const0_rtx : op0;
1185           else
1186             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1187                                           GET_MODE (op0));
1188           offset &= BITS_PER_WORD / unit - 1;
1189         }
1190       else
1191         word = op0;
1192
1193       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1194          it is just an out-of-bounds access.  Ignore it.  */
1195       if (word != const0_rtx)
1196         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1197                                bitregion_start, bitregion_end, part);
1198       bitsdone += thissize;
1199     }
1200 }
1201 \f
1202 /* A subroutine of extract_bit_field_1 that converts return value X
1203    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1204    to extract_bit_field.  */
1205
1206 static rtx
1207 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1208                              enum machine_mode tmode, bool unsignedp)
1209 {
1210   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1211     return x;
1212
1213   /* If the x mode is not a scalar integral, first convert to the
1214      integer mode of that size and then access it as a floating-point
1215      value via a SUBREG.  */
1216   if (!SCALAR_INT_MODE_P (tmode))
1217     {
1218       enum machine_mode smode;
1219
1220       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1221       x = convert_to_mode (smode, x, unsignedp);
1222       x = force_reg (smode, x);
1223       return gen_lowpart (tmode, x);
1224     }
1225
1226   return convert_to_mode (tmode, x, unsignedp);
1227 }
1228
1229 /* Try to use an ext(z)v pattern to extract a field from OP0.
1230    Return the extracted value on success, otherwise return null.
1231    EXT_MODE is the mode of the extraction and the other arguments
1232    are as for extract_bit_field.  */
1233
1234 static rtx
1235 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1236                               unsigned HOST_WIDE_INT bitsize,
1237                               unsigned HOST_WIDE_INT bitnum,
1238                               int unsignedp, rtx target,
1239                               enum machine_mode mode, enum machine_mode tmode)
1240 {
1241   struct expand_operand ops[4];
1242   rtx spec_target = target;
1243   rtx spec_target_subreg = 0;
1244   enum machine_mode ext_mode = extv->field_mode;
1245   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1246
1247   if (bitsize == 0 || unit < bitsize)
1248     return NULL_RTX;
1249
1250   if (MEM_P (op0))
1251     /* Get a reference to the first byte of the field.  */
1252     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1253                                 &bitnum);
1254   else
1255     {
1256       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1257       if (BYTES_BIG_ENDIAN)
1258         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1259
1260       /* If op0 is a register, we need it in EXT_MODE to make it
1261          acceptable to the format of ext(z)v.  */
1262       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1263         return NULL_RTX;
1264       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1265         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1266     }
1267
1268   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1269      "backwards" from the size of the unit we are extracting from.
1270      Otherwise, we count bits from the most significant on a
1271      BYTES/BITS_BIG_ENDIAN machine.  */
1272
1273   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1274     bitnum = unit - bitsize - bitnum;
1275
1276   if (target == 0)
1277     target = spec_target = gen_reg_rtx (tmode);
1278
1279   if (GET_MODE (target) != ext_mode)
1280     {
1281       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1282          between the mode of the extraction (word_mode) and the target
1283          mode.  Instead, create a temporary and use convert_move to set
1284          the target.  */
1285       if (REG_P (target)
1286           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1287         {
1288           target = gen_lowpart (ext_mode, target);
1289           if (GET_MODE_PRECISION (ext_mode)
1290               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1291             spec_target_subreg = target;
1292         }
1293       else
1294         target = gen_reg_rtx (ext_mode);
1295     }
1296
1297   create_output_operand (&ops[0], target, ext_mode);
1298   create_fixed_operand (&ops[1], op0);
1299   create_integer_operand (&ops[2], bitsize);
1300   create_integer_operand (&ops[3], bitnum);
1301   if (maybe_expand_insn (extv->icode, 4, ops))
1302     {
1303       target = ops[0].value;
1304       if (target == spec_target)
1305         return target;
1306       if (target == spec_target_subreg)
1307         return spec_target;
1308       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1309     }
1310   return NULL_RTX;
1311 }
1312
1313 /* A subroutine of extract_bit_field, with the same arguments.
1314    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1315    if we can find no other means of implementing the operation.
1316    if FALLBACK_P is false, return NULL instead.  */
1317
1318 static rtx
1319 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1320                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1321                      enum machine_mode mode, enum machine_mode tmode,
1322                      bool fallback_p)
1323 {
1324   rtx op0 = str_rtx;
1325   enum machine_mode int_mode;
1326   enum machine_mode mode1;
1327
1328   if (tmode == VOIDmode)
1329     tmode = mode;
1330
1331   while (GET_CODE (op0) == SUBREG)
1332     {
1333       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1334       op0 = SUBREG_REG (op0);
1335     }
1336
1337   /* If we have an out-of-bounds access to a register, just return an
1338      uninitialized register of the required mode.  This can occur if the
1339      source code contains an out-of-bounds access to a small array.  */
1340   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1341     return gen_reg_rtx (tmode);
1342
1343   if (REG_P (op0)
1344       && mode == GET_MODE (op0)
1345       && bitnum == 0
1346       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1347     {
1348       /* We're trying to extract a full register from itself.  */
1349       return op0;
1350     }
1351
1352   /* See if we can get a better vector mode before extracting.  */
1353   if (VECTOR_MODE_P (GET_MODE (op0))
1354       && !MEM_P (op0)
1355       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1356     {
1357       enum machine_mode new_mode;
1358
1359       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1360         new_mode = MIN_MODE_VECTOR_FLOAT;
1361       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1362         new_mode = MIN_MODE_VECTOR_FRACT;
1363       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1364         new_mode = MIN_MODE_VECTOR_UFRACT;
1365       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1366         new_mode = MIN_MODE_VECTOR_ACCUM;
1367       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1368         new_mode = MIN_MODE_VECTOR_UACCUM;
1369       else
1370         new_mode = MIN_MODE_VECTOR_INT;
1371
1372       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1373         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1374             && targetm.vector_mode_supported_p (new_mode))
1375           break;
1376       if (new_mode != VOIDmode)
1377         op0 = gen_lowpart (new_mode, op0);
1378     }
1379
1380   /* Use vec_extract patterns for extracting parts of vectors whenever
1381      available.  */
1382   if (VECTOR_MODE_P (GET_MODE (op0))
1383       && !MEM_P (op0)
1384       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1385       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1386           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1387     {
1388       struct expand_operand ops[3];
1389       enum machine_mode outermode = GET_MODE (op0);
1390       enum machine_mode innermode = GET_MODE_INNER (outermode);
1391       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1392       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1393
1394       create_output_operand (&ops[0], target, innermode);
1395       create_input_operand (&ops[1], op0, outermode);
1396       create_integer_operand (&ops[2], pos);
1397       if (maybe_expand_insn (icode, 3, ops))
1398         {
1399           target = ops[0].value;
1400           if (GET_MODE (target) != mode)
1401             return gen_lowpart (tmode, target);
1402           return target;
1403         }
1404     }
1405
1406   /* Make sure we are playing with integral modes.  Pun with subregs
1407      if we aren't.  */
1408   {
1409     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1410     if (imode != GET_MODE (op0))
1411       {
1412         if (MEM_P (op0))
1413           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1414         else if (imode != BLKmode)
1415           {
1416             op0 = gen_lowpart (imode, op0);
1417
1418             /* If we got a SUBREG, force it into a register since we
1419                aren't going to be able to do another SUBREG on it.  */
1420             if (GET_CODE (op0) == SUBREG)
1421               op0 = force_reg (imode, op0);
1422           }
1423         else if (REG_P (op0))
1424           {
1425             rtx reg, subreg;
1426             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1427                                             MODE_INT);
1428             reg = gen_reg_rtx (imode);
1429             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1430             emit_move_insn (subreg, op0);
1431             op0 = reg;
1432             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1433           }
1434         else
1435           {
1436             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1437             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1438             emit_move_insn (mem, op0);
1439             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1440           }
1441       }
1442   }
1443
1444   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1445      If that's wrong, the solution is to test for it and set TARGET to 0
1446      if needed.  */
1447
1448   /* If the bitfield is volatile, we need to make sure the access
1449      remains on a type-aligned boundary.  */
1450   if (GET_CODE (op0) == MEM
1451       && MEM_VOLATILE_P (op0)
1452       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1453       && flag_strict_volatile_bitfields > 0)
1454     goto no_subreg_mode_swap;
1455
1456   /* Only scalar integer modes can be converted via subregs.  There is an
1457      additional problem for FP modes here in that they can have a precision
1458      which is different from the size.  mode_for_size uses precision, but
1459      we want a mode based on the size, so we must avoid calling it for FP
1460      modes.  */
1461   mode1 = mode;
1462   if (SCALAR_INT_MODE_P (tmode))
1463     {
1464       enum machine_mode try_mode = mode_for_size (bitsize,
1465                                                   GET_MODE_CLASS (tmode), 0);
1466       if (try_mode != BLKmode)
1467         mode1 = try_mode;
1468     }
1469   gcc_assert (mode1 != BLKmode);
1470
1471   /* Extraction of a full MODE1 value can be done with a subreg as long
1472      as the least significant bit of the value is the least significant
1473      bit of either OP0 or a word of OP0.  */
1474   if (!MEM_P (op0)
1475       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1476       && bitsize == GET_MODE_BITSIZE (mode1)
1477       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1478     {
1479       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1480                                      bitnum / BITS_PER_UNIT);
1481       if (sub)
1482         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1483     }
1484
1485   /* Extraction of a full MODE1 value can be done with a load as long as
1486      the field is on a byte boundary and is sufficiently aligned.  */
1487   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1488     {
1489       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1490       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1491     }
1492
1493  no_subreg_mode_swap:
1494
1495   /* Handle fields bigger than a word.  */
1496
1497   if (bitsize > BITS_PER_WORD)
1498     {
1499       /* Here we transfer the words of the field
1500          in the order least significant first.
1501          This is because the most significant word is the one which may
1502          be less than full.  */
1503
1504       unsigned int backwards = WORDS_BIG_ENDIAN;
1505       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1506       unsigned int i;
1507       rtx last;
1508
1509       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1510         target = gen_reg_rtx (mode);
1511
1512       /* Indicate for flow that the entire target reg is being set.  */
1513       emit_clobber (target);
1514
1515       last = get_last_insn ();
1516       for (i = 0; i < nwords; i++)
1517         {
1518           /* If I is 0, use the low-order word in both field and target;
1519              if I is 1, use the next to lowest word; and so on.  */
1520           /* Word number in TARGET to use.  */
1521           unsigned int wordnum
1522             = (backwards
1523                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1524                : i);
1525           /* Offset from start of field in OP0.  */
1526           unsigned int bit_offset = (backwards
1527                                      ? MAX ((int) bitsize - ((int) i + 1)
1528                                             * BITS_PER_WORD,
1529                                             0)
1530                                      : (int) i * BITS_PER_WORD);
1531           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1532           rtx result_part
1533             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1534                                              bitsize - i * BITS_PER_WORD),
1535                                    bitnum + bit_offset, 1, target_part,
1536                                    mode, word_mode, fallback_p);
1537
1538           gcc_assert (target_part);
1539           if (!result_part)
1540             {
1541               delete_insns_since (last);
1542               return NULL;
1543             }
1544
1545           if (result_part != target_part)
1546             emit_move_insn (target_part, result_part);
1547         }
1548
1549       if (unsignedp)
1550         {
1551           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1552              need to be zero'd out.  */
1553           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1554             {
1555               unsigned int i, total_words;
1556
1557               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1558               for (i = nwords; i < total_words; i++)
1559                 emit_move_insn
1560                   (operand_subword (target,
1561                                     backwards ? total_words - i - 1 : i,
1562                                     1, VOIDmode),
1563                    const0_rtx);
1564             }
1565           return target;
1566         }
1567
1568       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1569       target = expand_shift (LSHIFT_EXPR, mode, target,
1570                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1571       return expand_shift (RSHIFT_EXPR, mode, target,
1572                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1573     }
1574
1575   /* If OP0 is a multi-word register, narrow it to the affected word.
1576      If the region spans two words, defer to extract_split_bit_field.  */
1577   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1578     {
1579       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1580                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1581       bitnum %= BITS_PER_WORD;
1582       if (bitnum + bitsize > BITS_PER_WORD)
1583         {
1584           if (!fallback_p)
1585             return NULL_RTX;
1586           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1587           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1588         }
1589     }
1590
1591   /* From here on we know the desired field is smaller than a word.
1592      If OP0 is a register, it too fits within a word.  */
1593   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1594   extraction_insn extv;
1595   if (!MEM_P (op0)
1596       /* ??? We could limit the structure size to the part of OP0 that
1597          contains the field, with appropriate checks for endianness
1598          and TRULY_NOOP_TRUNCATION.  */
1599       && get_best_reg_extraction_insn (&extv, pattern,
1600                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1601                                        tmode))
1602     {
1603       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1604                                                  unsignedp, target, mode,
1605                                                  tmode);
1606       if (result)
1607         return result;
1608     }
1609
1610   /* If OP0 is a memory, try copying it to a register and seeing if a
1611      cheap register alternative is available.  */
1612   if (MEM_P (op0))
1613     {
1614       /* Do not use extv/extzv for volatile bitfields when
1615          -fstrict-volatile-bitfields is in effect.  */
1616       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1617           && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1618                                            tmode))
1619         {
1620           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1621                                                      bitnum, unsignedp,
1622                                                      target, mode,
1623                                                      tmode);
1624           if (result)
1625             return result;
1626         }
1627
1628       rtx last = get_last_insn ();
1629
1630       /* Try loading part of OP0 into a register and extracting the
1631          bitfield from that.  */
1632       unsigned HOST_WIDE_INT bitpos;
1633       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1634                                                0, 0, tmode, &bitpos);
1635       if (xop0)
1636         {
1637           xop0 = copy_to_reg (xop0);
1638           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1639                                             unsignedp, target,
1640                                             mode, tmode, false);
1641           if (result)
1642             return result;
1643           delete_insns_since (last);
1644         }
1645     }
1646
1647   if (!fallback_p)
1648     return NULL;
1649
1650   /* Find a correspondingly-sized integer field, so we can apply
1651      shifts and masks to it.  */
1652   int_mode = int_mode_for_mode (tmode);
1653   if (int_mode == BLKmode)
1654     int_mode = int_mode_for_mode (mode);
1655   /* Should probably push op0 out to memory and then do a load.  */
1656   gcc_assert (int_mode != BLKmode);
1657
1658   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1659                                     target, unsignedp);
1660   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1661 }
1662
1663 /* Generate code to extract a byte-field from STR_RTX
1664    containing BITSIZE bits, starting at BITNUM,
1665    and put it in TARGET if possible (if TARGET is nonzero).
1666    Regardless of TARGET, we return the rtx for where the value is placed.
1667
1668    STR_RTX is the structure containing the byte (a REG or MEM).
1669    UNSIGNEDP is nonzero if this is an unsigned bit field.
1670    MODE is the natural mode of the field value once extracted.
1671    TMODE is the mode the caller would like the value to have;
1672    but the value may be returned with type MODE instead.
1673
1674    If a TARGET is specified and we can store in it at no extra cost,
1675    we do so, and return TARGET.
1676    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1677    if they are equally easy.  */
1678
1679 rtx
1680 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1681                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1682                    enum machine_mode mode, enum machine_mode tmode)
1683 {
1684   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1685                               target, mode, tmode, true);
1686 }
1687 \f
1688 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1689    from bit BITNUM of OP0.
1690
1691    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1692    If TARGET is nonzero, attempts to store the value there
1693    and return TARGET, but this is not guaranteed.
1694    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1695
1696 static rtx
1697 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1698                          unsigned HOST_WIDE_INT bitsize,
1699                          unsigned HOST_WIDE_INT bitnum, rtx target,
1700                          int unsignedp)
1701 {
1702   enum machine_mode mode;
1703
1704   if (MEM_P (op0))
1705     {
1706       /* Get the proper mode to use for this field.  We want a mode that
1707          includes the entire field.  If such a mode would be larger than
1708          a word, we won't be doing the extraction the normal way.  */
1709
1710       if (MEM_VOLATILE_P (op0)
1711           && flag_strict_volatile_bitfields > 0)
1712         {
1713           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1714             mode = GET_MODE (op0);
1715           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1716             mode = GET_MODE (target);
1717           else
1718             mode = tmode;
1719         }
1720       else
1721         mode = get_best_mode (bitsize, bitnum, 0, 0,
1722                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1723
1724       if (mode == VOIDmode)
1725         /* The only way this should occur is if the field spans word
1726            boundaries.  */
1727         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1728
1729       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1730       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1731
1732       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1733          if it results in a multi-word access where we otherwise wouldn't
1734          have one.  So, check for that case here.  */
1735       if (MEM_P (op0)
1736           && MEM_VOLATILE_P (op0)
1737           && flag_strict_volatile_bitfields > 0
1738           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1739           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1740         {
1741           /* If the target doesn't support unaligned access, give up and
1742              split the access into two.  */
1743           if (STRICT_ALIGNMENT)
1744             return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1745           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1746         }
1747       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1748       bitnum -= bit_offset;
1749     }
1750
1751   mode = GET_MODE (op0);
1752   gcc_assert (SCALAR_INT_MODE_P (mode));
1753
1754   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1755      for invalid input, such as extract equivalent of f5 from
1756      gcc.dg/pr48335-2.c.  */
1757
1758   if (BYTES_BIG_ENDIAN)
1759     /* BITNUM is the distance between our msb and that of OP0.
1760        Convert it to the distance from the lsb.  */
1761     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1762
1763   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1764      We have reduced the big-endian case to the little-endian case.  */
1765
1766   if (unsignedp)
1767     {
1768       if (bitnum)
1769         {
1770           /* If the field does not already start at the lsb,
1771              shift it so it does.  */
1772           /* Maybe propagate the target for the shift.  */
1773           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1774           if (tmode != mode)
1775             subtarget = 0;
1776           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1777         }
1778       /* Convert the value to the desired mode.  */
1779       if (mode != tmode)
1780         op0 = convert_to_mode (tmode, op0, 1);
1781
1782       /* Unless the msb of the field used to be the msb when we shifted,
1783          mask out the upper bits.  */
1784
1785       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1786         return expand_binop (GET_MODE (op0), and_optab, op0,
1787                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1788                              target, 1, OPTAB_LIB_WIDEN);
1789       return op0;
1790     }
1791
1792   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1793      then arithmetic-shift its lsb to the lsb of the word.  */
1794   op0 = force_reg (mode, op0);
1795
1796   /* Find the narrowest integer mode that contains the field.  */
1797
1798   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1799        mode = GET_MODE_WIDER_MODE (mode))
1800     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1801       {
1802         op0 = convert_to_mode (mode, op0, 0);
1803         break;
1804       }
1805
1806   if (mode != tmode)
1807     target = 0;
1808
1809   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1810     {
1811       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1812       /* Maybe propagate the target for the shift.  */
1813       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1814       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1815     }
1816
1817   return expand_shift (RSHIFT_EXPR, mode, op0,
1818                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1819 }
1820 \f
1821 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1822    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1823    complement of that if COMPLEMENT.  The mask is truncated if
1824    necessary to the width of mode MODE.  The mask is zero-extended if
1825    BITSIZE+BITPOS is too small for MODE.  */
1826
1827 static rtx
1828 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1829 {
1830   double_int mask;
1831
1832   mask = double_int::mask (bitsize);
1833   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1834
1835   if (complement)
1836     mask = ~mask;
1837
1838   return immed_double_int_const (mask, mode);
1839 }
1840
1841 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1842    VALUE << BITPOS.  */
1843
1844 static rtx
1845 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1846               int bitpos)
1847 {
1848   double_int val;
1849
1850   val = double_int::from_uhwi (value);
1851   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1852
1853   return immed_double_int_const (val, mode);
1854 }
1855 \f
1856 /* Extract a bit field that is split across two words
1857    and return an RTX for the result.
1858
1859    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1860    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1861    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1862
1863 static rtx
1864 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1866 {
1867   unsigned int unit;
1868   unsigned int bitsdone = 0;
1869   rtx result = NULL_RTX;
1870   int first = 1;
1871
1872   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1873      much at a time.  */
1874   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1875     unit = BITS_PER_WORD;
1876   else
1877     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1878
1879   while (bitsdone < bitsize)
1880     {
1881       unsigned HOST_WIDE_INT thissize;
1882       rtx part, word;
1883       unsigned HOST_WIDE_INT thispos;
1884       unsigned HOST_WIDE_INT offset;
1885
1886       offset = (bitpos + bitsdone) / unit;
1887       thispos = (bitpos + bitsdone) % unit;
1888
1889       /* THISSIZE must not overrun a word boundary.  Otherwise,
1890          extract_fixed_bit_field will call us again, and we will mutually
1891          recurse forever.  */
1892       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1893       thissize = MIN (thissize, unit - thispos);
1894
1895       /* If OP0 is a register, then handle OFFSET here.
1896
1897          When handling multiword bitfields, extract_bit_field may pass
1898          down a word_mode SUBREG of a larger REG for a bitfield that actually
1899          crosses a word boundary.  Thus, for a SUBREG, we must find
1900          the current word starting from the base register.  */
1901       if (GET_CODE (op0) == SUBREG)
1902         {
1903           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1904           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1905                                         GET_MODE (SUBREG_REG (op0)));
1906           offset = 0;
1907         }
1908       else if (REG_P (op0))
1909         {
1910           word = operand_subword_force (op0, offset, GET_MODE (op0));
1911           offset = 0;
1912         }
1913       else
1914         word = op0;
1915
1916       /* Extract the parts in bit-counting order,
1917          whose meaning is determined by BYTES_PER_UNIT.
1918          OFFSET is in UNITs, and UNIT is in bits.  */
1919       part = extract_fixed_bit_field (word_mode, word, thissize,
1920                                       offset * unit + thispos, 0, 1);
1921       bitsdone += thissize;
1922
1923       /* Shift this part into place for the result.  */
1924       if (BYTES_BIG_ENDIAN)
1925         {
1926           if (bitsize != bitsdone)
1927             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1928                                  bitsize - bitsdone, 0, 1);
1929         }
1930       else
1931         {
1932           if (bitsdone != thissize)
1933             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1934                                  bitsdone - thissize, 0, 1);
1935         }
1936
1937       if (first)
1938         result = part;
1939       else
1940         /* Combine the parts with bitwise or.  This works
1941            because we extracted each part as an unsigned bit field.  */
1942         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1943                                OPTAB_LIB_WIDEN);
1944
1945       first = 0;
1946     }
1947
1948   /* Unsigned bit field: we are done.  */
1949   if (unsignedp)
1950     return result;
1951   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1952   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1953                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1954   return expand_shift (RSHIFT_EXPR, word_mode, result,
1955                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1956 }
1957 \f
1958 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1959    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1960    MODE, fill the upper bits with zeros.  Fail if the layout of either
1961    mode is unknown (as for CC modes) or if the extraction would involve
1962    unprofitable mode punning.  Return the value on success, otherwise
1963    return null.
1964
1965    This is different from gen_lowpart* in these respects:
1966
1967      - the returned value must always be considered an rvalue
1968
1969      - when MODE is wider than SRC_MODE, the extraction involves
1970        a zero extension
1971
1972      - when MODE is smaller than SRC_MODE, the extraction involves
1973        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1974
1975    In other words, this routine performs a computation, whereas the
1976    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1977    operations.  */
1978
1979 rtx
1980 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1981 {
1982   enum machine_mode int_mode, src_int_mode;
1983
1984   if (mode == src_mode)
1985     return src;
1986
1987   if (CONSTANT_P (src))
1988     {
1989       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1990          fails, it will happily create (subreg (symbol_ref)) or similar
1991          invalid SUBREGs.  */
1992       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1993       rtx ret = simplify_subreg (mode, src, src_mode, byte);
1994       if (ret)
1995         return ret;
1996
1997       if (GET_MODE (src) == VOIDmode
1998           || !validate_subreg (mode, src_mode, src, byte))
1999         return NULL_RTX;
2000
2001       src = force_reg (GET_MODE (src), src);
2002       return gen_rtx_SUBREG (mode, src, byte);
2003     }
2004
2005   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2006     return NULL_RTX;
2007
2008   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2009       && MODES_TIEABLE_P (mode, src_mode))
2010     {
2011       rtx x = gen_lowpart_common (mode, src);
2012       if (x)
2013         return x;
2014     }
2015
2016   src_int_mode = int_mode_for_mode (src_mode);
2017   int_mode = int_mode_for_mode (mode);
2018   if (src_int_mode == BLKmode || int_mode == BLKmode)
2019     return NULL_RTX;
2020
2021   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2022     return NULL_RTX;
2023   if (!MODES_TIEABLE_P (int_mode, mode))
2024     return NULL_RTX;
2025
2026   src = gen_lowpart (src_int_mode, src);
2027   src = convert_modes (int_mode, src_int_mode, src, true);
2028   src = gen_lowpart (mode, src);
2029   return src;
2030 }
2031 \f
2032 /* Add INC into TARGET.  */
2033
2034 void
2035 expand_inc (rtx target, rtx inc)
2036 {
2037   rtx value = expand_binop (GET_MODE (target), add_optab,
2038                             target, inc,
2039                             target, 0, OPTAB_LIB_WIDEN);
2040   if (value != target)
2041     emit_move_insn (target, value);
2042 }
2043
2044 /* Subtract DEC from TARGET.  */
2045
2046 void
2047 expand_dec (rtx target, rtx dec)
2048 {
2049   rtx value = expand_binop (GET_MODE (target), sub_optab,
2050                             target, dec,
2051                             target, 0, OPTAB_LIB_WIDEN);
2052   if (value != target)
2053     emit_move_insn (target, value);
2054 }
2055 \f
2056 /* Output a shift instruction for expression code CODE,
2057    with SHIFTED being the rtx for the value to shift,
2058    and AMOUNT the rtx for the amount to shift by.
2059    Store the result in the rtx TARGET, if that is convenient.
2060    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2061    Return the rtx for where the value is.  */
2062
2063 static rtx
2064 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2065                 rtx amount, rtx target, int unsignedp)
2066 {
2067   rtx op1, temp = 0;
2068   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2069   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2070   optab lshift_optab = ashl_optab;
2071   optab rshift_arith_optab = ashr_optab;
2072   optab rshift_uns_optab = lshr_optab;
2073   optab lrotate_optab = rotl_optab;
2074   optab rrotate_optab = rotr_optab;
2075   enum machine_mode op1_mode;
2076   int attempt;
2077   bool speed = optimize_insn_for_speed_p ();
2078
2079   op1 = amount;
2080   op1_mode = GET_MODE (op1);
2081
2082   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2083      shift amount is a vector, use the vector/vector shift patterns.  */
2084   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2085     {
2086       lshift_optab = vashl_optab;
2087       rshift_arith_optab = vashr_optab;
2088       rshift_uns_optab = vlshr_optab;
2089       lrotate_optab = vrotl_optab;
2090       rrotate_optab = vrotr_optab;
2091     }
2092
2093   /* Previously detected shift-counts computed by NEGATE_EXPR
2094      and shifted in the other direction; but that does not work
2095      on all machines.  */
2096
2097   if (SHIFT_COUNT_TRUNCATED)
2098     {
2099       if (CONST_INT_P (op1)
2100           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2101               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2102         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2103                        % GET_MODE_BITSIZE (mode));
2104       else if (GET_CODE (op1) == SUBREG
2105                && subreg_lowpart_p (op1)
2106                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2107                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2108         op1 = SUBREG_REG (op1);
2109     }
2110
2111   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2112      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2113      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2114      amount instead.  */
2115   if (rotate
2116       && CONST_INT_P (op1)
2117       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2118                    GET_MODE_BITSIZE (mode) - 1))
2119     {
2120       op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2121       left = !left;
2122       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2123     }
2124
2125   if (op1 == const0_rtx)
2126     return shifted;
2127
2128   /* Check whether its cheaper to implement a left shift by a constant
2129      bit count by a sequence of additions.  */
2130   if (code == LSHIFT_EXPR
2131       && CONST_INT_P (op1)
2132       && INTVAL (op1) > 0
2133       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2134       && INTVAL (op1) < MAX_BITS_PER_WORD
2135       && (shift_cost (speed, mode, INTVAL (op1))
2136           > INTVAL (op1) * add_cost (speed, mode))
2137       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2138     {
2139       int i;
2140       for (i = 0; i < INTVAL (op1); i++)
2141         {
2142           temp = force_reg (mode, shifted);
2143           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2144                                   unsignedp, OPTAB_LIB_WIDEN);
2145         }
2146       return shifted;
2147     }
2148
2149   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2150     {
2151       enum optab_methods methods;
2152
2153       if (attempt == 0)
2154         methods = OPTAB_DIRECT;
2155       else if (attempt == 1)
2156         methods = OPTAB_WIDEN;
2157       else
2158         methods = OPTAB_LIB_WIDEN;
2159
2160       if (rotate)
2161         {
2162           /* Widening does not work for rotation.  */
2163           if (methods == OPTAB_WIDEN)
2164             continue;
2165           else if (methods == OPTAB_LIB_WIDEN)
2166             {
2167               /* If we have been unable to open-code this by a rotation,
2168                  do it as the IOR of two shifts.  I.e., to rotate A
2169                  by N bits, compute
2170                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2171                  where C is the bitsize of A.
2172
2173                  It is theoretically possible that the target machine might
2174                  not be able to perform either shift and hence we would
2175                  be making two libcalls rather than just the one for the
2176                  shift (similarly if IOR could not be done).  We will allow
2177                  this extremely unlikely lossage to avoid complicating the
2178                  code below.  */
2179
2180               rtx subtarget = target == shifted ? 0 : target;
2181               rtx new_amount, other_amount;
2182               rtx temp1;
2183
2184               new_amount = op1;
2185               if (op1 == const0_rtx)
2186                 return shifted;
2187               else if (CONST_INT_P (op1))
2188                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2189                                         - INTVAL (op1));
2190               else
2191                 {
2192                   other_amount
2193                     = simplify_gen_unary (NEG, GET_MODE (op1),
2194                                           op1, GET_MODE (op1));
2195                   HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2196                   other_amount
2197                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2198                                            gen_int_mode (mask, GET_MODE (op1)));
2199                 }
2200
2201               shifted = force_reg (mode, shifted);
2202
2203               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2204                                      mode, shifted, new_amount, 0, 1);
2205               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2206                                       mode, shifted, other_amount,
2207                                       subtarget, 1);
2208               return expand_binop (mode, ior_optab, temp, temp1, target,
2209                                    unsignedp, methods);
2210             }
2211
2212           temp = expand_binop (mode,
2213                                left ? lrotate_optab : rrotate_optab,
2214                                shifted, op1, target, unsignedp, methods);
2215         }
2216       else if (unsignedp)
2217         temp = expand_binop (mode,
2218                              left ? lshift_optab : rshift_uns_optab,
2219                              shifted, op1, target, unsignedp, methods);
2220
2221       /* Do arithmetic shifts.
2222          Also, if we are going to widen the operand, we can just as well
2223          use an arithmetic right-shift instead of a logical one.  */
2224       if (temp == 0 && ! rotate
2225           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2226         {
2227           enum optab_methods methods1 = methods;
2228
2229           /* If trying to widen a log shift to an arithmetic shift,
2230              don't accept an arithmetic shift of the same size.  */
2231           if (unsignedp)
2232             methods1 = OPTAB_MUST_WIDEN;
2233
2234           /* Arithmetic shift */
2235
2236           temp = expand_binop (mode,
2237                                left ? lshift_optab : rshift_arith_optab,
2238                                shifted, op1, target, unsignedp, methods1);
2239         }
2240
2241       /* We used to try extzv here for logical right shifts, but that was
2242          only useful for one machine, the VAX, and caused poor code
2243          generation there for lshrdi3, so the code was deleted and a
2244          define_expand for lshrsi3 was added to vax.md.  */
2245     }
2246
2247   gcc_assert (temp);
2248   return temp;
2249 }
2250
2251 /* Output a shift instruction for expression code CODE,
2252    with SHIFTED being the rtx for the value to shift,
2253    and AMOUNT the amount to shift by.
2254    Store the result in the rtx TARGET, if that is convenient.
2255    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2256    Return the rtx for where the value is.  */
2257
2258 rtx
2259 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2260               int amount, rtx target, int unsignedp)
2261 {
2262   return expand_shift_1 (code, mode,
2263                          shifted, GEN_INT (amount), target, unsignedp);
2264 }
2265
2266 /* Output a shift instruction for expression code CODE,
2267    with SHIFTED being the rtx for the value to shift,
2268    and AMOUNT the tree for the amount to shift by.
2269    Store the result in the rtx TARGET, if that is convenient.
2270    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2271    Return the rtx for where the value is.  */
2272
2273 rtx
2274 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2275                        tree amount, rtx target, int unsignedp)
2276 {
2277   return expand_shift_1 (code, mode,
2278                          shifted, expand_normal (amount), target, unsignedp);
2279 }
2280
2281 \f
2282 /* Indicates the type of fixup needed after a constant multiplication.
2283    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2284    the result should be negated, and ADD_VARIANT means that the
2285    multiplicand should be added to the result.  */
2286 enum mult_variant {basic_variant, negate_variant, add_variant};
2287
2288 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2289                         const struct mult_cost *, enum machine_mode mode);
2290 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2291                                  struct algorithm *, enum mult_variant *, int);
2292 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2293                               const struct algorithm *, enum mult_variant);
2294 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2295 static rtx extract_high_half (enum machine_mode, rtx);
2296 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2297 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2298                                        int, int);
2299 /* Compute and return the best algorithm for multiplying by T.
2300    The algorithm must cost less than cost_limit
2301    If retval.cost >= COST_LIMIT, no algorithm was found and all
2302    other field of the returned struct are undefined.
2303    MODE is the machine mode of the multiplication.  */
2304
2305 static void
2306 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2307             const struct mult_cost *cost_limit, enum machine_mode mode)
2308 {
2309   int m;
2310   struct algorithm *alg_in, *best_alg;
2311   struct mult_cost best_cost;
2312   struct mult_cost new_limit;
2313   int op_cost, op_latency;
2314   unsigned HOST_WIDE_INT orig_t = t;
2315   unsigned HOST_WIDE_INT q;
2316   int maxm, hash_index;
2317   bool cache_hit = false;
2318   enum alg_code cache_alg = alg_zero;
2319   bool speed = optimize_insn_for_speed_p ();
2320   enum machine_mode imode;
2321   struct alg_hash_entry *entry_ptr;
2322
2323   /* Indicate that no algorithm is yet found.  If no algorithm
2324      is found, this value will be returned and indicate failure.  */
2325   alg_out->cost.cost = cost_limit->cost + 1;
2326   alg_out->cost.latency = cost_limit->latency + 1;
2327
2328   if (cost_limit->cost < 0
2329       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2330     return;
2331
2332   /* Be prepared for vector modes.  */
2333   imode = GET_MODE_INNER (mode);
2334   if (imode == VOIDmode)
2335     imode = mode;
2336
2337   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2338
2339   /* Restrict the bits of "t" to the multiplication's mode.  */
2340   t &= GET_MODE_MASK (imode);
2341
2342   /* t == 1 can be done in zero cost.  */
2343   if (t == 1)
2344     {
2345       alg_out->ops = 1;
2346       alg_out->cost.cost = 0;
2347       alg_out->cost.latency = 0;
2348       alg_out->op[0] = alg_m;
2349       return;
2350     }
2351
2352   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2353      fail now.  */
2354   if (t == 0)
2355     {
2356       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2357         return;
2358       else
2359         {
2360           alg_out->ops = 1;
2361           alg_out->cost.cost = zero_cost (speed);
2362           alg_out->cost.latency = zero_cost (speed);
2363           alg_out->op[0] = alg_zero;
2364           return;
2365         }
2366     }
2367
2368   /* We'll be needing a couple extra algorithm structures now.  */
2369
2370   alg_in = XALLOCA (struct algorithm);
2371   best_alg = XALLOCA (struct algorithm);
2372   best_cost = *cost_limit;
2373
2374   /* Compute the hash index.  */
2375   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2376
2377   /* See if we already know what to do for T.  */
2378   entry_ptr = alg_hash_entry_ptr (hash_index);
2379   if (entry_ptr->t == t
2380       && entry_ptr->mode == mode
2381       && entry_ptr->mode == mode
2382       && entry_ptr->speed == speed
2383       && entry_ptr->alg != alg_unknown)
2384     {
2385       cache_alg = entry_ptr->alg;
2386
2387       if (cache_alg == alg_impossible)
2388         {
2389           /* The cache tells us that it's impossible to synthesize
2390              multiplication by T within entry_ptr->cost.  */
2391           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2392             /* COST_LIMIT is at least as restrictive as the one
2393                recorded in the hash table, in which case we have no
2394                hope of synthesizing a multiplication.  Just
2395                return.  */
2396             return;
2397
2398           /* If we get here, COST_LIMIT is less restrictive than the
2399              one recorded in the hash table, so we may be able to
2400              synthesize a multiplication.  Proceed as if we didn't
2401              have the cache entry.  */
2402         }
2403       else
2404         {
2405           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2406             /* The cached algorithm shows that this multiplication
2407                requires more cost than COST_LIMIT.  Just return.  This
2408                way, we don't clobber this cache entry with
2409                alg_impossible but retain useful information.  */
2410             return;
2411
2412           cache_hit = true;
2413
2414           switch (cache_alg)
2415             {
2416             case alg_shift:
2417               goto do_alg_shift;
2418
2419             case alg_add_t_m2:
2420             case alg_sub_t_m2:
2421               goto do_alg_addsub_t_m2;
2422
2423             case alg_add_factor:
2424             case alg_sub_factor:
2425               goto do_alg_addsub_factor;
2426
2427             case alg_add_t2_m:
2428               goto do_alg_add_t2_m;
2429
2430             case alg_sub_t2_m:
2431               goto do_alg_sub_t2_m;
2432
2433             default:
2434               gcc_unreachable ();
2435             }
2436         }
2437     }
2438
2439   /* If we have a group of zero bits at the low-order part of T, try
2440      multiplying by the remaining bits and then doing a shift.  */
2441
2442   if ((t & 1) == 0)
2443     {
2444     do_alg_shift:
2445       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2446       if (m < maxm)
2447         {
2448           q = t >> m;
2449           /* The function expand_shift will choose between a shift and
2450              a sequence of additions, so the observed cost is given as
2451              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2452           op_cost = m * add_cost (speed, mode);
2453           if (shift_cost (speed, mode, m) < op_cost)
2454             op_cost = shift_cost (speed, mode, m);
2455           new_limit.cost = best_cost.cost - op_cost;
2456           new_limit.latency = best_cost.latency - op_cost;
2457           synth_mult (alg_in, q, &new_limit, mode);
2458
2459           alg_in->cost.cost += op_cost;
2460           alg_in->cost.latency += op_cost;
2461           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2462             {
2463               struct algorithm *x;
2464               best_cost = alg_in->cost;
2465               x = alg_in, alg_in = best_alg, best_alg = x;
2466               best_alg->log[best_alg->ops] = m;
2467               best_alg->op[best_alg->ops] = alg_shift;
2468             }
2469
2470           /* See if treating ORIG_T as a signed number yields a better
2471              sequence.  Try this sequence only for a negative ORIG_T
2472              as it would be useless for a non-negative ORIG_T.  */
2473           if ((HOST_WIDE_INT) orig_t < 0)
2474             {
2475               /* Shift ORIG_T as follows because a right shift of a
2476                  negative-valued signed type is implementation
2477                  defined.  */
2478               q = ~(~orig_t >> m);
2479               /* The function expand_shift will choose between a shift
2480                  and a sequence of additions, so the observed cost is
2481                  given as MIN (m * add_cost(speed, mode),
2482                  shift_cost(speed, mode, m)).  */
2483               op_cost = m * add_cost (speed, mode);
2484               if (shift_cost (speed, mode, m) < op_cost)
2485                 op_cost = shift_cost (speed, mode, m);
2486               new_limit.cost = best_cost.cost - op_cost;
2487               new_limit.latency = best_cost.latency - op_cost;
2488               synth_mult (alg_in, q, &new_limit, mode);
2489
2490               alg_in->cost.cost += op_cost;
2491               alg_in->cost.latency += op_cost;
2492               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2493                 {
2494                   struct algorithm *x;
2495                   best_cost = alg_in->cost;
2496                   x = alg_in, alg_in = best_alg, best_alg = x;
2497                   best_alg->log[best_alg->ops] = m;
2498                   best_alg->op[best_alg->ops] = alg_shift;
2499                 }
2500             }
2501         }
2502       if (cache_hit)
2503         goto done;
2504     }
2505
2506   /* If we have an odd number, add or subtract one.  */
2507   if ((t & 1) != 0)
2508     {
2509       unsigned HOST_WIDE_INT w;
2510
2511     do_alg_addsub_t_m2:
2512       for (w = 1; (w & t) != 0; w <<= 1)
2513         ;
2514       /* If T was -1, then W will be zero after the loop.  This is another
2515          case where T ends with ...111.  Handling this with (T + 1) and
2516          subtract 1 produces slightly better code and results in algorithm
2517          selection much faster than treating it like the ...0111 case
2518          below.  */
2519       if (w == 0
2520           || (w > 2
2521               /* Reject the case where t is 3.
2522                  Thus we prefer addition in that case.  */
2523               && t != 3))
2524         {
2525           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2526
2527           op_cost = add_cost (speed, mode);
2528           new_limit.cost = best_cost.cost - op_cost;
2529           new_limit.latency = best_cost.latency - op_cost;
2530           synth_mult (alg_in, t + 1, &new_limit, mode);
2531
2532           alg_in->cost.cost += op_cost;
2533           alg_in->cost.latency += op_cost;
2534           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2535             {
2536               struct algorithm *x;
2537               best_cost = alg_in->cost;
2538               x = alg_in, alg_in = best_alg, best_alg = x;
2539               best_alg->log[best_alg->ops] = 0;
2540               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2541             }
2542         }
2543       else
2544         {
2545           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2546
2547           op_cost = add_cost (speed, mode);
2548           new_limit.cost = best_cost.cost - op_cost;
2549           new_limit.latency = best_cost.latency - op_cost;
2550           synth_mult (alg_in, t - 1, &new_limit, mode);
2551
2552           alg_in->cost.cost += op_cost;
2553           alg_in->cost.latency += op_cost;
2554           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2555             {
2556               struct algorithm *x;
2557               best_cost = alg_in->cost;
2558               x = alg_in, alg_in = best_alg, best_alg = x;
2559               best_alg->log[best_alg->ops] = 0;
2560               best_alg->op[best_alg->ops] = alg_add_t_m2;
2561             }
2562         }
2563
2564       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2565          quickly with a - a * n for some appropriate constant n.  */
2566       m = exact_log2 (-orig_t + 1);
2567       if (m >= 0 && m < maxm)
2568         {
2569           op_cost = shiftsub1_cost (speed, mode, m);
2570           new_limit.cost = best_cost.cost - op_cost;
2571           new_limit.latency = best_cost.latency - op_cost;
2572           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2573                       &new_limit, mode);
2574
2575           alg_in->cost.cost += op_cost;
2576           alg_in->cost.latency += op_cost;
2577           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2578             {
2579               struct algorithm *x;
2580               best_cost = alg_in->cost;
2581               x = alg_in, alg_in = best_alg, best_alg = x;
2582               best_alg->log[best_alg->ops] = m;
2583               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2584             }
2585         }
2586
2587       if (cache_hit)
2588         goto done;
2589     }
2590
2591   /* Look for factors of t of the form
2592      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2593      If we find such a factor, we can multiply by t using an algorithm that
2594      multiplies by q, shift the result by m and add/subtract it to itself.
2595
2596      We search for large factors first and loop down, even if large factors
2597      are less probable than small; if we find a large factor we will find a
2598      good sequence quickly, and therefore be able to prune (by decreasing
2599      COST_LIMIT) the search.  */
2600
2601  do_alg_addsub_factor:
2602   for (m = floor_log2 (t - 1); m >= 2; m--)
2603     {
2604       unsigned HOST_WIDE_INT d;
2605
2606       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2607       if (t % d == 0 && t > d && m < maxm
2608           && (!cache_hit || cache_alg == alg_add_factor))
2609         {
2610           /* If the target has a cheap shift-and-add instruction use
2611              that in preference to a shift insn followed by an add insn.
2612              Assume that the shift-and-add is "atomic" with a latency
2613              equal to its cost, otherwise assume that on superscalar
2614              hardware the shift may be executed concurrently with the
2615              earlier steps in the algorithm.  */
2616           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2617           if (shiftadd_cost (speed, mode, m) < op_cost)
2618             {
2619               op_cost = shiftadd_cost (speed, mode, m);
2620               op_latency = op_cost;
2621             }
2622           else
2623             op_latency = add_cost (speed, mode);
2624
2625           new_limit.cost = best_cost.cost - op_cost;
2626           new_limit.latency = best_cost.latency - op_latency;
2627           synth_mult (alg_in, t / d, &new_limit, mode);
2628
2629           alg_in->cost.cost += op_cost;
2630           alg_in->cost.latency += op_latency;
2631           if (alg_in->cost.latency < op_cost)
2632             alg_in->cost.latency = op_cost;
2633           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2634             {
2635               struct algorithm *x;
2636               best_cost = alg_in->cost;
2637               x = alg_in, alg_in = best_alg, best_alg = x;
2638               best_alg->log[best_alg->ops] = m;
2639               best_alg->op[best_alg->ops] = alg_add_factor;
2640             }
2641           /* Other factors will have been taken care of in the recursion.  */
2642           break;
2643         }
2644
2645       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2646       if (t % d == 0 && t > d && m < maxm
2647           && (!cache_hit || cache_alg == alg_sub_factor))
2648         {
2649           /* If the target has a cheap shift-and-subtract insn use
2650              that in preference to a shift insn followed by a sub insn.
2651              Assume that the shift-and-sub is "atomic" with a latency
2652              equal to it's cost, otherwise assume that on superscalar
2653              hardware the shift may be executed concurrently with the
2654              earlier steps in the algorithm.  */
2655           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2656           if (shiftsub0_cost (speed, mode, m) < op_cost)
2657             {
2658               op_cost = shiftsub0_cost (speed, mode, m);
2659               op_latency = op_cost;
2660             }
2661           else
2662             op_latency = add_cost (speed, mode);
2663
2664           new_limit.cost = best_cost.cost - op_cost;
2665           new_limit.latency = best_cost.latency - op_latency;
2666           synth_mult (alg_in, t / d, &new_limit, mode);
2667
2668           alg_in->cost.cost += op_cost;
2669           alg_in->cost.latency += op_latency;
2670           if (alg_in->cost.latency < op_cost)
2671             alg_in->cost.latency = op_cost;
2672           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673             {
2674               struct algorithm *x;
2675               best_cost = alg_in->cost;
2676               x = alg_in, alg_in = best_alg, best_alg = x;
2677               best_alg->log[best_alg->ops] = m;
2678               best_alg->op[best_alg->ops] = alg_sub_factor;
2679             }
2680           break;
2681         }
2682     }
2683   if (cache_hit)
2684     goto done;
2685
2686   /* Try shift-and-add (load effective address) instructions,
2687      i.e. do a*3, a*5, a*9.  */
2688   if ((t & 1) != 0)
2689     {
2690     do_alg_add_t2_m:
2691       q = t - 1;
2692       q = q & -q;
2693       m = exact_log2 (q);
2694       if (m >= 0 && m < maxm)
2695         {
2696           op_cost = shiftadd_cost (speed, mode, m);
2697           new_limit.cost = best_cost.cost - op_cost;
2698           new_limit.latency = best_cost.latency - op_cost;
2699           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2700
2701           alg_in->cost.cost += op_cost;
2702           alg_in->cost.latency += op_cost;
2703           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2704             {
2705               struct algorithm *x;
2706               best_cost = alg_in->cost;
2707               x = alg_in, alg_in = best_alg, best_alg = x;
2708               best_alg->log[best_alg->ops] = m;
2709               best_alg->op[best_alg->ops] = alg_add_t2_m;
2710             }
2711         }
2712       if (cache_hit)
2713         goto done;
2714
2715     do_alg_sub_t2_m:
2716       q = t + 1;
2717       q = q & -q;
2718       m = exact_log2 (q);
2719       if (m >= 0 && m < maxm)
2720         {
2721           op_cost = shiftsub0_cost (speed, mode, m);
2722           new_limit.cost = best_cost.cost - op_cost;
2723           new_limit.latency = best_cost.latency - op_cost;
2724           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2725
2726           alg_in->cost.cost += op_cost;
2727           alg_in->cost.latency += op_cost;
2728           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729             {
2730               struct algorithm *x;
2731               best_cost = alg_in->cost;
2732               x = alg_in, alg_in = best_alg, best_alg = x;
2733               best_alg->log[best_alg->ops] = m;
2734               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2735             }
2736         }
2737       if (cache_hit)
2738         goto done;
2739     }
2740
2741  done:
2742   /* If best_cost has not decreased, we have not found any algorithm.  */
2743   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2744     {
2745       /* We failed to find an algorithm.  Record alg_impossible for
2746          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2747          we are asked to find an algorithm for T within the same or
2748          lower COST_LIMIT, we can immediately return to the
2749          caller.  */
2750       entry_ptr->t = t;
2751       entry_ptr->mode = mode;
2752       entry_ptr->speed = speed;
2753       entry_ptr->alg = alg_impossible;
2754       entry_ptr->cost = *cost_limit;
2755       return;
2756     }
2757
2758   /* Cache the result.  */
2759   if (!cache_hit)
2760     {
2761       entry_ptr->t = t;
2762       entry_ptr->mode = mode;
2763       entry_ptr->speed = speed;
2764       entry_ptr->alg = best_alg->op[best_alg->ops];
2765       entry_ptr->cost.cost = best_cost.cost;
2766       entry_ptr->cost.latency = best_cost.latency;
2767     }
2768
2769   /* If we are getting a too long sequence for `struct algorithm'
2770      to record, make this search fail.  */
2771   if (best_alg->ops == MAX_BITS_PER_WORD)
2772     return;
2773
2774   /* Copy the algorithm from temporary space to the space at alg_out.
2775      We avoid using structure assignment because the majority of
2776      best_alg is normally undefined, and this is a critical function.  */
2777   alg_out->ops = best_alg->ops + 1;
2778   alg_out->cost = best_cost;
2779   memcpy (alg_out->op, best_alg->op,
2780           alg_out->ops * sizeof *alg_out->op);
2781   memcpy (alg_out->log, best_alg->log,
2782           alg_out->ops * sizeof *alg_out->log);
2783 }
2784 \f
2785 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2786    Try three variations:
2787
2788        - a shift/add sequence based on VAL itself
2789        - a shift/add sequence based on -VAL, followed by a negation
2790        - a shift/add sequence based on VAL - 1, followed by an addition.
2791
2792    Return true if the cheapest of these cost less than MULT_COST,
2793    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2794
2795 static bool
2796 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2797                      struct algorithm *alg, enum mult_variant *variant,
2798                      int mult_cost)
2799 {
2800   struct algorithm alg2;
2801   struct mult_cost limit;
2802   int op_cost;
2803   bool speed = optimize_insn_for_speed_p ();
2804
2805   /* Fail quickly for impossible bounds.  */
2806   if (mult_cost < 0)
2807     return false;
2808
2809   /* Ensure that mult_cost provides a reasonable upper bound.
2810      Any constant multiplication can be performed with less
2811      than 2 * bits additions.  */
2812   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2813   if (mult_cost > op_cost)
2814     mult_cost = op_cost;
2815
2816   *variant = basic_variant;
2817   limit.cost = mult_cost;
2818   limit.latency = mult_cost;
2819   synth_mult (alg, val, &limit, mode);
2820
2821   /* This works only if the inverted value actually fits in an
2822      `unsigned int' */
2823   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2824     {
2825       op_cost = neg_cost (speed, mode);
2826       if (MULT_COST_LESS (&alg->cost, mult_cost))
2827         {
2828           limit.cost = alg->cost.cost - op_cost;
2829           limit.latency = alg->cost.latency - op_cost;
2830         }
2831       else
2832         {
2833           limit.cost = mult_cost - op_cost;
2834           limit.latency = mult_cost - op_cost;
2835         }
2836
2837       synth_mult (&alg2, -val, &limit, mode);
2838       alg2.cost.cost += op_cost;
2839       alg2.cost.latency += op_cost;
2840       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2841         *alg = alg2, *variant = negate_variant;
2842     }
2843
2844   /* This proves very useful for division-by-constant.  */
2845   op_cost = add_cost (speed, mode);
2846   if (MULT_COST_LESS (&alg->cost, mult_cost))
2847     {
2848       limit.cost = alg->cost.cost - op_cost;
2849       limit.latency = alg->cost.latency - op_cost;
2850     }
2851   else
2852     {
2853       limit.cost = mult_cost - op_cost;
2854       limit.latency = mult_cost - op_cost;
2855     }
2856
2857   synth_mult (&alg2, val - 1, &limit, mode);
2858   alg2.cost.cost += op_cost;
2859   alg2.cost.latency += op_cost;
2860   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2861     *alg = alg2, *variant = add_variant;
2862
2863   return MULT_COST_LESS (&alg->cost, mult_cost);
2864 }
2865
2866 /* A subroutine of expand_mult, used for constant multiplications.
2867    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2868    convenient.  Use the shift/add sequence described by ALG and apply
2869    the final fixup specified by VARIANT.  */
2870
2871 static rtx
2872 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2873                    rtx target, const struct algorithm *alg,
2874                    enum mult_variant variant)
2875 {
2876   HOST_WIDE_INT val_so_far;
2877   rtx insn, accum, tem;
2878   int opno;
2879   enum machine_mode nmode;
2880
2881   /* Avoid referencing memory over and over and invalid sharing
2882      on SUBREGs.  */
2883   op0 = force_reg (mode, op0);
2884
2885   /* ACCUM starts out either as OP0 or as a zero, depending on
2886      the first operation.  */
2887
2888   if (alg->op[0] == alg_zero)
2889     {
2890       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2891       val_so_far = 0;
2892     }
2893   else if (alg->op[0] == alg_m)
2894     {
2895       accum = copy_to_mode_reg (mode, op0);
2896       val_so_far = 1;
2897     }
2898   else
2899     gcc_unreachable ();
2900
2901   for (opno = 1; opno < alg->ops; opno++)
2902     {
2903       int log = alg->log[opno];
2904       rtx shift_subtarget = optimize ? 0 : accum;
2905       rtx add_target
2906         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2907            && !optimize)
2908           ? target : 0;
2909       rtx accum_target = optimize ? 0 : accum;
2910       rtx accum_inner;
2911
2912       switch (alg->op[opno])
2913         {
2914         case alg_shift:
2915           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2916           /* REG_EQUAL note will be attached to the following insn.  */
2917           emit_move_insn (accum, tem);
2918           val_so_far <<= log;
2919           break;
2920
2921         case alg_add_t_m2:
2922           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2923           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2924                                  add_target ? add_target : accum_target);
2925           val_so_far += (HOST_WIDE_INT) 1 << log;
2926           break;
2927
2928         case alg_sub_t_m2:
2929           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2930           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2931                                  add_target ? add_target : accum_target);
2932           val_so_far -= (HOST_WIDE_INT) 1 << log;
2933           break;
2934
2935         case alg_add_t2_m:
2936           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2937                                 log, shift_subtarget, 0);
2938           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2939                                  add_target ? add_target : accum_target);
2940           val_so_far = (val_so_far << log) + 1;
2941           break;
2942
2943         case alg_sub_t2_m:
2944           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2945                                 log, shift_subtarget, 0);
2946           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2947                                  add_target ? add_target : accum_target);
2948           val_so_far = (val_so_far << log) - 1;
2949           break;
2950
2951         case alg_add_factor:
2952           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2953           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2954                                  add_target ? add_target : accum_target);
2955           val_so_far += val_so_far << log;
2956           break;
2957
2958         case alg_sub_factor:
2959           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2960           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2961                                  (add_target
2962                                   ? add_target : (optimize ? 0 : tem)));
2963           val_so_far = (val_so_far << log) - val_so_far;
2964           break;
2965
2966         default:
2967           gcc_unreachable ();
2968         }
2969
2970       if (SCALAR_INT_MODE_P (mode))
2971         {
2972           /* Write a REG_EQUAL note on the last insn so that we can cse
2973              multiplication sequences.  Note that if ACCUM is a SUBREG,
2974              we've set the inner register and must properly indicate that.  */
2975           tem = op0, nmode = mode;
2976           accum_inner = accum;
2977           if (GET_CODE (accum) == SUBREG)
2978             {
2979               accum_inner = SUBREG_REG (accum);
2980               nmode = GET_MODE (accum_inner);
2981               tem = gen_lowpart (nmode, op0);
2982             }
2983
2984           insn = get_last_insn ();
2985           set_dst_reg_note (insn, REG_EQUAL,
2986                             gen_rtx_MULT (nmode, tem,
2987                                           gen_int_mode (val_so_far, nmode)),
2988                             accum_inner);
2989         }
2990     }
2991
2992   if (variant == negate_variant)
2993     {
2994       val_so_far = -val_so_far;
2995       accum = expand_unop (mode, neg_optab, accum, target, 0);
2996     }
2997   else if (variant == add_variant)
2998     {
2999       val_so_far = val_so_far + 1;
3000       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3001     }
3002
3003   /* Compare only the bits of val and val_so_far that are significant
3004      in the result mode, to avoid sign-/zero-extension confusion.  */
3005   nmode = GET_MODE_INNER (mode);
3006   if (nmode == VOIDmode)
3007     nmode = mode;
3008   val &= GET_MODE_MASK (nmode);
3009   val_so_far &= GET_MODE_MASK (nmode);
3010   gcc_assert (val == val_so_far);
3011
3012   return accum;
3013 }
3014
3015 /* Perform a multiplication and return an rtx for the result.
3016    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3017    TARGET is a suggestion for where to store the result (an rtx).
3018
3019    We check specially for a constant integer as OP1.
3020    If you want this check for OP0 as well, then before calling
3021    you should swap the two operands if OP0 would be constant.  */
3022
3023 rtx
3024 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3025              int unsignedp)
3026 {
3027   enum mult_variant variant;
3028   struct algorithm algorithm;
3029   rtx scalar_op1;
3030   int max_cost;
3031   bool speed = optimize_insn_for_speed_p ();
3032   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3033
3034   if (CONSTANT_P (op0))
3035     {
3036       rtx temp = op0;
3037       op0 = op1;
3038       op1 = temp;
3039     }
3040
3041   /* For vectors, there are several simplifications that can be made if
3042      all elements of the vector constant are identical.  */
3043   scalar_op1 = op1;
3044   if (GET_CODE (op1) == CONST_VECTOR)
3045     {
3046       int i, n = CONST_VECTOR_NUNITS (op1);
3047       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3048       for (i = 1; i < n; ++i)
3049         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3050           goto skip_scalar;
3051     }
3052
3053   if (INTEGRAL_MODE_P (mode))
3054     {
3055       rtx fake_reg;
3056       HOST_WIDE_INT coeff;
3057       bool is_neg;
3058       int mode_bitsize;
3059
3060       if (op1 == CONST0_RTX (mode))
3061         return op1;
3062       if (op1 == CONST1_RTX (mode))
3063         return op0;
3064       if (op1 == CONSTM1_RTX (mode))
3065         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3066                             op0, target, 0);
3067
3068       if (do_trapv)
3069         goto skip_synth;
3070
3071       /* These are the operations that are potentially turned into
3072          a sequence of shifts and additions.  */
3073       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3074
3075       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3076          less than or equal in size to `unsigned int' this doesn't matter.
3077          If the mode is larger than `unsigned int', then synth_mult works
3078          only if the constant value exactly fits in an `unsigned int' without
3079          any truncation.  This means that multiplying by negative values does
3080          not work; results are off by 2^32 on a 32 bit machine.  */
3081
3082       if (CONST_INT_P (scalar_op1))
3083         {
3084           coeff = INTVAL (scalar_op1);
3085           is_neg = coeff < 0;
3086         }
3087       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3088         {
3089           /* If we are multiplying in DImode, it may still be a win
3090              to try to work with shifts and adds.  */
3091           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3092               && (CONST_DOUBLE_LOW (scalar_op1) > 0
3093                   || (CONST_DOUBLE_LOW (scalar_op1) < 0
3094                       && EXACT_POWER_OF_2_OR_ZERO_P
3095                            (CONST_DOUBLE_LOW (scalar_op1)))))
3096             {
3097               coeff = CONST_DOUBLE_LOW (scalar_op1);
3098               is_neg = false;
3099             }
3100           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3101             {
3102               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3103               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3104                 {
3105                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3106                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3107                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3108                     return expand_shift (LSHIFT_EXPR, mode, op0,
3109                                          shift, target, unsignedp);
3110                 }
3111               goto skip_synth;
3112             }
3113           else
3114             goto skip_synth;
3115         }
3116       else
3117         goto skip_synth;
3118
3119       /* We used to test optimize here, on the grounds that it's better to
3120          produce a smaller program when -O is not used.  But this causes
3121          such a terrible slowdown sometimes that it seems better to always
3122          use synth_mult.  */
3123
3124       /* Special case powers of two.  */
3125       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3126           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3127         return expand_shift (LSHIFT_EXPR, mode, op0,
3128                              floor_log2 (coeff), target, unsignedp);
3129
3130       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3131
3132       /* Attempt to handle multiplication of DImode values by negative
3133          coefficients, by performing the multiplication by a positive
3134          multiplier and then inverting the result.  */
3135       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3136         {
3137           /* Its safe to use -coeff even for INT_MIN, as the
3138              result is interpreted as an unsigned coefficient.
3139              Exclude cost of op0 from max_cost to match the cost
3140              calculation of the synth_mult.  */
3141           coeff = -(unsigned HOST_WIDE_INT) coeff;
3142           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3143                       - neg_cost (speed, mode));
3144           if (max_cost <= 0)
3145             goto skip_synth;
3146
3147           /* Special case powers of two.  */
3148           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3149             {
3150               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3151                                        floor_log2 (coeff), target, unsignedp);
3152               return expand_unop (mode, neg_optab, temp, target, 0);
3153             }
3154
3155           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3156                                    max_cost))
3157             {
3158               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3159                                             &algorithm, variant);
3160               return expand_unop (mode, neg_optab, temp, target, 0);
3161             }
3162           goto skip_synth;
3163         }
3164
3165       /* Exclude cost of op0 from max_cost to match the cost
3166          calculation of the synth_mult.  */
3167       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3168       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3169         return expand_mult_const (mode, op0, coeff, target,
3170                                   &algorithm, variant);
3171     }
3172  skip_synth:
3173
3174   /* Expand x*2.0 as x+x.  */
3175   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3176     {
3177       REAL_VALUE_TYPE d;
3178       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3179
3180       if (REAL_VALUES_EQUAL (d, dconst2))
3181         {
3182           op0 = force_reg (GET_MODE (op0), op0);
3183           return expand_binop (mode, add_optab, op0, op0,
3184                                target, unsignedp, OPTAB_LIB_WIDEN);
3185         }
3186     }
3187  skip_scalar:
3188
3189   /* This used to use umul_optab if unsigned, but for non-widening multiply
3190      there is no difference between signed and unsigned.  */
3191   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3192                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3193   gcc_assert (op0);
3194   return op0;
3195 }
3196
3197 /* Return a cost estimate for multiplying a register by the given
3198    COEFFicient in the given MODE and SPEED.  */
3199
3200 int
3201 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3202 {
3203   int max_cost;
3204   struct algorithm algorithm;
3205   enum mult_variant variant;
3206
3207   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3208   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3209   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3210     return algorithm.cost.cost;
3211   else
3212     return max_cost;
3213 }
3214
3215 /* Perform a widening multiplication and return an rtx for the result.
3216    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3217    TARGET is a suggestion for where to store the result (an rtx).
3218    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3219    or smul_widen_optab.
3220
3221    We check specially for a constant integer as OP1, comparing the
3222    cost of a widening multiply against the cost of a sequence of shifts
3223    and adds.  */
3224
3225 rtx
3226 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3227                       int unsignedp, optab this_optab)
3228 {
3229   bool speed = optimize_insn_for_speed_p ();
3230   rtx cop1;
3231
3232   if (CONST_INT_P (op1)
3233       && GET_MODE (op0) != VOIDmode
3234       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3235                                 this_optab == umul_widen_optab))
3236       && CONST_INT_P (cop1)
3237       && (INTVAL (cop1) >= 0
3238           || HWI_COMPUTABLE_MODE_P (mode)))
3239     {
3240       HOST_WIDE_INT coeff = INTVAL (cop1);
3241       int max_cost;
3242       enum mult_variant variant;
3243       struct algorithm algorithm;
3244
3245       /* Special case powers of two.  */
3246       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3247         {
3248           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3249           return expand_shift (LSHIFT_EXPR, mode, op0,
3250                                floor_log2 (coeff), target, unsignedp);
3251         }
3252
3253       /* Exclude cost of op0 from max_cost to match the cost
3254          calculation of the synth_mult.  */
3255       max_cost = mul_widen_cost (speed, mode);
3256       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3257                                max_cost))
3258         {
3259           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3260           return expand_mult_const (mode, op0, coeff, target,
3261                                     &algorithm, variant);
3262         }
3263     }
3264   return expand_binop (mode, this_optab, op0, op1, target,
3265                        unsignedp, OPTAB_LIB_WIDEN);
3266 }
3267 \f
3268 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3269    replace division by D, and put the least significant N bits of the result
3270    in *MULTIPLIER_PTR and return the most significant bit.
3271
3272    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3273    needed precision is in PRECISION (should be <= N).
3274
3275    PRECISION should be as small as possible so this function can choose
3276    multiplier more freely.
3277
3278    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3279    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3280
3281    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3282    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3283
3284 unsigned HOST_WIDE_INT
3285 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3286                    unsigned HOST_WIDE_INT *multiplier_ptr,
3287                    int *post_shift_ptr, int *lgup_ptr)
3288 {
3289   double_int mhigh, mlow;
3290   int lgup, post_shift;
3291   int pow, pow2;
3292
3293   /* lgup = ceil(log2(divisor)); */
3294   lgup = ceil_log2 (d);
3295
3296   gcc_assert (lgup <= n);
3297
3298   pow = n + lgup;
3299   pow2 = n + lgup - precision;
3300
3301   /* We could handle this with some effort, but this case is much
3302      better handled directly with a scc insn, so rely on caller using
3303      that.  */
3304   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3305
3306   /* mlow = 2^(N + lgup)/d */
3307   double_int val = double_int_zero.set_bit (pow);
3308   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3309
3310   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3311   val |= double_int_zero.set_bit (pow2);
3312   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3313
3314   gcc_assert (!mhigh.high || val.high - d < d);
3315   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3316   /* Assert that mlow < mhigh.  */
3317   gcc_assert (mlow.ult (mhigh));
3318
3319   /* If precision == N, then mlow, mhigh exceed 2^N
3320      (but they do not exceed 2^(N+1)).  */
3321
3322   /* Reduce to lowest terms.  */
3323   for (post_shift = lgup; post_shift > 0; post_shift--)
3324     {
3325       int shft = HOST_BITS_PER_WIDE_INT - 1;
3326       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3327       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3328       if (ml_lo >= mh_lo)
3329         break;
3330
3331       mlow = double_int::from_uhwi (ml_lo);
3332       mhigh = double_int::from_uhwi (mh_lo);
3333     }
3334
3335   *post_shift_ptr = post_shift;
3336   *lgup_ptr = lgup;
3337   if (n < HOST_BITS_PER_WIDE_INT)
3338     {
3339       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3340       *multiplier_ptr = mhigh.low & mask;
3341       return mhigh.low >= mask;
3342     }
3343   else
3344     {
3345       *multiplier_ptr = mhigh.low;
3346       return mhigh.high;
3347     }
3348 }
3349
3350 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3351    congruent to 1 (mod 2**N).  */
3352
3353 static unsigned HOST_WIDE_INT
3354 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3355 {
3356   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3357
3358   /* The algorithm notes that the choice y = x satisfies
3359      x*y == 1 mod 2^3, since x is assumed odd.
3360      Each iteration doubles the number of bits of significance in y.  */
3361
3362   unsigned HOST_WIDE_INT mask;
3363   unsigned HOST_WIDE_INT y = x;
3364   int nbit = 3;
3365
3366   mask = (n == HOST_BITS_PER_WIDE_INT
3367           ? ~(unsigned HOST_WIDE_INT) 0
3368           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3369
3370   while (nbit < n)
3371     {
3372       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3373       nbit *= 2;
3374     }
3375   return y;
3376 }
3377
3378 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3379    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3380    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3381    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3382    become signed.
3383
3384    The result is put in TARGET if that is convenient.
3385
3386    MODE is the mode of operation.  */
3387
3388 rtx
3389 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3390                              rtx op1, rtx target, int unsignedp)
3391 {
3392   rtx tem;
3393   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3394
3395   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3396                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3397   tem = expand_and (mode, tem, op1, NULL_RTX);
3398   adj_operand
3399     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3400                      adj_operand);
3401
3402   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3403                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3404   tem = expand_and (mode, tem, op0, NULL_RTX);
3405   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3406                           target);
3407
3408   return target;
3409 }
3410
3411 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3412
3413 static rtx
3414 extract_high_half (enum machine_mode mode, rtx op)
3415 {
3416   enum machine_mode wider_mode;
3417
3418   if (mode == word_mode)
3419     return gen_highpart (mode, op);
3420
3421   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3422
3423   wider_mode = GET_MODE_WIDER_MODE (mode);
3424   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3425                      GET_MODE_BITSIZE (mode), 0, 1);
3426   return convert_modes (mode, wider_mode, op, 0);
3427 }
3428
3429 /* Like expmed_mult_highpart, but only consider using a multiplication
3430    optab.  OP1 is an rtx for the constant operand.  */
3431
3432 static rtx
3433 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3434                             rtx target, int unsignedp, int max_cost)
3435 {
3436   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3437   enum machine_mode wider_mode;
3438   optab moptab;
3439   rtx tem;
3440   int size;
3441   bool speed = optimize_insn_for_speed_p ();
3442
3443   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3444
3445   wider_mode = GET_MODE_WIDER_MODE (mode);
3446   size = GET_MODE_BITSIZE (mode);
3447
3448   /* Firstly, try using a multiplication insn that only generates the needed
3449      high part of the product, and in the sign flavor of unsignedp.  */
3450   if (mul_highpart_cost (speed, mode) < max_cost)
3451     {
3452       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3453       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3454                           unsignedp, OPTAB_DIRECT);
3455       if (tem)
3456         return tem;
3457     }
3458
3459   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3460      Need to adjust the result after the multiplication.  */
3461   if (size - 1 < BITS_PER_WORD
3462       && (mul_highpart_cost (speed, mode)
3463           + 2 * shift_cost (speed, mode, size-1)
3464           + 4 * add_cost (speed, mode) < max_cost))
3465     {
3466       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3467       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3468                           unsignedp, OPTAB_DIRECT);
3469       if (tem)
3470         /* We used the wrong signedness.  Adjust the result.  */
3471         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3472                                             tem, unsignedp);
3473     }
3474
3475   /* Try widening multiplication.  */
3476   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3477   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3478       && mul_widen_cost (speed, wider_mode) < max_cost)
3479     {
3480       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3481                           unsignedp, OPTAB_WIDEN);
3482       if (tem)
3483         return extract_high_half (mode, tem);
3484     }
3485
3486   /* Try widening the mode and perform a non-widening multiplication.  */
3487   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3488       && size - 1 < BITS_PER_WORD
3489       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3490           < max_cost))
3491     {
3492       rtx insns, wop0, wop1;
3493
3494       /* We need to widen the operands, for example to ensure the
3495          constant multiplier is correctly sign or zero extended.
3496          Use a sequence to clean-up any instructions emitted by
3497          the conversions if things don't work out.  */
3498       start_sequence ();
3499       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3500       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3501       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3502                           unsignedp, OPTAB_WIDEN);
3503       insns = get_insns ();
3504       end_sequence ();
3505
3506       if (tem)
3507         {
3508           emit_insn (insns);
3509           return extract_high_half (mode, tem);
3510         }
3511     }
3512
3513   /* Try widening multiplication of opposite signedness, and adjust.  */
3514   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3515   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3516       && size - 1 < BITS_PER_WORD
3517       && (mul_widen_cost (speed, wider_mode)
3518           + 2 * shift_cost (speed, mode, size-1)
3519           + 4 * add_cost (speed, mode) < max_cost))
3520     {
3521       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3522                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3523       if (tem != 0)
3524         {
3525           tem = extract_high_half (mode, tem);
3526           /* We used the wrong signedness.  Adjust the result.  */
3527           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3528                                               target, unsignedp);
3529         }
3530     }
3531
3532   return 0;
3533 }
3534
3535 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3536    putting the high half of the result in TARGET if that is convenient,
3537    and return where the result is.  If the operation can not be performed,
3538    0 is returned.
3539
3540    MODE is the mode of operation and result.
3541
3542    UNSIGNEDP nonzero means unsigned multiply.
3543
3544    MAX_COST is the total allowed cost for the expanded RTL.  */
3545
3546 static rtx
3547 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3548                       rtx target, int unsignedp, int max_cost)
3549 {
3550   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3551   unsigned HOST_WIDE_INT cnst1;
3552   int extra_cost;
3553   bool sign_adjust = false;
3554   enum mult_variant variant;
3555   struct algorithm alg;
3556   rtx tem;
3557   bool speed = optimize_insn_for_speed_p ();
3558
3559   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3560   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3561   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3562
3563   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3564
3565   /* We can't optimize modes wider than BITS_PER_WORD.
3566      ??? We might be able to perform double-word arithmetic if
3567      mode == word_mode, however all the cost calculations in
3568      synth_mult etc. assume single-word operations.  */
3569   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3570     return expmed_mult_highpart_optab (mode, op0, op1, target,
3571                                        unsignedp, max_cost);
3572
3573   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3574
3575   /* Check whether we try to multiply by a negative constant.  */
3576   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3577     {
3578       sign_adjust = true;
3579       extra_cost += add_cost (speed, mode);
3580     }
3581
3582   /* See whether shift/add multiplication is cheap enough.  */
3583   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3584                            max_cost - extra_cost))
3585     {
3586       /* See whether the specialized multiplication optabs are
3587          cheaper than the shift/add version.  */
3588       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3589                                         alg.cost.cost + extra_cost);
3590       if (tem)
3591         return tem;
3592
3593       tem = convert_to_mode (wider_mode, op0, unsignedp);
3594       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3595       tem = extract_high_half (mode, tem);
3596
3597       /* Adjust result for signedness.  */
3598       if (sign_adjust)
3599         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3600
3601       return tem;
3602     }
3603   return expmed_mult_highpart_optab (mode, op0, op1, target,
3604                                      unsignedp, max_cost);
3605 }
3606
3607
3608 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3609
3610 static rtx
3611 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3612 {
3613   unsigned HOST_WIDE_INT masklow, maskhigh;
3614   rtx result, temp, shift, label;
3615   int logd;
3616
3617   logd = floor_log2 (d);
3618   result = gen_reg_rtx (mode);
3619
3620   /* Avoid conditional branches when they're expensive.  */
3621   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3622       && optimize_insn_for_speed_p ())
3623     {
3624       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3625                                       mode, 0, -1);
3626       if (signmask)
3627         {
3628           signmask = force_reg (mode, signmask);
3629           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3630           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3631
3632           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3633              which instruction sequence to use.  If logical right shifts
3634              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3635              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3636
3637           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3638           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3639               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3640                   > COSTS_N_INSNS (2)))
3641             {
3642               temp = expand_binop (mode, xor_optab, op0, signmask,
3643                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644               temp = expand_binop (mode, sub_optab, temp, signmask,
3645                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646               temp = expand_binop (mode, and_optab, temp,
3647                                    gen_int_mode (masklow, mode),
3648                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3649               temp = expand_binop (mode, xor_optab, temp, signmask,
3650                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651               temp = expand_binop (mode, sub_optab, temp, signmask,
3652                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3653             }
3654           else
3655             {
3656               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3657                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658               signmask = force_reg (mode, signmask);
3659
3660               temp = expand_binop (mode, add_optab, op0, signmask,
3661                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662               temp = expand_binop (mode, and_optab, temp,
3663                                    gen_int_mode (masklow, mode),
3664                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665               temp = expand_binop (mode, sub_optab, temp, signmask,
3666                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667             }
3668           return temp;
3669         }
3670     }
3671
3672   /* Mask contains the mode's signbit and the significant bits of the
3673      modulus.  By including the signbit in the operation, many targets
3674      can avoid an explicit compare operation in the following comparison
3675      against zero.  */
3676
3677   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3678   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3679     {
3680       masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3681       maskhigh = -1;
3682     }
3683   else
3684     maskhigh = HOST_WIDE_INT_M1U
3685                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3686
3687   temp = expand_binop (mode, and_optab, op0,
3688                        immed_double_const (masklow, maskhigh, mode),
3689                        result, 1, OPTAB_LIB_WIDEN);
3690   if (temp != result)
3691     emit_move_insn (result, temp);
3692
3693   label = gen_label_rtx ();
3694   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3695
3696   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3697                        0, OPTAB_LIB_WIDEN);
3698   masklow = HOST_WIDE_INT_M1U << logd;
3699   maskhigh = -1;
3700   temp = expand_binop (mode, ior_optab, temp,
3701                        immed_double_const (masklow, maskhigh, mode),
3702                        result, 1, OPTAB_LIB_WIDEN);
3703   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3704                        0, OPTAB_LIB_WIDEN);
3705   if (temp != result)
3706     emit_move_insn (result, temp);
3707   emit_label (label);
3708   return result;
3709 }
3710
3711 /* Expand signed division of OP0 by a power of two D in mode MODE.
3712    This routine is only called for positive values of D.  */
3713
3714 static rtx
3715 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3716 {
3717   rtx temp, label;
3718   int logd;
3719
3720   logd = floor_log2 (d);
3721
3722   if (d == 2
3723       && BRANCH_COST (optimize_insn_for_speed_p (),
3724                       false) >= 1)
3725     {
3726       temp = gen_reg_rtx (mode);
3727       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3728       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3729                            0, OPTAB_LIB_WIDEN);
3730       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3731     }
3732
3733 #ifdef HAVE_conditional_move
3734   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3735       >= 2)
3736     {
3737       rtx temp2;
3738
3739       /* ??? emit_conditional_move forces a stack adjustment via
3740          compare_from_rtx so, if the sequence is discarded, it will
3741          be lost.  Do it now instead.  */
3742       do_pending_stack_adjust ();
3743
3744       start_sequence ();
3745       temp2 = copy_to_mode_reg (mode, op0);
3746       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3747                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3748       temp = force_reg (mode, temp);
3749
3750       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3751       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3752                                      mode, temp, temp2, mode, 0);
3753       if (temp2)
3754         {
3755           rtx seq = get_insns ();
3756           end_sequence ();
3757           emit_insn (seq);
3758           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3759         }
3760       end_sequence ();
3761     }
3762 #endif
3763
3764   if (BRANCH_COST (optimize_insn_for_speed_p (),
3765                    false) >= 2)
3766     {
3767       int ushift = GET_MODE_BITSIZE (mode) - logd;
3768
3769       temp = gen_reg_rtx (mode);
3770       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3771       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3772           > COSTS_N_INSNS (1))
3773         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3774                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3775       else
3776         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3777                              ushift, NULL_RTX, 1);
3778       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3779                            0, OPTAB_LIB_WIDEN);
3780       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3781     }
3782
3783   label = gen_label_rtx ();
3784   temp = copy_to_mode_reg (mode, op0);
3785   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3786   expand_inc (temp, gen_int_mode (d - 1, mode));
3787   emit_label (label);
3788   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3789 }
3790 \f
3791 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3792    if that is convenient, and returning where the result is.
3793    You may request either the quotient or the remainder as the result;
3794    specify REM_FLAG nonzero to get the remainder.
3795
3796    CODE is the expression code for which kind of division this is;
3797    it controls how rounding is done.  MODE is the machine mode to use.
3798    UNSIGNEDP nonzero means do unsigned division.  */
3799
3800 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3801    and then correct it by or'ing in missing high bits
3802    if result of ANDI is nonzero.
3803    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3804    This could optimize to a bfexts instruction.
3805    But C doesn't use these operations, so their optimizations are
3806    left for later.  */
3807 /* ??? For modulo, we don't actually need the highpart of the first product,
3808    the low part will do nicely.  And for small divisors, the second multiply
3809    can also be a low-part only multiply or even be completely left out.
3810    E.g. to calculate the remainder of a division by 3 with a 32 bit
3811    multiply, multiply with 0x55555556 and extract the upper two bits;
3812    the result is exact for inputs up to 0x1fffffff.
3813    The input range can be reduced by using cross-sum rules.
3814    For odd divisors >= 3, the following table gives right shift counts
3815    so that if a number is shifted by an integer multiple of the given
3816    amount, the remainder stays the same:
3817    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3818    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3819    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3820    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3821    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3822
3823    Cross-sum rules for even numbers can be derived by leaving as many bits
3824    to the right alone as the divisor has zeros to the right.
3825    E.g. if x is an unsigned 32 bit number:
3826    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3827    */
3828
3829 rtx
3830 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3831                rtx op0, rtx op1, rtx target, int unsignedp)
3832 {
3833   enum machine_mode compute_mode;
3834   rtx tquotient;
3835   rtx quotient = 0, remainder = 0;
3836   rtx last;
3837   int size;
3838   rtx insn;
3839   optab optab1, optab2;
3840   int op1_is_constant, op1_is_pow2 = 0;
3841   int max_cost, extra_cost;
3842   static HOST_WIDE_INT last_div_const = 0;
3843   bool speed = optimize_insn_for_speed_p ();
3844
3845   op1_is_constant = CONST_INT_P (op1);
3846   if (op1_is_constant)
3847     {
3848       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3849       if (unsignedp)
3850         ext_op1 &= GET_MODE_MASK (mode);
3851       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3852                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3853     }
3854
3855   /*
3856      This is the structure of expand_divmod:
3857
3858      First comes code to fix up the operands so we can perform the operations
3859      correctly and efficiently.
3860
3861      Second comes a switch statement with code specific for each rounding mode.
3862      For some special operands this code emits all RTL for the desired
3863      operation, for other cases, it generates only a quotient and stores it in
3864      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3865      to indicate that it has not done anything.
3866
3867      Last comes code that finishes the operation.  If QUOTIENT is set and
3868      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3869      QUOTIENT is not set, it is computed using trunc rounding.
3870
3871      We try to generate special code for division and remainder when OP1 is a
3872      constant.  If |OP1| = 2**n we can use shifts and some other fast
3873      operations.  For other values of OP1, we compute a carefully selected
3874      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3875      by m.
3876
3877      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3878      half of the product.  Different strategies for generating the product are
3879      implemented in expmed_mult_highpart.
3880
3881      If what we actually want is the remainder, we generate that by another
3882      by-constant multiplication and a subtraction.  */
3883
3884   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3885      code below will malfunction if we are, so check here and handle
3886      the special case if so.  */
3887   if (op1 == const1_rtx)
3888     return rem_flag ? const0_rtx : op0;
3889
3890     /* When dividing by -1, we could get an overflow.
3891      negv_optab can handle overflows.  */
3892   if (! unsignedp && op1 == constm1_rtx)
3893     {
3894       if (rem_flag)
3895         return const0_rtx;
3896       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3897                           ? negv_optab : neg_optab, op0, target, 0);
3898     }
3899
3900   if (target
3901       /* Don't use the function value register as a target
3902          since we have to read it as well as write it,
3903          and function-inlining gets confused by this.  */
3904       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3905           /* Don't clobber an operand while doing a multi-step calculation.  */
3906           || ((rem_flag || op1_is_constant)
3907               && (reg_mentioned_p (target, op0)
3908                   || (MEM_P (op0) && MEM_P (target))))
3909           || reg_mentioned_p (target, op1)
3910           || (MEM_P (op1) && MEM_P (target))))
3911     target = 0;
3912
3913   /* Get the mode in which to perform this computation.  Normally it will
3914      be MODE, but sometimes we can't do the desired operation in MODE.
3915      If so, pick a wider mode in which we can do the operation.  Convert
3916      to that mode at the start to avoid repeated conversions.
3917
3918      First see what operations we need.  These depend on the expression
3919      we are evaluating.  (We assume that divxx3 insns exist under the
3920      same conditions that modxx3 insns and that these insns don't normally
3921      fail.  If these assumptions are not correct, we may generate less
3922      efficient code in some cases.)
3923
3924      Then see if we find a mode in which we can open-code that operation
3925      (either a division, modulus, or shift).  Finally, check for the smallest
3926      mode for which we can do the operation with a library call.  */
3927
3928   /* We might want to refine this now that we have division-by-constant
3929      optimization.  Since expmed_mult_highpart tries so many variants, it is
3930      not straightforward to generalize this.  Maybe we should make an array
3931      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3932
3933   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3934             ? (unsignedp ? lshr_optab : ashr_optab)
3935             : (unsignedp ? udiv_optab : sdiv_optab));
3936   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3937             ? optab1
3938             : (unsignedp ? udivmod_optab : sdivmod_optab));
3939
3940   for (compute_mode = mode; compute_mode != VOIDmode;
3941        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3942     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3943         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3944       break;
3945
3946   if (compute_mode == VOIDmode)
3947     for (compute_mode = mode; compute_mode != VOIDmode;
3948          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3949       if (optab_libfunc (optab1, compute_mode)
3950           || optab_libfunc (optab2, compute_mode))
3951         break;
3952
3953   /* If we still couldn't find a mode, use MODE, but expand_binop will
3954      probably die.  */
3955   if (compute_mode == VOIDmode)
3956     compute_mode = mode;
3957
3958   if (target && GET_MODE (target) == compute_mode)
3959     tquotient = target;
3960   else
3961     tquotient = gen_reg_rtx (compute_mode);
3962
3963   size = GET_MODE_BITSIZE (compute_mode);
3964 #if 0
3965   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3966      (mode), and thereby get better code when OP1 is a constant.  Do that
3967      later.  It will require going over all usages of SIZE below.  */
3968   size = GET_MODE_BITSIZE (mode);
3969 #endif
3970
3971   /* Only deduct something for a REM if the last divide done was
3972      for a different constant.   Then set the constant of the last
3973      divide.  */
3974   max_cost = (unsignedp
3975               ? udiv_cost (speed, compute_mode)
3976               : sdiv_cost (speed, compute_mode));
3977   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3978                      && INTVAL (op1) == last_div_const))
3979     max_cost -= (mul_cost (speed, compute_mode)
3980                  + add_cost (speed, compute_mode));
3981
3982   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3983
3984   /* Now convert to the best mode to use.  */
3985   if (compute_mode != mode)
3986     {
3987       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3988       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3989
3990       /* convert_modes may have placed op1 into a register, so we
3991          must recompute the following.  */
3992       op1_is_constant = CONST_INT_P (op1);
3993       op1_is_pow2 = (op1_is_constant
3994                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3995                           || (! unsignedp
3996                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
3997     }
3998
3999   /* If one of the operands is a volatile MEM, copy it into a register.  */
4000
4001   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4002     op0 = force_reg (compute_mode, op0);
4003   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4004     op1 = force_reg (compute_mode, op1);
4005
4006   /* If we need the remainder or if OP1 is constant, we need to
4007      put OP0 in a register in case it has any queued subexpressions.  */
4008   if (rem_flag || op1_is_constant)
4009     op0 = force_reg (compute_mode, op0);
4010
4011   last = get_last_insn ();
4012
4013   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4014   if (unsignedp)
4015     {
4016       if (code == FLOOR_DIV_EXPR)
4017         code = TRUNC_DIV_EXPR;
4018       if (code == FLOOR_MOD_EXPR)
4019         code = TRUNC_MOD_EXPR;
4020       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4021         code = TRUNC_DIV_EXPR;
4022     }
4023
4024   if (op1 != const0_rtx)
4025     switch (code)
4026       {
4027       case TRUNC_MOD_EXPR:
4028       case TRUNC_DIV_EXPR:
4029         if (op1_is_constant)
4030           {
4031             if (unsignedp)
4032               {
4033                 unsigned HOST_WIDE_INT mh, ml;
4034                 int pre_shift, post_shift;
4035                 int dummy;
4036                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4037                                             & GET_MODE_MASK (compute_mode));
4038
4039                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4040                   {
4041                     pre_shift = floor_log2 (d);
4042                     if (rem_flag)
4043                       {
4044                         unsigned HOST_WIDE_INT mask
4045                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4046                         remainder
4047                           = expand_binop (compute_mode, and_optab, op0,
4048                                           gen_int_mode (mask, compute_mode),
4049                                           remainder, 1,
4050                                           OPTAB_LIB_WIDEN);
4051                         if (remainder)
4052                           return gen_lowpart (mode, remainder);
4053                       }
4054                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4055                                              pre_shift, tquotient, 1);
4056                   }
4057                 else if (size <= HOST_BITS_PER_WIDE_INT)
4058                   {
4059                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4060                       {
4061                         /* Most significant bit of divisor is set; emit an scc
4062                            insn.  */
4063                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4064                                                           compute_mode, 1, 1);
4065                       }
4066                     else
4067                       {
4068                         /* Find a suitable multiplier and right shift count
4069                            instead of multiplying with D.  */
4070
4071                         mh = choose_multiplier (d, size, size,
4072                                                 &ml, &post_shift, &dummy);
4073
4074                         /* If the suggested multiplier is more than SIZE bits,
4075                            we can do better for even divisors, using an
4076                            initial right shift.  */
4077                         if (mh != 0 && (d & 1) == 0)
4078                           {
4079                             pre_shift = floor_log2 (d & -d);
4080                             mh = choose_multiplier (d >> pre_shift, size,
4081                                                     size - pre_shift,
4082                                                     &ml, &post_shift, &dummy);
4083                             gcc_assert (!mh);
4084                           }
4085                         else
4086                           pre_shift = 0;
4087
4088                         if (mh != 0)
4089                           {
4090                             rtx t1, t2, t3, t4;
4091
4092                             if (post_shift - 1 >= BITS_PER_WORD)
4093                               goto fail1;
4094
4095                             extra_cost
4096                               = (shift_cost (speed, compute_mode, post_shift - 1)
4097                                  + shift_cost (speed, compute_mode, 1)
4098                                  + 2 * add_cost (speed, compute_mode));
4099                             t1 = expmed_mult_highpart
4100                               (compute_mode, op0,
4101                                gen_int_mode (ml, compute_mode),
4102                                NULL_RTX, 1, max_cost - extra_cost);
4103                             if (t1 == 0)
4104                               goto fail1;
4105                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4106                                                                op0, t1),
4107                                                 NULL_RTX);
4108                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4109                                                t2, 1, NULL_RTX, 1);
4110                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4111                                                               t1, t3),
4112                                                 NULL_RTX);
4113                             quotient = expand_shift
4114                               (RSHIFT_EXPR, compute_mode, t4,
4115                                post_shift - 1, tquotient, 1);
4116                           }
4117                         else
4118                           {
4119                             rtx t1, t2;
4120
4121                             if (pre_shift >= BITS_PER_WORD
4122                                 || post_shift >= BITS_PER_WORD)
4123                               goto fail1;
4124
4125                             t1 = expand_shift
4126                               (RSHIFT_EXPR, compute_mode, op0,
4127                                pre_shift, NULL_RTX, 1);
4128                             extra_cost
4129                               = (shift_cost (speed, compute_mode, pre_shift)
4130                                  + shift_cost (speed, compute_mode, post_shift));
4131                             t2 = expmed_mult_highpart
4132                               (compute_mode, t1,
4133                                gen_int_mode (ml, compute_mode),
4134                                NULL_RTX, 1, max_cost - extra_cost);
4135                             if (t2 == 0)
4136                               goto fail1;
4137                             quotient = expand_shift
4138                               (RSHIFT_EXPR, compute_mode, t2,
4139                                post_shift, tquotient, 1);
4140                           }
4141                       }
4142                   }
4143                 else            /* Too wide mode to use tricky code */
4144                   break;
4145
4146                 insn = get_last_insn ();
4147                 if (insn != last)
4148                   set_dst_reg_note (insn, REG_EQUAL,
4149                                     gen_rtx_UDIV (compute_mode, op0, op1),
4150                                     quotient);
4151               }
4152             else                /* TRUNC_DIV, signed */
4153               {
4154                 unsigned HOST_WIDE_INT ml;
4155                 int lgup, post_shift;
4156                 rtx mlr;
4157                 HOST_WIDE_INT d = INTVAL (op1);
4158                 unsigned HOST_WIDE_INT abs_d;
4159
4160                 /* Since d might be INT_MIN, we have to cast to
4161                    unsigned HOST_WIDE_INT before negating to avoid
4162                    undefined signed overflow.  */
4163                 abs_d = (d >= 0
4164                          ? (unsigned HOST_WIDE_INT) d
4165                          : - (unsigned HOST_WIDE_INT) d);
4166
4167                 /* n rem d = n rem -d */
4168                 if (rem_flag && d < 0)
4169                   {
4170                     d = abs_d;
4171                     op1 = gen_int_mode (abs_d, compute_mode);
4172                   }
4173
4174                 if (d == 1)
4175                   quotient = op0;
4176                 else if (d == -1)
4177                   quotient = expand_unop (compute_mode, neg_optab, op0,
4178                                           tquotient, 0);
4179                 else if (HOST_BITS_PER_WIDE_INT >= size
4180                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4181                   {
4182                     /* This case is not handled correctly below.  */
4183                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4184                                                 compute_mode, 1, 1);
4185                     if (quotient == 0)
4186                       goto fail1;
4187                   }
4188                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4189                          && (rem_flag
4190                              ? smod_pow2_cheap (speed, compute_mode)
4191                              : sdiv_pow2_cheap (speed, compute_mode))
4192                          /* We assume that cheap metric is true if the
4193                             optab has an expander for this mode.  */
4194                          && ((optab_handler ((rem_flag ? smod_optab
4195                                               : sdiv_optab),
4196                                              compute_mode)
4197                               != CODE_FOR_nothing)
4198                              || (optab_handler (sdivmod_optab,
4199                                                 compute_mode)
4200                                  != CODE_FOR_nothing)))
4201                   ;
4202                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4203                   {
4204                     if (rem_flag)
4205                       {
4206                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4207                         if (remainder)
4208                           return gen_lowpart (mode, remainder);
4209                       }
4210
4211                     if (sdiv_pow2_cheap (speed, compute_mode)
4212                         && ((optab_handler (sdiv_optab, compute_mode)
4213                              != CODE_FOR_nothing)
4214                             || (optab_handler (sdivmod_optab, compute_mode)
4215                                 != CODE_FOR_nothing)))
4216                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4217                                                 compute_mode, op0,
4218                                                 gen_int_mode (abs_d,
4219                                                               compute_mode),
4220                                                 NULL_RTX, 0);
4221                     else
4222                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4223
4224                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4225                        negate the quotient.  */
4226                     if (d < 0)
4227                       {
4228                         insn = get_last_insn ();
4229                         if (insn != last
4230                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4231                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4232                           set_dst_reg_note (insn, REG_EQUAL,
4233                                             gen_rtx_DIV (compute_mode, op0,
4234                                                          gen_int_mode
4235                                                            (abs_d,
4236                                                             compute_mode)),
4237                                             quotient);
4238
4239                         quotient = expand_unop (compute_mode, neg_optab,
4240                                                 quotient, quotient, 0);
4241                       }
4242                   }
4243                 else if (size <= HOST_BITS_PER_WIDE_INT)
4244                   {
4245                     choose_multiplier (abs_d, size, size - 1,
4246                                        &ml, &post_shift, &lgup);
4247                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4248                       {
4249                         rtx t1, t2, t3;
4250
4251                         if (post_shift >= BITS_PER_WORD
4252                             || size - 1 >= BITS_PER_WORD)
4253                           goto fail1;
4254
4255                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4256                                       + shift_cost (speed, compute_mode, size - 1)
4257                                       + add_cost (speed, compute_mode));
4258                         t1 = expmed_mult_highpart
4259                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4260                            NULL_RTX, 0, max_cost - extra_cost);
4261                         if (t1 == 0)
4262                           goto fail1;
4263                         t2 = expand_shift
4264                           (RSHIFT_EXPR, compute_mode, t1,
4265                            post_shift, NULL_RTX, 0);
4266                         t3 = expand_shift
4267                           (RSHIFT_EXPR, compute_mode, op0,
4268                            size - 1, NULL_RTX, 0);
4269                         if (d < 0)
4270                           quotient
4271                             = force_operand (gen_rtx_MINUS (compute_mode,
4272                                                             t3, t2),
4273                                              tquotient);
4274                         else
4275                           quotient
4276                             = force_operand (gen_rtx_MINUS (compute_mode,
4277                                                             t2, t3),
4278                                              tquotient);
4279                       }
4280                     else
4281                       {
4282                         rtx t1, t2, t3, t4;
4283
4284                         if (post_shift >= BITS_PER_WORD
4285                             || size - 1 >= BITS_PER_WORD)
4286                           goto fail1;
4287
4288                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4289                         mlr = gen_int_mode (ml, compute_mode);
4290                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4291                                       + shift_cost (speed, compute_mode, size - 1)
4292                                       + 2 * add_cost (speed, compute_mode));
4293                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4294                                                    NULL_RTX, 0,
4295                                                    max_cost - extra_cost);
4296                         if (t1 == 0)
4297                           goto fail1;
4298                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4299                                                           t1, op0),
4300                                             NULL_RTX);
4301                         t3 = expand_shift
4302                           (RSHIFT_EXPR, compute_mode, t2,
4303                            post_shift, NULL_RTX, 0);
4304                         t4 = expand_shift
4305                           (RSHIFT_EXPR, compute_mode, op0,
4306                            size - 1, NULL_RTX, 0);
4307                         if (d < 0)
4308                           quotient
4309                             = force_operand (gen_rtx_MINUS (compute_mode,
4310                                                             t4, t3),
4311                                              tquotient);
4312                         else
4313                           quotient
4314                             = force_operand (gen_rtx_MINUS (compute_mode,
4315                                                             t3, t4),
4316                                              tquotient);
4317                       }
4318                   }
4319                 else            /* Too wide mode to use tricky code */
4320                   break;
4321
4322                 insn = get_last_insn ();
4323                 if (insn != last)
4324                   set_dst_reg_note (insn, REG_EQUAL,
4325                                     gen_rtx_DIV (compute_mode, op0, op1),
4326                                     quotient);
4327               }
4328             break;
4329           }
4330       fail1:
4331         delete_insns_since (last);
4332         break;
4333
4334       case FLOOR_DIV_EXPR:
4335       case FLOOR_MOD_EXPR:
4336       /* We will come here only for signed operations.  */
4337         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4338           {
4339             unsigned HOST_WIDE_INT mh, ml;
4340             int pre_shift, lgup, post_shift;
4341             HOST_WIDE_INT d = INTVAL (op1);
4342
4343             if (d > 0)
4344               {
4345                 /* We could just as easily deal with negative constants here,
4346                    but it does not seem worth the trouble for GCC 2.6.  */
4347                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4348                   {
4349                     pre_shift = floor_log2 (d);
4350                     if (rem_flag)
4351                       {
4352                         unsigned HOST_WIDE_INT mask
4353                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4354                         remainder = expand_binop
4355                           (compute_mode, and_optab, op0,
4356                            gen_int_mode (mask, compute_mode),
4357                            remainder, 0, OPTAB_LIB_WIDEN);
4358                         if (remainder)
4359                           return gen_lowpart (mode, remainder);
4360                       }
4361                     quotient = expand_shift
4362                       (RSHIFT_EXPR, compute_mode, op0,
4363                        pre_shift, tquotient, 0);
4364                   }
4365                 else
4366                   {
4367                     rtx t1, t2, t3, t4;
4368
4369                     mh = choose_multiplier (d, size, size - 1,
4370                                             &ml, &post_shift, &lgup);
4371                     gcc_assert (!mh);
4372
4373                     if (post_shift < BITS_PER_WORD
4374                         && size - 1 < BITS_PER_WORD)
4375                       {
4376                         t1 = expand_shift
4377                           (RSHIFT_EXPR, compute_mode, op0,
4378                            size - 1, NULL_RTX, 0);
4379                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4380                                            NULL_RTX, 0, OPTAB_WIDEN);
4381                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4382                                       + shift_cost (speed, compute_mode, size - 1)
4383                                       + 2 * add_cost (speed, compute_mode));
4384                         t3 = expmed_mult_highpart
4385                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4386                            NULL_RTX, 1, max_cost - extra_cost);
4387                         if (t3 != 0)
4388                           {
4389                             t4 = expand_shift
4390                               (RSHIFT_EXPR, compute_mode, t3,
4391                                post_shift, NULL_RTX, 1);
4392                             quotient = expand_binop (compute_mode, xor_optab,
4393                                                      t4, t1, tquotient, 0,
4394                                                      OPTAB_WIDEN);
4395                           }
4396                       }
4397                   }
4398               }
4399             else
4400               {
4401                 rtx nsign, t1, t2, t3, t4;
4402                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4403                                                   op0, constm1_rtx), NULL_RTX);
4404                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4405                                    0, OPTAB_WIDEN);
4406                 nsign = expand_shift
4407                   (RSHIFT_EXPR, compute_mode, t2,
4408                    size - 1, NULL_RTX, 0);
4409                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4410                                     NULL_RTX);
4411                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4412                                     NULL_RTX, 0);
4413                 if (t4)
4414                   {
4415                     rtx t5;
4416                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4417                                       NULL_RTX, 0);
4418                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4419                                                             t4, t5),
4420                                               tquotient);
4421                   }
4422               }
4423           }
4424
4425         if (quotient != 0)
4426           break;
4427         delete_insns_since (last);
4428
4429         /* Try using an instruction that produces both the quotient and
4430            remainder, using truncation.  We can easily compensate the quotient
4431            or remainder to get floor rounding, once we have the remainder.
4432            Notice that we compute also the final remainder value here,
4433            and return the result right away.  */
4434         if (target == 0 || GET_MODE (target) != compute_mode)
4435           target = gen_reg_rtx (compute_mode);
4436
4437         if (rem_flag)
4438           {
4439             remainder
4440               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4441             quotient = gen_reg_rtx (compute_mode);
4442           }
4443         else
4444           {
4445             quotient
4446               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4447             remainder = gen_reg_rtx (compute_mode);
4448           }
4449
4450         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4451                                  quotient, remainder, 0))
4452           {
4453             /* This could be computed with a branch-less sequence.
4454                Save that for later.  */
4455             rtx tem;
4456             rtx label = gen_label_rtx ();
4457             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4458             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4459                                 NULL_RTX, 0, OPTAB_WIDEN);
4460             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4461             expand_dec (quotient, const1_rtx);
4462             expand_inc (remainder, op1);
4463             emit_label (label);
4464             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4465           }
4466
4467         /* No luck with division elimination or divmod.  Have to do it
4468            by conditionally adjusting op0 *and* the result.  */
4469         {
4470           rtx label1, label2, label3, label4, label5;
4471           rtx adjusted_op0;
4472           rtx tem;
4473
4474           quotient = gen_reg_rtx (compute_mode);
4475           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4476           label1 = gen_label_rtx ();
4477           label2 = gen_label_rtx ();
4478           label3 = gen_label_rtx ();
4479           label4 = gen_label_rtx ();
4480           label5 = gen_label_rtx ();
4481           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4482           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4483           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4484                               quotient, 0, OPTAB_LIB_WIDEN);
4485           if (tem != quotient)
4486             emit_move_insn (quotient, tem);
4487           emit_jump_insn (gen_jump (label5));
4488           emit_barrier ();
4489           emit_label (label1);
4490           expand_inc (adjusted_op0, const1_rtx);
4491           emit_jump_insn (gen_jump (label4));
4492           emit_barrier ();
4493           emit_label (label2);
4494           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4495           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4496                               quotient, 0, OPTAB_LIB_WIDEN);
4497           if (tem != quotient)
4498             emit_move_insn (quotient, tem);
4499           emit_jump_insn (gen_jump (label5));
4500           emit_barrier ();
4501           emit_label (label3);
4502           expand_dec (adjusted_op0, const1_rtx);
4503           emit_label (label4);
4504           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4505                               quotient, 0, OPTAB_LIB_WIDEN);
4506           if (tem != quotient)
4507             emit_move_insn (quotient, tem);
4508           expand_dec (quotient, const1_rtx);
4509           emit_label (label5);
4510         }
4511         break;
4512
4513       case CEIL_DIV_EXPR:
4514       case CEIL_MOD_EXPR:
4515         if (unsignedp)
4516           {
4517             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4518               {
4519                 rtx t1, t2, t3;
4520                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4521                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4522                                    floor_log2 (d), tquotient, 1);
4523                 t2 = expand_binop (compute_mode, and_optab, op0,
4524                                    gen_int_mode (d - 1, compute_mode),
4525                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4526                 t3 = gen_reg_rtx (compute_mode);
4527                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4528                                       compute_mode, 1, 1);
4529                 if (t3 == 0)
4530                   {
4531                     rtx lab;
4532                     lab = gen_label_rtx ();
4533                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4534                     expand_inc (t1, const1_rtx);
4535                     emit_label (lab);
4536                     quotient = t1;
4537                   }
4538                 else
4539                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4540                                                           t1, t3),
4541                                             tquotient);
4542                 break;
4543               }
4544
4545             /* Try using an instruction that produces both the quotient and
4546                remainder, using truncation.  We can easily compensate the
4547                quotient or remainder to get ceiling rounding, once we have the
4548                remainder.  Notice that we compute also the final remainder
4549                value here, and return the result right away.  */
4550             if (target == 0 || GET_MODE (target) != compute_mode)
4551               target = gen_reg_rtx (compute_mode);
4552
4553             if (rem_flag)
4554               {
4555                 remainder = (REG_P (target)
4556                              ? target : gen_reg_rtx (compute_mode));
4557                 quotient = gen_reg_rtx (compute_mode);
4558               }
4559             else
4560               {
4561                 quotient = (REG_P (target)
4562                             ? target : gen_reg_rtx (compute_mode));
4563                 remainder = gen_reg_rtx (compute_mode);
4564               }
4565
4566             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4567                                      remainder, 1))
4568               {
4569                 /* This could be computed with a branch-less sequence.
4570                    Save that for later.  */
4571                 rtx label = gen_label_rtx ();
4572                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4573                                  compute_mode, label);
4574                 expand_inc (quotient, const1_rtx);
4575                 expand_dec (remainder, op1);
4576                 emit_label (label);
4577                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4578               }
4579
4580             /* No luck with division elimination or divmod.  Have to do it
4581                by conditionally adjusting op0 *and* the result.  */
4582             {
4583               rtx label1, label2;
4584               rtx adjusted_op0, tem;
4585
4586               quotient = gen_reg_rtx (compute_mode);
4587               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4588               label1 = gen_label_rtx ();
4589               label2 = gen_label_rtx ();
4590               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4591                                compute_mode, label1);
4592               emit_move_insn  (quotient, const0_rtx);
4593               emit_jump_insn (gen_jump (label2));
4594               emit_barrier ();
4595               emit_label (label1);
4596               expand_dec (adjusted_op0, const1_rtx);
4597               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4598                                   quotient, 1, OPTAB_LIB_WIDEN);
4599               if (tem != quotient)
4600                 emit_move_insn (quotient, tem);
4601               expand_inc (quotient, const1_rtx);
4602               emit_label (label2);
4603             }
4604           }
4605         else /* signed */
4606           {
4607             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4608                 && INTVAL (op1) >= 0)
4609               {
4610                 /* This is extremely similar to the code for the unsigned case
4611                    above.  For 2.7 we should merge these variants, but for
4612                    2.6.1 I don't want to touch the code for unsigned since that
4613                    get used in C.  The signed case will only be used by other
4614                    languages (Ada).  */
4615
4616                 rtx t1, t2, t3;
4617                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4618                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4619                                    floor_log2 (d), tquotient, 0);
4620                 t2 = expand_binop (compute_mode, and_optab, op0,
4621                                    gen_int_mode (d - 1, compute_mode),
4622                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4623                 t3 = gen_reg_rtx (compute_mode);
4624                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4625                                       compute_mode, 1, 1);
4626                 if (t3 == 0)
4627                   {
4628                     rtx lab;
4629                     lab = gen_label_rtx ();
4630                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4631                     expand_inc (t1, const1_rtx);
4632                     emit_label (lab);
4633                     quotient = t1;
4634                   }
4635                 else
4636                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4637                                                           t1, t3),
4638                                             tquotient);
4639                 break;
4640               }
4641
4642             /* Try using an instruction that produces both the quotient and
4643                remainder, using truncation.  We can easily compensate the
4644                quotient or remainder to get ceiling rounding, once we have the
4645                remainder.  Notice that we compute also the final remainder
4646                value here, and return the result right away.  */
4647             if (target == 0 || GET_MODE (target) != compute_mode)
4648               target = gen_reg_rtx (compute_mode);
4649             if (rem_flag)
4650               {
4651                 remainder= (REG_P (target)
4652                             ? target : gen_reg_rtx (compute_mode));
4653                 quotient = gen_reg_rtx (compute_mode);
4654               }
4655             else
4656               {
4657                 quotient = (REG_P (target)
4658                             ? target : gen_reg_rtx (compute_mode));
4659                 remainder = gen_reg_rtx (compute_mode);
4660               }
4661
4662             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4663                                      remainder, 0))
4664               {
4665                 /* This could be computed with a branch-less sequence.
4666                    Save that for later.  */
4667                 rtx tem;
4668                 rtx label = gen_label_rtx ();
4669                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4670                                  compute_mode, label);
4671                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4672                                     NULL_RTX, 0, OPTAB_WIDEN);
4673                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4674                 expand_inc (quotient, const1_rtx);
4675                 expand_dec (remainder, op1);
4676                 emit_label (label);
4677                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4678               }
4679
4680             /* No luck with division elimination or divmod.  Have to do it
4681                by conditionally adjusting op0 *and* the result.  */
4682             {
4683               rtx label1, label2, label3, label4, label5;
4684               rtx adjusted_op0;
4685               rtx tem;
4686
4687               quotient = gen_reg_rtx (compute_mode);
4688               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4689               label1 = gen_label_rtx ();
4690               label2 = gen_label_rtx ();
4691               label3 = gen_label_rtx ();
4692               label4 = gen_label_rtx ();
4693               label5 = gen_label_rtx ();
4694               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4695               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4696                                compute_mode, label1);
4697               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698                                   quotient, 0, OPTAB_LIB_WIDEN);
4699               if (tem != quotient)
4700                 emit_move_insn (quotient, tem);
4701               emit_jump_insn (gen_jump (label5));
4702               emit_barrier ();
4703               emit_label (label1);
4704               expand_dec (adjusted_op0, const1_rtx);
4705               emit_jump_insn (gen_jump (label4));
4706               emit_barrier ();
4707               emit_label (label2);
4708               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4709                                compute_mode, label3);
4710               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4711                                   quotient, 0, OPTAB_LIB_WIDEN);
4712               if (tem != quotient)
4713                 emit_move_insn (quotient, tem);
4714               emit_jump_insn (gen_jump (label5));
4715               emit_barrier ();
4716               emit_label (label3);
4717               expand_inc (adjusted_op0, const1_rtx);
4718               emit_label (label4);
4719               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4720                                   quotient, 0, OPTAB_LIB_WIDEN);
4721               if (tem != quotient)
4722                 emit_move_insn (quotient, tem);
4723               expand_inc (quotient, const1_rtx);
4724               emit_label (label5);
4725             }
4726           }
4727         break;
4728
4729       case EXACT_DIV_EXPR:
4730         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4731           {
4732             HOST_WIDE_INT d = INTVAL (op1);
4733             unsigned HOST_WIDE_INT ml;
4734             int pre_shift;
4735             rtx t1;
4736
4737             pre_shift = floor_log2 (d & -d);
4738             ml = invert_mod2n (d >> pre_shift, size);
4739             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4740                                pre_shift, NULL_RTX, unsignedp);
4741             quotient = expand_mult (compute_mode, t1,
4742                                     gen_int_mode (ml, compute_mode),
4743                                     NULL_RTX, 1);
4744
4745             insn = get_last_insn ();
4746             set_dst_reg_note (insn, REG_EQUAL,
4747                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4748                                               compute_mode, op0, op1),
4749                               quotient);
4750           }
4751         break;
4752
4753       case ROUND_DIV_EXPR:
4754       case ROUND_MOD_EXPR:
4755         if (unsignedp)
4756           {
4757             rtx tem;
4758             rtx label;
4759             label = gen_label_rtx ();
4760             quotient = gen_reg_rtx (compute_mode);
4761             remainder = gen_reg_rtx (compute_mode);
4762             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4763               {
4764                 rtx tem;
4765                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4766                                          quotient, 1, OPTAB_LIB_WIDEN);
4767                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4768                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4769                                           remainder, 1, OPTAB_LIB_WIDEN);
4770               }
4771             tem = plus_constant (compute_mode, op1, -1);
4772             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4773             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4774             expand_inc (quotient, const1_rtx);
4775             expand_dec (remainder, op1);
4776             emit_label (label);
4777           }
4778         else
4779           {
4780             rtx abs_rem, abs_op1, tem, mask;
4781             rtx label;
4782             label = gen_label_rtx ();
4783             quotient = gen_reg_rtx (compute_mode);
4784             remainder = gen_reg_rtx (compute_mode);
4785             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4786               {
4787                 rtx tem;
4788                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4789                                          quotient, 0, OPTAB_LIB_WIDEN);
4790                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4791                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4792                                           remainder, 0, OPTAB_LIB_WIDEN);
4793               }
4794             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4795             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4796             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4797                                 1, NULL_RTX, 1);
4798             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4799             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4800                                 NULL_RTX, 0, OPTAB_WIDEN);
4801             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4802                                  size - 1, NULL_RTX, 0);
4803             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4804                                 NULL_RTX, 0, OPTAB_WIDEN);
4805             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4806                                 NULL_RTX, 0, OPTAB_WIDEN);
4807             expand_inc (quotient, tem);
4808             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4809                                 NULL_RTX, 0, OPTAB_WIDEN);
4810             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4811                                 NULL_RTX, 0, OPTAB_WIDEN);
4812             expand_dec (remainder, tem);
4813             emit_label (label);
4814           }
4815         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4816
4817       default:
4818         gcc_unreachable ();
4819       }
4820
4821   if (quotient == 0)
4822     {
4823       if (target && GET_MODE (target) != compute_mode)
4824         target = 0;
4825
4826       if (rem_flag)
4827         {
4828           /* Try to produce the remainder without producing the quotient.
4829              If we seem to have a divmod pattern that does not require widening,
4830              don't try widening here.  We should really have a WIDEN argument
4831              to expand_twoval_binop, since what we'd really like to do here is
4832              1) try a mod insn in compute_mode
4833              2) try a divmod insn in compute_mode
4834              3) try a div insn in compute_mode and multiply-subtract to get
4835                 remainder
4836              4) try the same things with widening allowed.  */
4837           remainder
4838             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4839                                  op0, op1, target,
4840                                  unsignedp,
4841                                  ((optab_handler (optab2, compute_mode)
4842                                    != CODE_FOR_nothing)
4843                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4844           if (remainder == 0)
4845             {
4846               /* No luck there.  Can we do remainder and divide at once
4847                  without a library call?  */
4848               remainder = gen_reg_rtx (compute_mode);
4849               if (! expand_twoval_binop ((unsignedp
4850                                           ? udivmod_optab
4851                                           : sdivmod_optab),
4852                                          op0, op1,
4853                                          NULL_RTX, remainder, unsignedp))
4854                 remainder = 0;
4855             }
4856
4857           if (remainder)
4858             return gen_lowpart (mode, remainder);
4859         }
4860
4861       /* Produce the quotient.  Try a quotient insn, but not a library call.
4862          If we have a divmod in this mode, use it in preference to widening
4863          the div (for this test we assume it will not fail). Note that optab2
4864          is set to the one of the two optabs that the call below will use.  */
4865       quotient
4866         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4867                              op0, op1, rem_flag ? NULL_RTX : target,
4868                              unsignedp,
4869                              ((optab_handler (optab2, compute_mode)
4870                                != CODE_FOR_nothing)
4871                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4872
4873       if (quotient == 0)
4874         {
4875           /* No luck there.  Try a quotient-and-remainder insn,
4876              keeping the quotient alone.  */
4877           quotient = gen_reg_rtx (compute_mode);
4878           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4879                                      op0, op1,
4880                                      quotient, NULL_RTX, unsignedp))
4881             {
4882               quotient = 0;
4883               if (! rem_flag)
4884                 /* Still no luck.  If we are not computing the remainder,
4885                    use a library call for the quotient.  */
4886                 quotient = sign_expand_binop (compute_mode,
4887                                               udiv_optab, sdiv_optab,
4888                                               op0, op1, target,
4889                                               unsignedp, OPTAB_LIB_WIDEN);
4890             }
4891         }
4892     }
4893
4894   if (rem_flag)
4895     {
4896       if (target && GET_MODE (target) != compute_mode)
4897         target = 0;
4898
4899       if (quotient == 0)
4900         {
4901           /* No divide instruction either.  Use library for remainder.  */
4902           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4903                                          op0, op1, target,
4904                                          unsignedp, OPTAB_LIB_WIDEN);
4905           /* No remainder function.  Try a quotient-and-remainder
4906              function, keeping the remainder.  */
4907           if (!remainder)
4908             {
4909               remainder = gen_reg_rtx (compute_mode);
4910               if (!expand_twoval_binop_libfunc
4911                   (unsignedp ? udivmod_optab : sdivmod_optab,
4912                    op0, op1,
4913                    NULL_RTX, remainder,
4914                    unsignedp ? UMOD : MOD))
4915                 remainder = NULL_RTX;
4916             }
4917         }
4918       else
4919         {
4920           /* We divided.  Now finish doing X - Y * (X / Y).  */
4921           remainder = expand_mult (compute_mode, quotient, op1,
4922                                    NULL_RTX, unsignedp);
4923           remainder = expand_binop (compute_mode, sub_optab, op0,
4924                                     remainder, target, unsignedp,
4925                                     OPTAB_LIB_WIDEN);
4926         }
4927     }
4928
4929   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4930 }
4931 \f
4932 /* Return a tree node with data type TYPE, describing the value of X.
4933    Usually this is an VAR_DECL, if there is no obvious better choice.
4934    X may be an expression, however we only support those expressions
4935    generated by loop.c.  */
4936
4937 tree
4938 make_tree (tree type, rtx x)
4939 {
4940   tree t;
4941
4942   switch (GET_CODE (x))
4943     {
4944     case CONST_INT:
4945       {
4946         HOST_WIDE_INT hi = 0;
4947
4948         if (INTVAL (x) < 0
4949             && !(TYPE_UNSIGNED (type)
4950                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4951                      < HOST_BITS_PER_WIDE_INT)))
4952           hi = -1;
4953
4954         t = build_int_cst_wide (type, INTVAL (x), hi);
4955
4956         return t;
4957       }
4958
4959     case CONST_DOUBLE:
4960       if (GET_MODE (x) == VOIDmode)
4961         t = build_int_cst_wide (type,
4962                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4963       else
4964         {
4965           REAL_VALUE_TYPE d;
4966
4967           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4968           t = build_real (type, d);
4969         }
4970
4971       return t;
4972
4973     case CONST_VECTOR:
4974       {
4975         int units = CONST_VECTOR_NUNITS (x);
4976         tree itype = TREE_TYPE (type);
4977         tree *elts;
4978         int i;
4979
4980         /* Build a tree with vector elements.  */
4981         elts = XALLOCAVEC (tree, units);
4982         for (i = units - 1; i >= 0; --i)
4983           {
4984             rtx elt = CONST_VECTOR_ELT (x, i);
4985             elts[i] = make_tree (itype, elt);
4986           }
4987
4988         return build_vector (type, elts);
4989       }
4990
4991     case PLUS:
4992       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4993                           make_tree (type, XEXP (x, 1)));
4994
4995     case MINUS:
4996       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4997                           make_tree (type, XEXP (x, 1)));
4998
4999     case NEG:
5000       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5001
5002     case MULT:
5003       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5004                           make_tree (type, XEXP (x, 1)));
5005
5006     case ASHIFT:
5007       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5008                           make_tree (type, XEXP (x, 1)));
5009
5010     case LSHIFTRT:
5011       t = unsigned_type_for (type);
5012       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5013                                          make_tree (t, XEXP (x, 0)),
5014                                          make_tree (type, XEXP (x, 1))));
5015
5016     case ASHIFTRT:
5017       t = signed_type_for (type);
5018       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5019                                          make_tree (t, XEXP (x, 0)),
5020                                          make_tree (type, XEXP (x, 1))));
5021
5022     case DIV:
5023       if (TREE_CODE (type) != REAL_TYPE)
5024         t = signed_type_for (type);
5025       else
5026         t = type;
5027
5028       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5029                                          make_tree (t, XEXP (x, 0)),
5030                                          make_tree (t, XEXP (x, 1))));
5031     case UDIV:
5032       t = unsigned_type_for (type);
5033       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5034                                          make_tree (t, XEXP (x, 0)),
5035                                          make_tree (t, XEXP (x, 1))));
5036
5037     case SIGN_EXTEND:
5038     case ZERO_EXTEND:
5039       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5040                                           GET_CODE (x) == ZERO_EXTEND);
5041       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5042
5043     case CONST:
5044       return make_tree (type, XEXP (x, 0));
5045
5046     case SYMBOL_REF:
5047       t = SYMBOL_REF_DECL (x);
5048       if (t)
5049         return fold_convert (type, build_fold_addr_expr (t));
5050       /* else fall through.  */
5051
5052     default:
5053       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5054
5055       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5056          address mode to pointer mode.  */
5057       if (POINTER_TYPE_P (type))
5058         x = convert_memory_address_addr_space
5059               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5060
5061       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5062          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5063       t->decl_with_rtl.rtl = x;
5064
5065       return t;
5066     }
5067 }
5068 \f
5069 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5070    and returning TARGET.
5071
5072    If TARGET is 0, a pseudo-register or constant is returned.  */
5073
5074 rtx
5075 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5076 {
5077   rtx tem = 0;
5078
5079   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5080     tem = simplify_binary_operation (AND, mode, op0, op1);
5081   if (tem == 0)
5082     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5083
5084   if (target == 0)
5085     target = tem;
5086   else if (tem != target)
5087     emit_move_insn (target, tem);
5088   return target;
5089 }
5090
5091 /* Helper function for emit_store_flag.  */
5092 static rtx
5093 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5094              enum machine_mode mode, enum machine_mode compare_mode,
5095              int unsignedp, rtx x, rtx y, int normalizep,
5096              enum machine_mode target_mode)
5097 {
5098   struct expand_operand ops[4];
5099   rtx op0, last, comparison, subtarget;
5100   enum machine_mode result_mode = targetm.cstore_mode (icode);
5101
5102   last = get_last_insn ();
5103   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5104   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5105   if (!x || !y)
5106     {
5107       delete_insns_since (last);
5108       return NULL_RTX;
5109     }
5110
5111   if (target_mode == VOIDmode)
5112     target_mode = result_mode;
5113   if (!target)
5114     target = gen_reg_rtx (target_mode);
5115
5116   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5117
5118   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5119   create_fixed_operand (&ops[1], comparison);
5120   create_fixed_operand (&ops[2], x);
5121   create_fixed_operand (&ops[3], y);
5122   if (!maybe_expand_insn (icode, 4, ops))
5123     {
5124       delete_insns_since (last);
5125       return NULL_RTX;
5126     }
5127   subtarget = ops[0].value;
5128
5129   /* If we are converting to a wider mode, first convert to
5130      TARGET_MODE, then normalize.  This produces better combining
5131      opportunities on machines that have a SIGN_EXTRACT when we are
5132      testing a single bit.  This mostly benefits the 68k.
5133
5134      If STORE_FLAG_VALUE does not have the sign bit set when
5135      interpreted in MODE, we can do this conversion as unsigned, which
5136      is usually more efficient.  */
5137   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5138     {
5139       convert_move (target, subtarget,
5140                     val_signbit_known_clear_p (result_mode,
5141                                                STORE_FLAG_VALUE));
5142       op0 = target;
5143       result_mode = target_mode;
5144     }
5145   else
5146     op0 = subtarget;
5147
5148   /* If we want to keep subexpressions around, don't reuse our last
5149      target.  */
5150   if (optimize)
5151     subtarget = 0;
5152
5153   /* Now normalize to the proper value in MODE.  Sometimes we don't
5154      have to do anything.  */
5155   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5156     ;
5157   /* STORE_FLAG_VALUE might be the most negative number, so write
5158      the comparison this way to avoid a compiler-time warning.  */
5159   else if (- normalizep == STORE_FLAG_VALUE)
5160     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5161
5162   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5163      it hard to use a value of just the sign bit due to ANSI integer
5164      constant typing rules.  */
5165   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5166     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5167                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5168                         normalizep == 1);
5169   else
5170     {
5171       gcc_assert (STORE_FLAG_VALUE & 1);
5172
5173       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5174       if (normalizep == -1)
5175         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5176     }
5177
5178   /* If we were converting to a smaller mode, do the conversion now.  */
5179   if (target_mode != result_mode)
5180     {
5181       convert_move (target, op0, 0);
5182       return target;
5183     }
5184   else
5185     return op0;
5186 }
5187
5188
5189 /* A subroutine of emit_store_flag only including "tricks" that do not
5190    need a recursive call.  These are kept separate to avoid infinite
5191    loops.  */
5192
5193 static rtx
5194 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5195                    enum machine_mode mode, int unsignedp, int normalizep,
5196                    enum machine_mode target_mode)
5197 {
5198   rtx subtarget;
5199   enum insn_code icode;
5200   enum machine_mode compare_mode;
5201   enum mode_class mclass;
5202   enum rtx_code scode;
5203   rtx tem;
5204
5205   if (unsignedp)
5206     code = unsigned_condition (code);
5207   scode = swap_condition (code);
5208
5209   /* If one operand is constant, make it the second one.  Only do this
5210      if the other operand is not constant as well.  */
5211
5212   if (swap_commutative_operands_p (op0, op1))
5213     {
5214       tem = op0;
5215       op0 = op1;
5216       op1 = tem;
5217       code = swap_condition (code);
5218     }
5219
5220   if (mode == VOIDmode)
5221     mode = GET_MODE (op0);
5222
5223   /* For some comparisons with 1 and -1, we can convert this to
5224      comparisons with zero.  This will often produce more opportunities for
5225      store-flag insns.  */
5226
5227   switch (code)
5228     {
5229     case LT:
5230       if (op1 == const1_rtx)
5231         op1 = const0_rtx, code = LE;
5232       break;
5233     case LE:
5234       if (op1 == constm1_rtx)
5235         op1 = const0_rtx, code = LT;
5236       break;
5237     case GE:
5238       if (op1 == const1_rtx)
5239         op1 = const0_rtx, code = GT;
5240       break;
5241     case GT:
5242       if (op1 == constm1_rtx)
5243         op1 = const0_rtx, code = GE;
5244       break;
5245     case GEU:
5246       if (op1 == const1_rtx)
5247         op1 = const0_rtx, code = NE;
5248       break;
5249     case LTU:
5250       if (op1 == const1_rtx)
5251         op1 = const0_rtx, code = EQ;
5252       break;
5253     default:
5254       break;
5255     }
5256
5257   /* If we are comparing a double-word integer with zero or -1, we can
5258      convert the comparison into one involving a single word.  */
5259   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5260       && GET_MODE_CLASS (mode) == MODE_INT
5261       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5262     {
5263       if ((code == EQ || code == NE)
5264           && (op1 == const0_rtx || op1 == constm1_rtx))
5265         {
5266           rtx op00, op01;
5267
5268           /* Do a logical OR or AND of the two words and compare the
5269              result.  */
5270           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5271           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5272           tem = expand_binop (word_mode,
5273                               op1 == const0_rtx ? ior_optab : and_optab,
5274                               op00, op01, NULL_RTX, unsignedp,
5275                               OPTAB_DIRECT);
5276
5277           if (tem != 0)
5278             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5279                                    unsignedp, normalizep);
5280         }
5281       else if ((code == LT || code == GE) && op1 == const0_rtx)
5282         {
5283           rtx op0h;
5284
5285           /* If testing the sign bit, can just test on high word.  */
5286           op0h = simplify_gen_subreg (word_mode, op0, mode,
5287                                       subreg_highpart_offset (word_mode,
5288                                                               mode));
5289           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5290                                  unsignedp, normalizep);
5291         }
5292       else
5293         tem = NULL_RTX;
5294
5295       if (tem)
5296         {
5297           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5298             return tem;
5299           if (!target)
5300             target = gen_reg_rtx (target_mode);
5301
5302           convert_move (target, tem,
5303                         !val_signbit_known_set_p (word_mode,
5304                                                   (normalizep ? normalizep
5305                                                    : STORE_FLAG_VALUE)));
5306           return target;
5307         }
5308     }
5309
5310   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5311      complement of A (for GE) and shifting the sign bit to the low bit.  */
5312   if (op1 == const0_rtx && (code == LT || code == GE)
5313       && GET_MODE_CLASS (mode) == MODE_INT
5314       && (normalizep || STORE_FLAG_VALUE == 1
5315           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5316     {
5317       subtarget = target;
5318
5319       if (!target)
5320         target_mode = mode;
5321
5322       /* If the result is to be wider than OP0, it is best to convert it
5323          first.  If it is to be narrower, it is *incorrect* to convert it
5324          first.  */
5325       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5326         {
5327           op0 = convert_modes (target_mode, mode, op0, 0);
5328           mode = target_mode;
5329         }
5330
5331       if (target_mode != mode)
5332         subtarget = 0;
5333
5334       if (code == GE)
5335         op0 = expand_unop (mode, one_cmpl_optab, op0,
5336                            ((STORE_FLAG_VALUE == 1 || normalizep)
5337                             ? 0 : subtarget), 0);
5338
5339       if (STORE_FLAG_VALUE == 1 || normalizep)
5340         /* If we are supposed to produce a 0/1 value, we want to do
5341            a logical shift from the sign bit to the low-order bit; for
5342            a -1/0 value, we do an arithmetic shift.  */
5343         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5344                             GET_MODE_BITSIZE (mode) - 1,
5345                             subtarget, normalizep != -1);
5346
5347       if (mode != target_mode)
5348         op0 = convert_modes (target_mode, mode, op0, 0);
5349
5350       return op0;
5351     }
5352
5353   mclass = GET_MODE_CLASS (mode);
5354   for (compare_mode = mode; compare_mode != VOIDmode;
5355        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5356     {
5357      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5358      icode = optab_handler (cstore_optab, optab_mode);
5359      if (icode != CODE_FOR_nothing)
5360         {
5361           do_pending_stack_adjust ();
5362           tem = emit_cstore (target, icode, code, mode, compare_mode,
5363                              unsignedp, op0, op1, normalizep, target_mode);
5364           if (tem)
5365             return tem;
5366
5367           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5368             {
5369               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5370                                  unsignedp, op1, op0, normalizep, target_mode);
5371               if (tem)
5372                 return tem;
5373             }
5374           break;
5375         }
5376     }
5377
5378   return 0;
5379 }
5380
5381 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5382    and storing in TARGET.  Normally return TARGET.
5383    Return 0 if that cannot be done.
5384
5385    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5386    it is VOIDmode, they cannot both be CONST_INT.
5387
5388    UNSIGNEDP is for the case where we have to widen the operands
5389    to perform the operation.  It says to use zero-extension.
5390
5391    NORMALIZEP is 1 if we should convert the result to be either zero
5392    or one.  Normalize is -1 if we should convert the result to be
5393    either zero or -1.  If NORMALIZEP is zero, the result will be left
5394    "raw" out of the scc insn.  */
5395
5396 rtx
5397 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5398                  enum machine_mode mode, int unsignedp, int normalizep)
5399 {
5400   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5401   enum rtx_code rcode;
5402   rtx subtarget;
5403   rtx tem, last, trueval;
5404
5405   /* If we compare constants, we shouldn't use a store-flag operation,
5406      but a constant load.  We can get there via the vanilla route that
5407      usually generates a compare-branch sequence, but will in this case
5408      fold the comparison to a constant, and thus elide the branch.  */
5409   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5410     return NULL_RTX;
5411
5412   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5413                            target_mode);
5414   if (tem)
5415     return tem;
5416
5417   /* If we reached here, we can't do this with a scc insn, however there
5418      are some comparisons that can be done in other ways.  Don't do any
5419      of these cases if branches are very cheap.  */
5420   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5421     return 0;
5422
5423   /* See what we need to return.  We can only return a 1, -1, or the
5424      sign bit.  */
5425
5426   if (normalizep == 0)
5427     {
5428       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5429         normalizep = STORE_FLAG_VALUE;
5430
5431       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5432         ;
5433       else
5434         return 0;
5435     }
5436
5437   last = get_last_insn ();
5438
5439   /* If optimizing, use different pseudo registers for each insn, instead
5440      of reusing the same pseudo.  This leads to better CSE, but slows
5441      down the compiler, since there are more pseudos */
5442   subtarget = (!optimize
5443                && (target_mode == mode)) ? target : NULL_RTX;
5444   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5445
5446   /* For floating-point comparisons, try the reverse comparison or try
5447      changing the "orderedness" of the comparison.  */
5448   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5449     {
5450       enum rtx_code first_code;
5451       bool and_them;
5452
5453       rcode = reverse_condition_maybe_unordered (code);
5454       if (can_compare_p (rcode, mode, ccp_store_flag)
5455           && (code == ORDERED || code == UNORDERED
5456               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5457               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5458         {
5459           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5460                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5461
5462           /* For the reverse comparison, use either an addition or a XOR.  */
5463           if (want_add
5464               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5465                            optimize_insn_for_speed_p ()) == 0)
5466             {
5467               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5468                                        STORE_FLAG_VALUE, target_mode);
5469               if (tem)
5470                 return expand_binop (target_mode, add_optab, tem,
5471                                      gen_int_mode (normalizep, target_mode),
5472                                      target, 0, OPTAB_WIDEN);
5473             }
5474           else if (!want_add
5475                    && rtx_cost (trueval, XOR, 1,
5476                                 optimize_insn_for_speed_p ()) == 0)
5477             {
5478               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5479                                        normalizep, target_mode);
5480               if (tem)
5481                 return expand_binop (target_mode, xor_optab, tem, trueval,
5482                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5483             }
5484         }
5485
5486       delete_insns_since (last);
5487
5488       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5489       if (code == ORDERED || code == UNORDERED)
5490         return 0;
5491
5492       and_them = split_comparison (code, mode, &first_code, &code);
5493
5494       /* If there are no NaNs, the first comparison should always fall through.
5495          Effectively change the comparison to the other one.  */
5496       if (!HONOR_NANS (mode))
5497         {
5498           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5499           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5500                                     target_mode);
5501         }
5502
5503 #ifdef HAVE_conditional_move
5504       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5505          conditional move.  */
5506       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5507                                normalizep, target_mode);
5508       if (tem == 0)
5509         return 0;
5510
5511       if (and_them)
5512         tem = emit_conditional_move (target, code, op0, op1, mode,
5513                                      tem, const0_rtx, GET_MODE (tem), 0);
5514       else
5515         tem = emit_conditional_move (target, code, op0, op1, mode,
5516                                      trueval, tem, GET_MODE (tem), 0);
5517
5518       if (tem == 0)
5519         delete_insns_since (last);
5520       return tem;
5521 #else
5522       return 0;
5523 #endif
5524     }
5525
5526   /* The remaining tricks only apply to integer comparisons.  */
5527
5528   if (GET_MODE_CLASS (mode) != MODE_INT)
5529     return 0;
5530
5531   /* If this is an equality comparison of integers, we can try to exclusive-or
5532      (or subtract) the two operands and use a recursive call to try the
5533      comparison with zero.  Don't do any of these cases if branches are
5534      very cheap.  */
5535
5536   if ((code == EQ || code == NE) && op1 != const0_rtx)
5537     {
5538       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5539                           OPTAB_WIDEN);
5540
5541       if (tem == 0)
5542         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5543                             OPTAB_WIDEN);
5544       if (tem != 0)
5545         tem = emit_store_flag (target, code, tem, const0_rtx,
5546                                mode, unsignedp, normalizep);
5547       if (tem != 0)
5548         return tem;
5549
5550       delete_insns_since (last);
5551     }
5552
5553   /* For integer comparisons, try the reverse comparison.  However, for
5554      small X and if we'd have anyway to extend, implementing "X != 0"
5555      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5556   rcode = reverse_condition (code);
5557   if (can_compare_p (rcode, mode, ccp_store_flag)
5558       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5559             && code == NE
5560             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5561             && op1 == const0_rtx))
5562     {
5563       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5564                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5565
5566       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5567       if (want_add
5568           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5569                        optimize_insn_for_speed_p ()) == 0)
5570         {
5571           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5572                                    STORE_FLAG_VALUE, target_mode);
5573           if (tem != 0)
5574             tem = expand_binop (target_mode, add_optab, tem,
5575                                 gen_int_mode (normalizep, target_mode),
5576                                 target, 0, OPTAB_WIDEN);
5577         }
5578       else if (!want_add
5579                && rtx_cost (trueval, XOR, 1,
5580                             optimize_insn_for_speed_p ()) == 0)
5581         {
5582           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5583                                    normalizep, target_mode);
5584           if (tem != 0)
5585             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5586                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5587         }
5588
5589       if (tem != 0)
5590         return tem;
5591       delete_insns_since (last);
5592     }
5593
5594   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5595      the constant zero.  Reject all other comparisons at this point.  Only
5596      do LE and GT if branches are expensive since they are expensive on
5597      2-operand machines.  */
5598
5599   if (op1 != const0_rtx
5600       || (code != EQ && code != NE
5601           && (BRANCH_COST (optimize_insn_for_speed_p (),
5602                            false) <= 1 || (code != LE && code != GT))))
5603     return 0;
5604
5605   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5606      do the necessary operation below.  */
5607
5608   tem = 0;
5609
5610   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5611      the sign bit set.  */
5612
5613   if (code == LE)
5614     {
5615       /* This is destructive, so SUBTARGET can't be OP0.  */
5616       if (rtx_equal_p (subtarget, op0))
5617         subtarget = 0;
5618
5619       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5620                           OPTAB_WIDEN);
5621       if (tem)
5622         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5623                             OPTAB_WIDEN);
5624     }
5625
5626   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5627      number of bits in the mode of OP0, minus one.  */
5628
5629   if (code == GT)
5630     {
5631       if (rtx_equal_p (subtarget, op0))
5632         subtarget = 0;
5633
5634       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5635                           GET_MODE_BITSIZE (mode) - 1,
5636                           subtarget, 0);
5637       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5638                           OPTAB_WIDEN);
5639     }
5640
5641   if (code == EQ || code == NE)
5642     {
5643       /* For EQ or NE, one way to do the comparison is to apply an operation
5644          that converts the operand into a positive number if it is nonzero
5645          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5646          for NE we negate.  This puts the result in the sign bit.  Then we
5647          normalize with a shift, if needed.
5648
5649          Two operations that can do the above actions are ABS and FFS, so try
5650          them.  If that doesn't work, and MODE is smaller than a full word,
5651          we can use zero-extension to the wider mode (an unsigned conversion)
5652          as the operation.  */
5653
5654       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5655          that is compensated by the subsequent overflow when subtracting
5656          one / negating.  */
5657
5658       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5659         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5660       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5661         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5662       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5663         {
5664           tem = convert_modes (word_mode, mode, op0, 1);
5665           mode = word_mode;
5666         }
5667
5668       if (tem != 0)
5669         {
5670           if (code == EQ)
5671             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5672                                 0, OPTAB_WIDEN);
5673           else
5674             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5675         }
5676
5677       /* If we couldn't do it that way, for NE we can "or" the two's complement
5678          of the value with itself.  For EQ, we take the one's complement of
5679          that "or", which is an extra insn, so we only handle EQ if branches
5680          are expensive.  */
5681
5682       if (tem == 0
5683           && (code == NE
5684               || BRANCH_COST (optimize_insn_for_speed_p (),
5685                               false) > 1))
5686         {
5687           if (rtx_equal_p (subtarget, op0))
5688             subtarget = 0;
5689
5690           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5691           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5692                               OPTAB_WIDEN);
5693
5694           if (tem && code == EQ)
5695             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5696         }
5697     }
5698
5699   if (tem && normalizep)
5700     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5701                         GET_MODE_BITSIZE (mode) - 1,
5702                         subtarget, normalizep == 1);
5703
5704   if (tem)
5705     {
5706       if (!target)
5707         ;
5708       else if (GET_MODE (tem) != target_mode)
5709         {
5710           convert_move (target, tem, 0);
5711           tem = target;
5712         }
5713       else if (!subtarget)
5714         {
5715           emit_move_insn (target, tem);
5716           tem = target;
5717         }
5718     }
5719   else
5720     delete_insns_since (last);
5721
5722   return tem;
5723 }
5724
5725 /* Like emit_store_flag, but always succeeds.  */
5726
5727 rtx
5728 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5729                        enum machine_mode mode, int unsignedp, int normalizep)
5730 {
5731   rtx tem, label;
5732   rtx trueval, falseval;
5733
5734   /* First see if emit_store_flag can do the job.  */
5735   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5736   if (tem != 0)
5737     return tem;
5738
5739   if (!target)
5740     target = gen_reg_rtx (word_mode);
5741
5742   /* If this failed, we have to do this with set/compare/jump/set code.
5743      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5744   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5745   if (code == NE
5746       && GET_MODE_CLASS (mode) == MODE_INT
5747       && REG_P (target)
5748       && op0 == target
5749       && op1 == const0_rtx)
5750     {
5751       label = gen_label_rtx ();
5752       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5753                                mode, NULL_RTX, NULL_RTX, label, -1);
5754       emit_move_insn (target, trueval);
5755       emit_label (label);
5756       return target;
5757     }
5758
5759   if (!REG_P (target)
5760       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5761     target = gen_reg_rtx (GET_MODE (target));
5762
5763   /* Jump in the right direction if the target cannot implement CODE
5764      but can jump on its reverse condition.  */
5765   falseval = const0_rtx;
5766   if (! can_compare_p (code, mode, ccp_jump)
5767       && (! FLOAT_MODE_P (mode)
5768           || code == ORDERED || code == UNORDERED
5769           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5770           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5771     {
5772       enum rtx_code rcode;
5773       if (FLOAT_MODE_P (mode))
5774         rcode = reverse_condition_maybe_unordered (code);
5775       else
5776         rcode = reverse_condition (code);
5777
5778       /* Canonicalize to UNORDERED for the libcall.  */
5779       if (can_compare_p (rcode, mode, ccp_jump)
5780           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5781         {
5782           falseval = trueval;
5783           trueval = const0_rtx;
5784           code = rcode;
5785         }
5786     }
5787
5788   emit_move_insn (target, trueval);
5789   label = gen_label_rtx ();
5790   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5791                            NULL_RTX, label, -1);
5792
5793   emit_move_insn (target, falseval);
5794   emit_label (label);
5795
5796   return target;
5797 }
5798 \f
5799 /* Perform possibly multi-word comparison and conditional jump to LABEL
5800    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5801    now a thin wrapper around do_compare_rtx_and_jump.  */
5802
5803 static void
5804 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5805                  rtx label)
5806 {
5807   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5808   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5809                            NULL_RTX, NULL_RTX, label, -1);
5810 }