gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2013 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-config.h"
  32 #include "expr.h"
  33 #include "optabs.h"
  34 #include "recog.h"
  35 #include "langhooks.h"
  36 #include "df.h"
  37 #include "target.h"
  38 #include "expmed.h"
  39
  40 struct target_expmed default_target_expmed;
  41 #if SWITCHABLE_TARGET
  42 struct target_expmed *this_target_expmed = &default_target_expmed;
  43 #endif
  44
  45 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    rtx);
  50 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    unsigned HOST_WIDE_INT,
  54                                    rtx);
  55 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  56                                     unsigned HOST_WIDE_INT,
  57                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  58 static rtx mask_rtx (enum machine_mode, int, int, int);
  59 static rtx lshift_value (enum machine_mode, rtx, int, int);
  60 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT, int);
  62 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  63 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  64 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  65
  66 /* Test whether a value is zero of a power of two.  */
  67 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  68
  69 struct init_expmed_rtl
  70 {
  71   struct rtx_def reg;           rtunion reg_fld[2];
  72   struct rtx_def plus;  rtunion plus_fld1;
  73   struct rtx_def neg;
  74   struct rtx_def mult;  rtunion mult_fld1;
  75   struct rtx_def sdiv;  rtunion sdiv_fld1;
  76   struct rtx_def udiv;  rtunion udiv_fld1;
  77   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
  78   struct rtx_def smod_32;       rtunion smod_32_fld1;
  79   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
  80   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
  81   struct rtx_def wide_trunc;
  82   struct rtx_def shift; rtunion shift_fld1;
  83   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
  84   struct rtx_def shift_add;     rtunion shift_add_fld1;
  85   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
  86   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
  87   struct rtx_def zext;
  88   struct rtx_def trunc;
  89
  90   rtx pow2[MAX_BITS_PER_WORD];
  91   rtx cint[MAX_BITS_PER_WORD];
  92 };
  93
  94 static void
  95 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  96                       enum machine_mode from_mode, bool speed)
  97 {
  98   int to_size, from_size;
  99   rtx which;
 100
 101   /* We're given no information about the true size of a partial integer,
 102      only the size of the "full" integer it requires for storage.  For
 103      comparison purposes here, reduce the bit size by one in that case.  */
 104   to_size = (GET_MODE_BITSIZE (to_mode)
 105              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 106   from_size = (GET_MODE_BITSIZE (from_mode)
 107                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 108
 109   /* Assume cost of zero-extend and sign-extend is the same.  */
 110   which = (to_size < from_size ? &all->trunc : &all->zext);
 111
 112   PUT_MODE (&all->reg, from_mode);
 113   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 114 }
 115
 116 static void
 117 init_expmed_one_mode (struct init_expmed_rtl *all,
 118                       enum machine_mode mode, int speed)
 119 {
 120   int m, n, mode_bitsize;
 121   enum machine_mode mode_from;
 122
 123   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 124
 125   PUT_MODE (&all->reg, mode);
 126   PUT_MODE (&all->plus, mode);
 127   PUT_MODE (&all->neg, mode);
 128   PUT_MODE (&all->mult, mode);
 129   PUT_MODE (&all->sdiv, mode);
 130   PUT_MODE (&all->udiv, mode);
 131   PUT_MODE (&all->sdiv_32, mode);
 132   PUT_MODE (&all->smod_32, mode);
 133   PUT_MODE (&all->wide_trunc, mode);
 134   PUT_MODE (&all->shift, mode);
 135   PUT_MODE (&all->shift_mult, mode);
 136   PUT_MODE (&all->shift_add, mode);
 137   PUT_MODE (&all->shift_sub0, mode);
 138   PUT_MODE (&all->shift_sub1, mode);
 139   PUT_MODE (&all->zext, mode);
 140   PUT_MODE (&all->trunc, mode);
 141
 142   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 143   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 144   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 145   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 146   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 147
 148   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 149                                      <= 2 * add_cost (speed, mode)));
 150   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 151                                      <= 4 * add_cost (speed, mode)));
 152
 153   set_shift_cost (speed, mode, 0, 0);
 154   {
 155     int cost = add_cost (speed, mode);
 156     set_shiftadd_cost (speed, mode, 0, cost);
 157     set_shiftsub0_cost (speed, mode, 0, cost);
 158     set_shiftsub1_cost (speed, mode, 0, cost);
 159   }
 160
 161   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 162   for (m = 1; m < n; m++)
 163     {
 164       XEXP (&all->shift, 1) = all->cint[m];
 165       XEXP (&all->shift_mult, 1) = all->pow2[m];
 166
 167       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 168       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 169       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 170       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 171     }
 172
 173   if (SCALAR_INT_MODE_P (mode))
 174     {
 175       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 176            mode_from = (enum machine_mode)(mode_from + 1))
 177         init_expmed_one_conv (all, mode, mode_from, speed);
 178     }
 179   if (GET_MODE_CLASS (mode) == MODE_INT)
 180     {
 181       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 182       if (wider_mode != VOIDmode)
 183         {
 184           PUT_MODE (&all->zext, wider_mode);
 185           PUT_MODE (&all->wide_mult, wider_mode);
 186           PUT_MODE (&all->wide_lshr, wider_mode);
 187           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 188
 189           set_mul_widen_cost (speed, wider_mode,
 190                               set_src_cost (&all->wide_mult, speed));
 191           set_mul_highpart_cost (speed, mode,
 192                                  set_src_cost (&all->wide_trunc, speed));
 193         }
 194     }
 195 }
 196
 197 void
 198 init_expmed (void)
 199 {
 200   struct init_expmed_rtl all;
 201   enum machine_mode mode;
 202   int m, speed;
 203
 204   memset (&all, 0, sizeof all);
 205   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 206     {
 207       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 208       all.cint[m] = GEN_INT (m);
 209     }
 210
 211   PUT_CODE (&all.reg, REG);
 212   /* Avoid using hard regs in ways which may be unsupported.  */
 213   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 214
 215   PUT_CODE (&all.plus, PLUS);
 216   XEXP (&all.plus, 0) = &all.reg;
 217   XEXP (&all.plus, 1) = &all.reg;
 218
 219   PUT_CODE (&all.neg, NEG);
 220   XEXP (&all.neg, 0) = &all.reg;
 221
 222   PUT_CODE (&all.mult, MULT);
 223   XEXP (&all.mult, 0) = &all.reg;
 224   XEXP (&all.mult, 1) = &all.reg;
 225
 226   PUT_CODE (&all.sdiv, DIV);
 227   XEXP (&all.sdiv, 0) = &all.reg;
 228   XEXP (&all.sdiv, 1) = &all.reg;
 229
 230   PUT_CODE (&all.udiv, UDIV);
 231   XEXP (&all.udiv, 0) = &all.reg;
 232   XEXP (&all.udiv, 1) = &all.reg;
 233
 234   PUT_CODE (&all.sdiv_32, DIV);
 235   XEXP (&all.sdiv_32, 0) = &all.reg;
 236   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 237
 238   PUT_CODE (&all.smod_32, MOD);
 239   XEXP (&all.smod_32, 0) = &all.reg;
 240   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 241
 242   PUT_CODE (&all.zext, ZERO_EXTEND);
 243   XEXP (&all.zext, 0) = &all.reg;
 244
 245   PUT_CODE (&all.wide_mult, MULT);
 246   XEXP (&all.wide_mult, 0) = &all.zext;
 247   XEXP (&all.wide_mult, 1) = &all.zext;
 248
 249   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 250   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 251
 252   PUT_CODE (&all.wide_trunc, TRUNCATE);
 253   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 254
 255   PUT_CODE (&all.shift, ASHIFT);
 256   XEXP (&all.shift, 0) = &all.reg;
 257
 258   PUT_CODE (&all.shift_mult, MULT);
 259   XEXP (&all.shift_mult, 0) = &all.reg;
 260
 261   PUT_CODE (&all.shift_add, PLUS);
 262   XEXP (&all.shift_add, 0) = &all.shift_mult;
 263   XEXP (&all.shift_add, 1) = &all.reg;
 264
 265   PUT_CODE (&all.shift_sub0, MINUS);
 266   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 267   XEXP (&all.shift_sub0, 1) = &all.reg;
 268
 269   PUT_CODE (&all.shift_sub1, MINUS);
 270   XEXP (&all.shift_sub1, 0) = &all.reg;
 271   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 272
 273   PUT_CODE (&all.trunc, TRUNCATE);
 274   XEXP (&all.trunc, 0) = &all.reg;
 275
 276   for (speed = 0; speed < 2; speed++)
 277     {
 278       crtl->maybe_hot_insn_p = speed;
 279       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 280
 281       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 282            mode = (enum machine_mode)(mode + 1))
 283         init_expmed_one_mode (&all, mode, speed);
 284
 285       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 286         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 287              mode = (enum machine_mode)(mode + 1))
 288           init_expmed_one_mode (&all, mode, speed);
 289
 290       if (MIN_MODE_VECTOR_INT != VOIDmode)
 291         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 292              mode = (enum machine_mode)(mode + 1))
 293           init_expmed_one_mode (&all, mode, speed);
 294     }
 295
 296   if (alg_hash_used_p ())
 297     {
 298       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 299       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 300     }
 301   else
 302     set_alg_hash_used_p (true);
 303   default_rtl_profile ();
 304 }
 305
 306 /* Return an rtx representing minus the value of X.
 307    MODE is the intended mode of the result,
 308    useful if X is a CONST_INT.  */
 309
 310 rtx
 311 negate_rtx (enum machine_mode mode, rtx x)
 312 {
 313   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 314
 315   if (result == 0)
 316     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 317
 318   return result;
 319 }
 320
 321 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 322    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 323    If MODE is BLKmode, return a reference to every byte in the bitfield.
 324    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 325
 326 static rtx
 327 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 328                       unsigned HOST_WIDE_INT bitsize,
 329                       unsigned HOST_WIDE_INT bitnum,
 330                       unsigned HOST_WIDE_INT *new_bitnum)
 331 {
 332   if (mode == BLKmode)
 333     {
 334       *new_bitnum = bitnum % BITS_PER_UNIT;
 335       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 336       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 337                             / BITS_PER_UNIT);
 338       return adjust_bitfield_address_size (mem, mode, offset, size);
 339     }
 340   else
 341     {
 342       unsigned int unit = GET_MODE_BITSIZE (mode);
 343       *new_bitnum = bitnum % unit;
 344       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 345       return adjust_bitfield_address (mem, mode, offset);
 346     }
 347 }
 348
 349 /* The caller wants to perform insertion or extraction PATTERN on a
 350    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 351    BITREGION_START and BITREGION_END are as for store_bit_field
 352    and FIELDMODE is the natural mode of the field.
 353
 354    Search for a mode that is compatible with the memory access
 355    restrictions and (where applicable) with a register insertion or
 356    extraction.  Return the new memory on success, storing the adjusted
 357    bit position in *NEW_BITNUM.  Return null otherwise.  */
 358
 359 static rtx
 360 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 361                               rtx op0, HOST_WIDE_INT bitsize,
 362                               HOST_WIDE_INT bitnum,
 363                               unsigned HOST_WIDE_INT bitregion_start,
 364                               unsigned HOST_WIDE_INT bitregion_end,
 365                               enum machine_mode fieldmode,
 366                               unsigned HOST_WIDE_INT *new_bitnum)
 367 {
 368   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 369                                 bitregion_end, MEM_ALIGN (op0),
 370                                 MEM_VOLATILE_P (op0));
 371   enum machine_mode best_mode;
 372   if (iter.next_mode (&best_mode))
 373     {
 374       /* We can use a memory in BEST_MODE.  See whether this is true for
 375          any wider modes.  All other things being equal, we prefer to
 376          use the widest mode possible because it tends to expose more
 377          CSE opportunities.  */
 378       if (!iter.prefer_smaller_modes ())
 379         {
 380           /* Limit the search to the mode required by the corresponding
 381              register insertion or extraction instruction, if any.  */
 382           enum machine_mode limit_mode = word_mode;
 383           extraction_insn insn;
 384           if (get_best_reg_extraction_insn (&insn, pattern,
 385                                             GET_MODE_BITSIZE (best_mode),
 386                                             fieldmode))
 387             limit_mode = insn.field_mode;
 388
 389           enum machine_mode wider_mode;
 390           while (iter.next_mode (&wider_mode)
 391                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 392             best_mode = wider_mode;
 393         }
 394       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 395                                    new_bitnum);
 396     }
 397   return NULL_RTX;
 398 }
 399
 400 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 401    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 402    offset is then BITNUM / BITS_PER_UNIT.  */
 403
 404 static bool
 405 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 406                      unsigned HOST_WIDE_INT bitsize,
 407                      enum machine_mode struct_mode)
 408 {
 409   if (BYTES_BIG_ENDIAN)
 410     return (bitnum % BITS_PER_UNIT == 0
 411             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 412                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 413   else
 414     return bitnum % BITS_PER_WORD == 0;
 415 }
 416
 417 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 418    bit number BITNUM can be treated as a simple value of mode MODE.  */
 419
 420 static bool
 421 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 422                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 423 {
 424   return (MEM_P (op0)
 425           && bitnum % BITS_PER_UNIT == 0
 426           && bitsize == GET_MODE_BITSIZE (mode)
 427           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 428               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 429                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 430 }
 431 \f
 432 /* Try to use instruction INSV to store VALUE into a field of OP0.
 433    BITSIZE and BITNUM are as for store_bit_field.  */
 434
 435 static bool
 436 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 437                             unsigned HOST_WIDE_INT bitsize,
 438                             unsigned HOST_WIDE_INT bitnum, rtx value)
 439 {
 440   struct expand_operand ops[4];
 441   rtx value1;
 442   rtx xop0 = op0;
 443   rtx last = get_last_insn ();
 444   bool copy_back = false;
 445
 446   enum machine_mode op_mode = insv->field_mode;
 447   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 448   if (bitsize == 0 || bitsize > unit)
 449     return false;
 450
 451   if (MEM_P (xop0))
 452     /* Get a reference to the first byte of the field.  */
 453     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 454                                  &bitnum);
 455   else
 456     {
 457       /* Convert from counting within OP0 to counting in OP_MODE.  */
 458       if (BYTES_BIG_ENDIAN)
 459         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 460
 461       /* If xop0 is a register, we need it in OP_MODE
 462          to make it acceptable to the format of insv.  */
 463       if (GET_CODE (xop0) == SUBREG)
 464         /* We can't just change the mode, because this might clobber op0,
 465            and we will need the original value of op0 if insv fails.  */
 466         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 467       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 468         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 469     }
 470
 471   /* If the destination is a paradoxical subreg such that we need a
 472      truncate to the inner mode, perform the insertion on a temporary and
 473      truncate the result to the original destination.  Note that we can't
 474      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 475      X) 0)) is (reg:N X).  */
 476   if (GET_CODE (xop0) == SUBREG
 477       && REG_P (SUBREG_REG (xop0))
 478       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 479                                          op_mode))
 480     {
 481       rtx tem = gen_reg_rtx (op_mode);
 482       emit_move_insn (tem, xop0);
 483       xop0 = tem;
 484       copy_back = true;
 485     }
 486
 487   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 488      "backwards" from the size of the unit we are inserting into.
 489      Otherwise, we count bits from the most significant on a
 490      BYTES/BITS_BIG_ENDIAN machine.  */
 491
 492   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 493     bitnum = unit - bitsize - bitnum;
 494
 495   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 496   value1 = value;
 497   if (GET_MODE (value) != op_mode)
 498     {
 499       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 500         {
 501           /* Optimization: Don't bother really extending VALUE
 502              if it has all the bits we will actually use.  However,
 503              if we must narrow it, be sure we do it correctly.  */
 504
 505           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 506             {
 507               rtx tmp;
 508
 509               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 510               if (! tmp)
 511                 tmp = simplify_gen_subreg (op_mode,
 512                                            force_reg (GET_MODE (value),
 513                                                       value1),
 514                                            GET_MODE (value), 0);
 515               value1 = tmp;
 516             }
 517           else
 518             value1 = gen_lowpart (op_mode, value1);
 519         }
 520       else if (CONST_INT_P (value))
 521         value1 = gen_int_mode (INTVAL (value), op_mode);
 522       else
 523         /* Parse phase is supposed to make VALUE's data type
 524            match that of the component reference, which is a type
 525            at least as wide as the field; so VALUE should have
 526            a mode that corresponds to that type.  */
 527         gcc_assert (CONSTANT_P (value));
 528     }
 529
 530   create_fixed_operand (&ops[0], xop0);
 531   create_integer_operand (&ops[1], bitsize);
 532   create_integer_operand (&ops[2], bitnum);
 533   create_input_operand (&ops[3], value1, op_mode);
 534   if (maybe_expand_insn (insv->icode, 4, ops))
 535     {
 536       if (copy_back)
 537         convert_move (op0, xop0, true);
 538       return true;
 539     }
 540   delete_insns_since (last);
 541   return false;
 542 }
 543
 544 /* A subroutine of store_bit_field, with the same arguments.  Return true
 545    if the operation could be implemented.
 546
 547    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 548    no other way of implementing the operation.  If FALLBACK_P is false,
 549    return false instead.  */
 550
 551 static bool
 552 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 553                    unsigned HOST_WIDE_INT bitnum,
 554                    unsigned HOST_WIDE_INT bitregion_start,
 555                    unsigned HOST_WIDE_INT bitregion_end,
 556                    enum machine_mode fieldmode,
 557                    rtx value, bool fallback_p)
 558 {
 559   rtx op0 = str_rtx;
 560   rtx orig_value;
 561
 562   while (GET_CODE (op0) == SUBREG)
 563     {
 564       /* The following line once was done only if WORDS_BIG_ENDIAN,
 565          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 566          meaningful at a much higher level; when structures are copied
 567          between memory and regs, the higher-numbered regs
 568          always get higher addresses.  */
 569       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 570       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 571       int byte_offset = 0;
 572
 573       /* Paradoxical subregs need special handling on big endian machines.  */
 574       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 575         {
 576           int difference = inner_mode_size - outer_mode_size;
 577
 578           if (WORDS_BIG_ENDIAN)
 579             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 580           if (BYTES_BIG_ENDIAN)
 581             byte_offset += difference % UNITS_PER_WORD;
 582         }
 583       else
 584         byte_offset = SUBREG_BYTE (op0);
 585
 586       bitnum += byte_offset * BITS_PER_UNIT;
 587       op0 = SUBREG_REG (op0);
 588     }
 589
 590   /* No action is needed if the target is a register and if the field
 591      lies completely outside that register.  This can occur if the source
 592      code contains an out-of-bounds access to a small array.  */
 593   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 594     return true;
 595
 596   /* Use vec_set patterns for inserting parts of vectors whenever
 597      available.  */
 598   if (VECTOR_MODE_P (GET_MODE (op0))
 599       && !MEM_P (op0)
 600       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 601       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 602       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 603       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 604     {
 605       struct expand_operand ops[3];
 606       enum machine_mode outermode = GET_MODE (op0);
 607       enum machine_mode innermode = GET_MODE_INNER (outermode);
 608       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 609       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 610
 611       create_fixed_operand (&ops[0], op0);
 612       create_input_operand (&ops[1], value, innermode);
 613       create_integer_operand (&ops[2], pos);
 614       if (maybe_expand_insn (icode, 3, ops))
 615         return true;
 616     }
 617
 618   /* If the target is a register, overwriting the entire object, or storing
 619      a full-word or multi-word field can be done with just a SUBREG.  */
 620   if (!MEM_P (op0)
 621       && bitsize == GET_MODE_BITSIZE (fieldmode)
 622       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 623           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 624     {
 625       /* Use the subreg machinery either to narrow OP0 to the required
 626          words or to cope with mode punning between equal-sized modes.  */
 627       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 628                                      bitnum / BITS_PER_UNIT);
 629       if (sub)
 630         {
 631           emit_move_insn (sub, value);
 632           return true;
 633         }
 634     }
 635
 636   /* If the target is memory, storing any naturally aligned field can be
 637      done with a simple store.  For targets that support fast unaligned
 638      memory, any naturally sized, unit aligned field can be done directly.  */
 639   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 640     {
 641       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 642       emit_move_insn (op0, value);
 643       return true;
 644     }
 645
 646   /* Make sure we are playing with integral modes.  Pun with subregs
 647      if we aren't.  This must come after the entire register case above,
 648      since that case is valid for any mode.  The following cases are only
 649      valid for integral modes.  */
 650   {
 651     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 652     if (imode != GET_MODE (op0))
 653       {
 654         if (MEM_P (op0))
 655           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 656         else
 657           {
 658             gcc_assert (imode != BLKmode);
 659             op0 = gen_lowpart (imode, op0);
 660           }
 661       }
 662   }
 663
 664   /* Storing an lsb-aligned field in a register
 665      can be done with a movstrict instruction.  */
 666
 667   if (!MEM_P (op0)
 668       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 669       && bitsize == GET_MODE_BITSIZE (fieldmode)
 670       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 671     {
 672       struct expand_operand ops[2];
 673       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 674       rtx arg0 = op0;
 675       unsigned HOST_WIDE_INT subreg_off;
 676
 677       if (GET_CODE (arg0) == SUBREG)
 678         {
 679           /* Else we've got some float mode source being extracted into
 680              a different float mode destination -- this combination of
 681              subregs results in Severe Tire Damage.  */
 682           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 683                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 684                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 685           arg0 = SUBREG_REG (arg0);
 686         }
 687
 688       subreg_off = bitnum / BITS_PER_UNIT;
 689       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 690         {
 691           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 692
 693           create_fixed_operand (&ops[0], arg0);
 694           /* Shrink the source operand to FIELDMODE.  */
 695           create_convert_operand_to (&ops[1], value, fieldmode, false);
 696           if (maybe_expand_insn (icode, 2, ops))
 697             return true;
 698         }
 699     }
 700
 701   /* Handle fields bigger than a word.  */
 702
 703   if (bitsize > BITS_PER_WORD)
 704     {
 705       /* Here we transfer the words of the field
 706          in the order least significant first.
 707          This is because the most significant word is the one which may
 708          be less than full.
 709          However, only do that if the value is not BLKmode.  */
 710
 711       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 712       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 713       unsigned int i;
 714       rtx last;
 715
 716       /* This is the mode we must force value to, so that there will be enough
 717          subwords to extract.  Note that fieldmode will often (always?) be
 718          VOIDmode, because that is what store_field uses to indicate that this
 719          is a bit field, but passing VOIDmode to operand_subword_force
 720          is not allowed.  */
 721       fieldmode = GET_MODE (value);
 722       if (fieldmode == VOIDmode)
 723         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 724
 725       last = get_last_insn ();
 726       for (i = 0; i < nwords; i++)
 727         {
 728           /* If I is 0, use the low-order word in both field and target;
 729              if I is 1, use the next to lowest word; and so on.  */
 730           unsigned int wordnum = (backwards
 731                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 732                                   - i - 1
 733                                   : i);
 734           unsigned int bit_offset = (backwards
 735                                      ? MAX ((int) bitsize - ((int) i + 1)
 736                                             * BITS_PER_WORD,
 737                                             0)
 738                                      : (int) i * BITS_PER_WORD);
 739           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 740           unsigned HOST_WIDE_INT new_bitsize =
 741             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 742
 743           /* If the remaining chunk doesn't have full wordsize we have
 744              to make sure that for big endian machines the higher order
 745              bits are used.  */
 746           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 747             value_word = simplify_expand_binop (word_mode, lshr_optab,
 748                                                 value_word,
 749                                                 GEN_INT (BITS_PER_WORD
 750                                                          - new_bitsize),
 751                                                 NULL_RTX, true,
 752                                                 OPTAB_LIB_WIDEN);
 753
 754           if (!store_bit_field_1 (op0, new_bitsize,
 755                                   bitnum + bit_offset,
 756                                   bitregion_start, bitregion_end,
 757                                   word_mode,
 758                                   value_word, fallback_p))
 759             {
 760               delete_insns_since (last);
 761               return false;
 762             }
 763         }
 764       return true;
 765     }
 766
 767   /* If VALUE has a floating-point or complex mode, access it as an
 768      integer of the corresponding size.  This can occur on a machine
 769      with 64 bit registers that uses SFmode for float.  It can also
 770      occur for unaligned float or complex fields.  */
 771   orig_value = value;
 772   if (GET_MODE (value) != VOIDmode
 773       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 774       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 775     {
 776       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 777       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 778     }
 779
 780   /* If OP0 is a multi-word register, narrow it to the affected word.
 781      If the region spans two words, defer to store_split_bit_field.  */
 782   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 783     {
 784       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 785                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 786       gcc_assert (op0);
 787       bitnum %= BITS_PER_WORD;
 788       if (bitnum + bitsize > BITS_PER_WORD)
 789         {
 790           if (!fallback_p)
 791             return false;
 792
 793           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 794                                  bitregion_end, value);
 795           return true;
 796         }
 797     }
 798
 799   /* From here on we can assume that the field to be stored in fits
 800      within a word.  If the destination is a register, it too fits
 801      in a word.  */
 802
 803   extraction_insn insv;
 804   if (!MEM_P (op0)
 805       && get_best_reg_extraction_insn (&insv, EP_insv,
 806                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 807                                        fieldmode)
 808       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 809     return true;
 810
 811   /* If OP0 is a memory, try copying it to a register and seeing if a
 812      cheap register alternative is available.  */
 813   if (MEM_P (op0))
 814     {
 815       /* Do not use unaligned memory insvs for volatile bitfields when
 816          -fstrict-volatile-bitfields is in effect.  */
 817       if (!(MEM_VOLATILE_P (op0)
 818             && flag_strict_volatile_bitfields > 0)
 819           && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 820                                            fieldmode)
 821           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 822         return true;
 823
 824       rtx last = get_last_insn ();
 825
 826       /* Try loading part of OP0 into a register, inserting the bitfield
 827          into that, and then copying the result back to OP0.  */
 828       unsigned HOST_WIDE_INT bitpos;
 829       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 830                                                bitregion_start, bitregion_end,
 831                                                fieldmode, &bitpos);
 832       if (xop0)
 833         {
 834           rtx tempreg = copy_to_reg (xop0);
 835           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 836                                  bitregion_start, bitregion_end,
 837                                  fieldmode, orig_value, false))
 838             {
 839               emit_move_insn (xop0, tempreg);
 840               return true;
 841             }
 842           delete_insns_since (last);
 843         }
 844     }
 845
 846   if (!fallback_p)
 847     return false;
 848
 849   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 850                          bitregion_end, value);
 851   return true;
 852 }
 853
 854 /* Generate code to store value from rtx VALUE
 855    into a bit-field within structure STR_RTX
 856    containing BITSIZE bits starting at bit BITNUM.
 857
 858    BITREGION_START is bitpos of the first bitfield in this region.
 859    BITREGION_END is the bitpos of the ending bitfield in this region.
 860    These two fields are 0, if the C++ memory model does not apply,
 861    or we are not interested in keeping track of bitfield regions.
 862
 863    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 864
 865 void
 866 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 867                  unsigned HOST_WIDE_INT bitnum,
 868                  unsigned HOST_WIDE_INT bitregion_start,
 869                  unsigned HOST_WIDE_INT bitregion_end,
 870                  enum machine_mode fieldmode,
 871                  rtx value)
 872 {
 873   /* Under the C++0x memory model, we must not touch bits outside the
 874      bit region.  Adjust the address to start at the beginning of the
 875      bit region.  */
 876   if (MEM_P (str_rtx) && bitregion_start > 0)
 877     {
 878       enum machine_mode bestmode;
 879       HOST_WIDE_INT offset, size;
 880
 881       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 882
 883       offset = bitregion_start / BITS_PER_UNIT;
 884       bitnum -= bitregion_start;
 885       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 886       bitregion_end -= bitregion_start;
 887       bitregion_start = 0;
 888       bestmode = get_best_mode (bitsize, bitnum,
 889                                 bitregion_start, bitregion_end,
 890                                 MEM_ALIGN (str_rtx), VOIDmode,
 891                                 MEM_VOLATILE_P (str_rtx));
 892       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 893     }
 894
 895   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 896                           bitregion_start, bitregion_end,
 897                           fieldmode, value, true))
 898     gcc_unreachable ();
 899 }
 900 \f
 901 /* Use shifts and boolean operations to store VALUE into a bit field of
 902    width BITSIZE in OP0, starting at bit BITNUM.  */
 903
 904 static void
 905 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 906                        unsigned HOST_WIDE_INT bitnum,
 907                        unsigned HOST_WIDE_INT bitregion_start,
 908                        unsigned HOST_WIDE_INT bitregion_end,
 909                        rtx value)
 910 {
 911   enum machine_mode mode;
 912   rtx temp;
 913   int all_zero = 0;
 914   int all_one = 0;
 915
 916   /* There is a case not handled here:
 917      a structure with a known alignment of just a halfword
 918      and a field split across two aligned halfwords within the structure.
 919      Or likewise a structure with a known alignment of just a byte
 920      and a field split across two bytes.
 921      Such cases are not supposed to be able to occur.  */
 922
 923   if (MEM_P (op0))
 924     {
 925       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 926
 927       if (bitregion_end)
 928         maxbits = bitregion_end - bitregion_start + 1;
 929
 930       /* Get the proper mode to use for this field.  We want a mode that
 931          includes the entire field.  If such a mode would be larger than
 932          a word, we won't be doing the extraction the normal way.
 933          We don't want a mode bigger than the destination.  */
 934
 935       mode = GET_MODE (op0);
 936       if (GET_MODE_BITSIZE (mode) == 0
 937           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 938         mode = word_mode;
 939
 940       if (MEM_VOLATILE_P (op0)
 941           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 942           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 943           && flag_strict_volatile_bitfields > 0)
 944         mode = GET_MODE (op0);
 945       else
 946         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 947                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 948
 949       if (mode == VOIDmode)
 950         {
 951           /* The only way this should occur is if the field spans word
 952              boundaries.  */
 953           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 954                                  bitregion_end, value);
 955           return;
 956         }
 957
 958       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
 959     }
 960
 961   mode = GET_MODE (op0);
 962   gcc_assert (SCALAR_INT_MODE_P (mode));
 963
 964   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 965      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 966
 967   if (BYTES_BIG_ENDIAN)
 968     /* BITNUM is the distance between our msb
 969        and that of the containing datum.
 970        Convert it to the distance from the lsb.  */
 971     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 972
 973   /* Now BITNUM is always the distance between our lsb
 974      and that of OP0.  */
 975
 976   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 977      we must first convert its mode to MODE.  */
 978
 979   if (CONST_INT_P (value))
 980     {
 981       HOST_WIDE_INT v = INTVAL (value);
 982
 983       if (bitsize < HOST_BITS_PER_WIDE_INT)
 984         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 985
 986       if (v == 0)
 987         all_zero = 1;
 988       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 989                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 990                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 991         all_one = 1;
 992
 993       value = lshift_value (mode, value, bitnum, bitsize);
 994     }
 995   else
 996     {
 997       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 998                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
 999
1000       if (GET_MODE (value) != mode)
1001         value = convert_to_mode (mode, value, 1);
1002
1003       if (must_and)
1004         value = expand_binop (mode, and_optab, value,
1005                               mask_rtx (mode, 0, bitsize, 0),
1006                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1007       if (bitnum > 0)
1008         value = expand_shift (LSHIFT_EXPR, mode, value,
1009                               bitnum, NULL_RTX, 1);
1010     }
1011
1012   /* Now clear the chosen bits in OP0,
1013      except that if VALUE is -1 we need not bother.  */
1014   /* We keep the intermediates in registers to allow CSE to combine
1015      consecutive bitfield assignments.  */
1016
1017   temp = force_reg (mode, op0);
1018
1019   if (! all_one)
1020     {
1021       temp = expand_binop (mode, and_optab, temp,
1022                            mask_rtx (mode, bitnum, bitsize, 1),
1023                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1024       temp = force_reg (mode, temp);
1025     }
1026
1027   /* Now logical-or VALUE into OP0, unless it is zero.  */
1028
1029   if (! all_zero)
1030     {
1031       temp = expand_binop (mode, ior_optab, temp, value,
1032                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1033       temp = force_reg (mode, temp);
1034     }
1035
1036   if (op0 != temp)
1037     {
1038       op0 = copy_rtx (op0);
1039       emit_move_insn (op0, temp);
1040     }
1041 }
1042 \f
1043 /* Store a bit field that is split across multiple accessible memory objects.
1044
1045    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1046    BITSIZE is the field width; BITPOS the position of its first bit
1047    (within the word).
1048    VALUE is the value to store.
1049
1050    This does not yet handle fields wider than BITS_PER_WORD.  */
1051
1052 static void
1053 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1054                        unsigned HOST_WIDE_INT bitpos,
1055                        unsigned HOST_WIDE_INT bitregion_start,
1056                        unsigned HOST_WIDE_INT bitregion_end,
1057                        rtx value)
1058 {
1059   unsigned int unit;
1060   unsigned int bitsdone = 0;
1061
1062   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1063      much at a time.  */
1064   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1065     unit = BITS_PER_WORD;
1066   else
1067     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1068
1069   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1070      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1071      that VALUE might be a floating-point constant.  */
1072   if (CONSTANT_P (value) && !CONST_INT_P (value))
1073     {
1074       rtx word = gen_lowpart_common (word_mode, value);
1075
1076       if (word && (value != word))
1077         value = word;
1078       else
1079         value = gen_lowpart_common (word_mode,
1080                                     force_reg (GET_MODE (value) != VOIDmode
1081                                                ? GET_MODE (value)
1082                                                : word_mode, value));
1083     }
1084
1085   while (bitsdone < bitsize)
1086     {
1087       unsigned HOST_WIDE_INT thissize;
1088       rtx part, word;
1089       unsigned HOST_WIDE_INT thispos;
1090       unsigned HOST_WIDE_INT offset;
1091
1092       offset = (bitpos + bitsdone) / unit;
1093       thispos = (bitpos + bitsdone) % unit;
1094
1095       /* When region of bytes we can touch is restricted, decrease
1096          UNIT close to the end of the region as needed.  */
1097       if (bitregion_end
1098           && unit > BITS_PER_UNIT
1099           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1100         {
1101           unit = unit / 2;
1102           continue;
1103         }
1104
1105       /* THISSIZE must not overrun a word boundary.  Otherwise,
1106          store_fixed_bit_field will call us again, and we will mutually
1107          recurse forever.  */
1108       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1109       thissize = MIN (thissize, unit - thispos);
1110
1111       if (BYTES_BIG_ENDIAN)
1112         {
1113           /* Fetch successively less significant portions.  */
1114           if (CONST_INT_P (value))
1115             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1116                              >> (bitsize - bitsdone - thissize))
1117                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1118           else
1119             {
1120               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1121               /* The args are chosen so that the last part includes the
1122                  lsb.  Give extract_bit_field the value it needs (with
1123                  endianness compensation) to fetch the piece we want.  */
1124               part = extract_fixed_bit_field (word_mode, value, thissize,
1125                                               total_bits - bitsize + bitsdone,
1126                                               NULL_RTX, 1, false);
1127             }
1128         }
1129       else
1130         {
1131           /* Fetch successively more significant portions.  */
1132           if (CONST_INT_P (value))
1133             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1134                              >> bitsdone)
1135                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1136           else
1137             part = extract_fixed_bit_field (word_mode, value, thissize,
1138                                             bitsdone, NULL_RTX, 1, false);
1139         }
1140
1141       /* If OP0 is a register, then handle OFFSET here.
1142
1143          When handling multiword bitfields, extract_bit_field may pass
1144          down a word_mode SUBREG of a larger REG for a bitfield that actually
1145          crosses a word boundary.  Thus, for a SUBREG, we must find
1146          the current word starting from the base register.  */
1147       if (GET_CODE (op0) == SUBREG)
1148         {
1149           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1150           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1151           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1152             word = word_offset ? const0_rtx : op0;
1153           else
1154             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1155                                           GET_MODE (SUBREG_REG (op0)));
1156           offset = 0;
1157         }
1158       else if (REG_P (op0))
1159         {
1160           enum machine_mode op0_mode = GET_MODE (op0);
1161           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1162             word = offset ? const0_rtx : op0;
1163           else
1164             word = operand_subword_force (op0, offset, GET_MODE (op0));
1165           offset = 0;
1166         }
1167       else
1168         word = op0;
1169
1170       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1171          it is just an out-of-bounds access.  Ignore it.  */
1172       if (word != const0_rtx)
1173         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1174                                bitregion_start, bitregion_end, part);
1175       bitsdone += thissize;
1176     }
1177 }
1178 \f
1179 /* A subroutine of extract_bit_field_1 that converts return value X
1180    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1181    to extract_bit_field.  */
1182
1183 static rtx
1184 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1185                              enum machine_mode tmode, bool unsignedp)
1186 {
1187   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1188     return x;
1189
1190   /* If the x mode is not a scalar integral, first convert to the
1191      integer mode of that size and then access it as a floating-point
1192      value via a SUBREG.  */
1193   if (!SCALAR_INT_MODE_P (tmode))
1194     {
1195       enum machine_mode smode;
1196
1197       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1198       x = convert_to_mode (smode, x, unsignedp);
1199       x = force_reg (smode, x);
1200       return gen_lowpart (tmode, x);
1201     }
1202
1203   return convert_to_mode (tmode, x, unsignedp);
1204 }
1205
1206 /* Try to use an ext(z)v pattern to extract a field from OP0.
1207    Return the extracted value on success, otherwise return null.
1208    EXT_MODE is the mode of the extraction and the other arguments
1209    are as for extract_bit_field.  */
1210
1211 static rtx
1212 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1213                               unsigned HOST_WIDE_INT bitsize,
1214                               unsigned HOST_WIDE_INT bitnum,
1215                               int unsignedp, rtx target,
1216                               enum machine_mode mode, enum machine_mode tmode)
1217 {
1218   struct expand_operand ops[4];
1219   rtx spec_target = target;
1220   rtx spec_target_subreg = 0;
1221   enum machine_mode ext_mode = extv->field_mode;
1222   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1223
1224   if (bitsize == 0 || unit < bitsize)
1225     return NULL_RTX;
1226
1227   if (MEM_P (op0))
1228     /* Get a reference to the first byte of the field.  */
1229     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1230                                 &bitnum);
1231   else
1232     {
1233       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1234       if (BYTES_BIG_ENDIAN)
1235         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1236
1237       /* If op0 is a register, we need it in EXT_MODE to make it
1238          acceptable to the format of ext(z)v.  */
1239       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1240         return NULL_RTX;
1241       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1242         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1243     }
1244
1245   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1246      "backwards" from the size of the unit we are extracting from.
1247      Otherwise, we count bits from the most significant on a
1248      BYTES/BITS_BIG_ENDIAN machine.  */
1249
1250   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1251     bitnum = unit - bitsize - bitnum;
1252
1253   if (target == 0)
1254     target = spec_target = gen_reg_rtx (tmode);
1255
1256   if (GET_MODE (target) != ext_mode)
1257     {
1258       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1259          between the mode of the extraction (word_mode) and the target
1260          mode.  Instead, create a temporary and use convert_move to set
1261          the target.  */
1262       if (REG_P (target)
1263           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1264         {
1265           target = gen_lowpart (ext_mode, target);
1266           if (GET_MODE_PRECISION (ext_mode)
1267               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1268             spec_target_subreg = target;
1269         }
1270       else
1271         target = gen_reg_rtx (ext_mode);
1272     }
1273
1274   create_output_operand (&ops[0], target, ext_mode);
1275   create_fixed_operand (&ops[1], op0);
1276   create_integer_operand (&ops[2], bitsize);
1277   create_integer_operand (&ops[3], bitnum);
1278   if (maybe_expand_insn (extv->icode, 4, ops))
1279     {
1280       target = ops[0].value;
1281       if (target == spec_target)
1282         return target;
1283       if (target == spec_target_subreg)
1284         return spec_target;
1285       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1286     }
1287   return NULL_RTX;
1288 }
1289
1290 /* A subroutine of extract_bit_field, with the same arguments.
1291    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1292    if we can find no other means of implementing the operation.
1293    if FALLBACK_P is false, return NULL instead.  */
1294
1295 static rtx
1296 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1297                      unsigned HOST_WIDE_INT bitnum,
1298                      int unsignedp, bool packedp, rtx target,
1299                      enum machine_mode mode, enum machine_mode tmode,
1300                      bool fallback_p)
1301 {
1302   rtx op0 = str_rtx;
1303   enum machine_mode int_mode;
1304   enum machine_mode mode1;
1305
1306   if (tmode == VOIDmode)
1307     tmode = mode;
1308
1309   while (GET_CODE (op0) == SUBREG)
1310     {
1311       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1312       op0 = SUBREG_REG (op0);
1313     }
1314
1315   /* If we have an out-of-bounds access to a register, just return an
1316      uninitialized register of the required mode.  This can occur if the
1317      source code contains an out-of-bounds access to a small array.  */
1318   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1319     return gen_reg_rtx (tmode);
1320
1321   if (REG_P (op0)
1322       && mode == GET_MODE (op0)
1323       && bitnum == 0
1324       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1325     {
1326       /* We're trying to extract a full register from itself.  */
1327       return op0;
1328     }
1329
1330   /* See if we can get a better vector mode before extracting.  */
1331   if (VECTOR_MODE_P (GET_MODE (op0))
1332       && !MEM_P (op0)
1333       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1334     {
1335       enum machine_mode new_mode;
1336
1337       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1338         new_mode = MIN_MODE_VECTOR_FLOAT;
1339       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1340         new_mode = MIN_MODE_VECTOR_FRACT;
1341       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1342         new_mode = MIN_MODE_VECTOR_UFRACT;
1343       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1344         new_mode = MIN_MODE_VECTOR_ACCUM;
1345       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1346         new_mode = MIN_MODE_VECTOR_UACCUM;
1347       else
1348         new_mode = MIN_MODE_VECTOR_INT;
1349
1350       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1351         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1352             && targetm.vector_mode_supported_p (new_mode))
1353           break;
1354       if (new_mode != VOIDmode)
1355         op0 = gen_lowpart (new_mode, op0);
1356     }
1357
1358   /* Use vec_extract patterns for extracting parts of vectors whenever
1359      available.  */
1360   if (VECTOR_MODE_P (GET_MODE (op0))
1361       && !MEM_P (op0)
1362       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1363       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1364           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1365     {
1366       struct expand_operand ops[3];
1367       enum machine_mode outermode = GET_MODE (op0);
1368       enum machine_mode innermode = GET_MODE_INNER (outermode);
1369       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1370       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1371
1372       create_output_operand (&ops[0], target, innermode);
1373       create_input_operand (&ops[1], op0, outermode);
1374       create_integer_operand (&ops[2], pos);
1375       if (maybe_expand_insn (icode, 3, ops))
1376         {
1377           target = ops[0].value;
1378           if (GET_MODE (target) != mode)
1379             return gen_lowpart (tmode, target);
1380           return target;
1381         }
1382     }
1383
1384   /* Make sure we are playing with integral modes.  Pun with subregs
1385      if we aren't.  */
1386   {
1387     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1388     if (imode != GET_MODE (op0))
1389       {
1390         if (MEM_P (op0))
1391           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1392         else if (imode != BLKmode)
1393           {
1394             op0 = gen_lowpart (imode, op0);
1395
1396             /* If we got a SUBREG, force it into a register since we
1397                aren't going to be able to do another SUBREG on it.  */
1398             if (GET_CODE (op0) == SUBREG)
1399               op0 = force_reg (imode, op0);
1400           }
1401         else if (REG_P (op0))
1402           {
1403             rtx reg, subreg;
1404             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1405                                             MODE_INT);
1406             reg = gen_reg_rtx (imode);
1407             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1408             emit_move_insn (subreg, op0);
1409             op0 = reg;
1410             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1411           }
1412         else
1413           {
1414             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1415             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1416             emit_move_insn (mem, op0);
1417             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1418           }
1419       }
1420   }
1421
1422   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1423      If that's wrong, the solution is to test for it and set TARGET to 0
1424      if needed.  */
1425
1426   /* If the bitfield is volatile, we need to make sure the access
1427      remains on a type-aligned boundary.  */
1428   if (GET_CODE (op0) == MEM
1429       && MEM_VOLATILE_P (op0)
1430       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1431       && flag_strict_volatile_bitfields > 0)
1432     goto no_subreg_mode_swap;
1433
1434   /* Only scalar integer modes can be converted via subregs.  There is an
1435      additional problem for FP modes here in that they can have a precision
1436      which is different from the size.  mode_for_size uses precision, but
1437      we want a mode based on the size, so we must avoid calling it for FP
1438      modes.  */
1439   mode1 = mode;
1440   if (SCALAR_INT_MODE_P (tmode))
1441     {
1442       enum machine_mode try_mode = mode_for_size (bitsize,
1443                                                   GET_MODE_CLASS (tmode), 0);
1444       if (try_mode != BLKmode)
1445         mode1 = try_mode;
1446     }
1447   gcc_assert (mode1 != BLKmode);
1448
1449   /* Extraction of a full MODE1 value can be done with a subreg as long
1450      as the least significant bit of the value is the least significant
1451      bit of either OP0 or a word of OP0.  */
1452   if (!MEM_P (op0)
1453       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1454       && bitsize == GET_MODE_BITSIZE (mode1)
1455       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1456     {
1457       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1458                                      bitnum / BITS_PER_UNIT);
1459       if (sub)
1460         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1461     }
1462
1463   /* Extraction of a full MODE1 value can be done with a load as long as
1464      the field is on a byte boundary and is sufficiently aligned.  */
1465   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1466     {
1467       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1468       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1469     }
1470
1471  no_subreg_mode_swap:
1472
1473   /* Handle fields bigger than a word.  */
1474
1475   if (bitsize > BITS_PER_WORD)
1476     {
1477       /* Here we transfer the words of the field
1478          in the order least significant first.
1479          This is because the most significant word is the one which may
1480          be less than full.  */
1481
1482       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1483       unsigned int i;
1484       rtx last;
1485
1486       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1487         target = gen_reg_rtx (mode);
1488
1489       /* Indicate for flow that the entire target reg is being set.  */
1490       emit_clobber (target);
1491
1492       last = get_last_insn ();
1493       for (i = 0; i < nwords; i++)
1494         {
1495           /* If I is 0, use the low-order word in both field and target;
1496              if I is 1, use the next to lowest word; and so on.  */
1497           /* Word number in TARGET to use.  */
1498           unsigned int wordnum
1499             = (WORDS_BIG_ENDIAN
1500                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1501                : i);
1502           /* Offset from start of field in OP0.  */
1503           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1504                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1505                                                 * (int) BITS_PER_WORD))
1506                                      : (int) i * BITS_PER_WORD);
1507           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1508           rtx result_part
1509             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1510                                              bitsize - i * BITS_PER_WORD),
1511                                    bitnum + bit_offset, 1, false, target_part,
1512                                    mode, word_mode, fallback_p);
1513
1514           gcc_assert (target_part);
1515           if (!result_part)
1516             {
1517               delete_insns_since (last);
1518               return NULL;
1519             }
1520
1521           if (result_part != target_part)
1522             emit_move_insn (target_part, result_part);
1523         }
1524
1525       if (unsignedp)
1526         {
1527           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1528              need to be zero'd out.  */
1529           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1530             {
1531               unsigned int i, total_words;
1532
1533               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1534               for (i = nwords; i < total_words; i++)
1535                 emit_move_insn
1536                   (operand_subword (target,
1537                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1538                                     1, VOIDmode),
1539                    const0_rtx);
1540             }
1541           return target;
1542         }
1543
1544       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1545       target = expand_shift (LSHIFT_EXPR, mode, target,
1546                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1547       return expand_shift (RSHIFT_EXPR, mode, target,
1548                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1549     }
1550
1551   /* If OP0 is a multi-word register, narrow it to the affected word.
1552      If the region spans two words, defer to extract_split_bit_field.  */
1553   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1554     {
1555       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1556                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1557       bitnum %= BITS_PER_WORD;
1558       if (bitnum + bitsize > BITS_PER_WORD)
1559         {
1560           if (!fallback_p)
1561             return NULL_RTX;
1562           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1563           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1564         }
1565     }
1566
1567   /* From here on we know the desired field is smaller than a word.
1568      If OP0 is a register, it too fits within a word.  */
1569   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1570   extraction_insn extv;
1571   if (!MEM_P (op0)
1572       && get_best_reg_extraction_insn (&extv, pattern, bitnum + bitsize,
1573                                        tmode))
1574     {
1575       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1576                                                  unsignedp, target, mode,
1577                                                  tmode);
1578       if (result)
1579         return result;
1580     }
1581
1582   /* If OP0 is a memory, try copying it to a register and seeing if a
1583      cheap register alternative is available.  */
1584   if (MEM_P (op0))
1585     {
1586       /* Do not use extv/extzv for volatile bitfields when
1587          -fstrict-volatile-bitfields is in effect.  */
1588       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1589           && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1590                                            tmode))
1591         {
1592           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1593                                                      bitnum, unsignedp,
1594                                                      target, mode,
1595                                                      tmode);
1596           if (result)
1597             return result;
1598         }
1599
1600       rtx last = get_last_insn ();
1601
1602       /* Try loading part of OP0 into a register and extracting the
1603          bitfield from that.  */
1604       unsigned HOST_WIDE_INT bitpos;
1605       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1606                                                0, 0, tmode, &bitpos);
1607       if (xop0)
1608         {
1609           xop0 = copy_to_reg (xop0);
1610           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1611                                             unsignedp, packedp, target,
1612                                             mode, tmode, false);
1613           if (result)
1614             return result;
1615           delete_insns_since (last);
1616         }
1617     }
1618
1619   if (!fallback_p)
1620     return NULL;
1621
1622   /* Find a correspondingly-sized integer field, so we can apply
1623      shifts and masks to it.  */
1624   int_mode = int_mode_for_mode (tmode);
1625   if (int_mode == BLKmode)
1626     int_mode = int_mode_for_mode (mode);
1627   /* Should probably push op0 out to memory and then do a load.  */
1628   gcc_assert (int_mode != BLKmode);
1629
1630   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1631                                     target, unsignedp, packedp);
1632   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1633 }
1634
1635 /* Generate code to extract a byte-field from STR_RTX
1636    containing BITSIZE bits, starting at BITNUM,
1637    and put it in TARGET if possible (if TARGET is nonzero).
1638    Regardless of TARGET, we return the rtx for where the value is placed.
1639
1640    STR_RTX is the structure containing the byte (a REG or MEM).
1641    UNSIGNEDP is nonzero if this is an unsigned bit field.
1642    PACKEDP is nonzero if the field has the packed attribute.
1643    MODE is the natural mode of the field value once extracted.
1644    TMODE is the mode the caller would like the value to have;
1645    but the value may be returned with type MODE instead.
1646
1647    If a TARGET is specified and we can store in it at no extra cost,
1648    we do so, and return TARGET.
1649    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1650    if they are equally easy.  */
1651
1652 rtx
1653 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1654                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1655                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1656 {
1657   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1658                               target, mode, tmode, true);
1659 }
1660 \f
1661 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1662    from bit BITNUM of OP0.
1663
1664    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1665    PACKEDP is true if the field has the packed attribute.
1666
1667    If TARGET is nonzero, attempts to store the value there
1668    and return TARGET, but this is not guaranteed.
1669    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1670
1671 static rtx
1672 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1673                          unsigned HOST_WIDE_INT bitsize,
1674                          unsigned HOST_WIDE_INT bitnum, rtx target,
1675                          int unsignedp, bool packedp)
1676 {
1677   enum machine_mode mode;
1678
1679   if (MEM_P (op0))
1680     {
1681       /* Get the proper mode to use for this field.  We want a mode that
1682          includes the entire field.  If such a mode would be larger than
1683          a word, we won't be doing the extraction the normal way.  */
1684
1685       if (MEM_VOLATILE_P (op0)
1686           && flag_strict_volatile_bitfields > 0)
1687         {
1688           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1689             mode = GET_MODE (op0);
1690           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1691             mode = GET_MODE (target);
1692           else
1693             mode = tmode;
1694         }
1695       else
1696         mode = get_best_mode (bitsize, bitnum, 0, 0,
1697                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1698
1699       if (mode == VOIDmode)
1700         /* The only way this should occur is if the field spans word
1701            boundaries.  */
1702         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1703
1704       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1705       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1706
1707       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1708          if it results in a multi-word access where we otherwise wouldn't
1709          have one.  So, check for that case here.  */
1710       if (MEM_P (op0)
1711           && MEM_VOLATILE_P (op0)
1712           && flag_strict_volatile_bitfields > 0
1713           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1714           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1715         {
1716           if (STRICT_ALIGNMENT)
1717             {
1718               static bool informed_about_misalignment = false;
1719
1720               if (packedp)
1721                 {
1722                   if (bitsize == total_bits)
1723                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1724                                 "multiple accesses to volatile structure"
1725                                 " member because of packed attribute");
1726                   else
1727                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1728                                 "multiple accesses to volatile structure"
1729                                 " bitfield because of packed attribute");
1730
1731                   return extract_split_bit_field (op0, bitsize, bitnum,
1732                                                   unsignedp);
1733                 }
1734
1735               if (bitsize == total_bits)
1736                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1737                             "mis-aligned access used for structure member");
1738               else
1739                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1740                             "mis-aligned access used for structure bitfield");
1741
1742               if (! informed_about_misalignment)
1743                 {
1744                   informed_about_misalignment = true;
1745                   inform (input_location,
1746                           "when a volatile object spans multiple type-sized"
1747                           " locations, the compiler must choose between using"
1748                           " a single mis-aligned access to preserve the"
1749                           " volatility, or using multiple aligned accesses"
1750                           " to avoid runtime faults; this code may fail at"
1751                           " runtime if the hardware does not allow this"
1752                           " access");
1753                 }
1754             }
1755           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1756         }
1757       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1758       bitnum -= bit_offset;
1759     }
1760
1761   mode = GET_MODE (op0);
1762   gcc_assert (SCALAR_INT_MODE_P (mode));
1763
1764   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1765      for invalid input, such as extract equivalent of f5 from
1766      gcc.dg/pr48335-2.c.  */
1767
1768   if (BYTES_BIG_ENDIAN)
1769     /* BITNUM is the distance between our msb and that of OP0.
1770        Convert it to the distance from the lsb.  */
1771     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1772
1773   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1774      We have reduced the big-endian case to the little-endian case.  */
1775
1776   if (unsignedp)
1777     {
1778       if (bitnum)
1779         {
1780           /* If the field does not already start at the lsb,
1781              shift it so it does.  */
1782           /* Maybe propagate the target for the shift.  */
1783           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784           if (tmode != mode)
1785             subtarget = 0;
1786           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1787         }
1788       /* Convert the value to the desired mode.  */
1789       if (mode != tmode)
1790         op0 = convert_to_mode (tmode, op0, 1);
1791
1792       /* Unless the msb of the field used to be the msb when we shifted,
1793          mask out the upper bits.  */
1794
1795       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1796         return expand_binop (GET_MODE (op0), and_optab, op0,
1797                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1798                              target, 1, OPTAB_LIB_WIDEN);
1799       return op0;
1800     }
1801
1802   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1803      then arithmetic-shift its lsb to the lsb of the word.  */
1804   op0 = force_reg (mode, op0);
1805
1806   /* Find the narrowest integer mode that contains the field.  */
1807
1808   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1809        mode = GET_MODE_WIDER_MODE (mode))
1810     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1811       {
1812         op0 = convert_to_mode (mode, op0, 0);
1813         break;
1814       }
1815
1816   if (mode != tmode)
1817     target = 0;
1818
1819   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1820     {
1821       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1822       /* Maybe propagate the target for the shift.  */
1823       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1824       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1825     }
1826
1827   return expand_shift (RSHIFT_EXPR, mode, op0,
1828                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1829 }
1830 \f
1831 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1832    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1833    complement of that if COMPLEMENT.  The mask is truncated if
1834    necessary to the width of mode MODE.  The mask is zero-extended if
1835    BITSIZE+BITPOS is too small for MODE.  */
1836
1837 static rtx
1838 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1839 {
1840   double_int mask;
1841
1842   mask = double_int::mask (bitsize);
1843   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1844
1845   if (complement)
1846     mask = ~mask;
1847
1848   return immed_double_int_const (mask, mode);
1849 }
1850
1851 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1852    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1853
1854 static rtx
1855 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1856 {
1857   double_int val;
1858
1859   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1860   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1861
1862   return immed_double_int_const (val, mode);
1863 }
1864 \f
1865 /* Extract a bit field that is split across two words
1866    and return an RTX for the result.
1867
1868    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1869    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1870    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1871
1872 static rtx
1873 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1874                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1875 {
1876   unsigned int unit;
1877   unsigned int bitsdone = 0;
1878   rtx result = NULL_RTX;
1879   int first = 1;
1880
1881   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1882      much at a time.  */
1883   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1884     unit = BITS_PER_WORD;
1885   else
1886     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1887
1888   while (bitsdone < bitsize)
1889     {
1890       unsigned HOST_WIDE_INT thissize;
1891       rtx part, word;
1892       unsigned HOST_WIDE_INT thispos;
1893       unsigned HOST_WIDE_INT offset;
1894
1895       offset = (bitpos + bitsdone) / unit;
1896       thispos = (bitpos + bitsdone) % unit;
1897
1898       /* THISSIZE must not overrun a word boundary.  Otherwise,
1899          extract_fixed_bit_field will call us again, and we will mutually
1900          recurse forever.  */
1901       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1902       thissize = MIN (thissize, unit - thispos);
1903
1904       /* If OP0 is a register, then handle OFFSET here.
1905
1906          When handling multiword bitfields, extract_bit_field may pass
1907          down a word_mode SUBREG of a larger REG for a bitfield that actually
1908          crosses a word boundary.  Thus, for a SUBREG, we must find
1909          the current word starting from the base register.  */
1910       if (GET_CODE (op0) == SUBREG)
1911         {
1912           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1913           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1914                                         GET_MODE (SUBREG_REG (op0)));
1915           offset = 0;
1916         }
1917       else if (REG_P (op0))
1918         {
1919           word = operand_subword_force (op0, offset, GET_MODE (op0));
1920           offset = 0;
1921         }
1922       else
1923         word = op0;
1924
1925       /* Extract the parts in bit-counting order,
1926          whose meaning is determined by BYTES_PER_UNIT.
1927          OFFSET is in UNITs, and UNIT is in bits.  */
1928       part = extract_fixed_bit_field (word_mode, word, thissize,
1929                                       offset * unit + thispos, 0, 1, false);
1930       bitsdone += thissize;
1931
1932       /* Shift this part into place for the result.  */
1933       if (BYTES_BIG_ENDIAN)
1934         {
1935           if (bitsize != bitsdone)
1936             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937                                  bitsize - bitsdone, 0, 1);
1938         }
1939       else
1940         {
1941           if (bitsdone != thissize)
1942             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1943                                  bitsdone - thissize, 0, 1);
1944         }
1945
1946       if (first)
1947         result = part;
1948       else
1949         /* Combine the parts with bitwise or.  This works
1950            because we extracted each part as an unsigned bit field.  */
1951         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1952                                OPTAB_LIB_WIDEN);
1953
1954       first = 0;
1955     }
1956
1957   /* Unsigned bit field: we are done.  */
1958   if (unsignedp)
1959     return result;
1960   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1961   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1962                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1963   return expand_shift (RSHIFT_EXPR, word_mode, result,
1964                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1965 }
1966 \f
1967 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1968    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1969    MODE, fill the upper bits with zeros.  Fail if the layout of either
1970    mode is unknown (as for CC modes) or if the extraction would involve
1971    unprofitable mode punning.  Return the value on success, otherwise
1972    return null.
1973
1974    This is different from gen_lowpart* in these respects:
1975
1976      - the returned value must always be considered an rvalue
1977
1978      - when MODE is wider than SRC_MODE, the extraction involves
1979        a zero extension
1980
1981      - when MODE is smaller than SRC_MODE, the extraction involves
1982        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1983
1984    In other words, this routine performs a computation, whereas the
1985    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1986    operations.  */
1987
1988 rtx
1989 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1990 {
1991   enum machine_mode int_mode, src_int_mode;
1992
1993   if (mode == src_mode)
1994     return src;
1995
1996   if (CONSTANT_P (src))
1997     {
1998       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1999          fails, it will happily create (subreg (symbol_ref)) or similar
2000          invalid SUBREGs.  */
2001       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2002       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2003       if (ret)
2004         return ret;
2005
2006       if (GET_MODE (src) == VOIDmode
2007           || !validate_subreg (mode, src_mode, src, byte))
2008         return NULL_RTX;
2009
2010       src = force_reg (GET_MODE (src), src);
2011       return gen_rtx_SUBREG (mode, src, byte);
2012     }
2013
2014   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2015     return NULL_RTX;
2016
2017   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2018       && MODES_TIEABLE_P (mode, src_mode))
2019     {
2020       rtx x = gen_lowpart_common (mode, src);
2021       if (x)
2022         return x;
2023     }
2024
2025   src_int_mode = int_mode_for_mode (src_mode);
2026   int_mode = int_mode_for_mode (mode);
2027   if (src_int_mode == BLKmode || int_mode == BLKmode)
2028     return NULL_RTX;
2029
2030   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2031     return NULL_RTX;
2032   if (!MODES_TIEABLE_P (int_mode, mode))
2033     return NULL_RTX;
2034
2035   src = gen_lowpart (src_int_mode, src);
2036   src = convert_modes (int_mode, src_int_mode, src, true);
2037   src = gen_lowpart (mode, src);
2038   return src;
2039 }
2040 \f
2041 /* Add INC into TARGET.  */
2042
2043 void
2044 expand_inc (rtx target, rtx inc)
2045 {
2046   rtx value = expand_binop (GET_MODE (target), add_optab,
2047                             target, inc,
2048                             target, 0, OPTAB_LIB_WIDEN);
2049   if (value != target)
2050     emit_move_insn (target, value);
2051 }
2052
2053 /* Subtract DEC from TARGET.  */
2054
2055 void
2056 expand_dec (rtx target, rtx dec)
2057 {
2058   rtx value = expand_binop (GET_MODE (target), sub_optab,
2059                             target, dec,
2060                             target, 0, OPTAB_LIB_WIDEN);
2061   if (value != target)
2062     emit_move_insn (target, value);
2063 }
2064 \f
2065 /* Output a shift instruction for expression code CODE,
2066    with SHIFTED being the rtx for the value to shift,
2067    and AMOUNT the rtx for the amount to shift by.
2068    Store the result in the rtx TARGET, if that is convenient.
2069    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2070    Return the rtx for where the value is.  */
2071
2072 static rtx
2073 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2074                 rtx amount, rtx target, int unsignedp)
2075 {
2076   rtx op1, temp = 0;
2077   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2078   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2079   optab lshift_optab = ashl_optab;
2080   optab rshift_arith_optab = ashr_optab;
2081   optab rshift_uns_optab = lshr_optab;
2082   optab lrotate_optab = rotl_optab;
2083   optab rrotate_optab = rotr_optab;
2084   enum machine_mode op1_mode;
2085   int attempt;
2086   bool speed = optimize_insn_for_speed_p ();
2087
2088   op1 = amount;
2089   op1_mode = GET_MODE (op1);
2090
2091   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2092      shift amount is a vector, use the vector/vector shift patterns.  */
2093   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2094     {
2095       lshift_optab = vashl_optab;
2096       rshift_arith_optab = vashr_optab;
2097       rshift_uns_optab = vlshr_optab;
2098       lrotate_optab = vrotl_optab;
2099       rrotate_optab = vrotr_optab;
2100     }
2101
2102   /* Previously detected shift-counts computed by NEGATE_EXPR
2103      and shifted in the other direction; but that does not work
2104      on all machines.  */
2105
2106   if (SHIFT_COUNT_TRUNCATED)
2107     {
2108       if (CONST_INT_P (op1)
2109           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2110               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2111         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2112                        % GET_MODE_BITSIZE (mode));
2113       else if (GET_CODE (op1) == SUBREG
2114                && subreg_lowpart_p (op1)
2115                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2116                && INTEGRAL_MODE_P (GET_MODE (op1)))
2117         op1 = SUBREG_REG (op1);
2118     }
2119
2120   if (op1 == const0_rtx)
2121     return shifted;
2122
2123   /* Check whether its cheaper to implement a left shift by a constant
2124      bit count by a sequence of additions.  */
2125   if (code == LSHIFT_EXPR
2126       && CONST_INT_P (op1)
2127       && INTVAL (op1) > 0
2128       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2129       && INTVAL (op1) < MAX_BITS_PER_WORD
2130       && (shift_cost (speed, mode, INTVAL (op1))
2131           > INTVAL (op1) * add_cost (speed, mode))
2132       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2133     {
2134       int i;
2135       for (i = 0; i < INTVAL (op1); i++)
2136         {
2137           temp = force_reg (mode, shifted);
2138           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2139                                   unsignedp, OPTAB_LIB_WIDEN);
2140         }
2141       return shifted;
2142     }
2143
2144   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2145     {
2146       enum optab_methods methods;
2147
2148       if (attempt == 0)
2149         methods = OPTAB_DIRECT;
2150       else if (attempt == 1)
2151         methods = OPTAB_WIDEN;
2152       else
2153         methods = OPTAB_LIB_WIDEN;
2154
2155       if (rotate)
2156         {
2157           /* Widening does not work for rotation.  */
2158           if (methods == OPTAB_WIDEN)
2159             continue;
2160           else if (methods == OPTAB_LIB_WIDEN)
2161             {
2162               /* If we have been unable to open-code this by a rotation,
2163                  do it as the IOR of two shifts.  I.e., to rotate A
2164                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2165                  where C is the bitsize of A.
2166
2167                  It is theoretically possible that the target machine might
2168                  not be able to perform either shift and hence we would
2169                  be making two libcalls rather than just the one for the
2170                  shift (similarly if IOR could not be done).  We will allow
2171                  this extremely unlikely lossage to avoid complicating the
2172                  code below.  */
2173
2174               rtx subtarget = target == shifted ? 0 : target;
2175               rtx new_amount, other_amount;
2176               rtx temp1;
2177
2178               new_amount = op1;
2179               if (CONST_INT_P (op1))
2180                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2181                                         - INTVAL (op1));
2182               else
2183                 other_amount
2184                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2185                                          GEN_INT (GET_MODE_PRECISION (mode)),
2186                                          op1);
2187
2188               shifted = force_reg (mode, shifted);
2189
2190               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2191                                      mode, shifted, new_amount, 0, 1);
2192               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2193                                       mode, shifted, other_amount,
2194                                       subtarget, 1);
2195               return expand_binop (mode, ior_optab, temp, temp1, target,
2196                                    unsignedp, methods);
2197             }
2198
2199           temp = expand_binop (mode,
2200                                left ? lrotate_optab : rrotate_optab,
2201                                shifted, op1, target, unsignedp, methods);
2202         }
2203       else if (unsignedp)
2204         temp = expand_binop (mode,
2205                              left ? lshift_optab : rshift_uns_optab,
2206                              shifted, op1, target, unsignedp, methods);
2207
2208       /* Do arithmetic shifts.
2209          Also, if we are going to widen the operand, we can just as well
2210          use an arithmetic right-shift instead of a logical one.  */
2211       if (temp == 0 && ! rotate
2212           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2213         {
2214           enum optab_methods methods1 = methods;
2215
2216           /* If trying to widen a log shift to an arithmetic shift,
2217              don't accept an arithmetic shift of the same size.  */
2218           if (unsignedp)
2219             methods1 = OPTAB_MUST_WIDEN;
2220
2221           /* Arithmetic shift */
2222
2223           temp = expand_binop (mode,
2224                                left ? lshift_optab : rshift_arith_optab,
2225                                shifted, op1, target, unsignedp, methods1);
2226         }
2227
2228       /* We used to try extzv here for logical right shifts, but that was
2229          only useful for one machine, the VAX, and caused poor code
2230          generation there for lshrdi3, so the code was deleted and a
2231          define_expand for lshrsi3 was added to vax.md.  */
2232     }
2233
2234   gcc_assert (temp);
2235   return temp;
2236 }
2237
2238 /* Output a shift instruction for expression code CODE,
2239    with SHIFTED being the rtx for the value to shift,
2240    and AMOUNT the amount to shift by.
2241    Store the result in the rtx TARGET, if that is convenient.
2242    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2243    Return the rtx for where the value is.  */
2244
2245 rtx
2246 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2247               int amount, rtx target, int unsignedp)
2248 {
2249   return expand_shift_1 (code, mode,
2250                          shifted, GEN_INT (amount), target, unsignedp);
2251 }
2252
2253 /* Output a shift instruction for expression code CODE,
2254    with SHIFTED being the rtx for the value to shift,
2255    and AMOUNT the tree for the amount to shift by.
2256    Store the result in the rtx TARGET, if that is convenient.
2257    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2258    Return the rtx for where the value is.  */
2259
2260 rtx
2261 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2262                        tree amount, rtx target, int unsignedp)
2263 {
2264   return expand_shift_1 (code, mode,
2265                          shifted, expand_normal (amount), target, unsignedp);
2266 }
2267
2268 \f
2269 /* Indicates the type of fixup needed after a constant multiplication.
2270    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2271    the result should be negated, and ADD_VARIANT means that the
2272    multiplicand should be added to the result.  */
2273 enum mult_variant {basic_variant, negate_variant, add_variant};
2274
2275 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2276                         const struct mult_cost *, enum machine_mode mode);
2277 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2278                                  struct algorithm *, enum mult_variant *, int);
2279 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2280                               const struct algorithm *, enum mult_variant);
2281 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2282 static rtx extract_high_half (enum machine_mode, rtx);
2283 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2284 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2285                                        int, int);
2286 /* Compute and return the best algorithm for multiplying by T.
2287    The algorithm must cost less than cost_limit
2288    If retval.cost >= COST_LIMIT, no algorithm was found and all
2289    other field of the returned struct are undefined.
2290    MODE is the machine mode of the multiplication.  */
2291
2292 static void
2293 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2294             const struct mult_cost *cost_limit, enum machine_mode mode)
2295 {
2296   int m;
2297   struct algorithm *alg_in, *best_alg;
2298   struct mult_cost best_cost;
2299   struct mult_cost new_limit;
2300   int op_cost, op_latency;
2301   unsigned HOST_WIDE_INT orig_t = t;
2302   unsigned HOST_WIDE_INT q;
2303   int maxm, hash_index;
2304   bool cache_hit = false;
2305   enum alg_code cache_alg = alg_zero;
2306   bool speed = optimize_insn_for_speed_p ();
2307   enum machine_mode imode;
2308   struct alg_hash_entry *entry_ptr;
2309
2310   /* Indicate that no algorithm is yet found.  If no algorithm
2311      is found, this value will be returned and indicate failure.  */
2312   alg_out->cost.cost = cost_limit->cost + 1;
2313   alg_out->cost.latency = cost_limit->latency + 1;
2314
2315   if (cost_limit->cost < 0
2316       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2317     return;
2318
2319   /* Be prepared for vector modes.  */
2320   imode = GET_MODE_INNER (mode);
2321   if (imode == VOIDmode)
2322     imode = mode;
2323
2324   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2325
2326   /* Restrict the bits of "t" to the multiplication's mode.  */
2327   t &= GET_MODE_MASK (imode);
2328
2329   /* t == 1 can be done in zero cost.  */
2330   if (t == 1)
2331     {
2332       alg_out->ops = 1;
2333       alg_out->cost.cost = 0;
2334       alg_out->cost.latency = 0;
2335       alg_out->op[0] = alg_m;
2336       return;
2337     }
2338
2339   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2340      fail now.  */
2341   if (t == 0)
2342     {
2343       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2344         return;
2345       else
2346         {
2347           alg_out->ops = 1;
2348           alg_out->cost.cost = zero_cost (speed);
2349           alg_out->cost.latency = zero_cost (speed);
2350           alg_out->op[0] = alg_zero;
2351           return;
2352         }
2353     }
2354
2355   /* We'll be needing a couple extra algorithm structures now.  */
2356
2357   alg_in = XALLOCA (struct algorithm);
2358   best_alg = XALLOCA (struct algorithm);
2359   best_cost = *cost_limit;
2360
2361   /* Compute the hash index.  */
2362   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2363
2364   /* See if we already know what to do for T.  */
2365   entry_ptr = alg_hash_entry_ptr (hash_index);
2366   if (entry_ptr->t == t
2367       && entry_ptr->mode == mode
2368       && entry_ptr->mode == mode
2369       && entry_ptr->speed == speed
2370       && entry_ptr->alg != alg_unknown)
2371     {
2372       cache_alg = entry_ptr->alg;
2373
2374       if (cache_alg == alg_impossible)
2375         {
2376           /* The cache tells us that it's impossible to synthesize
2377              multiplication by T within entry_ptr->cost.  */
2378           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2379             /* COST_LIMIT is at least as restrictive as the one
2380                recorded in the hash table, in which case we have no
2381                hope of synthesizing a multiplication.  Just
2382                return.  */
2383             return;
2384
2385           /* If we get here, COST_LIMIT is less restrictive than the
2386              one recorded in the hash table, so we may be able to
2387              synthesize a multiplication.  Proceed as if we didn't
2388              have the cache entry.  */
2389         }
2390       else
2391         {
2392           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2393             /* The cached algorithm shows that this multiplication
2394                requires more cost than COST_LIMIT.  Just return.  This
2395                way, we don't clobber this cache entry with
2396                alg_impossible but retain useful information.  */
2397             return;
2398
2399           cache_hit = true;
2400
2401           switch (cache_alg)
2402             {
2403             case alg_shift:
2404               goto do_alg_shift;
2405
2406             case alg_add_t_m2:
2407             case alg_sub_t_m2:
2408               goto do_alg_addsub_t_m2;
2409
2410             case alg_add_factor:
2411             case alg_sub_factor:
2412               goto do_alg_addsub_factor;
2413
2414             case alg_add_t2_m:
2415               goto do_alg_add_t2_m;
2416
2417             case alg_sub_t2_m:
2418               goto do_alg_sub_t2_m;
2419
2420             default:
2421               gcc_unreachable ();
2422             }
2423         }
2424     }
2425
2426   /* If we have a group of zero bits at the low-order part of T, try
2427      multiplying by the remaining bits and then doing a shift.  */
2428
2429   if ((t & 1) == 0)
2430     {
2431     do_alg_shift:
2432       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2433       if (m < maxm)
2434         {
2435           q = t >> m;
2436           /* The function expand_shift will choose between a shift and
2437              a sequence of additions, so the observed cost is given as
2438              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2439           op_cost = m * add_cost (speed, mode);
2440           if (shift_cost (speed, mode, m) < op_cost)
2441             op_cost = shift_cost (speed, mode, m);
2442           new_limit.cost = best_cost.cost - op_cost;
2443           new_limit.latency = best_cost.latency - op_cost;
2444           synth_mult (alg_in, q, &new_limit, mode);
2445
2446           alg_in->cost.cost += op_cost;
2447           alg_in->cost.latency += op_cost;
2448           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2449             {
2450               struct algorithm *x;
2451               best_cost = alg_in->cost;
2452               x = alg_in, alg_in = best_alg, best_alg = x;
2453               best_alg->log[best_alg->ops] = m;
2454               best_alg->op[best_alg->ops] = alg_shift;
2455             }
2456
2457           /* See if treating ORIG_T as a signed number yields a better
2458              sequence.  Try this sequence only for a negative ORIG_T
2459              as it would be useless for a non-negative ORIG_T.  */
2460           if ((HOST_WIDE_INT) orig_t < 0)
2461             {
2462               /* Shift ORIG_T as follows because a right shift of a
2463                  negative-valued signed type is implementation
2464                  defined.  */
2465               q = ~(~orig_t >> m);
2466               /* The function expand_shift will choose between a shift
2467                  and a sequence of additions, so the observed cost is
2468                  given as MIN (m * add_cost(speed, mode),
2469                  shift_cost(speed, mode, m)).  */
2470               op_cost = m * add_cost (speed, mode);
2471               if (shift_cost (speed, mode, m) < op_cost)
2472                 op_cost = shift_cost (speed, mode, m);
2473               new_limit.cost = best_cost.cost - op_cost;
2474               new_limit.latency = best_cost.latency - op_cost;
2475               synth_mult (alg_in, q, &new_limit, mode);
2476
2477               alg_in->cost.cost += op_cost;
2478               alg_in->cost.latency += op_cost;
2479               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2480                 {
2481                   struct algorithm *x;
2482                   best_cost = alg_in->cost;
2483                   x = alg_in, alg_in = best_alg, best_alg = x;
2484                   best_alg->log[best_alg->ops] = m;
2485                   best_alg->op[best_alg->ops] = alg_shift;
2486                 }
2487             }
2488         }
2489       if (cache_hit)
2490         goto done;
2491     }
2492
2493   /* If we have an odd number, add or subtract one.  */
2494   if ((t & 1) != 0)
2495     {
2496       unsigned HOST_WIDE_INT w;
2497
2498     do_alg_addsub_t_m2:
2499       for (w = 1; (w & t) != 0; w <<= 1)
2500         ;
2501       /* If T was -1, then W will be zero after the loop.  This is another
2502          case where T ends with ...111.  Handling this with (T + 1) and
2503          subtract 1 produces slightly better code and results in algorithm
2504          selection much faster than treating it like the ...0111 case
2505          below.  */
2506       if (w == 0
2507           || (w > 2
2508               /* Reject the case where t is 3.
2509                  Thus we prefer addition in that case.  */
2510               && t != 3))
2511         {
2512           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2513
2514           op_cost = add_cost (speed, mode);
2515           new_limit.cost = best_cost.cost - op_cost;
2516           new_limit.latency = best_cost.latency - op_cost;
2517           synth_mult (alg_in, t + 1, &new_limit, mode);
2518
2519           alg_in->cost.cost += op_cost;
2520           alg_in->cost.latency += op_cost;
2521           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2522             {
2523               struct algorithm *x;
2524               best_cost = alg_in->cost;
2525               x = alg_in, alg_in = best_alg, best_alg = x;
2526               best_alg->log[best_alg->ops] = 0;
2527               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2528             }
2529         }
2530       else
2531         {
2532           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2533
2534           op_cost = add_cost (speed, mode);
2535           new_limit.cost = best_cost.cost - op_cost;
2536           new_limit.latency = best_cost.latency - op_cost;
2537           synth_mult (alg_in, t - 1, &new_limit, mode);
2538
2539           alg_in->cost.cost += op_cost;
2540           alg_in->cost.latency += op_cost;
2541           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2542             {
2543               struct algorithm *x;
2544               best_cost = alg_in->cost;
2545               x = alg_in, alg_in = best_alg, best_alg = x;
2546               best_alg->log[best_alg->ops] = 0;
2547               best_alg->op[best_alg->ops] = alg_add_t_m2;
2548             }
2549         }
2550
2551       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2552          quickly with a - a * n for some appropriate constant n.  */
2553       m = exact_log2 (-orig_t + 1);
2554       if (m >= 0 && m < maxm)
2555         {
2556           op_cost = shiftsub1_cost (speed, mode, m);
2557           new_limit.cost = best_cost.cost - op_cost;
2558           new_limit.latency = best_cost.latency - op_cost;
2559           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2560                       &new_limit, mode);
2561
2562           alg_in->cost.cost += op_cost;
2563           alg_in->cost.latency += op_cost;
2564           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2565             {
2566               struct algorithm *x;
2567               best_cost = alg_in->cost;
2568               x = alg_in, alg_in = best_alg, best_alg = x;
2569               best_alg->log[best_alg->ops] = m;
2570               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2571             }
2572         }
2573
2574       if (cache_hit)
2575         goto done;
2576     }
2577
2578   /* Look for factors of t of the form
2579      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2580      If we find such a factor, we can multiply by t using an algorithm that
2581      multiplies by q, shift the result by m and add/subtract it to itself.
2582
2583      We search for large factors first and loop down, even if large factors
2584      are less probable than small; if we find a large factor we will find a
2585      good sequence quickly, and therefore be able to prune (by decreasing
2586      COST_LIMIT) the search.  */
2587
2588  do_alg_addsub_factor:
2589   for (m = floor_log2 (t - 1); m >= 2; m--)
2590     {
2591       unsigned HOST_WIDE_INT d;
2592
2593       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2594       if (t % d == 0 && t > d && m < maxm
2595           && (!cache_hit || cache_alg == alg_add_factor))
2596         {
2597           /* If the target has a cheap shift-and-add instruction use
2598              that in preference to a shift insn followed by an add insn.
2599              Assume that the shift-and-add is "atomic" with a latency
2600              equal to its cost, otherwise assume that on superscalar
2601              hardware the shift may be executed concurrently with the
2602              earlier steps in the algorithm.  */
2603           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2604           if (shiftadd_cost (speed, mode, m) < op_cost)
2605             {
2606               op_cost = shiftadd_cost (speed, mode, m);
2607               op_latency = op_cost;
2608             }
2609           else
2610             op_latency = add_cost (speed, mode);
2611
2612           new_limit.cost = best_cost.cost - op_cost;
2613           new_limit.latency = best_cost.latency - op_latency;
2614           synth_mult (alg_in, t / d, &new_limit, mode);
2615
2616           alg_in->cost.cost += op_cost;
2617           alg_in->cost.latency += op_latency;
2618           if (alg_in->cost.latency < op_cost)
2619             alg_in->cost.latency = op_cost;
2620           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621             {
2622               struct algorithm *x;
2623               best_cost = alg_in->cost;
2624               x = alg_in, alg_in = best_alg, best_alg = x;
2625               best_alg->log[best_alg->ops] = m;
2626               best_alg->op[best_alg->ops] = alg_add_factor;
2627             }
2628           /* Other factors will have been taken care of in the recursion.  */
2629           break;
2630         }
2631
2632       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2633       if (t % d == 0 && t > d && m < maxm
2634           && (!cache_hit || cache_alg == alg_sub_factor))
2635         {
2636           /* If the target has a cheap shift-and-subtract insn use
2637              that in preference to a shift insn followed by a sub insn.
2638              Assume that the shift-and-sub is "atomic" with a latency
2639              equal to it's cost, otherwise assume that on superscalar
2640              hardware the shift may be executed concurrently with the
2641              earlier steps in the algorithm.  */
2642           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2643           if (shiftsub0_cost (speed, mode, m) < op_cost)
2644             {
2645               op_cost = shiftsub0_cost (speed, mode, m);
2646               op_latency = op_cost;
2647             }
2648           else
2649             op_latency = add_cost (speed, mode);
2650
2651           new_limit.cost = best_cost.cost - op_cost;
2652           new_limit.latency = best_cost.latency - op_latency;
2653           synth_mult (alg_in, t / d, &new_limit, mode);
2654
2655           alg_in->cost.cost += op_cost;
2656           alg_in->cost.latency += op_latency;
2657           if (alg_in->cost.latency < op_cost)
2658             alg_in->cost.latency = op_cost;
2659           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2660             {
2661               struct algorithm *x;
2662               best_cost = alg_in->cost;
2663               x = alg_in, alg_in = best_alg, best_alg = x;
2664               best_alg->log[best_alg->ops] = m;
2665               best_alg->op[best_alg->ops] = alg_sub_factor;
2666             }
2667           break;
2668         }
2669     }
2670   if (cache_hit)
2671     goto done;
2672
2673   /* Try shift-and-add (load effective address) instructions,
2674      i.e. do a*3, a*5, a*9.  */
2675   if ((t & 1) != 0)
2676     {
2677     do_alg_add_t2_m:
2678       q = t - 1;
2679       q = q & -q;
2680       m = exact_log2 (q);
2681       if (m >= 0 && m < maxm)
2682         {
2683           op_cost = shiftadd_cost (speed, mode, m);
2684           new_limit.cost = best_cost.cost - op_cost;
2685           new_limit.latency = best_cost.latency - op_cost;
2686           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2687
2688           alg_in->cost.cost += op_cost;
2689           alg_in->cost.latency += op_cost;
2690           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2691             {
2692               struct algorithm *x;
2693               best_cost = alg_in->cost;
2694               x = alg_in, alg_in = best_alg, best_alg = x;
2695               best_alg->log[best_alg->ops] = m;
2696               best_alg->op[best_alg->ops] = alg_add_t2_m;
2697             }
2698         }
2699       if (cache_hit)
2700         goto done;
2701
2702     do_alg_sub_t2_m:
2703       q = t + 1;
2704       q = q & -q;
2705       m = exact_log2 (q);
2706       if (m >= 0 && m < maxm)
2707         {
2708           op_cost = shiftsub0_cost (speed, mode, m);
2709           new_limit.cost = best_cost.cost - op_cost;
2710           new_limit.latency = best_cost.latency - op_cost;
2711           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2712
2713           alg_in->cost.cost += op_cost;
2714           alg_in->cost.latency += op_cost;
2715           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2716             {
2717               struct algorithm *x;
2718               best_cost = alg_in->cost;
2719               x = alg_in, alg_in = best_alg, best_alg = x;
2720               best_alg->log[best_alg->ops] = m;
2721               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2722             }
2723         }
2724       if (cache_hit)
2725         goto done;
2726     }
2727
2728  done:
2729   /* If best_cost has not decreased, we have not found any algorithm.  */
2730   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2731     {
2732       /* We failed to find an algorithm.  Record alg_impossible for
2733          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2734          we are asked to find an algorithm for T within the same or
2735          lower COST_LIMIT, we can immediately return to the
2736          caller.  */
2737       entry_ptr->t = t;
2738       entry_ptr->mode = mode;
2739       entry_ptr->speed = speed;
2740       entry_ptr->alg = alg_impossible;
2741       entry_ptr->cost = *cost_limit;
2742       return;
2743     }
2744
2745   /* Cache the result.  */
2746   if (!cache_hit)
2747     {
2748       entry_ptr->t = t;
2749       entry_ptr->mode = mode;
2750       entry_ptr->speed = speed;
2751       entry_ptr->alg = best_alg->op[best_alg->ops];
2752       entry_ptr->cost.cost = best_cost.cost;
2753       entry_ptr->cost.latency = best_cost.latency;
2754     }
2755
2756   /* If we are getting a too long sequence for `struct algorithm'
2757      to record, make this search fail.  */
2758   if (best_alg->ops == MAX_BITS_PER_WORD)
2759     return;
2760
2761   /* Copy the algorithm from temporary space to the space at alg_out.
2762      We avoid using structure assignment because the majority of
2763      best_alg is normally undefined, and this is a critical function.  */
2764   alg_out->ops = best_alg->ops + 1;
2765   alg_out->cost = best_cost;
2766   memcpy (alg_out->op, best_alg->op,
2767           alg_out->ops * sizeof *alg_out->op);
2768   memcpy (alg_out->log, best_alg->log,
2769           alg_out->ops * sizeof *alg_out->log);
2770 }
2771 \f
2772 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2773    Try three variations:
2774
2775        - a shift/add sequence based on VAL itself
2776        - a shift/add sequence based on -VAL, followed by a negation
2777        - a shift/add sequence based on VAL - 1, followed by an addition.
2778
2779    Return true if the cheapest of these cost less than MULT_COST,
2780    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2781
2782 static bool
2783 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2784                      struct algorithm *alg, enum mult_variant *variant,
2785                      int mult_cost)
2786 {
2787   struct algorithm alg2;
2788   struct mult_cost limit;
2789   int op_cost;
2790   bool speed = optimize_insn_for_speed_p ();
2791
2792   /* Fail quickly for impossible bounds.  */
2793   if (mult_cost < 0)
2794     return false;
2795
2796   /* Ensure that mult_cost provides a reasonable upper bound.
2797      Any constant multiplication can be performed with less
2798      than 2 * bits additions.  */
2799   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2800   if (mult_cost > op_cost)
2801     mult_cost = op_cost;
2802
2803   *variant = basic_variant;
2804   limit.cost = mult_cost;
2805   limit.latency = mult_cost;
2806   synth_mult (alg, val, &limit, mode);
2807
2808   /* This works only if the inverted value actually fits in an
2809      `unsigned int' */
2810   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2811     {
2812       op_cost = neg_cost(speed, mode);
2813       if (MULT_COST_LESS (&alg->cost, mult_cost))
2814         {
2815           limit.cost = alg->cost.cost - op_cost;
2816           limit.latency = alg->cost.latency - op_cost;
2817         }
2818       else
2819         {
2820           limit.cost = mult_cost - op_cost;
2821           limit.latency = mult_cost - op_cost;
2822         }
2823
2824       synth_mult (&alg2, -val, &limit, mode);
2825       alg2.cost.cost += op_cost;
2826       alg2.cost.latency += op_cost;
2827       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2828         *alg = alg2, *variant = negate_variant;
2829     }
2830
2831   /* This proves very useful for division-by-constant.  */
2832   op_cost = add_cost (speed, mode);
2833   if (MULT_COST_LESS (&alg->cost, mult_cost))
2834     {
2835       limit.cost = alg->cost.cost - op_cost;
2836       limit.latency = alg->cost.latency - op_cost;
2837     }
2838   else
2839     {
2840       limit.cost = mult_cost - op_cost;
2841       limit.latency = mult_cost - op_cost;
2842     }
2843
2844   synth_mult (&alg2, val - 1, &limit, mode);
2845   alg2.cost.cost += op_cost;
2846   alg2.cost.latency += op_cost;
2847   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2848     *alg = alg2, *variant = add_variant;
2849
2850   return MULT_COST_LESS (&alg->cost, mult_cost);
2851 }
2852
2853 /* A subroutine of expand_mult, used for constant multiplications.
2854    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2855    convenient.  Use the shift/add sequence described by ALG and apply
2856    the final fixup specified by VARIANT.  */
2857
2858 static rtx
2859 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2860                    rtx target, const struct algorithm *alg,
2861                    enum mult_variant variant)
2862 {
2863   HOST_WIDE_INT val_so_far;
2864   rtx insn, accum, tem;
2865   int opno;
2866   enum machine_mode nmode;
2867
2868   /* Avoid referencing memory over and over and invalid sharing
2869      on SUBREGs.  */
2870   op0 = force_reg (mode, op0);
2871
2872   /* ACCUM starts out either as OP0 or as a zero, depending on
2873      the first operation.  */
2874
2875   if (alg->op[0] == alg_zero)
2876     {
2877       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2878       val_so_far = 0;
2879     }
2880   else if (alg->op[0] == alg_m)
2881     {
2882       accum = copy_to_mode_reg (mode, op0);
2883       val_so_far = 1;
2884     }
2885   else
2886     gcc_unreachable ();
2887
2888   for (opno = 1; opno < alg->ops; opno++)
2889     {
2890       int log = alg->log[opno];
2891       rtx shift_subtarget = optimize ? 0 : accum;
2892       rtx add_target
2893         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2894            && !optimize)
2895           ? target : 0;
2896       rtx accum_target = optimize ? 0 : accum;
2897       rtx accum_inner;
2898
2899       switch (alg->op[opno])
2900         {
2901         case alg_shift:
2902           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2903           /* REG_EQUAL note will be attached to the following insn.  */
2904           emit_move_insn (accum, tem);
2905           val_so_far <<= log;
2906           break;
2907
2908         case alg_add_t_m2:
2909           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2910           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2911                                  add_target ? add_target : accum_target);
2912           val_so_far += (HOST_WIDE_INT) 1 << log;
2913           break;
2914
2915         case alg_sub_t_m2:
2916           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2917           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2918                                  add_target ? add_target : accum_target);
2919           val_so_far -= (HOST_WIDE_INT) 1 << log;
2920           break;
2921
2922         case alg_add_t2_m:
2923           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2924                                 log, shift_subtarget, 0);
2925           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2926                                  add_target ? add_target : accum_target);
2927           val_so_far = (val_so_far << log) + 1;
2928           break;
2929
2930         case alg_sub_t2_m:
2931           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2932                                 log, shift_subtarget, 0);
2933           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2934                                  add_target ? add_target : accum_target);
2935           val_so_far = (val_so_far << log) - 1;
2936           break;
2937
2938         case alg_add_factor:
2939           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2940           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2941                                  add_target ? add_target : accum_target);
2942           val_so_far += val_so_far << log;
2943           break;
2944
2945         case alg_sub_factor:
2946           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2947           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2948                                  (add_target
2949                                   ? add_target : (optimize ? 0 : tem)));
2950           val_so_far = (val_so_far << log) - val_so_far;
2951           break;
2952
2953         default:
2954           gcc_unreachable ();
2955         }
2956
2957       if (SCALAR_INT_MODE_P (mode))
2958         {
2959           /* Write a REG_EQUAL note on the last insn so that we can cse
2960              multiplication sequences.  Note that if ACCUM is a SUBREG,
2961              we've set the inner register and must properly indicate that.  */
2962           tem = op0, nmode = mode;
2963           accum_inner = accum;
2964           if (GET_CODE (accum) == SUBREG)
2965             {
2966               accum_inner = SUBREG_REG (accum);
2967               nmode = GET_MODE (accum_inner);
2968               tem = gen_lowpart (nmode, op0);
2969             }
2970
2971           insn = get_last_insn ();
2972           set_dst_reg_note (insn, REG_EQUAL,
2973                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2974                             accum_inner);
2975         }
2976     }
2977
2978   if (variant == negate_variant)
2979     {
2980       val_so_far = -val_so_far;
2981       accum = expand_unop (mode, neg_optab, accum, target, 0);
2982     }
2983   else if (variant == add_variant)
2984     {
2985       val_so_far = val_so_far + 1;
2986       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2987     }
2988
2989   /* Compare only the bits of val and val_so_far that are significant
2990      in the result mode, to avoid sign-/zero-extension confusion.  */
2991   nmode = GET_MODE_INNER (mode);
2992   if (nmode == VOIDmode)
2993     nmode = mode;
2994   val &= GET_MODE_MASK (nmode);
2995   val_so_far &= GET_MODE_MASK (nmode);
2996   gcc_assert (val == val_so_far);
2997
2998   return accum;
2999 }
3000
3001 /* Perform a multiplication and return an rtx for the result.
3002    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3003    TARGET is a suggestion for where to store the result (an rtx).
3004
3005    We check specially for a constant integer as OP1.
3006    If you want this check for OP0 as well, then before calling
3007    you should swap the two operands if OP0 would be constant.  */
3008
3009 rtx
3010 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3011              int unsignedp)
3012 {
3013   enum mult_variant variant;
3014   struct algorithm algorithm;
3015   rtx scalar_op1;
3016   int max_cost;
3017   bool speed = optimize_insn_for_speed_p ();
3018   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3019
3020   if (CONSTANT_P (op0))
3021     {
3022       rtx temp = op0;
3023       op0 = op1;
3024       op1 = temp;
3025     }
3026
3027   /* For vectors, there are several simplifications that can be made if
3028      all elements of the vector constant are identical.  */
3029   scalar_op1 = op1;
3030   if (GET_CODE (op1) == CONST_VECTOR)
3031     {
3032       int i, n = CONST_VECTOR_NUNITS (op1);
3033       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3034       for (i = 1; i < n; ++i)
3035         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3036           goto skip_scalar;
3037     }
3038
3039   if (INTEGRAL_MODE_P (mode))
3040     {
3041       rtx fake_reg;
3042       HOST_WIDE_INT coeff;
3043       bool is_neg;
3044       int mode_bitsize;
3045
3046       if (op1 == CONST0_RTX (mode))
3047         return op1;
3048       if (op1 == CONST1_RTX (mode))
3049         return op0;
3050       if (op1 == CONSTM1_RTX (mode))
3051         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3052                             op0, target, 0);
3053
3054       if (do_trapv)
3055         goto skip_synth;
3056
3057       /* These are the operations that are potentially turned into
3058          a sequence of shifts and additions.  */
3059       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3060
3061       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3062          less than or equal in size to `unsigned int' this doesn't matter.
3063          If the mode is larger than `unsigned int', then synth_mult works
3064          only if the constant value exactly fits in an `unsigned int' without
3065          any truncation.  This means that multiplying by negative values does
3066          not work; results are off by 2^32 on a 32 bit machine.  */
3067
3068       if (CONST_INT_P (scalar_op1))
3069         {
3070           coeff = INTVAL (scalar_op1);
3071           is_neg = coeff < 0;
3072         }
3073       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3074         {
3075           /* If we are multiplying in DImode, it may still be a win
3076              to try to work with shifts and adds.  */
3077           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3078               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3079             {
3080               coeff = CONST_DOUBLE_LOW (scalar_op1);
3081               is_neg = false;
3082             }
3083           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3084             {
3085               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3086               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3087                 {
3088                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3089                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3090                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3091                     return expand_shift (LSHIFT_EXPR, mode, op0,
3092                                          shift, target, unsignedp);
3093                 }
3094               goto skip_synth;
3095             }
3096           else
3097             goto skip_synth;
3098         }
3099       else
3100         goto skip_synth;
3101
3102       /* We used to test optimize here, on the grounds that it's better to
3103          produce a smaller program when -O is not used.  But this causes
3104          such a terrible slowdown sometimes that it seems better to always
3105          use synth_mult.  */
3106
3107       /* Special case powers of two.  */
3108       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3109         return expand_shift (LSHIFT_EXPR, mode, op0,
3110                              floor_log2 (coeff), target, unsignedp);
3111
3112       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3113
3114       /* Attempt to handle multiplication of DImode values by negative
3115          coefficients, by performing the multiplication by a positive
3116          multiplier and then inverting the result.  */
3117       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3118         {
3119           /* Its safe to use -coeff even for INT_MIN, as the
3120              result is interpreted as an unsigned coefficient.
3121              Exclude cost of op0 from max_cost to match the cost
3122              calculation of the synth_mult.  */
3123           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3124                       - neg_cost(speed, mode));
3125           if (max_cost > 0
3126               && choose_mult_variant (mode, -coeff, &algorithm,
3127                                       &variant, max_cost))
3128             {
3129               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3130                                             &algorithm, variant);
3131               return expand_unop (mode, neg_optab, temp, target, 0);
3132             }
3133           goto skip_synth;
3134         }
3135
3136       /* Exclude cost of op0 from max_cost to match the cost
3137          calculation of the synth_mult.  */
3138       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3139       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3140         return expand_mult_const (mode, op0, coeff, target,
3141                                   &algorithm, variant);
3142     }
3143  skip_synth:
3144
3145   /* Expand x*2.0 as x+x.  */
3146   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3147     {
3148       REAL_VALUE_TYPE d;
3149       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3150
3151       if (REAL_VALUES_EQUAL (d, dconst2))
3152         {
3153           op0 = force_reg (GET_MODE (op0), op0);
3154           return expand_binop (mode, add_optab, op0, op0,
3155                                target, unsignedp, OPTAB_LIB_WIDEN);
3156         }
3157     }
3158  skip_scalar:
3159
3160   /* This used to use umul_optab if unsigned, but for non-widening multiply
3161      there is no difference between signed and unsigned.  */
3162   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3163                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3164   gcc_assert (op0);
3165   return op0;
3166 }
3167
3168 /* Return a cost estimate for multiplying a register by the given
3169    COEFFicient in the given MODE and SPEED.  */
3170
3171 int
3172 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3173 {
3174   int max_cost;
3175   struct algorithm algorithm;
3176   enum mult_variant variant;
3177
3178   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3179   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3180   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3181     return algorithm.cost.cost;
3182   else
3183     return max_cost;
3184 }
3185
3186 /* Perform a widening multiplication and return an rtx for the result.
3187    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3188    TARGET is a suggestion for where to store the result (an rtx).
3189    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3190    or smul_widen_optab.
3191
3192    We check specially for a constant integer as OP1, comparing the
3193    cost of a widening multiply against the cost of a sequence of shifts
3194    and adds.  */
3195
3196 rtx
3197 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3198                       int unsignedp, optab this_optab)
3199 {
3200   bool speed = optimize_insn_for_speed_p ();
3201   rtx cop1;
3202
3203   if (CONST_INT_P (op1)
3204       && GET_MODE (op0) != VOIDmode
3205       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3206                                 this_optab == umul_widen_optab))
3207       && CONST_INT_P (cop1)
3208       && (INTVAL (cop1) >= 0
3209           || HWI_COMPUTABLE_MODE_P (mode)))
3210     {
3211       HOST_WIDE_INT coeff = INTVAL (cop1);
3212       int max_cost;
3213       enum mult_variant variant;
3214       struct algorithm algorithm;
3215
3216       /* Special case powers of two.  */
3217       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3218         {
3219           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3220           return expand_shift (LSHIFT_EXPR, mode, op0,
3221                                floor_log2 (coeff), target, unsignedp);
3222         }
3223
3224       /* Exclude cost of op0 from max_cost to match the cost
3225          calculation of the synth_mult.  */
3226       max_cost = mul_widen_cost (speed, mode);
3227       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3228                                max_cost))
3229         {
3230           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3231           return expand_mult_const (mode, op0, coeff, target,
3232                                     &algorithm, variant);
3233         }
3234     }
3235   return expand_binop (mode, this_optab, op0, op1, target,
3236                        unsignedp, OPTAB_LIB_WIDEN);
3237 }
3238 \f
3239 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3240    replace division by D, and put the least significant N bits of the result
3241    in *MULTIPLIER_PTR and return the most significant bit.
3242
3243    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3244    needed precision is in PRECISION (should be <= N).
3245
3246    PRECISION should be as small as possible so this function can choose
3247    multiplier more freely.
3248
3249    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3250    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3251
3252    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3253    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3254
3255 unsigned HOST_WIDE_INT
3256 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3257                    unsigned HOST_WIDE_INT *multiplier_ptr,
3258                    int *post_shift_ptr, int *lgup_ptr)
3259 {
3260   double_int mhigh, mlow;
3261   int lgup, post_shift;
3262   int pow, pow2;
3263
3264   /* lgup = ceil(log2(divisor)); */
3265   lgup = ceil_log2 (d);
3266
3267   gcc_assert (lgup <= n);
3268
3269   pow = n + lgup;
3270   pow2 = n + lgup - precision;
3271
3272   /* We could handle this with some effort, but this case is much
3273      better handled directly with a scc insn, so rely on caller using
3274      that.  */
3275   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3276
3277   /* mlow = 2^(N + lgup)/d */
3278   double_int val = double_int_zero.set_bit (pow);
3279   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3280
3281   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3282   val |= double_int_zero.set_bit (pow2);
3283   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3284
3285   gcc_assert (!mhigh.high || val.high - d < d);
3286   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3287   /* Assert that mlow < mhigh.  */
3288   gcc_assert (mlow.ult (mhigh));
3289
3290   /* If precision == N, then mlow, mhigh exceed 2^N
3291      (but they do not exceed 2^(N+1)).  */
3292
3293   /* Reduce to lowest terms.  */
3294   for (post_shift = lgup; post_shift > 0; post_shift--)
3295     {
3296       int shft = HOST_BITS_PER_WIDE_INT - 1;
3297       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3298       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3299       if (ml_lo >= mh_lo)
3300         break;
3301
3302       mlow = double_int::from_uhwi (ml_lo);
3303       mhigh = double_int::from_uhwi (mh_lo);
3304     }
3305
3306   *post_shift_ptr = post_shift;
3307   *lgup_ptr = lgup;
3308   if (n < HOST_BITS_PER_WIDE_INT)
3309     {
3310       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3311       *multiplier_ptr = mhigh.low & mask;
3312       return mhigh.low >= mask;
3313     }
3314   else
3315     {
3316       *multiplier_ptr = mhigh.low;
3317       return mhigh.high;
3318     }
3319 }
3320
3321 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3322    congruent to 1 (mod 2**N).  */
3323
3324 static unsigned HOST_WIDE_INT
3325 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3326 {
3327   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3328
3329   /* The algorithm notes that the choice y = x satisfies
3330      x*y == 1 mod 2^3, since x is assumed odd.
3331      Each iteration doubles the number of bits of significance in y.  */
3332
3333   unsigned HOST_WIDE_INT mask;
3334   unsigned HOST_WIDE_INT y = x;
3335   int nbit = 3;
3336
3337   mask = (n == HOST_BITS_PER_WIDE_INT
3338           ? ~(unsigned HOST_WIDE_INT) 0
3339           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3340
3341   while (nbit < n)
3342     {
3343       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3344       nbit *= 2;
3345     }
3346   return y;
3347 }
3348
3349 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3350    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3351    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3352    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3353    become signed.
3354
3355    The result is put in TARGET if that is convenient.
3356
3357    MODE is the mode of operation.  */
3358
3359 rtx
3360 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3361                              rtx op1, rtx target, int unsignedp)
3362 {
3363   rtx tem;
3364   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3365
3366   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3367                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3368   tem = expand_and (mode, tem, op1, NULL_RTX);
3369   adj_operand
3370     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3371                      adj_operand);
3372
3373   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3374                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3375   tem = expand_and (mode, tem, op0, NULL_RTX);
3376   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3377                           target);
3378
3379   return target;
3380 }
3381
3382 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3383
3384 static rtx
3385 extract_high_half (enum machine_mode mode, rtx op)
3386 {
3387   enum machine_mode wider_mode;
3388
3389   if (mode == word_mode)
3390     return gen_highpart (mode, op);
3391
3392   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3393
3394   wider_mode = GET_MODE_WIDER_MODE (mode);
3395   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3396                      GET_MODE_BITSIZE (mode), 0, 1);
3397   return convert_modes (mode, wider_mode, op, 0);
3398 }
3399
3400 /* Like expmed_mult_highpart, but only consider using a multiplication
3401    optab.  OP1 is an rtx for the constant operand.  */
3402
3403 static rtx
3404 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3405                             rtx target, int unsignedp, int max_cost)
3406 {
3407   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3408   enum machine_mode wider_mode;
3409   optab moptab;
3410   rtx tem;
3411   int size;
3412   bool speed = optimize_insn_for_speed_p ();
3413
3414   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3415
3416   wider_mode = GET_MODE_WIDER_MODE (mode);
3417   size = GET_MODE_BITSIZE (mode);
3418
3419   /* Firstly, try using a multiplication insn that only generates the needed
3420      high part of the product, and in the sign flavor of unsignedp.  */
3421   if (mul_highpart_cost (speed, mode) < max_cost)
3422     {
3423       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3424       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3425                           unsignedp, OPTAB_DIRECT);
3426       if (tem)
3427         return tem;
3428     }
3429
3430   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3431      Need to adjust the result after the multiplication.  */
3432   if (size - 1 < BITS_PER_WORD
3433       && (mul_highpart_cost (speed, mode)
3434           + 2 * shift_cost (speed, mode, size-1)
3435           + 4 * add_cost (speed, mode) < max_cost))
3436     {
3437       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3438       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3439                           unsignedp, OPTAB_DIRECT);
3440       if (tem)
3441         /* We used the wrong signedness.  Adjust the result.  */
3442         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3443                                             tem, unsignedp);
3444     }
3445
3446   /* Try widening multiplication.  */
3447   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3448   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3449       && mul_widen_cost (speed, wider_mode) < max_cost)
3450     {
3451       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3452                           unsignedp, OPTAB_WIDEN);
3453       if (tem)
3454         return extract_high_half (mode, tem);
3455     }
3456
3457   /* Try widening the mode and perform a non-widening multiplication.  */
3458   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3459       && size - 1 < BITS_PER_WORD
3460       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3461           < max_cost))
3462     {
3463       rtx insns, wop0, wop1;
3464
3465       /* We need to widen the operands, for example to ensure the
3466          constant multiplier is correctly sign or zero extended.
3467          Use a sequence to clean-up any instructions emitted by
3468          the conversions if things don't work out.  */
3469       start_sequence ();
3470       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3471       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3472       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3473                           unsignedp, OPTAB_WIDEN);
3474       insns = get_insns ();
3475       end_sequence ();
3476
3477       if (tem)
3478         {
3479           emit_insn (insns);
3480           return extract_high_half (mode, tem);
3481         }
3482     }
3483
3484   /* Try widening multiplication of opposite signedness, and adjust.  */
3485   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3486   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3487       && size - 1 < BITS_PER_WORD
3488       && (mul_widen_cost (speed, wider_mode)
3489           + 2 * shift_cost (speed, mode, size-1)
3490           + 4 * add_cost (speed, mode) < max_cost))
3491     {
3492       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3493                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3494       if (tem != 0)
3495         {
3496           tem = extract_high_half (mode, tem);
3497           /* We used the wrong signedness.  Adjust the result.  */
3498           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3499                                               target, unsignedp);
3500         }
3501     }
3502
3503   return 0;
3504 }
3505
3506 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3507    putting the high half of the result in TARGET if that is convenient,
3508    and return where the result is.  If the operation can not be performed,
3509    0 is returned.
3510
3511    MODE is the mode of operation and result.
3512
3513    UNSIGNEDP nonzero means unsigned multiply.
3514
3515    MAX_COST is the total allowed cost for the expanded RTL.  */
3516
3517 static rtx
3518 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3519                       rtx target, int unsignedp, int max_cost)
3520 {
3521   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3522   unsigned HOST_WIDE_INT cnst1;
3523   int extra_cost;
3524   bool sign_adjust = false;
3525   enum mult_variant variant;
3526   struct algorithm alg;
3527   rtx tem;
3528   bool speed = optimize_insn_for_speed_p ();
3529
3530   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3531   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3532   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3533
3534   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3535
3536   /* We can't optimize modes wider than BITS_PER_WORD.
3537      ??? We might be able to perform double-word arithmetic if
3538      mode == word_mode, however all the cost calculations in
3539      synth_mult etc. assume single-word operations.  */
3540   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3541     return expmed_mult_highpart_optab (mode, op0, op1, target,
3542                                        unsignedp, max_cost);
3543
3544   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3545
3546   /* Check whether we try to multiply by a negative constant.  */
3547   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3548     {
3549       sign_adjust = true;
3550       extra_cost += add_cost (speed, mode);
3551     }
3552
3553   /* See whether shift/add multiplication is cheap enough.  */
3554   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3555                            max_cost - extra_cost))
3556     {
3557       /* See whether the specialized multiplication optabs are
3558          cheaper than the shift/add version.  */
3559       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3560                                         alg.cost.cost + extra_cost);
3561       if (tem)
3562         return tem;
3563
3564       tem = convert_to_mode (wider_mode, op0, unsignedp);
3565       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3566       tem = extract_high_half (mode, tem);
3567
3568       /* Adjust result for signedness.  */
3569       if (sign_adjust)
3570         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3571
3572       return tem;
3573     }
3574   return expmed_mult_highpart_optab (mode, op0, op1, target,
3575                                      unsignedp, max_cost);
3576 }
3577
3578
3579 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3580
3581 static rtx
3582 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3583 {
3584   unsigned HOST_WIDE_INT masklow, maskhigh;
3585   rtx result, temp, shift, label;
3586   int logd;
3587
3588   logd = floor_log2 (d);
3589   result = gen_reg_rtx (mode);
3590
3591   /* Avoid conditional branches when they're expensive.  */
3592   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3593       && optimize_insn_for_speed_p ())
3594     {
3595       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3596                                       mode, 0, -1);
3597       if (signmask)
3598         {
3599           signmask = force_reg (mode, signmask);
3600           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3601           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3602
3603           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3604              which instruction sequence to use.  If logical right shifts
3605              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3606              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3607
3608           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3609           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3610               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3611                   > COSTS_N_INSNS (2)))
3612             {
3613               temp = expand_binop (mode, xor_optab, op0, signmask,
3614                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3615               temp = expand_binop (mode, sub_optab, temp, signmask,
3616                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3617               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3618                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3619               temp = expand_binop (mode, xor_optab, temp, signmask,
3620                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3621               temp = expand_binop (mode, sub_optab, temp, signmask,
3622                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3623             }
3624           else
3625             {
3626               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3627                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3628               signmask = force_reg (mode, signmask);
3629
3630               temp = expand_binop (mode, add_optab, op0, signmask,
3631                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3632               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3633                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634               temp = expand_binop (mode, sub_optab, temp, signmask,
3635                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3636             }
3637           return temp;
3638         }
3639     }
3640
3641   /* Mask contains the mode's signbit and the significant bits of the
3642      modulus.  By including the signbit in the operation, many targets
3643      can avoid an explicit compare operation in the following comparison
3644      against zero.  */
3645
3646   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3647   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3648     {
3649       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3650       maskhigh = -1;
3651     }
3652   else
3653     maskhigh = (HOST_WIDE_INT) -1
3654                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3655
3656   temp = expand_binop (mode, and_optab, op0,
3657                        immed_double_const (masklow, maskhigh, mode),
3658                        result, 1, OPTAB_LIB_WIDEN);
3659   if (temp != result)
3660     emit_move_insn (result, temp);
3661
3662   label = gen_label_rtx ();
3663   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3664
3665   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3666                        0, OPTAB_LIB_WIDEN);
3667   masklow = (HOST_WIDE_INT) -1 << logd;
3668   maskhigh = -1;
3669   temp = expand_binop (mode, ior_optab, temp,
3670                        immed_double_const (masklow, maskhigh, mode),
3671                        result, 1, OPTAB_LIB_WIDEN);
3672   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3673                        0, OPTAB_LIB_WIDEN);
3674   if (temp != result)
3675     emit_move_insn (result, temp);
3676   emit_label (label);
3677   return result;
3678 }
3679
3680 /* Expand signed division of OP0 by a power of two D in mode MODE.
3681    This routine is only called for positive values of D.  */
3682
3683 static rtx
3684 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3685 {
3686   rtx temp, label;
3687   int logd;
3688
3689   logd = floor_log2 (d);
3690
3691   if (d == 2
3692       && BRANCH_COST (optimize_insn_for_speed_p (),
3693                       false) >= 1)
3694     {
3695       temp = gen_reg_rtx (mode);
3696       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3697       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3698                            0, OPTAB_LIB_WIDEN);
3699       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3700     }
3701
3702 #ifdef HAVE_conditional_move
3703   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3704       >= 2)
3705     {
3706       rtx temp2;
3707
3708       /* ??? emit_conditional_move forces a stack adjustment via
3709          compare_from_rtx so, if the sequence is discarded, it will
3710          be lost.  Do it now instead.  */
3711       do_pending_stack_adjust ();
3712
3713       start_sequence ();
3714       temp2 = copy_to_mode_reg (mode, op0);
3715       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3716                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3717       temp = force_reg (mode, temp);
3718
3719       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3720       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3721                                      mode, temp, temp2, mode, 0);
3722       if (temp2)
3723         {
3724           rtx seq = get_insns ();
3725           end_sequence ();
3726           emit_insn (seq);
3727           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3728         }
3729       end_sequence ();
3730     }
3731 #endif
3732
3733   if (BRANCH_COST (optimize_insn_for_speed_p (),
3734                    false) >= 2)
3735     {
3736       int ushift = GET_MODE_BITSIZE (mode) - logd;
3737
3738       temp = gen_reg_rtx (mode);
3739       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3740       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3741           > COSTS_N_INSNS (1))
3742         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3743                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3744       else
3745         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3746                              ushift, NULL_RTX, 1);
3747       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3748                            0, OPTAB_LIB_WIDEN);
3749       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3750     }
3751
3752   label = gen_label_rtx ();
3753   temp = copy_to_mode_reg (mode, op0);
3754   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3755   expand_inc (temp, GEN_INT (d - 1));
3756   emit_label (label);
3757   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3758 }
3759 \f
3760 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3761    if that is convenient, and returning where the result is.
3762    You may request either the quotient or the remainder as the result;
3763    specify REM_FLAG nonzero to get the remainder.
3764
3765    CODE is the expression code for which kind of division this is;
3766    it controls how rounding is done.  MODE is the machine mode to use.
3767    UNSIGNEDP nonzero means do unsigned division.  */
3768
3769 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3770    and then correct it by or'ing in missing high bits
3771    if result of ANDI is nonzero.
3772    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3773    This could optimize to a bfexts instruction.
3774    But C doesn't use these operations, so their optimizations are
3775    left for later.  */
3776 /* ??? For modulo, we don't actually need the highpart of the first product,
3777    the low part will do nicely.  And for small divisors, the second multiply
3778    can also be a low-part only multiply or even be completely left out.
3779    E.g. to calculate the remainder of a division by 3 with a 32 bit
3780    multiply, multiply with 0x55555556 and extract the upper two bits;
3781    the result is exact for inputs up to 0x1fffffff.
3782    The input range can be reduced by using cross-sum rules.
3783    For odd divisors >= 3, the following table gives right shift counts
3784    so that if a number is shifted by an integer multiple of the given
3785    amount, the remainder stays the same:
3786    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3787    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3788    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3789    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3790    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3791
3792    Cross-sum rules for even numbers can be derived by leaving as many bits
3793    to the right alone as the divisor has zeros to the right.
3794    E.g. if x is an unsigned 32 bit number:
3795    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3796    */
3797
3798 rtx
3799 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3800                rtx op0, rtx op1, rtx target, int unsignedp)
3801 {
3802   enum machine_mode compute_mode;
3803   rtx tquotient;
3804   rtx quotient = 0, remainder = 0;
3805   rtx last;
3806   int size;
3807   rtx insn;
3808   optab optab1, optab2;
3809   int op1_is_constant, op1_is_pow2 = 0;
3810   int max_cost, extra_cost;
3811   static HOST_WIDE_INT last_div_const = 0;
3812   static HOST_WIDE_INT ext_op1;
3813   bool speed = optimize_insn_for_speed_p ();
3814
3815   op1_is_constant = CONST_INT_P (op1);
3816   if (op1_is_constant)
3817     {
3818       ext_op1 = INTVAL (op1);
3819       if (unsignedp)
3820         ext_op1 &= GET_MODE_MASK (mode);
3821       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3822                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3823     }
3824
3825   /*
3826      This is the structure of expand_divmod:
3827
3828      First comes code to fix up the operands so we can perform the operations
3829      correctly and efficiently.
3830
3831      Second comes a switch statement with code specific for each rounding mode.
3832      For some special operands this code emits all RTL for the desired
3833      operation, for other cases, it generates only a quotient and stores it in
3834      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3835      to indicate that it has not done anything.
3836
3837      Last comes code that finishes the operation.  If QUOTIENT is set and
3838      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3839      QUOTIENT is not set, it is computed using trunc rounding.
3840
3841      We try to generate special code for division and remainder when OP1 is a
3842      constant.  If |OP1| = 2**n we can use shifts and some other fast
3843      operations.  For other values of OP1, we compute a carefully selected
3844      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3845      by m.
3846
3847      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3848      half of the product.  Different strategies for generating the product are
3849      implemented in expmed_mult_highpart.
3850
3851      If what we actually want is the remainder, we generate that by another
3852      by-constant multiplication and a subtraction.  */
3853
3854   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3855      code below will malfunction if we are, so check here and handle
3856      the special case if so.  */
3857   if (op1 == const1_rtx)
3858     return rem_flag ? const0_rtx : op0;
3859
3860     /* When dividing by -1, we could get an overflow.
3861      negv_optab can handle overflows.  */
3862   if (! unsignedp && op1 == constm1_rtx)
3863     {
3864       if (rem_flag)
3865         return const0_rtx;
3866       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3867                           ? negv_optab : neg_optab, op0, target, 0);
3868     }
3869
3870   if (target
3871       /* Don't use the function value register as a target
3872          since we have to read it as well as write it,
3873          and function-inlining gets confused by this.  */
3874       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3875           /* Don't clobber an operand while doing a multi-step calculation.  */
3876           || ((rem_flag || op1_is_constant)
3877               && (reg_mentioned_p (target, op0)
3878                   || (MEM_P (op0) && MEM_P (target))))
3879           || reg_mentioned_p (target, op1)
3880           || (MEM_P (op1) && MEM_P (target))))
3881     target = 0;
3882
3883   /* Get the mode in which to perform this computation.  Normally it will
3884      be MODE, but sometimes we can't do the desired operation in MODE.
3885      If so, pick a wider mode in which we can do the operation.  Convert
3886      to that mode at the start to avoid repeated conversions.
3887
3888      First see what operations we need.  These depend on the expression
3889      we are evaluating.  (We assume that divxx3 insns exist under the
3890      same conditions that modxx3 insns and that these insns don't normally
3891      fail.  If these assumptions are not correct, we may generate less
3892      efficient code in some cases.)
3893
3894      Then see if we find a mode in which we can open-code that operation
3895      (either a division, modulus, or shift).  Finally, check for the smallest
3896      mode for which we can do the operation with a library call.  */
3897
3898   /* We might want to refine this now that we have division-by-constant
3899      optimization.  Since expmed_mult_highpart tries so many variants, it is
3900      not straightforward to generalize this.  Maybe we should make an array
3901      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3902
3903   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3904             ? (unsignedp ? lshr_optab : ashr_optab)
3905             : (unsignedp ? udiv_optab : sdiv_optab));
3906   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3907             ? optab1
3908             : (unsignedp ? udivmod_optab : sdivmod_optab));
3909
3910   for (compute_mode = mode; compute_mode != VOIDmode;
3911        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3912     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3913         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3914       break;
3915
3916   if (compute_mode == VOIDmode)
3917     for (compute_mode = mode; compute_mode != VOIDmode;
3918          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3919       if (optab_libfunc (optab1, compute_mode)
3920           || optab_libfunc (optab2, compute_mode))
3921         break;
3922
3923   /* If we still couldn't find a mode, use MODE, but expand_binop will
3924      probably die.  */
3925   if (compute_mode == VOIDmode)
3926     compute_mode = mode;
3927
3928   if (target && GET_MODE (target) == compute_mode)
3929     tquotient = target;
3930   else
3931     tquotient = gen_reg_rtx (compute_mode);
3932
3933   size = GET_MODE_BITSIZE (compute_mode);
3934 #if 0
3935   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3936      (mode), and thereby get better code when OP1 is a constant.  Do that
3937      later.  It will require going over all usages of SIZE below.  */
3938   size = GET_MODE_BITSIZE (mode);
3939 #endif
3940
3941   /* Only deduct something for a REM if the last divide done was
3942      for a different constant.   Then set the constant of the last
3943      divide.  */
3944   max_cost = (unsignedp
3945               ? udiv_cost (speed, compute_mode)
3946               : sdiv_cost (speed, compute_mode));
3947   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3948                      && INTVAL (op1) == last_div_const))
3949     max_cost -= (mul_cost (speed, compute_mode)
3950                  + add_cost (speed, compute_mode));
3951
3952   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3953
3954   /* Now convert to the best mode to use.  */
3955   if (compute_mode != mode)
3956     {
3957       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3958       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3959
3960       /* convert_modes may have placed op1 into a register, so we
3961          must recompute the following.  */
3962       op1_is_constant = CONST_INT_P (op1);
3963       op1_is_pow2 = (op1_is_constant
3964                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3965                           || (! unsignedp
3966                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3967     }
3968
3969   /* If one of the operands is a volatile MEM, copy it into a register.  */
3970
3971   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3972     op0 = force_reg (compute_mode, op0);
3973   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3974     op1 = force_reg (compute_mode, op1);
3975
3976   /* If we need the remainder or if OP1 is constant, we need to
3977      put OP0 in a register in case it has any queued subexpressions.  */
3978   if (rem_flag || op1_is_constant)
3979     op0 = force_reg (compute_mode, op0);
3980
3981   last = get_last_insn ();
3982
3983   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3984   if (unsignedp)
3985     {
3986       if (code == FLOOR_DIV_EXPR)
3987         code = TRUNC_DIV_EXPR;
3988       if (code == FLOOR_MOD_EXPR)
3989         code = TRUNC_MOD_EXPR;
3990       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3991         code = TRUNC_DIV_EXPR;
3992     }
3993
3994   if (op1 != const0_rtx)
3995     switch (code)
3996       {
3997       case TRUNC_MOD_EXPR:
3998       case TRUNC_DIV_EXPR:
3999         if (op1_is_constant)
4000           {
4001             if (unsignedp)
4002               {
4003                 unsigned HOST_WIDE_INT mh, ml;
4004                 int pre_shift, post_shift;
4005                 int dummy;
4006                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4007                                             & GET_MODE_MASK (compute_mode));
4008
4009                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4010                   {
4011                     pre_shift = floor_log2 (d);
4012                     if (rem_flag)
4013                       {
4014                         remainder
4015                           = expand_binop (compute_mode, and_optab, op0,
4016                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4017                                           remainder, 1,
4018                                           OPTAB_LIB_WIDEN);
4019                         if (remainder)
4020                           return gen_lowpart (mode, remainder);
4021                       }
4022                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4023                                              pre_shift, tquotient, 1);
4024                   }
4025                 else if (size <= HOST_BITS_PER_WIDE_INT)
4026                   {
4027                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4028                       {
4029                         /* Most significant bit of divisor is set; emit an scc
4030                            insn.  */
4031                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4032                                                           compute_mode, 1, 1);
4033                       }
4034                     else
4035                       {
4036                         /* Find a suitable multiplier and right shift count
4037                            instead of multiplying with D.  */
4038
4039                         mh = choose_multiplier (d, size, size,
4040                                                 &ml, &post_shift, &dummy);
4041
4042                         /* If the suggested multiplier is more than SIZE bits,
4043                            we can do better for even divisors, using an
4044                            initial right shift.  */
4045                         if (mh != 0 && (d & 1) == 0)
4046                           {
4047                             pre_shift = floor_log2 (d & -d);
4048                             mh = choose_multiplier (d >> pre_shift, size,
4049                                                     size - pre_shift,
4050                                                     &ml, &post_shift, &dummy);
4051                             gcc_assert (!mh);
4052                           }
4053                         else
4054                           pre_shift = 0;
4055
4056                         if (mh != 0)
4057                           {
4058                             rtx t1, t2, t3, t4;
4059
4060                             if (post_shift - 1 >= BITS_PER_WORD)
4061                               goto fail1;
4062
4063                             extra_cost
4064                               = (shift_cost (speed, compute_mode, post_shift - 1)
4065                                  + shift_cost (speed, compute_mode, 1)
4066                                  + 2 * add_cost (speed, compute_mode));
4067                             t1 = expmed_mult_highpart (compute_mode, op0,
4068                                                        GEN_INT (ml),
4069                                                        NULL_RTX, 1,
4070                                                        max_cost - extra_cost);
4071                             if (t1 == 0)
4072                               goto fail1;
4073                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4074                                                                op0, t1),
4075                                                 NULL_RTX);
4076                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4077                                                t2, 1, NULL_RTX, 1);
4078                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4079                                                               t1, t3),
4080                                                 NULL_RTX);
4081                             quotient = expand_shift
4082                               (RSHIFT_EXPR, compute_mode, t4,
4083                                post_shift - 1, tquotient, 1);
4084                           }
4085                         else
4086                           {
4087                             rtx t1, t2;
4088
4089                             if (pre_shift >= BITS_PER_WORD
4090                                 || post_shift >= BITS_PER_WORD)
4091                               goto fail1;
4092
4093                             t1 = expand_shift
4094                               (RSHIFT_EXPR, compute_mode, op0,
4095                                pre_shift, NULL_RTX, 1);
4096                             extra_cost
4097                               = (shift_cost (speed, compute_mode, pre_shift)
4098                                  + shift_cost (speed, compute_mode, post_shift));
4099                             t2 = expmed_mult_highpart (compute_mode, t1,
4100                                                        GEN_INT (ml),
4101                                                        NULL_RTX, 1,
4102                                                        max_cost - extra_cost);
4103                             if (t2 == 0)
4104                               goto fail1;
4105                             quotient = expand_shift
4106                               (RSHIFT_EXPR, compute_mode, t2,
4107                                post_shift, tquotient, 1);
4108                           }
4109                       }
4110                   }
4111                 else            /* Too wide mode to use tricky code */
4112                   break;
4113
4114                 insn = get_last_insn ();
4115                 if (insn != last)
4116                   set_dst_reg_note (insn, REG_EQUAL,
4117                                     gen_rtx_UDIV (compute_mode, op0, op1),
4118                                     quotient);
4119               }
4120             else                /* TRUNC_DIV, signed */
4121               {
4122                 unsigned HOST_WIDE_INT ml;
4123                 int lgup, post_shift;
4124                 rtx mlr;
4125                 HOST_WIDE_INT d = INTVAL (op1);
4126                 unsigned HOST_WIDE_INT abs_d;
4127
4128                 /* Since d might be INT_MIN, we have to cast to
4129                    unsigned HOST_WIDE_INT before negating to avoid
4130                    undefined signed overflow.  */
4131                 abs_d = (d >= 0
4132                          ? (unsigned HOST_WIDE_INT) d
4133                          : - (unsigned HOST_WIDE_INT) d);
4134
4135                 /* n rem d = n rem -d */
4136                 if (rem_flag && d < 0)
4137                   {
4138                     d = abs_d;
4139                     op1 = gen_int_mode (abs_d, compute_mode);
4140                   }
4141
4142                 if (d == 1)
4143                   quotient = op0;
4144                 else if (d == -1)
4145                   quotient = expand_unop (compute_mode, neg_optab, op0,
4146                                           tquotient, 0);
4147                 else if (HOST_BITS_PER_WIDE_INT >= size
4148                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4149                   {
4150                     /* This case is not handled correctly below.  */
4151                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4152                                                 compute_mode, 1, 1);
4153                     if (quotient == 0)
4154                       goto fail1;
4155                   }
4156                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4157                          && (rem_flag
4158                              ? smod_pow2_cheap (speed, compute_mode)
4159                              : sdiv_pow2_cheap (speed, compute_mode))
4160                          /* We assume that cheap metric is true if the
4161                             optab has an expander for this mode.  */
4162                          && ((optab_handler ((rem_flag ? smod_optab
4163                                               : sdiv_optab),
4164                                              compute_mode)
4165                               != CODE_FOR_nothing)
4166                              || (optab_handler (sdivmod_optab,
4167                                                 compute_mode)
4168                                  != CODE_FOR_nothing)))
4169                   ;
4170                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4171                   {
4172                     if (rem_flag)
4173                       {
4174                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4175                         if (remainder)
4176                           return gen_lowpart (mode, remainder);
4177                       }
4178
4179                     if (sdiv_pow2_cheap (speed, compute_mode)
4180                         && ((optab_handler (sdiv_optab, compute_mode)
4181                              != CODE_FOR_nothing)
4182                             || (optab_handler (sdivmod_optab, compute_mode)
4183                                 != CODE_FOR_nothing)))
4184                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4185                                                 compute_mode, op0,
4186                                                 gen_int_mode (abs_d,
4187                                                               compute_mode),
4188                                                 NULL_RTX, 0);
4189                     else
4190                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4191
4192                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4193                        negate the quotient.  */
4194                     if (d < 0)
4195                       {
4196                         insn = get_last_insn ();
4197                         if (insn != last
4198                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4199                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4200                           set_dst_reg_note (insn, REG_EQUAL,
4201                                             gen_rtx_DIV (compute_mode, op0,
4202                                                          gen_int_mode
4203                                                            (abs_d,
4204                                                             compute_mode)),
4205                                             quotient);
4206
4207                         quotient = expand_unop (compute_mode, neg_optab,
4208                                                 quotient, quotient, 0);
4209                       }
4210                   }
4211                 else if (size <= HOST_BITS_PER_WIDE_INT)
4212                   {
4213                     choose_multiplier (abs_d, size, size - 1,
4214                                        &ml, &post_shift, &lgup);
4215                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4216                       {
4217                         rtx t1, t2, t3;
4218
4219                         if (post_shift >= BITS_PER_WORD
4220                             || size - 1 >= BITS_PER_WORD)
4221                           goto fail1;
4222
4223                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4224                                       + shift_cost (speed, compute_mode, size - 1)
4225                                       + add_cost (speed, compute_mode));
4226                         t1 = expmed_mult_highpart (compute_mode, op0,
4227                                                    GEN_INT (ml), NULL_RTX, 0,
4228                                                    max_cost - extra_cost);
4229                         if (t1 == 0)
4230                           goto fail1;
4231                         t2 = expand_shift
4232                           (RSHIFT_EXPR, compute_mode, t1,
4233                            post_shift, NULL_RTX, 0);
4234                         t3 = expand_shift
4235                           (RSHIFT_EXPR, compute_mode, op0,
4236                            size - 1, NULL_RTX, 0);
4237                         if (d < 0)
4238                           quotient
4239                             = force_operand (gen_rtx_MINUS (compute_mode,
4240                                                             t3, t2),
4241                                              tquotient);
4242                         else
4243                           quotient
4244                             = force_operand (gen_rtx_MINUS (compute_mode,
4245                                                             t2, t3),
4246                                              tquotient);
4247                       }
4248                     else
4249                       {
4250                         rtx t1, t2, t3, t4;
4251
4252                         if (post_shift >= BITS_PER_WORD
4253                             || size - 1 >= BITS_PER_WORD)
4254                           goto fail1;
4255
4256                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4257                         mlr = gen_int_mode (ml, compute_mode);
4258                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4259                                       + shift_cost (speed, compute_mode, size - 1)
4260                                       + 2 * add_cost (speed, compute_mode));
4261                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4262                                                    NULL_RTX, 0,
4263                                                    max_cost - extra_cost);
4264                         if (t1 == 0)
4265                           goto fail1;
4266                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4267                                                           t1, op0),
4268                                             NULL_RTX);
4269                         t3 = expand_shift
4270                           (RSHIFT_EXPR, compute_mode, t2,
4271                            post_shift, NULL_RTX, 0);
4272                         t4 = expand_shift
4273                           (RSHIFT_EXPR, compute_mode, op0,
4274                            size - 1, NULL_RTX, 0);
4275                         if (d < 0)
4276                           quotient
4277                             = force_operand (gen_rtx_MINUS (compute_mode,
4278                                                             t4, t3),
4279                                              tquotient);
4280                         else
4281                           quotient
4282                             = force_operand (gen_rtx_MINUS (compute_mode,
4283                                                             t3, t4),
4284                                              tquotient);
4285                       }
4286                   }
4287                 else            /* Too wide mode to use tricky code */
4288                   break;
4289
4290                 insn = get_last_insn ();
4291                 if (insn != last)
4292                   set_dst_reg_note (insn, REG_EQUAL,
4293                                     gen_rtx_DIV (compute_mode, op0, op1),
4294                                     quotient);
4295               }
4296             break;
4297           }
4298       fail1:
4299         delete_insns_since (last);
4300         break;
4301
4302       case FLOOR_DIV_EXPR:
4303       case FLOOR_MOD_EXPR:
4304       /* We will come here only for signed operations.  */
4305         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4306           {
4307             unsigned HOST_WIDE_INT mh, ml;
4308             int pre_shift, lgup, post_shift;
4309             HOST_WIDE_INT d = INTVAL (op1);
4310
4311             if (d > 0)
4312               {
4313                 /* We could just as easily deal with negative constants here,
4314                    but it does not seem worth the trouble for GCC 2.6.  */
4315                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4316                   {
4317                     pre_shift = floor_log2 (d);
4318                     if (rem_flag)
4319                       {
4320                         remainder = expand_binop (compute_mode, and_optab, op0,
4321                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4322                                                   remainder, 0, OPTAB_LIB_WIDEN);
4323                         if (remainder)
4324                           return gen_lowpart (mode, remainder);
4325                       }
4326                     quotient = expand_shift
4327                       (RSHIFT_EXPR, compute_mode, op0,
4328                        pre_shift, tquotient, 0);
4329                   }
4330                 else
4331                   {
4332                     rtx t1, t2, t3, t4;
4333
4334                     mh = choose_multiplier (d, size, size - 1,
4335                                             &ml, &post_shift, &lgup);
4336                     gcc_assert (!mh);
4337
4338                     if (post_shift < BITS_PER_WORD
4339                         && size - 1 < BITS_PER_WORD)
4340                       {
4341                         t1 = expand_shift
4342                           (RSHIFT_EXPR, compute_mode, op0,
4343                            size - 1, NULL_RTX, 0);
4344                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4345                                            NULL_RTX, 0, OPTAB_WIDEN);
4346                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4347                                       + shift_cost (speed, compute_mode, size - 1)
4348                                       + 2 * add_cost (speed, compute_mode));
4349                         t3 = expmed_mult_highpart (compute_mode, t2,
4350                                                    GEN_INT (ml), NULL_RTX, 1,
4351                                                    max_cost - extra_cost);
4352                         if (t3 != 0)
4353                           {
4354                             t4 = expand_shift
4355                               (RSHIFT_EXPR, compute_mode, t3,
4356                                post_shift, NULL_RTX, 1);
4357                             quotient = expand_binop (compute_mode, xor_optab,
4358                                                      t4, t1, tquotient, 0,
4359                                                      OPTAB_WIDEN);
4360                           }
4361                       }
4362                   }
4363               }
4364             else
4365               {
4366                 rtx nsign, t1, t2, t3, t4;
4367                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4368                                                   op0, constm1_rtx), NULL_RTX);
4369                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4370                                    0, OPTAB_WIDEN);
4371                 nsign = expand_shift
4372                   (RSHIFT_EXPR, compute_mode, t2,
4373                    size - 1, NULL_RTX, 0);
4374                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4375                                     NULL_RTX);
4376                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4377                                     NULL_RTX, 0);
4378                 if (t4)
4379                   {
4380                     rtx t5;
4381                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4382                                       NULL_RTX, 0);
4383                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4384                                                             t4, t5),
4385                                               tquotient);
4386                   }
4387               }
4388           }
4389
4390         if (quotient != 0)
4391           break;
4392         delete_insns_since (last);
4393
4394         /* Try using an instruction that produces both the quotient and
4395            remainder, using truncation.  We can easily compensate the quotient
4396            or remainder to get floor rounding, once we have the remainder.
4397            Notice that we compute also the final remainder value here,
4398            and return the result right away.  */
4399         if (target == 0 || GET_MODE (target) != compute_mode)
4400           target = gen_reg_rtx (compute_mode);
4401
4402         if (rem_flag)
4403           {
4404             remainder
4405               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4406             quotient = gen_reg_rtx (compute_mode);
4407           }
4408         else
4409           {
4410             quotient
4411               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4412             remainder = gen_reg_rtx (compute_mode);
4413           }
4414
4415         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4416                                  quotient, remainder, 0))
4417           {
4418             /* This could be computed with a branch-less sequence.
4419                Save that for later.  */
4420             rtx tem;
4421             rtx label = gen_label_rtx ();
4422             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4423             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4424                                 NULL_RTX, 0, OPTAB_WIDEN);
4425             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4426             expand_dec (quotient, const1_rtx);
4427             expand_inc (remainder, op1);
4428             emit_label (label);
4429             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4430           }
4431
4432         /* No luck with division elimination or divmod.  Have to do it
4433            by conditionally adjusting op0 *and* the result.  */
4434         {
4435           rtx label1, label2, label3, label4, label5;
4436           rtx adjusted_op0;
4437           rtx tem;
4438
4439           quotient = gen_reg_rtx (compute_mode);
4440           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4441           label1 = gen_label_rtx ();
4442           label2 = gen_label_rtx ();
4443           label3 = gen_label_rtx ();
4444           label4 = gen_label_rtx ();
4445           label5 = gen_label_rtx ();
4446           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4447           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4448           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4449                               quotient, 0, OPTAB_LIB_WIDEN);
4450           if (tem != quotient)
4451             emit_move_insn (quotient, tem);
4452           emit_jump_insn (gen_jump (label5));
4453           emit_barrier ();
4454           emit_label (label1);
4455           expand_inc (adjusted_op0, const1_rtx);
4456           emit_jump_insn (gen_jump (label4));
4457           emit_barrier ();
4458           emit_label (label2);
4459           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4460           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4461                               quotient, 0, OPTAB_LIB_WIDEN);
4462           if (tem != quotient)
4463             emit_move_insn (quotient, tem);
4464           emit_jump_insn (gen_jump (label5));
4465           emit_barrier ();
4466           emit_label (label3);
4467           expand_dec (adjusted_op0, const1_rtx);
4468           emit_label (label4);
4469           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4470                               quotient, 0, OPTAB_LIB_WIDEN);
4471           if (tem != quotient)
4472             emit_move_insn (quotient, tem);
4473           expand_dec (quotient, const1_rtx);
4474           emit_label (label5);
4475         }
4476         break;
4477
4478       case CEIL_DIV_EXPR:
4479       case CEIL_MOD_EXPR:
4480         if (unsignedp)
4481           {
4482             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4483               {
4484                 rtx t1, t2, t3;
4485                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4486                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4487                                    floor_log2 (d), tquotient, 1);
4488                 t2 = expand_binop (compute_mode, and_optab, op0,
4489                                    GEN_INT (d - 1),
4490                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4491                 t3 = gen_reg_rtx (compute_mode);
4492                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4493                                       compute_mode, 1, 1);
4494                 if (t3 == 0)
4495                   {
4496                     rtx lab;
4497                     lab = gen_label_rtx ();
4498                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4499                     expand_inc (t1, const1_rtx);
4500                     emit_label (lab);
4501                     quotient = t1;
4502                   }
4503                 else
4504                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4505                                                           t1, t3),
4506                                             tquotient);
4507                 break;
4508               }
4509
4510             /* Try using an instruction that produces both the quotient and
4511                remainder, using truncation.  We can easily compensate the
4512                quotient or remainder to get ceiling rounding, once we have the
4513                remainder.  Notice that we compute also the final remainder
4514                value here, and return the result right away.  */
4515             if (target == 0 || GET_MODE (target) != compute_mode)
4516               target = gen_reg_rtx (compute_mode);
4517
4518             if (rem_flag)
4519               {
4520                 remainder = (REG_P (target)
4521                              ? target : gen_reg_rtx (compute_mode));
4522                 quotient = gen_reg_rtx (compute_mode);
4523               }
4524             else
4525               {
4526                 quotient = (REG_P (target)
4527                             ? target : gen_reg_rtx (compute_mode));
4528                 remainder = gen_reg_rtx (compute_mode);
4529               }
4530
4531             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4532                                      remainder, 1))
4533               {
4534                 /* This could be computed with a branch-less sequence.
4535                    Save that for later.  */
4536                 rtx label = gen_label_rtx ();
4537                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4538                                  compute_mode, label);
4539                 expand_inc (quotient, const1_rtx);
4540                 expand_dec (remainder, op1);
4541                 emit_label (label);
4542                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4543               }
4544
4545             /* No luck with division elimination or divmod.  Have to do it
4546                by conditionally adjusting op0 *and* the result.  */
4547             {
4548               rtx label1, label2;
4549               rtx adjusted_op0, tem;
4550
4551               quotient = gen_reg_rtx (compute_mode);
4552               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4553               label1 = gen_label_rtx ();
4554               label2 = gen_label_rtx ();
4555               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4556                                compute_mode, label1);
4557               emit_move_insn  (quotient, const0_rtx);
4558               emit_jump_insn (gen_jump (label2));
4559               emit_barrier ();
4560               emit_label (label1);
4561               expand_dec (adjusted_op0, const1_rtx);
4562               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4563                                   quotient, 1, OPTAB_LIB_WIDEN);
4564               if (tem != quotient)
4565                 emit_move_insn (quotient, tem);
4566               expand_inc (quotient, const1_rtx);
4567               emit_label (label2);
4568             }
4569           }
4570         else /* signed */
4571           {
4572             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4573                 && INTVAL (op1) >= 0)
4574               {
4575                 /* This is extremely similar to the code for the unsigned case
4576                    above.  For 2.7 we should merge these variants, but for
4577                    2.6.1 I don't want to touch the code for unsigned since that
4578                    get used in C.  The signed case will only be used by other
4579                    languages (Ada).  */
4580
4581                 rtx t1, t2, t3;
4582                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4583                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4584                                    floor_log2 (d), tquotient, 0);
4585                 t2 = expand_binop (compute_mode, and_optab, op0,
4586                                    GEN_INT (d - 1),
4587                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4588                 t3 = gen_reg_rtx (compute_mode);
4589                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4590                                       compute_mode, 1, 1);
4591                 if (t3 == 0)
4592                   {
4593                     rtx lab;
4594                     lab = gen_label_rtx ();
4595                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4596                     expand_inc (t1, const1_rtx);
4597                     emit_label (lab);
4598                     quotient = t1;
4599                   }
4600                 else
4601                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4602                                                           t1, t3),
4603                                             tquotient);
4604                 break;
4605               }
4606
4607             /* Try using an instruction that produces both the quotient and
4608                remainder, using truncation.  We can easily compensate the
4609                quotient or remainder to get ceiling rounding, once we have the
4610                remainder.  Notice that we compute also the final remainder
4611                value here, and return the result right away.  */
4612             if (target == 0 || GET_MODE (target) != compute_mode)
4613               target = gen_reg_rtx (compute_mode);
4614             if (rem_flag)
4615               {
4616                 remainder= (REG_P (target)
4617                             ? target : gen_reg_rtx (compute_mode));
4618                 quotient = gen_reg_rtx (compute_mode);
4619               }
4620             else
4621               {
4622                 quotient = (REG_P (target)
4623                             ? target : gen_reg_rtx (compute_mode));
4624                 remainder = gen_reg_rtx (compute_mode);
4625               }
4626
4627             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4628                                      remainder, 0))
4629               {
4630                 /* This could be computed with a branch-less sequence.
4631                    Save that for later.  */
4632                 rtx tem;
4633                 rtx label = gen_label_rtx ();
4634                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4635                                  compute_mode, label);
4636                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4637                                     NULL_RTX, 0, OPTAB_WIDEN);
4638                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4639                 expand_inc (quotient, const1_rtx);
4640                 expand_dec (remainder, op1);
4641                 emit_label (label);
4642                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4643               }
4644
4645             /* No luck with division elimination or divmod.  Have to do it
4646                by conditionally adjusting op0 *and* the result.  */
4647             {
4648               rtx label1, label2, label3, label4, label5;
4649               rtx adjusted_op0;
4650               rtx tem;
4651
4652               quotient = gen_reg_rtx (compute_mode);
4653               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4654               label1 = gen_label_rtx ();
4655               label2 = gen_label_rtx ();
4656               label3 = gen_label_rtx ();
4657               label4 = gen_label_rtx ();
4658               label5 = gen_label_rtx ();
4659               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4660               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4661                                compute_mode, label1);
4662               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4663                                   quotient, 0, OPTAB_LIB_WIDEN);
4664               if (tem != quotient)
4665                 emit_move_insn (quotient, tem);
4666               emit_jump_insn (gen_jump (label5));
4667               emit_barrier ();
4668               emit_label (label1);
4669               expand_dec (adjusted_op0, const1_rtx);
4670               emit_jump_insn (gen_jump (label4));
4671               emit_barrier ();
4672               emit_label (label2);
4673               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4674                                compute_mode, label3);
4675               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4676                                   quotient, 0, OPTAB_LIB_WIDEN);
4677               if (tem != quotient)
4678                 emit_move_insn (quotient, tem);
4679               emit_jump_insn (gen_jump (label5));
4680               emit_barrier ();
4681               emit_label (label3);
4682               expand_inc (adjusted_op0, const1_rtx);
4683               emit_label (label4);
4684               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4685                                   quotient, 0, OPTAB_LIB_WIDEN);
4686               if (tem != quotient)
4687                 emit_move_insn (quotient, tem);
4688               expand_inc (quotient, const1_rtx);
4689               emit_label (label5);
4690             }
4691           }
4692         break;
4693
4694       case EXACT_DIV_EXPR:
4695         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4696           {
4697             HOST_WIDE_INT d = INTVAL (op1);
4698             unsigned HOST_WIDE_INT ml;
4699             int pre_shift;
4700             rtx t1;
4701
4702             pre_shift = floor_log2 (d & -d);
4703             ml = invert_mod2n (d >> pre_shift, size);
4704             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4705                                pre_shift, NULL_RTX, unsignedp);
4706             quotient = expand_mult (compute_mode, t1,
4707                                     gen_int_mode (ml, compute_mode),
4708                                     NULL_RTX, 1);
4709
4710             insn = get_last_insn ();
4711             set_dst_reg_note (insn, REG_EQUAL,
4712                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4713                                               compute_mode, op0, op1),
4714                               quotient);
4715           }
4716         break;
4717
4718       case ROUND_DIV_EXPR:
4719       case ROUND_MOD_EXPR:
4720         if (unsignedp)
4721           {
4722             rtx tem;
4723             rtx label;
4724             label = gen_label_rtx ();
4725             quotient = gen_reg_rtx (compute_mode);
4726             remainder = gen_reg_rtx (compute_mode);
4727             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4728               {
4729                 rtx tem;
4730                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4731                                          quotient, 1, OPTAB_LIB_WIDEN);
4732                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4733                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4734                                           remainder, 1, OPTAB_LIB_WIDEN);
4735               }
4736             tem = plus_constant (compute_mode, op1, -1);
4737             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4738             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4739             expand_inc (quotient, const1_rtx);
4740             expand_dec (remainder, op1);
4741             emit_label (label);
4742           }
4743         else
4744           {
4745             rtx abs_rem, abs_op1, tem, mask;
4746             rtx label;
4747             label = gen_label_rtx ();
4748             quotient = gen_reg_rtx (compute_mode);
4749             remainder = gen_reg_rtx (compute_mode);
4750             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4751               {
4752                 rtx tem;
4753                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4754                                          quotient, 0, OPTAB_LIB_WIDEN);
4755                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4756                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4757                                           remainder, 0, OPTAB_LIB_WIDEN);
4758               }
4759             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4760             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4761             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4762                                 1, NULL_RTX, 1);
4763             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4764             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4765                                 NULL_RTX, 0, OPTAB_WIDEN);
4766             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4767                                  size - 1, NULL_RTX, 0);
4768             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4769                                 NULL_RTX, 0, OPTAB_WIDEN);
4770             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4771                                 NULL_RTX, 0, OPTAB_WIDEN);
4772             expand_inc (quotient, tem);
4773             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4774                                 NULL_RTX, 0, OPTAB_WIDEN);
4775             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4776                                 NULL_RTX, 0, OPTAB_WIDEN);
4777             expand_dec (remainder, tem);
4778             emit_label (label);
4779           }
4780         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4781
4782       default:
4783         gcc_unreachable ();
4784       }
4785
4786   if (quotient == 0)
4787     {
4788       if (target && GET_MODE (target) != compute_mode)
4789         target = 0;
4790
4791       if (rem_flag)
4792         {
4793           /* Try to produce the remainder without producing the quotient.
4794              If we seem to have a divmod pattern that does not require widening,
4795              don't try widening here.  We should really have a WIDEN argument
4796              to expand_twoval_binop, since what we'd really like to do here is
4797              1) try a mod insn in compute_mode
4798              2) try a divmod insn in compute_mode
4799              3) try a div insn in compute_mode and multiply-subtract to get
4800                 remainder
4801              4) try the same things with widening allowed.  */
4802           remainder
4803             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4804                                  op0, op1, target,
4805                                  unsignedp,
4806                                  ((optab_handler (optab2, compute_mode)
4807                                    != CODE_FOR_nothing)
4808                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4809           if (remainder == 0)
4810             {
4811               /* No luck there.  Can we do remainder and divide at once
4812                  without a library call?  */
4813               remainder = gen_reg_rtx (compute_mode);
4814               if (! expand_twoval_binop ((unsignedp
4815                                           ? udivmod_optab
4816                                           : sdivmod_optab),
4817                                          op0, op1,
4818                                          NULL_RTX, remainder, unsignedp))
4819                 remainder = 0;
4820             }
4821
4822           if (remainder)
4823             return gen_lowpart (mode, remainder);
4824         }
4825
4826       /* Produce the quotient.  Try a quotient insn, but not a library call.
4827          If we have a divmod in this mode, use it in preference to widening
4828          the div (for this test we assume it will not fail). Note that optab2
4829          is set to the one of the two optabs that the call below will use.  */
4830       quotient
4831         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4832                              op0, op1, rem_flag ? NULL_RTX : target,
4833                              unsignedp,
4834                              ((optab_handler (optab2, compute_mode)
4835                                != CODE_FOR_nothing)
4836                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4837
4838       if (quotient == 0)
4839         {
4840           /* No luck there.  Try a quotient-and-remainder insn,
4841              keeping the quotient alone.  */
4842           quotient = gen_reg_rtx (compute_mode);
4843           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4844                                      op0, op1,
4845                                      quotient, NULL_RTX, unsignedp))
4846             {
4847               quotient = 0;
4848               if (! rem_flag)
4849                 /* Still no luck.  If we are not computing the remainder,
4850                    use a library call for the quotient.  */
4851                 quotient = sign_expand_binop (compute_mode,
4852                                               udiv_optab, sdiv_optab,
4853                                               op0, op1, target,
4854                                               unsignedp, OPTAB_LIB_WIDEN);
4855             }
4856         }
4857     }
4858
4859   if (rem_flag)
4860     {
4861       if (target && GET_MODE (target) != compute_mode)
4862         target = 0;
4863
4864       if (quotient == 0)
4865         {
4866           /* No divide instruction either.  Use library for remainder.  */
4867           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4868                                          op0, op1, target,
4869                                          unsignedp, OPTAB_LIB_WIDEN);
4870           /* No remainder function.  Try a quotient-and-remainder
4871              function, keeping the remainder.  */
4872           if (!remainder)
4873             {
4874               remainder = gen_reg_rtx (compute_mode);
4875               if (!expand_twoval_binop_libfunc
4876                   (unsignedp ? udivmod_optab : sdivmod_optab,
4877                    op0, op1,
4878                    NULL_RTX, remainder,
4879                    unsignedp ? UMOD : MOD))
4880                 remainder = NULL_RTX;
4881             }
4882         }
4883       else
4884         {
4885           /* We divided.  Now finish doing X - Y * (X / Y).  */
4886           remainder = expand_mult (compute_mode, quotient, op1,
4887                                    NULL_RTX, unsignedp);
4888           remainder = expand_binop (compute_mode, sub_optab, op0,
4889                                     remainder, target, unsignedp,
4890                                     OPTAB_LIB_WIDEN);
4891         }
4892     }
4893
4894   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4895 }
4896 \f
4897 /* Return a tree node with data type TYPE, describing the value of X.
4898    Usually this is an VAR_DECL, if there is no obvious better choice.
4899    X may be an expression, however we only support those expressions
4900    generated by loop.c.  */
4901
4902 tree
4903 make_tree (tree type, rtx x)
4904 {
4905   tree t;
4906
4907   switch (GET_CODE (x))
4908     {
4909     case CONST_INT:
4910       {
4911         HOST_WIDE_INT hi = 0;
4912
4913         if (INTVAL (x) < 0
4914             && !(TYPE_UNSIGNED (type)
4915                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4916                      < HOST_BITS_PER_WIDE_INT)))
4917           hi = -1;
4918
4919         t = build_int_cst_wide (type, INTVAL (x), hi);
4920
4921         return t;
4922       }
4923
4924     case CONST_DOUBLE:
4925       if (GET_MODE (x) == VOIDmode)
4926         t = build_int_cst_wide (type,
4927                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4928       else
4929         {
4930           REAL_VALUE_TYPE d;
4931
4932           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4933           t = build_real (type, d);
4934         }
4935
4936       return t;
4937
4938     case CONST_VECTOR:
4939       {
4940         int units = CONST_VECTOR_NUNITS (x);
4941         tree itype = TREE_TYPE (type);
4942         tree *elts;
4943         int i;
4944
4945         /* Build a tree with vector elements.  */
4946         elts = XALLOCAVEC (tree, units);
4947         for (i = units - 1; i >= 0; --i)
4948           {
4949             rtx elt = CONST_VECTOR_ELT (x, i);
4950             elts[i] = make_tree (itype, elt);
4951           }
4952
4953         return build_vector (type, elts);
4954       }
4955
4956     case PLUS:
4957       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4958                           make_tree (type, XEXP (x, 1)));
4959
4960     case MINUS:
4961       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4962                           make_tree (type, XEXP (x, 1)));
4963
4964     case NEG:
4965       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4966
4967     case MULT:
4968       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4969                           make_tree (type, XEXP (x, 1)));
4970
4971     case ASHIFT:
4972       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4973                           make_tree (type, XEXP (x, 1)));
4974
4975     case LSHIFTRT:
4976       t = unsigned_type_for (type);
4977       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4978                                          make_tree (t, XEXP (x, 0)),
4979                                          make_tree (type, XEXP (x, 1))));
4980
4981     case ASHIFTRT:
4982       t = signed_type_for (type);
4983       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4984                                          make_tree (t, XEXP (x, 0)),
4985                                          make_tree (type, XEXP (x, 1))));
4986
4987     case DIV:
4988       if (TREE_CODE (type) != REAL_TYPE)
4989         t = signed_type_for (type);
4990       else
4991         t = type;
4992
4993       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4994                                          make_tree (t, XEXP (x, 0)),
4995                                          make_tree (t, XEXP (x, 1))));
4996     case UDIV:
4997       t = unsigned_type_for (type);
4998       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4999                                          make_tree (t, XEXP (x, 0)),
5000                                          make_tree (t, XEXP (x, 1))));
5001
5002     case SIGN_EXTEND:
5003     case ZERO_EXTEND:
5004       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5005                                           GET_CODE (x) == ZERO_EXTEND);
5006       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5007
5008     case CONST:
5009       return make_tree (type, XEXP (x, 0));
5010
5011     case SYMBOL_REF:
5012       t = SYMBOL_REF_DECL (x);
5013       if (t)
5014         return fold_convert (type, build_fold_addr_expr (t));
5015       /* else fall through.  */
5016
5017     default:
5018       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5019
5020       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5021          address mode to pointer mode.  */
5022       if (POINTER_TYPE_P (type))
5023         x = convert_memory_address_addr_space
5024               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5025
5026       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5027          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5028       t->decl_with_rtl.rtl = x;
5029
5030       return t;
5031     }
5032 }
5033 \f
5034 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5035    and returning TARGET.
5036
5037    If TARGET is 0, a pseudo-register or constant is returned.  */
5038
5039 rtx
5040 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5041 {
5042   rtx tem = 0;
5043
5044   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5045     tem = simplify_binary_operation (AND, mode, op0, op1);
5046   if (tem == 0)
5047     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5048
5049   if (target == 0)
5050     target = tem;
5051   else if (tem != target)
5052     emit_move_insn (target, tem);
5053   return target;
5054 }
5055
5056 /* Helper function for emit_store_flag.  */
5057 static rtx
5058 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5059              enum machine_mode mode, enum machine_mode compare_mode,
5060              int unsignedp, rtx x, rtx y, int normalizep,
5061              enum machine_mode target_mode)
5062 {
5063   struct expand_operand ops[4];
5064   rtx op0, last, comparison, subtarget;
5065   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5066
5067   last = get_last_insn ();
5068   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5069   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5070   if (!x || !y)
5071     {
5072       delete_insns_since (last);
5073       return NULL_RTX;
5074     }
5075
5076   if (target_mode == VOIDmode)
5077     target_mode = result_mode;
5078   if (!target)
5079     target = gen_reg_rtx (target_mode);
5080
5081   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5082
5083   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5084   create_fixed_operand (&ops[1], comparison);
5085   create_fixed_operand (&ops[2], x);
5086   create_fixed_operand (&ops[3], y);
5087   if (!maybe_expand_insn (icode, 4, ops))
5088     {
5089       delete_insns_since (last);
5090       return NULL_RTX;
5091     }
5092   subtarget = ops[0].value;
5093
5094   /* If we are converting to a wider mode, first convert to
5095      TARGET_MODE, then normalize.  This produces better combining
5096      opportunities on machines that have a SIGN_EXTRACT when we are
5097      testing a single bit.  This mostly benefits the 68k.
5098
5099      If STORE_FLAG_VALUE does not have the sign bit set when
5100      interpreted in MODE, we can do this conversion as unsigned, which
5101      is usually more efficient.  */
5102   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5103     {
5104       convert_move (target, subtarget,
5105                     val_signbit_known_clear_p (result_mode,
5106                                                STORE_FLAG_VALUE));
5107       op0 = target;
5108       result_mode = target_mode;
5109     }
5110   else
5111     op0 = subtarget;
5112
5113   /* If we want to keep subexpressions around, don't reuse our last
5114      target.  */
5115   if (optimize)
5116     subtarget = 0;
5117
5118   /* Now normalize to the proper value in MODE.  Sometimes we don't
5119      have to do anything.  */
5120   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5121     ;
5122   /* STORE_FLAG_VALUE might be the most negative number, so write
5123      the comparison this way to avoid a compiler-time warning.  */
5124   else if (- normalizep == STORE_FLAG_VALUE)
5125     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5126
5127   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5128      it hard to use a value of just the sign bit due to ANSI integer
5129      constant typing rules.  */
5130   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5131     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5132                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5133                         normalizep == 1);
5134   else
5135     {
5136       gcc_assert (STORE_FLAG_VALUE & 1);
5137
5138       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5139       if (normalizep == -1)
5140         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5141     }
5142
5143   /* If we were converting to a smaller mode, do the conversion now.  */
5144   if (target_mode != result_mode)
5145     {
5146       convert_move (target, op0, 0);
5147       return target;
5148     }
5149   else
5150     return op0;
5151 }
5152
5153
5154 /* A subroutine of emit_store_flag only including "tricks" that do not
5155    need a recursive call.  These are kept separate to avoid infinite
5156    loops.  */
5157
5158 static rtx
5159 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5160                    enum machine_mode mode, int unsignedp, int normalizep,
5161                    enum machine_mode target_mode)
5162 {
5163   rtx subtarget;
5164   enum insn_code icode;
5165   enum machine_mode compare_mode;
5166   enum mode_class mclass;
5167   enum rtx_code scode;
5168   rtx tem;
5169
5170   if (unsignedp)
5171     code = unsigned_condition (code);
5172   scode = swap_condition (code);
5173
5174   /* If one operand is constant, make it the second one.  Only do this
5175      if the other operand is not constant as well.  */
5176
5177   if (swap_commutative_operands_p (op0, op1))
5178     {
5179       tem = op0;
5180       op0 = op1;
5181       op1 = tem;
5182       code = swap_condition (code);
5183     }
5184
5185   if (mode == VOIDmode)
5186     mode = GET_MODE (op0);
5187
5188   /* For some comparisons with 1 and -1, we can convert this to
5189      comparisons with zero.  This will often produce more opportunities for
5190      store-flag insns.  */
5191
5192   switch (code)
5193     {
5194     case LT:
5195       if (op1 == const1_rtx)
5196         op1 = const0_rtx, code = LE;
5197       break;
5198     case LE:
5199       if (op1 == constm1_rtx)
5200         op1 = const0_rtx, code = LT;
5201       break;
5202     case GE:
5203       if (op1 == const1_rtx)
5204         op1 = const0_rtx, code = GT;
5205       break;
5206     case GT:
5207       if (op1 == constm1_rtx)
5208         op1 = const0_rtx, code = GE;
5209       break;
5210     case GEU:
5211       if (op1 == const1_rtx)
5212         op1 = const0_rtx, code = NE;
5213       break;
5214     case LTU:
5215       if (op1 == const1_rtx)
5216         op1 = const0_rtx, code = EQ;
5217       break;
5218     default:
5219       break;
5220     }
5221
5222   /* If we are comparing a double-word integer with zero or -1, we can
5223      convert the comparison into one involving a single word.  */
5224   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5225       && GET_MODE_CLASS (mode) == MODE_INT
5226       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5227     {
5228       if ((code == EQ || code == NE)
5229           && (op1 == const0_rtx || op1 == constm1_rtx))
5230         {
5231           rtx op00, op01;
5232
5233           /* Do a logical OR or AND of the two words and compare the
5234              result.  */
5235           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5236           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5237           tem = expand_binop (word_mode,
5238                               op1 == const0_rtx ? ior_optab : and_optab,
5239                               op00, op01, NULL_RTX, unsignedp,
5240                               OPTAB_DIRECT);
5241
5242           if (tem != 0)
5243             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5244                                    unsignedp, normalizep);
5245         }
5246       else if ((code == LT || code == GE) && op1 == const0_rtx)
5247         {
5248           rtx op0h;
5249
5250           /* If testing the sign bit, can just test on high word.  */
5251           op0h = simplify_gen_subreg (word_mode, op0, mode,
5252                                       subreg_highpart_offset (word_mode,
5253                                                               mode));
5254           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5255                                  unsignedp, normalizep);
5256         }
5257       else
5258         tem = NULL_RTX;
5259
5260       if (tem)
5261         {
5262           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5263             return tem;
5264           if (!target)
5265             target = gen_reg_rtx (target_mode);
5266
5267           convert_move (target, tem,
5268                         !val_signbit_known_set_p (word_mode,
5269                                                   (normalizep ? normalizep
5270                                                    : STORE_FLAG_VALUE)));
5271           return target;
5272         }
5273     }
5274
5275   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5276      complement of A (for GE) and shifting the sign bit to the low bit.  */
5277   if (op1 == const0_rtx && (code == LT || code == GE)
5278       && GET_MODE_CLASS (mode) == MODE_INT
5279       && (normalizep || STORE_FLAG_VALUE == 1
5280           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5281     {
5282       subtarget = target;
5283
5284       if (!target)
5285         target_mode = mode;
5286
5287       /* If the result is to be wider than OP0, it is best to convert it
5288          first.  If it is to be narrower, it is *incorrect* to convert it
5289          first.  */
5290       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5291         {
5292           op0 = convert_modes (target_mode, mode, op0, 0);
5293           mode = target_mode;
5294         }
5295
5296       if (target_mode != mode)
5297         subtarget = 0;
5298
5299       if (code == GE)
5300         op0 = expand_unop (mode, one_cmpl_optab, op0,
5301                            ((STORE_FLAG_VALUE == 1 || normalizep)
5302                             ? 0 : subtarget), 0);
5303
5304       if (STORE_FLAG_VALUE == 1 || normalizep)
5305         /* If we are supposed to produce a 0/1 value, we want to do
5306            a logical shift from the sign bit to the low-order bit; for
5307            a -1/0 value, we do an arithmetic shift.  */
5308         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5309                             GET_MODE_BITSIZE (mode) - 1,
5310                             subtarget, normalizep != -1);
5311
5312       if (mode != target_mode)
5313         op0 = convert_modes (target_mode, mode, op0, 0);
5314
5315       return op0;
5316     }
5317
5318   mclass = GET_MODE_CLASS (mode);
5319   for (compare_mode = mode; compare_mode != VOIDmode;
5320        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5321     {
5322      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5323      icode = optab_handler (cstore_optab, optab_mode);
5324      if (icode != CODE_FOR_nothing)
5325         {
5326           do_pending_stack_adjust ();
5327           tem = emit_cstore (target, icode, code, mode, compare_mode,
5328                              unsignedp, op0, op1, normalizep, target_mode);
5329           if (tem)
5330             return tem;
5331
5332           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5333             {
5334               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5335                                  unsignedp, op1, op0, normalizep, target_mode);
5336               if (tem)
5337                 return tem;
5338             }
5339           break;
5340         }
5341     }
5342
5343   return 0;
5344 }
5345
5346 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5347    and storing in TARGET.  Normally return TARGET.
5348    Return 0 if that cannot be done.
5349
5350    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5351    it is VOIDmode, they cannot both be CONST_INT.
5352
5353    UNSIGNEDP is for the case where we have to widen the operands
5354    to perform the operation.  It says to use zero-extension.
5355
5356    NORMALIZEP is 1 if we should convert the result to be either zero
5357    or one.  Normalize is -1 if we should convert the result to be
5358    either zero or -1.  If NORMALIZEP is zero, the result will be left
5359    "raw" out of the scc insn.  */
5360
5361 rtx
5362 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5363                  enum machine_mode mode, int unsignedp, int normalizep)
5364 {
5365   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5366   enum rtx_code rcode;
5367   rtx subtarget;
5368   rtx tem, last, trueval;
5369
5370   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5371                            target_mode);
5372   if (tem)
5373     return tem;
5374
5375   /* If we reached here, we can't do this with a scc insn, however there
5376      are some comparisons that can be done in other ways.  Don't do any
5377      of these cases if branches are very cheap.  */
5378   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5379     return 0;
5380
5381   /* See what we need to return.  We can only return a 1, -1, or the
5382      sign bit.  */
5383
5384   if (normalizep == 0)
5385     {
5386       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5387         normalizep = STORE_FLAG_VALUE;
5388
5389       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5390         ;
5391       else
5392         return 0;
5393     }
5394
5395   last = get_last_insn ();
5396
5397   /* If optimizing, use different pseudo registers for each insn, instead
5398      of reusing the same pseudo.  This leads to better CSE, but slows
5399      down the compiler, since there are more pseudos */
5400   subtarget = (!optimize
5401                && (target_mode == mode)) ? target : NULL_RTX;
5402   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5403
5404   /* For floating-point comparisons, try the reverse comparison or try
5405      changing the "orderedness" of the comparison.  */
5406   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5407     {
5408       enum rtx_code first_code;
5409       bool and_them;
5410
5411       rcode = reverse_condition_maybe_unordered (code);
5412       if (can_compare_p (rcode, mode, ccp_store_flag)
5413           && (code == ORDERED || code == UNORDERED
5414               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5415               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5416         {
5417           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5418                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5419
5420           /* For the reverse comparison, use either an addition or a XOR.  */
5421           if (want_add
5422               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5423                            optimize_insn_for_speed_p ()) == 0)
5424             {
5425               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5426                                        STORE_FLAG_VALUE, target_mode);
5427               if (tem)
5428                 return expand_binop (target_mode, add_optab, tem,
5429                                      GEN_INT (normalizep),
5430                                      target, 0, OPTAB_WIDEN);
5431             }
5432           else if (!want_add
5433                    && rtx_cost (trueval, XOR, 1,
5434                                 optimize_insn_for_speed_p ()) == 0)
5435             {
5436               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5437                                        normalizep, target_mode);
5438               if (tem)
5439                 return expand_binop (target_mode, xor_optab, tem, trueval,
5440                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5441             }
5442         }
5443
5444       delete_insns_since (last);
5445
5446       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5447       if (code == ORDERED || code == UNORDERED)
5448         return 0;
5449
5450       and_them = split_comparison (code, mode, &first_code, &code);
5451
5452       /* If there are no NaNs, the first comparison should always fall through.
5453          Effectively change the comparison to the other one.  */
5454       if (!HONOR_NANS (mode))
5455         {
5456           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5457           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5458                                     target_mode);
5459         }
5460
5461 #ifdef HAVE_conditional_move
5462       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5463          conditional move.  */
5464       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5465                                normalizep, target_mode);
5466       if (tem == 0)
5467         return 0;
5468
5469       if (and_them)
5470         tem = emit_conditional_move (target, code, op0, op1, mode,
5471                                      tem, const0_rtx, GET_MODE (tem), 0);
5472       else
5473         tem = emit_conditional_move (target, code, op0, op1, mode,
5474                                      trueval, tem, GET_MODE (tem), 0);
5475
5476       if (tem == 0)
5477         delete_insns_since (last);
5478       return tem;
5479 #else
5480       return 0;
5481 #endif
5482     }
5483
5484   /* The remaining tricks only apply to integer comparisons.  */
5485
5486   if (GET_MODE_CLASS (mode) != MODE_INT)
5487     return 0;
5488
5489   /* If this is an equality comparison of integers, we can try to exclusive-or
5490      (or subtract) the two operands and use a recursive call to try the
5491      comparison with zero.  Don't do any of these cases if branches are
5492      very cheap.  */
5493
5494   if ((code == EQ || code == NE) && op1 != const0_rtx)
5495     {
5496       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5497                           OPTAB_WIDEN);
5498
5499       if (tem == 0)
5500         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5501                             OPTAB_WIDEN);
5502       if (tem != 0)
5503         tem = emit_store_flag (target, code, tem, const0_rtx,
5504                                mode, unsignedp, normalizep);
5505       if (tem != 0)
5506         return tem;
5507
5508       delete_insns_since (last);
5509     }
5510
5511   /* For integer comparisons, try the reverse comparison.  However, for
5512      small X and if we'd have anyway to extend, implementing "X != 0"
5513      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5514   rcode = reverse_condition (code);
5515   if (can_compare_p (rcode, mode, ccp_store_flag)
5516       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5517             && code == NE
5518             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5519             && op1 == const0_rtx))
5520     {
5521       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5522                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5523
5524       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5525       if (want_add
5526           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5527                        optimize_insn_for_speed_p ()) == 0)
5528         {
5529           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5530                                    STORE_FLAG_VALUE, target_mode);
5531           if (tem != 0)
5532             tem = expand_binop (target_mode, add_optab, tem,
5533                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5534         }
5535       else if (!want_add
5536                && rtx_cost (trueval, XOR, 1,
5537                             optimize_insn_for_speed_p ()) == 0)
5538         {
5539           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5540                                    normalizep, target_mode);
5541           if (tem != 0)
5542             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5543                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5544         }
5545
5546       if (tem != 0)
5547         return tem;
5548       delete_insns_since (last);
5549     }
5550
5551   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5552      the constant zero.  Reject all other comparisons at this point.  Only
5553      do LE and GT if branches are expensive since they are expensive on
5554      2-operand machines.  */
5555
5556   if (op1 != const0_rtx
5557       || (code != EQ && code != NE
5558           && (BRANCH_COST (optimize_insn_for_speed_p (),
5559                            false) <= 1 || (code != LE && code != GT))))
5560     return 0;
5561
5562   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5563      do the necessary operation below.  */
5564
5565   tem = 0;
5566
5567   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5568      the sign bit set.  */
5569
5570   if (code == LE)
5571     {
5572       /* This is destructive, so SUBTARGET can't be OP0.  */
5573       if (rtx_equal_p (subtarget, op0))
5574         subtarget = 0;
5575
5576       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5577                           OPTAB_WIDEN);
5578       if (tem)
5579         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5580                             OPTAB_WIDEN);
5581     }
5582
5583   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5584      number of bits in the mode of OP0, minus one.  */
5585
5586   if (code == GT)
5587     {
5588       if (rtx_equal_p (subtarget, op0))
5589         subtarget = 0;
5590
5591       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5592                           GET_MODE_BITSIZE (mode) - 1,
5593                           subtarget, 0);
5594       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5595                           OPTAB_WIDEN);
5596     }
5597
5598   if (code == EQ || code == NE)
5599     {
5600       /* For EQ or NE, one way to do the comparison is to apply an operation
5601          that converts the operand into a positive number if it is nonzero
5602          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5603          for NE we negate.  This puts the result in the sign bit.  Then we
5604          normalize with a shift, if needed.
5605
5606          Two operations that can do the above actions are ABS and FFS, so try
5607          them.  If that doesn't work, and MODE is smaller than a full word,
5608          we can use zero-extension to the wider mode (an unsigned conversion)
5609          as the operation.  */
5610
5611       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5612          that is compensated by the subsequent overflow when subtracting
5613          one / negating.  */
5614
5615       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5616         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5617       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5618         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5619       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5620         {
5621           tem = convert_modes (word_mode, mode, op0, 1);
5622           mode = word_mode;
5623         }
5624
5625       if (tem != 0)
5626         {
5627           if (code == EQ)
5628             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5629                                 0, OPTAB_WIDEN);
5630           else
5631             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5632         }
5633
5634       /* If we couldn't do it that way, for NE we can "or" the two's complement
5635          of the value with itself.  For EQ, we take the one's complement of
5636          that "or", which is an extra insn, so we only handle EQ if branches
5637          are expensive.  */
5638
5639       if (tem == 0
5640           && (code == NE
5641               || BRANCH_COST (optimize_insn_for_speed_p (),
5642                               false) > 1))
5643         {
5644           if (rtx_equal_p (subtarget, op0))
5645             subtarget = 0;
5646
5647           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5648           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5649                               OPTAB_WIDEN);
5650
5651           if (tem && code == EQ)
5652             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5653         }
5654     }
5655
5656   if (tem && normalizep)
5657     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5658                         GET_MODE_BITSIZE (mode) - 1,
5659                         subtarget, normalizep == 1);
5660
5661   if (tem)
5662     {
5663       if (!target)
5664         ;
5665       else if (GET_MODE (tem) != target_mode)
5666         {
5667           convert_move (target, tem, 0);
5668           tem = target;
5669         }
5670       else if (!subtarget)
5671         {
5672           emit_move_insn (target, tem);
5673           tem = target;
5674         }
5675     }
5676   else
5677     delete_insns_since (last);
5678
5679   return tem;
5680 }
5681
5682 /* Like emit_store_flag, but always succeeds.  */
5683
5684 rtx
5685 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5686                        enum machine_mode mode, int unsignedp, int normalizep)
5687 {
5688   rtx tem, label;
5689   rtx trueval, falseval;
5690
5691   /* First see if emit_store_flag can do the job.  */
5692   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5693   if (tem != 0)
5694     return tem;
5695
5696   if (!target)
5697     target = gen_reg_rtx (word_mode);
5698
5699   /* If this failed, we have to do this with set/compare/jump/set code.
5700      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5701   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5702   if (code == NE
5703       && GET_MODE_CLASS (mode) == MODE_INT
5704       && REG_P (target)
5705       && op0 == target
5706       && op1 == const0_rtx)
5707     {
5708       label = gen_label_rtx ();
5709       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5710                                mode, NULL_RTX, NULL_RTX, label, -1);
5711       emit_move_insn (target, trueval);
5712       emit_label (label);
5713       return target;
5714     }
5715
5716   if (!REG_P (target)
5717       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5718     target = gen_reg_rtx (GET_MODE (target));
5719
5720   /* Jump in the right direction if the target cannot implement CODE
5721      but can jump on its reverse condition.  */
5722   falseval = const0_rtx;
5723   if (! can_compare_p (code, mode, ccp_jump)
5724       && (! FLOAT_MODE_P (mode)
5725           || code == ORDERED || code == UNORDERED
5726           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5727           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5728     {
5729       enum rtx_code rcode;
5730       if (FLOAT_MODE_P (mode))
5731         rcode = reverse_condition_maybe_unordered (code);
5732       else
5733         rcode = reverse_condition (code);
5734
5735       /* Canonicalize to UNORDERED for the libcall.  */
5736       if (can_compare_p (rcode, mode, ccp_jump)
5737           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5738         {
5739           falseval = trueval;
5740           trueval = const0_rtx;
5741           code = rcode;
5742         }
5743     }
5744
5745   emit_move_insn (target, trueval);
5746   label = gen_label_rtx ();
5747   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5748                            NULL_RTX, label, -1);
5749
5750   emit_move_insn (target, falseval);
5751   emit_label (label);
5752
5753   return target;
5754 }
5755 \f
5756 /* Perform possibly multi-word comparison and conditional jump to LABEL
5757    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5758    now a thin wrapper around do_compare_rtx_and_jump.  */
5759
5760 static void
5761 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5762                  rtx label)
5763 {
5764   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5765   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5766                            NULL_RTX, NULL_RTX, label, -1);
5767 }