gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2013 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-config.h"
  32 #include "expr.h"
  33 #include "optabs.h"
  34 #include "recog.h"
  35 #include "langhooks.h"
  36 #include "df.h"
  37 #include "target.h"
  38 #include "expmed.h"
  39
  40 struct target_expmed default_target_expmed;
  41 #if SWITCHABLE_TARGET
  42 struct target_expmed *this_target_expmed = &default_target_expmed;
  43 #endif
  44
  45 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    rtx);
  50 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    unsigned HOST_WIDE_INT,
  54                                    rtx);
  55 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  56                                     unsigned HOST_WIDE_INT,
  57                                     unsigned HOST_WIDE_INT, rtx, int);
  58 static rtx mask_rtx (enum machine_mode, int, int, int);
  59 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  60 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT, int);
  62 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  63 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  64 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  65
  66 /* Test whether a value is zero of a power of two.  */
  67 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  68   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  69
  70 struct init_expmed_rtl
  71 {
  72   struct rtx_def reg;
  73   struct rtx_def plus;
  74   struct rtx_def neg;
  75   struct rtx_def mult;
  76   struct rtx_def sdiv;
  77   struct rtx_def udiv;
  78   struct rtx_def sdiv_32;
  79   struct rtx_def smod_32;
  80   struct rtx_def wide_mult;
  81   struct rtx_def wide_lshr;
  82   struct rtx_def wide_trunc;
  83   struct rtx_def shift;
  84   struct rtx_def shift_mult;
  85   struct rtx_def shift_add;
  86   struct rtx_def shift_sub0;
  87   struct rtx_def shift_sub1;
  88   struct rtx_def zext;
  89   struct rtx_def trunc;
  90
  91   rtx pow2[MAX_BITS_PER_WORD];
  92   rtx cint[MAX_BITS_PER_WORD];
  93 };
  94
  95 static void
  96 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  97                       enum machine_mode from_mode, bool speed)
  98 {
  99   int to_size, from_size;
 100   rtx which;
 101
 102   /* We're given no information about the true size of a partial integer,
 103      only the size of the "full" integer it requires for storage.  For
 104      comparison purposes here, reduce the bit size by one in that case.  */
 105   to_size = (GET_MODE_BITSIZE (to_mode)
 106              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 107   from_size = (GET_MODE_BITSIZE (from_mode)
 108                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 109
 110   /* Assume cost of zero-extend and sign-extend is the same.  */
 111   which = (to_size < from_size ? &all->trunc : &all->zext);
 112
 113   PUT_MODE (&all->reg, from_mode);
 114   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 115 }
 116
 117 static void
 118 init_expmed_one_mode (struct init_expmed_rtl *all,
 119                       enum machine_mode mode, int speed)
 120 {
 121   int m, n, mode_bitsize;
 122   enum machine_mode mode_from;
 123
 124   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 125
 126   PUT_MODE (&all->reg, mode);
 127   PUT_MODE (&all->plus, mode);
 128   PUT_MODE (&all->neg, mode);
 129   PUT_MODE (&all->mult, mode);
 130   PUT_MODE (&all->sdiv, mode);
 131   PUT_MODE (&all->udiv, mode);
 132   PUT_MODE (&all->sdiv_32, mode);
 133   PUT_MODE (&all->smod_32, mode);
 134   PUT_MODE (&all->wide_trunc, mode);
 135   PUT_MODE (&all->shift, mode);
 136   PUT_MODE (&all->shift_mult, mode);
 137   PUT_MODE (&all->shift_add, mode);
 138   PUT_MODE (&all->shift_sub0, mode);
 139   PUT_MODE (&all->shift_sub1, mode);
 140   PUT_MODE (&all->zext, mode);
 141   PUT_MODE (&all->trunc, mode);
 142
 143   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 144   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 145   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 146   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 147   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 148
 149   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 150                                      <= 2 * add_cost (speed, mode)));
 151   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 152                                      <= 4 * add_cost (speed, mode)));
 153
 154   set_shift_cost (speed, mode, 0, 0);
 155   {
 156     int cost = add_cost (speed, mode);
 157     set_shiftadd_cost (speed, mode, 0, cost);
 158     set_shiftsub0_cost (speed, mode, 0, cost);
 159     set_shiftsub1_cost (speed, mode, 0, cost);
 160   }
 161
 162   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 163   for (m = 1; m < n; m++)
 164     {
 165       XEXP (&all->shift, 1) = all->cint[m];
 166       XEXP (&all->shift_mult, 1) = all->pow2[m];
 167
 168       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 169       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 170       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 171       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 172     }
 173
 174   if (SCALAR_INT_MODE_P (mode))
 175     {
 176       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 177            mode_from = (enum machine_mode)(mode_from + 1))
 178         init_expmed_one_conv (all, mode, mode_from, speed);
 179     }
 180   if (GET_MODE_CLASS (mode) == MODE_INT)
 181     {
 182       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 183       if (wider_mode != VOIDmode)
 184         {
 185           PUT_MODE (&all->zext, wider_mode);
 186           PUT_MODE (&all->wide_mult, wider_mode);
 187           PUT_MODE (&all->wide_lshr, wider_mode);
 188           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 189
 190           set_mul_widen_cost (speed, wider_mode,
 191                               set_src_cost (&all->wide_mult, speed));
 192           set_mul_highpart_cost (speed, mode,
 193                                  set_src_cost (&all->wide_trunc, speed));
 194         }
 195     }
 196 }
 197
 198 void
 199 init_expmed (void)
 200 {
 201   struct init_expmed_rtl all;
 202   enum machine_mode mode;
 203   int m, speed;
 204
 205   memset (&all, 0, sizeof all);
 206   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 207     {
 208       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 209       all.cint[m] = GEN_INT (m);
 210     }
 211
 212   PUT_CODE (&all.reg, REG);
 213   /* Avoid using hard regs in ways which may be unsupported.  */
 214   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 215
 216   PUT_CODE (&all.plus, PLUS);
 217   XEXP (&all.plus, 0) = &all.reg;
 218   XEXP (&all.plus, 1) = &all.reg;
 219
 220   PUT_CODE (&all.neg, NEG);
 221   XEXP (&all.neg, 0) = &all.reg;
 222
 223   PUT_CODE (&all.mult, MULT);
 224   XEXP (&all.mult, 0) = &all.reg;
 225   XEXP (&all.mult, 1) = &all.reg;
 226
 227   PUT_CODE (&all.sdiv, DIV);
 228   XEXP (&all.sdiv, 0) = &all.reg;
 229   XEXP (&all.sdiv, 1) = &all.reg;
 230
 231   PUT_CODE (&all.udiv, UDIV);
 232   XEXP (&all.udiv, 0) = &all.reg;
 233   XEXP (&all.udiv, 1) = &all.reg;
 234
 235   PUT_CODE (&all.sdiv_32, DIV);
 236   XEXP (&all.sdiv_32, 0) = &all.reg;
 237   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 238
 239   PUT_CODE (&all.smod_32, MOD);
 240   XEXP (&all.smod_32, 0) = &all.reg;
 241   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 242
 243   PUT_CODE (&all.zext, ZERO_EXTEND);
 244   XEXP (&all.zext, 0) = &all.reg;
 245
 246   PUT_CODE (&all.wide_mult, MULT);
 247   XEXP (&all.wide_mult, 0) = &all.zext;
 248   XEXP (&all.wide_mult, 1) = &all.zext;
 249
 250   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 251   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 252
 253   PUT_CODE (&all.wide_trunc, TRUNCATE);
 254   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 255
 256   PUT_CODE (&all.shift, ASHIFT);
 257   XEXP (&all.shift, 0) = &all.reg;
 258
 259   PUT_CODE (&all.shift_mult, MULT);
 260   XEXP (&all.shift_mult, 0) = &all.reg;
 261
 262   PUT_CODE (&all.shift_add, PLUS);
 263   XEXP (&all.shift_add, 0) = &all.shift_mult;
 264   XEXP (&all.shift_add, 1) = &all.reg;
 265
 266   PUT_CODE (&all.shift_sub0, MINUS);
 267   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 268   XEXP (&all.shift_sub0, 1) = &all.reg;
 269
 270   PUT_CODE (&all.shift_sub1, MINUS);
 271   XEXP (&all.shift_sub1, 0) = &all.reg;
 272   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 273
 274   PUT_CODE (&all.trunc, TRUNCATE);
 275   XEXP (&all.trunc, 0) = &all.reg;
 276
 277   for (speed = 0; speed < 2; speed++)
 278     {
 279       crtl->maybe_hot_insn_p = speed;
 280       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 281
 282       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 283            mode = (enum machine_mode)(mode + 1))
 284         init_expmed_one_mode (&all, mode, speed);
 285
 286       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 287         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 288              mode = (enum machine_mode)(mode + 1))
 289           init_expmed_one_mode (&all, mode, speed);
 290
 291       if (MIN_MODE_VECTOR_INT != VOIDmode)
 292         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 293              mode = (enum machine_mode)(mode + 1))
 294           init_expmed_one_mode (&all, mode, speed);
 295     }
 296
 297   if (alg_hash_used_p ())
 298     {
 299       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 300       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 301     }
 302   else
 303     set_alg_hash_used_p (true);
 304   default_rtl_profile ();
 305 }
 306
 307 /* Return an rtx representing minus the value of X.
 308    MODE is the intended mode of the result,
 309    useful if X is a CONST_INT.  */
 310
 311 rtx
 312 negate_rtx (enum machine_mode mode, rtx x)
 313 {
 314   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 315
 316   if (result == 0)
 317     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 318
 319   return result;
 320 }
 321
 322 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 323    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 324    If MODE is BLKmode, return a reference to every byte in the bitfield.
 325    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 326
 327 static rtx
 328 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 329                       unsigned HOST_WIDE_INT bitsize,
 330                       unsigned HOST_WIDE_INT bitnum,
 331                       unsigned HOST_WIDE_INT *new_bitnum)
 332 {
 333   if (mode == BLKmode)
 334     {
 335       *new_bitnum = bitnum % BITS_PER_UNIT;
 336       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 337       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 338                             / BITS_PER_UNIT);
 339       return adjust_bitfield_address_size (mem, mode, offset, size);
 340     }
 341   else
 342     {
 343       unsigned int unit = GET_MODE_BITSIZE (mode);
 344       *new_bitnum = bitnum % unit;
 345       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 346       return adjust_bitfield_address (mem, mode, offset);
 347     }
 348 }
 349
 350 /* The caller wants to perform insertion or extraction PATTERN on a
 351    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 352    BITREGION_START and BITREGION_END are as for store_bit_field
 353    and FIELDMODE is the natural mode of the field.
 354
 355    Search for a mode that is compatible with the memory access
 356    restrictions and (where applicable) with a register insertion or
 357    extraction.  Return the new memory on success, storing the adjusted
 358    bit position in *NEW_BITNUM.  Return null otherwise.  */
 359
 360 static rtx
 361 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 362                               rtx op0, HOST_WIDE_INT bitsize,
 363                               HOST_WIDE_INT bitnum,
 364                               unsigned HOST_WIDE_INT bitregion_start,
 365                               unsigned HOST_WIDE_INT bitregion_end,
 366                               enum machine_mode fieldmode,
 367                               unsigned HOST_WIDE_INT *new_bitnum)
 368 {
 369   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 370                                 bitregion_end, MEM_ALIGN (op0),
 371                                 MEM_VOLATILE_P (op0));
 372   enum machine_mode best_mode;
 373   if (iter.next_mode (&best_mode))
 374     {
 375       /* We can use a memory in BEST_MODE.  See whether this is true for
 376          any wider modes.  All other things being equal, we prefer to
 377          use the widest mode possible because it tends to expose more
 378          CSE opportunities.  */
 379       if (!iter.prefer_smaller_modes ())
 380         {
 381           /* Limit the search to the mode required by the corresponding
 382              register insertion or extraction instruction, if any.  */
 383           enum machine_mode limit_mode = word_mode;
 384           extraction_insn insn;
 385           if (get_best_reg_extraction_insn (&insn, pattern,
 386                                             GET_MODE_BITSIZE (best_mode),
 387                                             fieldmode))
 388             limit_mode = insn.field_mode;
 389
 390           enum machine_mode wider_mode;
 391           while (iter.next_mode (&wider_mode)
 392                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 393             best_mode = wider_mode;
 394         }
 395       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 396                                    new_bitnum);
 397     }
 398   return NULL_RTX;
 399 }
 400
 401 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 402    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 403    offset is then BITNUM / BITS_PER_UNIT.  */
 404
 405 static bool
 406 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 407                      unsigned HOST_WIDE_INT bitsize,
 408                      enum machine_mode struct_mode)
 409 {
 410   if (BYTES_BIG_ENDIAN)
 411     return (bitnum % BITS_PER_UNIT == 0
 412             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 413                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 414   else
 415     return bitnum % BITS_PER_WORD == 0;
 416 }
 417
 418 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 419    bit number BITNUM can be treated as a simple value of mode MODE.  */
 420
 421 static bool
 422 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 423                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 424 {
 425   return (MEM_P (op0)
 426           && bitnum % BITS_PER_UNIT == 0
 427           && bitsize == GET_MODE_BITSIZE (mode)
 428           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 429               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 430                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 431 }
 432 \f
 433 /* Try to use instruction INSV to store VALUE into a field of OP0.
 434    BITSIZE and BITNUM are as for store_bit_field.  */
 435
 436 static bool
 437 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 438                             unsigned HOST_WIDE_INT bitsize,
 439                             unsigned HOST_WIDE_INT bitnum, rtx value)
 440 {
 441   struct expand_operand ops[4];
 442   rtx value1;
 443   rtx xop0 = op0;
 444   rtx last = get_last_insn ();
 445   bool copy_back = false;
 446
 447   enum machine_mode op_mode = insv->field_mode;
 448   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 449   if (bitsize == 0 || bitsize > unit)
 450     return false;
 451
 452   if (MEM_P (xop0))
 453     /* Get a reference to the first byte of the field.  */
 454     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 455                                  &bitnum);
 456   else
 457     {
 458       /* Convert from counting within OP0 to counting in OP_MODE.  */
 459       if (BYTES_BIG_ENDIAN)
 460         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 461
 462       /* If xop0 is a register, we need it in OP_MODE
 463          to make it acceptable to the format of insv.  */
 464       if (GET_CODE (xop0) == SUBREG)
 465         /* We can't just change the mode, because this might clobber op0,
 466            and we will need the original value of op0 if insv fails.  */
 467         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 468       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 469         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 470     }
 471
 472   /* If the destination is a paradoxical subreg such that we need a
 473      truncate to the inner mode, perform the insertion on a temporary and
 474      truncate the result to the original destination.  Note that we can't
 475      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 476      X) 0)) is (reg:N X).  */
 477   if (GET_CODE (xop0) == SUBREG
 478       && REG_P (SUBREG_REG (xop0))
 479       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 480                                          op_mode))
 481     {
 482       rtx tem = gen_reg_rtx (op_mode);
 483       emit_move_insn (tem, xop0);
 484       xop0 = tem;
 485       copy_back = true;
 486     }
 487
 488   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 489      "backwards" from the size of the unit we are inserting into.
 490      Otherwise, we count bits from the most significant on a
 491      BYTES/BITS_BIG_ENDIAN machine.  */
 492
 493   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 494     bitnum = unit - bitsize - bitnum;
 495
 496   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 497   value1 = value;
 498   if (GET_MODE (value) != op_mode)
 499     {
 500       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 501         {
 502           /* Optimization: Don't bother really extending VALUE
 503              if it has all the bits we will actually use.  However,
 504              if we must narrow it, be sure we do it correctly.  */
 505
 506           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 507             {
 508               rtx tmp;
 509
 510               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 511               if (! tmp)
 512                 tmp = simplify_gen_subreg (op_mode,
 513                                            force_reg (GET_MODE (value),
 514                                                       value1),
 515                                            GET_MODE (value), 0);
 516               value1 = tmp;
 517             }
 518           else
 519             value1 = gen_lowpart (op_mode, value1);
 520         }
 521       else if (CONST_INT_P (value))
 522         value1 = gen_int_mode (INTVAL (value), op_mode);
 523       else
 524         /* Parse phase is supposed to make VALUE's data type
 525            match that of the component reference, which is a type
 526            at least as wide as the field; so VALUE should have
 527            a mode that corresponds to that type.  */
 528         gcc_assert (CONSTANT_P (value));
 529     }
 530
 531   create_fixed_operand (&ops[0], xop0);
 532   create_integer_operand (&ops[1], bitsize);
 533   create_integer_operand (&ops[2], bitnum);
 534   create_input_operand (&ops[3], value1, op_mode);
 535   if (maybe_expand_insn (insv->icode, 4, ops))
 536     {
 537       if (copy_back)
 538         convert_move (op0, xop0, true);
 539       return true;
 540     }
 541   delete_insns_since (last);
 542   return false;
 543 }
 544
 545 /* A subroutine of store_bit_field, with the same arguments.  Return true
 546    if the operation could be implemented.
 547
 548    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 549    no other way of implementing the operation.  If FALLBACK_P is false,
 550    return false instead.  */
 551
 552 static bool
 553 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 554                    unsigned HOST_WIDE_INT bitnum,
 555                    unsigned HOST_WIDE_INT bitregion_start,
 556                    unsigned HOST_WIDE_INT bitregion_end,
 557                    enum machine_mode fieldmode,
 558                    rtx value, bool fallback_p)
 559 {
 560   rtx op0 = str_rtx;
 561   rtx orig_value;
 562
 563   while (GET_CODE (op0) == SUBREG)
 564     {
 565       /* The following line once was done only if WORDS_BIG_ENDIAN,
 566          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 567          meaningful at a much higher level; when structures are copied
 568          between memory and regs, the higher-numbered regs
 569          always get higher addresses.  */
 570       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 571       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 572       int byte_offset = 0;
 573
 574       /* Paradoxical subregs need special handling on big endian machines.  */
 575       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 576         {
 577           int difference = inner_mode_size - outer_mode_size;
 578
 579           if (WORDS_BIG_ENDIAN)
 580             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 581           if (BYTES_BIG_ENDIAN)
 582             byte_offset += difference % UNITS_PER_WORD;
 583         }
 584       else
 585         byte_offset = SUBREG_BYTE (op0);
 586
 587       bitnum += byte_offset * BITS_PER_UNIT;
 588       op0 = SUBREG_REG (op0);
 589     }
 590
 591   /* No action is needed if the target is a register and if the field
 592      lies completely outside that register.  This can occur if the source
 593      code contains an out-of-bounds access to a small array.  */
 594   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 595     return true;
 596
 597   /* Use vec_set patterns for inserting parts of vectors whenever
 598      available.  */
 599   if (VECTOR_MODE_P (GET_MODE (op0))
 600       && !MEM_P (op0)
 601       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 602       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 603       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 604       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 605     {
 606       struct expand_operand ops[3];
 607       enum machine_mode outermode = GET_MODE (op0);
 608       enum machine_mode innermode = GET_MODE_INNER (outermode);
 609       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 610       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 611
 612       create_fixed_operand (&ops[0], op0);
 613       create_input_operand (&ops[1], value, innermode);
 614       create_integer_operand (&ops[2], pos);
 615       if (maybe_expand_insn (icode, 3, ops))
 616         return true;
 617     }
 618
 619   /* If the target is a register, overwriting the entire object, or storing
 620      a full-word or multi-word field can be done with just a SUBREG.  */
 621   if (!MEM_P (op0)
 622       && bitsize == GET_MODE_BITSIZE (fieldmode)
 623       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 624           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 625     {
 626       /* Use the subreg machinery either to narrow OP0 to the required
 627          words or to cope with mode punning between equal-sized modes.
 628          In the latter case, use subreg on the rhs side, not lhs.  */
 629       rtx sub;
 630
 631       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 632         {
 633           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 634           if (sub)
 635             {
 636               emit_move_insn (op0, sub);
 637               return true;
 638             }
 639         }
 640       else
 641         {
 642           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 643                                      bitnum / BITS_PER_UNIT);
 644           if (sub)
 645             {
 646               emit_move_insn (sub, value);
 647               return true;
 648             }
 649         }
 650     }
 651
 652   /* If the target is memory, storing any naturally aligned field can be
 653      done with a simple store.  For targets that support fast unaligned
 654      memory, any naturally sized, unit aligned field can be done directly.  */
 655   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 656     {
 657       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 658       emit_move_insn (op0, value);
 659       return true;
 660     }
 661
 662   /* Make sure we are playing with integral modes.  Pun with subregs
 663      if we aren't.  This must come after the entire register case above,
 664      since that case is valid for any mode.  The following cases are only
 665      valid for integral modes.  */
 666   {
 667     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 668     if (imode != GET_MODE (op0))
 669       {
 670         if (MEM_P (op0))
 671           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 672         else
 673           {
 674             gcc_assert (imode != BLKmode);
 675             op0 = gen_lowpart (imode, op0);
 676           }
 677       }
 678   }
 679
 680   /* Storing an lsb-aligned field in a register
 681      can be done with a movstrict instruction.  */
 682
 683   if (!MEM_P (op0)
 684       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 685       && bitsize == GET_MODE_BITSIZE (fieldmode)
 686       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 687     {
 688       struct expand_operand ops[2];
 689       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 690       rtx arg0 = op0;
 691       unsigned HOST_WIDE_INT subreg_off;
 692
 693       if (GET_CODE (arg0) == SUBREG)
 694         {
 695           /* Else we've got some float mode source being extracted into
 696              a different float mode destination -- this combination of
 697              subregs results in Severe Tire Damage.  */
 698           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 699                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 700                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 701           arg0 = SUBREG_REG (arg0);
 702         }
 703
 704       subreg_off = bitnum / BITS_PER_UNIT;
 705       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 706         {
 707           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 708
 709           create_fixed_operand (&ops[0], arg0);
 710           /* Shrink the source operand to FIELDMODE.  */
 711           create_convert_operand_to (&ops[1], value, fieldmode, false);
 712           if (maybe_expand_insn (icode, 2, ops))
 713             return true;
 714         }
 715     }
 716
 717   /* Handle fields bigger than a word.  */
 718
 719   if (bitsize > BITS_PER_WORD)
 720     {
 721       /* Here we transfer the words of the field
 722          in the order least significant first.
 723          This is because the most significant word is the one which may
 724          be less than full.
 725          However, only do that if the value is not BLKmode.  */
 726
 727       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 728       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 729       unsigned int i;
 730       rtx last;
 731
 732       /* This is the mode we must force value to, so that there will be enough
 733          subwords to extract.  Note that fieldmode will often (always?) be
 734          VOIDmode, because that is what store_field uses to indicate that this
 735          is a bit field, but passing VOIDmode to operand_subword_force
 736          is not allowed.  */
 737       fieldmode = GET_MODE (value);
 738       if (fieldmode == VOIDmode)
 739         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 740
 741       last = get_last_insn ();
 742       for (i = 0; i < nwords; i++)
 743         {
 744           /* If I is 0, use the low-order word in both field and target;
 745              if I is 1, use the next to lowest word; and so on.  */
 746           unsigned int wordnum = (backwards
 747                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 748                                   - i - 1
 749                                   : i);
 750           unsigned int bit_offset = (backwards
 751                                      ? MAX ((int) bitsize - ((int) i + 1)
 752                                             * BITS_PER_WORD,
 753                                             0)
 754                                      : (int) i * BITS_PER_WORD);
 755           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 756           unsigned HOST_WIDE_INT new_bitsize =
 757             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 758
 759           /* If the remaining chunk doesn't have full wordsize we have
 760              to make sure that for big endian machines the higher order
 761              bits are used.  */
 762           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 763             value_word = simplify_expand_binop (word_mode, lshr_optab,
 764                                                 value_word,
 765                                                 GEN_INT (BITS_PER_WORD
 766                                                          - new_bitsize),
 767                                                 NULL_RTX, true,
 768                                                 OPTAB_LIB_WIDEN);
 769
 770           if (!store_bit_field_1 (op0, new_bitsize,
 771                                   bitnum + bit_offset,
 772                                   bitregion_start, bitregion_end,
 773                                   word_mode,
 774                                   value_word, fallback_p))
 775             {
 776               delete_insns_since (last);
 777               return false;
 778             }
 779         }
 780       return true;
 781     }
 782
 783   /* If VALUE has a floating-point or complex mode, access it as an
 784      integer of the corresponding size.  This can occur on a machine
 785      with 64 bit registers that uses SFmode for float.  It can also
 786      occur for unaligned float or complex fields.  */
 787   orig_value = value;
 788   if (GET_MODE (value) != VOIDmode
 789       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 790       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 791     {
 792       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 793       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 794     }
 795
 796   /* If OP0 is a multi-word register, narrow it to the affected word.
 797      If the region spans two words, defer to store_split_bit_field.  */
 798   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 799     {
 800       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 801                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 802       gcc_assert (op0);
 803       bitnum %= BITS_PER_WORD;
 804       if (bitnum + bitsize > BITS_PER_WORD)
 805         {
 806           if (!fallback_p)
 807             return false;
 808
 809           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 810                                  bitregion_end, value);
 811           return true;
 812         }
 813     }
 814
 815   /* From here on we can assume that the field to be stored in fits
 816      within a word.  If the destination is a register, it too fits
 817      in a word.  */
 818
 819   extraction_insn insv;
 820   if (!MEM_P (op0)
 821       && get_best_reg_extraction_insn (&insv, EP_insv,
 822                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 823                                        fieldmode)
 824       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 825     return true;
 826
 827   /* If OP0 is a memory, try copying it to a register and seeing if a
 828      cheap register alternative is available.  */
 829   if (MEM_P (op0))
 830     {
 831       /* Do not use unaligned memory insvs for volatile bitfields when
 832          -fstrict-volatile-bitfields is in effect.  */
 833       if (!(MEM_VOLATILE_P (op0)
 834             && flag_strict_volatile_bitfields > 0)
 835           && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 836                                            fieldmode)
 837           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 838         return true;
 839
 840       rtx last = get_last_insn ();
 841
 842       /* Try loading part of OP0 into a register, inserting the bitfield
 843          into that, and then copying the result back to OP0.  */
 844       unsigned HOST_WIDE_INT bitpos;
 845       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 846                                                bitregion_start, bitregion_end,
 847                                                fieldmode, &bitpos);
 848       if (xop0)
 849         {
 850           rtx tempreg = copy_to_reg (xop0);
 851           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 852                                  bitregion_start, bitregion_end,
 853                                  fieldmode, orig_value, false))
 854             {
 855               emit_move_insn (xop0, tempreg);
 856               return true;
 857             }
 858           delete_insns_since (last);
 859         }
 860     }
 861
 862   if (!fallback_p)
 863     return false;
 864
 865   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 866                          bitregion_end, value);
 867   return true;
 868 }
 869
 870 /* Generate code to store value from rtx VALUE
 871    into a bit-field within structure STR_RTX
 872    containing BITSIZE bits starting at bit BITNUM.
 873
 874    BITREGION_START is bitpos of the first bitfield in this region.
 875    BITREGION_END is the bitpos of the ending bitfield in this region.
 876    These two fields are 0, if the C++ memory model does not apply,
 877    or we are not interested in keeping track of bitfield regions.
 878
 879    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 880
 881 void
 882 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 883                  unsigned HOST_WIDE_INT bitnum,
 884                  unsigned HOST_WIDE_INT bitregion_start,
 885                  unsigned HOST_WIDE_INT bitregion_end,
 886                  enum machine_mode fieldmode,
 887                  rtx value)
 888 {
 889   /* Under the C++0x memory model, we must not touch bits outside the
 890      bit region.  Adjust the address to start at the beginning of the
 891      bit region.  */
 892   if (MEM_P (str_rtx) && bitregion_start > 0)
 893     {
 894       enum machine_mode bestmode;
 895       HOST_WIDE_INT offset, size;
 896
 897       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 898
 899       offset = bitregion_start / BITS_PER_UNIT;
 900       bitnum -= bitregion_start;
 901       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 902       bitregion_end -= bitregion_start;
 903       bitregion_start = 0;
 904       bestmode = get_best_mode (bitsize, bitnum,
 905                                 bitregion_start, bitregion_end,
 906                                 MEM_ALIGN (str_rtx), VOIDmode,
 907                                 MEM_VOLATILE_P (str_rtx));
 908       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 909     }
 910
 911   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 912                           bitregion_start, bitregion_end,
 913                           fieldmode, value, true))
 914     gcc_unreachable ();
 915 }
 916 \f
 917 /* Use shifts and boolean operations to store VALUE into a bit field of
 918    width BITSIZE in OP0, starting at bit BITNUM.  */
 919
 920 static void
 921 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 922                        unsigned HOST_WIDE_INT bitnum,
 923                        unsigned HOST_WIDE_INT bitregion_start,
 924                        unsigned HOST_WIDE_INT bitregion_end,
 925                        rtx value)
 926 {
 927   enum machine_mode mode;
 928   rtx temp;
 929   int all_zero = 0;
 930   int all_one = 0;
 931
 932   /* There is a case not handled here:
 933      a structure with a known alignment of just a halfword
 934      and a field split across two aligned halfwords within the structure.
 935      Or likewise a structure with a known alignment of just a byte
 936      and a field split across two bytes.
 937      Such cases are not supposed to be able to occur.  */
 938
 939   if (MEM_P (op0))
 940     {
 941       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 942
 943       if (bitregion_end)
 944         maxbits = bitregion_end - bitregion_start + 1;
 945
 946       /* Get the proper mode to use for this field.  We want a mode that
 947          includes the entire field.  If such a mode would be larger than
 948          a word, we won't be doing the extraction the normal way.
 949          We don't want a mode bigger than the destination.  */
 950
 951       mode = GET_MODE (op0);
 952       if (GET_MODE_BITSIZE (mode) == 0
 953           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 954         mode = word_mode;
 955
 956       if (MEM_VOLATILE_P (op0)
 957           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 958           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 959           && flag_strict_volatile_bitfields > 0)
 960         mode = GET_MODE (op0);
 961       else
 962         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 963                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 964
 965       if (mode == VOIDmode)
 966         {
 967           /* The only way this should occur is if the field spans word
 968              boundaries.  */
 969           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 970                                  bitregion_end, value);
 971           return;
 972         }
 973
 974       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
 975     }
 976
 977   mode = GET_MODE (op0);
 978   gcc_assert (SCALAR_INT_MODE_P (mode));
 979
 980   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 981      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 982
 983   if (BYTES_BIG_ENDIAN)
 984     /* BITNUM is the distance between our msb
 985        and that of the containing datum.
 986        Convert it to the distance from the lsb.  */
 987     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 988
 989   /* Now BITNUM is always the distance between our lsb
 990      and that of OP0.  */
 991
 992   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 993      we must first convert its mode to MODE.  */
 994
 995   if (CONST_INT_P (value))
 996     {
 997       HOST_WIDE_INT v = INTVAL (value);
 998
 999       if (bitsize < HOST_BITS_PER_WIDE_INT)
1000         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1001
1002       if (v == 0)
1003         all_zero = 1;
1004       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1005                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1006                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1007         all_one = 1;
1008
1009       value = lshift_value (mode, v, bitnum);
1010     }
1011   else
1012     {
1013       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1014                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1015
1016       if (GET_MODE (value) != mode)
1017         value = convert_to_mode (mode, value, 1);
1018
1019       if (must_and)
1020         value = expand_binop (mode, and_optab, value,
1021                               mask_rtx (mode, 0, bitsize, 0),
1022                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1023       if (bitnum > 0)
1024         value = expand_shift (LSHIFT_EXPR, mode, value,
1025                               bitnum, NULL_RTX, 1);
1026     }
1027
1028   /* Now clear the chosen bits in OP0,
1029      except that if VALUE is -1 we need not bother.  */
1030   /* We keep the intermediates in registers to allow CSE to combine
1031      consecutive bitfield assignments.  */
1032
1033   temp = force_reg (mode, op0);
1034
1035   if (! all_one)
1036     {
1037       temp = expand_binop (mode, and_optab, temp,
1038                            mask_rtx (mode, bitnum, bitsize, 1),
1039                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1040       temp = force_reg (mode, temp);
1041     }
1042
1043   /* Now logical-or VALUE into OP0, unless it is zero.  */
1044
1045   if (! all_zero)
1046     {
1047       temp = expand_binop (mode, ior_optab, temp, value,
1048                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1049       temp = force_reg (mode, temp);
1050     }
1051
1052   if (op0 != temp)
1053     {
1054       op0 = copy_rtx (op0);
1055       emit_move_insn (op0, temp);
1056     }
1057 }
1058 \f
1059 /* Store a bit field that is split across multiple accessible memory objects.
1060
1061    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1062    BITSIZE is the field width; BITPOS the position of its first bit
1063    (within the word).
1064    VALUE is the value to store.
1065
1066    This does not yet handle fields wider than BITS_PER_WORD.  */
1067
1068 static void
1069 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1070                        unsigned HOST_WIDE_INT bitpos,
1071                        unsigned HOST_WIDE_INT bitregion_start,
1072                        unsigned HOST_WIDE_INT bitregion_end,
1073                        rtx value)
1074 {
1075   unsigned int unit;
1076   unsigned int bitsdone = 0;
1077
1078   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1079      much at a time.  */
1080   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1081     unit = BITS_PER_WORD;
1082   else
1083     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1084
1085   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1086      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1087      that VALUE might be a floating-point constant.  */
1088   if (CONSTANT_P (value) && !CONST_INT_P (value))
1089     {
1090       rtx word = gen_lowpart_common (word_mode, value);
1091
1092       if (word && (value != word))
1093         value = word;
1094       else
1095         value = gen_lowpart_common (word_mode,
1096                                     force_reg (GET_MODE (value) != VOIDmode
1097                                                ? GET_MODE (value)
1098                                                : word_mode, value));
1099     }
1100
1101   while (bitsdone < bitsize)
1102     {
1103       unsigned HOST_WIDE_INT thissize;
1104       rtx part, word;
1105       unsigned HOST_WIDE_INT thispos;
1106       unsigned HOST_WIDE_INT offset;
1107
1108       offset = (bitpos + bitsdone) / unit;
1109       thispos = (bitpos + bitsdone) % unit;
1110
1111       /* When region of bytes we can touch is restricted, decrease
1112          UNIT close to the end of the region as needed.  If op0 is a REG
1113          or SUBREG of REG, don't do this, as there can't be data races
1114          on a register and we can expand shorter code in some cases.  */
1115       if (bitregion_end
1116           && unit > BITS_PER_UNIT
1117           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1118           && !REG_P (op0)
1119           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1120         {
1121           unit = unit / 2;
1122           continue;
1123         }
1124
1125       /* THISSIZE must not overrun a word boundary.  Otherwise,
1126          store_fixed_bit_field will call us again, and we will mutually
1127          recurse forever.  */
1128       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1129       thissize = MIN (thissize, unit - thispos);
1130
1131       if (BYTES_BIG_ENDIAN)
1132         {
1133           /* Fetch successively less significant portions.  */
1134           if (CONST_INT_P (value))
1135             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136                              >> (bitsize - bitsdone - thissize))
1137                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1138           else
1139             {
1140               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1141               /* The args are chosen so that the last part includes the
1142                  lsb.  Give extract_bit_field the value it needs (with
1143                  endianness compensation) to fetch the piece we want.  */
1144               part = extract_fixed_bit_field (word_mode, value, thissize,
1145                                               total_bits - bitsize + bitsdone,
1146                                               NULL_RTX, 1);
1147             }
1148         }
1149       else
1150         {
1151           /* Fetch successively more significant portions.  */
1152           if (CONST_INT_P (value))
1153             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1154                              >> bitsdone)
1155                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1156           else
1157             part = extract_fixed_bit_field (word_mode, value, thissize,
1158                                             bitsdone, NULL_RTX, 1);
1159         }
1160
1161       /* If OP0 is a register, then handle OFFSET here.
1162
1163          When handling multiword bitfields, extract_bit_field may pass
1164          down a word_mode SUBREG of a larger REG for a bitfield that actually
1165          crosses a word boundary.  Thus, for a SUBREG, we must find
1166          the current word starting from the base register.  */
1167       if (GET_CODE (op0) == SUBREG)
1168         {
1169           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1170                             + (offset * unit / BITS_PER_WORD);
1171           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1172           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1173             word = word_offset ? const0_rtx : op0;
1174           else
1175             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1176                                           GET_MODE (SUBREG_REG (op0)));
1177           offset &= BITS_PER_WORD / unit - 1;
1178         }
1179       else if (REG_P (op0))
1180         {
1181           enum machine_mode op0_mode = GET_MODE (op0);
1182           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1183             word = offset ? const0_rtx : op0;
1184           else
1185             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1186                                           GET_MODE (op0));
1187           offset &= BITS_PER_WORD / unit - 1;
1188         }
1189       else
1190         word = op0;
1191
1192       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1193          it is just an out-of-bounds access.  Ignore it.  */
1194       if (word != const0_rtx)
1195         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1196                                bitregion_start, bitregion_end, part);
1197       bitsdone += thissize;
1198     }
1199 }
1200 \f
1201 /* A subroutine of extract_bit_field_1 that converts return value X
1202    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1203    to extract_bit_field.  */
1204
1205 static rtx
1206 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1207                              enum machine_mode tmode, bool unsignedp)
1208 {
1209   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1210     return x;
1211
1212   /* If the x mode is not a scalar integral, first convert to the
1213      integer mode of that size and then access it as a floating-point
1214      value via a SUBREG.  */
1215   if (!SCALAR_INT_MODE_P (tmode))
1216     {
1217       enum machine_mode smode;
1218
1219       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1220       x = convert_to_mode (smode, x, unsignedp);
1221       x = force_reg (smode, x);
1222       return gen_lowpart (tmode, x);
1223     }
1224
1225   return convert_to_mode (tmode, x, unsignedp);
1226 }
1227
1228 /* Try to use an ext(z)v pattern to extract a field from OP0.
1229    Return the extracted value on success, otherwise return null.
1230    EXT_MODE is the mode of the extraction and the other arguments
1231    are as for extract_bit_field.  */
1232
1233 static rtx
1234 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1235                               unsigned HOST_WIDE_INT bitsize,
1236                               unsigned HOST_WIDE_INT bitnum,
1237                               int unsignedp, rtx target,
1238                               enum machine_mode mode, enum machine_mode tmode)
1239 {
1240   struct expand_operand ops[4];
1241   rtx spec_target = target;
1242   rtx spec_target_subreg = 0;
1243   enum machine_mode ext_mode = extv->field_mode;
1244   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1245
1246   if (bitsize == 0 || unit < bitsize)
1247     return NULL_RTX;
1248
1249   if (MEM_P (op0))
1250     /* Get a reference to the first byte of the field.  */
1251     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1252                                 &bitnum);
1253   else
1254     {
1255       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1256       if (BYTES_BIG_ENDIAN)
1257         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1258
1259       /* If op0 is a register, we need it in EXT_MODE to make it
1260          acceptable to the format of ext(z)v.  */
1261       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1262         return NULL_RTX;
1263       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1264         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1265     }
1266
1267   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1268      "backwards" from the size of the unit we are extracting from.
1269      Otherwise, we count bits from the most significant on a
1270      BYTES/BITS_BIG_ENDIAN machine.  */
1271
1272   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1273     bitnum = unit - bitsize - bitnum;
1274
1275   if (target == 0)
1276     target = spec_target = gen_reg_rtx (tmode);
1277
1278   if (GET_MODE (target) != ext_mode)
1279     {
1280       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1281          between the mode of the extraction (word_mode) and the target
1282          mode.  Instead, create a temporary and use convert_move to set
1283          the target.  */
1284       if (REG_P (target)
1285           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1286         {
1287           target = gen_lowpart (ext_mode, target);
1288           if (GET_MODE_PRECISION (ext_mode)
1289               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1290             spec_target_subreg = target;
1291         }
1292       else
1293         target = gen_reg_rtx (ext_mode);
1294     }
1295
1296   create_output_operand (&ops[0], target, ext_mode);
1297   create_fixed_operand (&ops[1], op0);
1298   create_integer_operand (&ops[2], bitsize);
1299   create_integer_operand (&ops[3], bitnum);
1300   if (maybe_expand_insn (extv->icode, 4, ops))
1301     {
1302       target = ops[0].value;
1303       if (target == spec_target)
1304         return target;
1305       if (target == spec_target_subreg)
1306         return spec_target;
1307       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1308     }
1309   return NULL_RTX;
1310 }
1311
1312 /* A subroutine of extract_bit_field, with the same arguments.
1313    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1314    if we can find no other means of implementing the operation.
1315    if FALLBACK_P is false, return NULL instead.  */
1316
1317 static rtx
1318 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1319                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1320                      enum machine_mode mode, enum machine_mode tmode,
1321                      bool fallback_p)
1322 {
1323   rtx op0 = str_rtx;
1324   enum machine_mode int_mode;
1325   enum machine_mode mode1;
1326
1327   if (tmode == VOIDmode)
1328     tmode = mode;
1329
1330   while (GET_CODE (op0) == SUBREG)
1331     {
1332       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1333       op0 = SUBREG_REG (op0);
1334     }
1335
1336   /* If we have an out-of-bounds access to a register, just return an
1337      uninitialized register of the required mode.  This can occur if the
1338      source code contains an out-of-bounds access to a small array.  */
1339   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1340     return gen_reg_rtx (tmode);
1341
1342   if (REG_P (op0)
1343       && mode == GET_MODE (op0)
1344       && bitnum == 0
1345       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1346     {
1347       /* We're trying to extract a full register from itself.  */
1348       return op0;
1349     }
1350
1351   /* See if we can get a better vector mode before extracting.  */
1352   if (VECTOR_MODE_P (GET_MODE (op0))
1353       && !MEM_P (op0)
1354       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1355     {
1356       enum machine_mode new_mode;
1357
1358       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1359         new_mode = MIN_MODE_VECTOR_FLOAT;
1360       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1361         new_mode = MIN_MODE_VECTOR_FRACT;
1362       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1363         new_mode = MIN_MODE_VECTOR_UFRACT;
1364       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1365         new_mode = MIN_MODE_VECTOR_ACCUM;
1366       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1367         new_mode = MIN_MODE_VECTOR_UACCUM;
1368       else
1369         new_mode = MIN_MODE_VECTOR_INT;
1370
1371       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1372         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1373             && targetm.vector_mode_supported_p (new_mode))
1374           break;
1375       if (new_mode != VOIDmode)
1376         op0 = gen_lowpart (new_mode, op0);
1377     }
1378
1379   /* Use vec_extract patterns for extracting parts of vectors whenever
1380      available.  */
1381   if (VECTOR_MODE_P (GET_MODE (op0))
1382       && !MEM_P (op0)
1383       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1384       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1385           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1386     {
1387       struct expand_operand ops[3];
1388       enum machine_mode outermode = GET_MODE (op0);
1389       enum machine_mode innermode = GET_MODE_INNER (outermode);
1390       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1391       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1392
1393       create_output_operand (&ops[0], target, innermode);
1394       create_input_operand (&ops[1], op0, outermode);
1395       create_integer_operand (&ops[2], pos);
1396       if (maybe_expand_insn (icode, 3, ops))
1397         {
1398           target = ops[0].value;
1399           if (GET_MODE (target) != mode)
1400             return gen_lowpart (tmode, target);
1401           return target;
1402         }
1403     }
1404
1405   /* Make sure we are playing with integral modes.  Pun with subregs
1406      if we aren't.  */
1407   {
1408     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1409     if (imode != GET_MODE (op0))
1410       {
1411         if (MEM_P (op0))
1412           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1413         else if (imode != BLKmode)
1414           {
1415             op0 = gen_lowpart (imode, op0);
1416
1417             /* If we got a SUBREG, force it into a register since we
1418                aren't going to be able to do another SUBREG on it.  */
1419             if (GET_CODE (op0) == SUBREG)
1420               op0 = force_reg (imode, op0);
1421           }
1422         else if (REG_P (op0))
1423           {
1424             rtx reg, subreg;
1425             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1426                                             MODE_INT);
1427             reg = gen_reg_rtx (imode);
1428             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1429             emit_move_insn (subreg, op0);
1430             op0 = reg;
1431             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1432           }
1433         else
1434           {
1435             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1436             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1437             emit_move_insn (mem, op0);
1438             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1439           }
1440       }
1441   }
1442
1443   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1444      If that's wrong, the solution is to test for it and set TARGET to 0
1445      if needed.  */
1446
1447   /* If the bitfield is volatile, we need to make sure the access
1448      remains on a type-aligned boundary.  */
1449   if (GET_CODE (op0) == MEM
1450       && MEM_VOLATILE_P (op0)
1451       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1452       && flag_strict_volatile_bitfields > 0)
1453     goto no_subreg_mode_swap;
1454
1455   /* Only scalar integer modes can be converted via subregs.  There is an
1456      additional problem for FP modes here in that they can have a precision
1457      which is different from the size.  mode_for_size uses precision, but
1458      we want a mode based on the size, so we must avoid calling it for FP
1459      modes.  */
1460   mode1 = mode;
1461   if (SCALAR_INT_MODE_P (tmode))
1462     {
1463       enum machine_mode try_mode = mode_for_size (bitsize,
1464                                                   GET_MODE_CLASS (tmode), 0);
1465       if (try_mode != BLKmode)
1466         mode1 = try_mode;
1467     }
1468   gcc_assert (mode1 != BLKmode);
1469
1470   /* Extraction of a full MODE1 value can be done with a subreg as long
1471      as the least significant bit of the value is the least significant
1472      bit of either OP0 or a word of OP0.  */
1473   if (!MEM_P (op0)
1474       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1475       && bitsize == GET_MODE_BITSIZE (mode1)
1476       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1477     {
1478       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1479                                      bitnum / BITS_PER_UNIT);
1480       if (sub)
1481         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1482     }
1483
1484   /* Extraction of a full MODE1 value can be done with a load as long as
1485      the field is on a byte boundary and is sufficiently aligned.  */
1486   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1487     {
1488       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1489       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1490     }
1491
1492  no_subreg_mode_swap:
1493
1494   /* Handle fields bigger than a word.  */
1495
1496   if (bitsize > BITS_PER_WORD)
1497     {
1498       /* Here we transfer the words of the field
1499          in the order least significant first.
1500          This is because the most significant word is the one which may
1501          be less than full.  */
1502
1503       unsigned int backwards = WORDS_BIG_ENDIAN;
1504       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1505       unsigned int i;
1506       rtx last;
1507
1508       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1509         target = gen_reg_rtx (mode);
1510
1511       /* Indicate for flow that the entire target reg is being set.  */
1512       emit_clobber (target);
1513
1514       last = get_last_insn ();
1515       for (i = 0; i < nwords; i++)
1516         {
1517           /* If I is 0, use the low-order word in both field and target;
1518              if I is 1, use the next to lowest word; and so on.  */
1519           /* Word number in TARGET to use.  */
1520           unsigned int wordnum
1521             = (backwards
1522                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1523                : i);
1524           /* Offset from start of field in OP0.  */
1525           unsigned int bit_offset = (backwards
1526                                      ? MAX ((int) bitsize - ((int) i + 1)
1527                                             * BITS_PER_WORD,
1528                                             0)
1529                                      : (int) i * BITS_PER_WORD);
1530           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1531           rtx result_part
1532             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1533                                              bitsize - i * BITS_PER_WORD),
1534                                    bitnum + bit_offset, 1, target_part,
1535                                    mode, word_mode, fallback_p);
1536
1537           gcc_assert (target_part);
1538           if (!result_part)
1539             {
1540               delete_insns_since (last);
1541               return NULL;
1542             }
1543
1544           if (result_part != target_part)
1545             emit_move_insn (target_part, result_part);
1546         }
1547
1548       if (unsignedp)
1549         {
1550           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1551              need to be zero'd out.  */
1552           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1553             {
1554               unsigned int i, total_words;
1555
1556               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1557               for (i = nwords; i < total_words; i++)
1558                 emit_move_insn
1559                   (operand_subword (target,
1560                                     backwards ? total_words - i - 1 : i,
1561                                     1, VOIDmode),
1562                    const0_rtx);
1563             }
1564           return target;
1565         }
1566
1567       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1568       target = expand_shift (LSHIFT_EXPR, mode, target,
1569                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1570       return expand_shift (RSHIFT_EXPR, mode, target,
1571                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1572     }
1573
1574   /* If OP0 is a multi-word register, narrow it to the affected word.
1575      If the region spans two words, defer to extract_split_bit_field.  */
1576   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1577     {
1578       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1579                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1580       bitnum %= BITS_PER_WORD;
1581       if (bitnum + bitsize > BITS_PER_WORD)
1582         {
1583           if (!fallback_p)
1584             return NULL_RTX;
1585           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1586           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1587         }
1588     }
1589
1590   /* From here on we know the desired field is smaller than a word.
1591      If OP0 is a register, it too fits within a word.  */
1592   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1593   extraction_insn extv;
1594   if (!MEM_P (op0)
1595       /* ??? We could limit the structure size to the part of OP0 that
1596          contains the field, with appropriate checks for endianness
1597          and TRULY_NOOP_TRUNCATION.  */
1598       && get_best_reg_extraction_insn (&extv, pattern,
1599                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1600                                        tmode))
1601     {
1602       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1603                                                  unsignedp, target, mode,
1604                                                  tmode);
1605       if (result)
1606         return result;
1607     }
1608
1609   /* If OP0 is a memory, try copying it to a register and seeing if a
1610      cheap register alternative is available.  */
1611   if (MEM_P (op0))
1612     {
1613       /* Do not use extv/extzv for volatile bitfields when
1614          -fstrict-volatile-bitfields is in effect.  */
1615       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1616           && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1617                                            tmode))
1618         {
1619           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1620                                                      bitnum, unsignedp,
1621                                                      target, mode,
1622                                                      tmode);
1623           if (result)
1624             return result;
1625         }
1626
1627       rtx last = get_last_insn ();
1628
1629       /* Try loading part of OP0 into a register and extracting the
1630          bitfield from that.  */
1631       unsigned HOST_WIDE_INT bitpos;
1632       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1633                                                0, 0, tmode, &bitpos);
1634       if (xop0)
1635         {
1636           xop0 = copy_to_reg (xop0);
1637           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1638                                             unsignedp, target,
1639                                             mode, tmode, false);
1640           if (result)
1641             return result;
1642           delete_insns_since (last);
1643         }
1644     }
1645
1646   if (!fallback_p)
1647     return NULL;
1648
1649   /* Find a correspondingly-sized integer field, so we can apply
1650      shifts and masks to it.  */
1651   int_mode = int_mode_for_mode (tmode);
1652   if (int_mode == BLKmode)
1653     int_mode = int_mode_for_mode (mode);
1654   /* Should probably push op0 out to memory and then do a load.  */
1655   gcc_assert (int_mode != BLKmode);
1656
1657   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1658                                     target, unsignedp);
1659   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1660 }
1661
1662 /* Generate code to extract a byte-field from STR_RTX
1663    containing BITSIZE bits, starting at BITNUM,
1664    and put it in TARGET if possible (if TARGET is nonzero).
1665    Regardless of TARGET, we return the rtx for where the value is placed.
1666
1667    STR_RTX is the structure containing the byte (a REG or MEM).
1668    UNSIGNEDP is nonzero if this is an unsigned bit field.
1669    MODE is the natural mode of the field value once extracted.
1670    TMODE is the mode the caller would like the value to have;
1671    but the value may be returned with type MODE instead.
1672
1673    If a TARGET is specified and we can store in it at no extra cost,
1674    we do so, and return TARGET.
1675    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1676    if they are equally easy.  */
1677
1678 rtx
1679 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1680                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1681                    enum machine_mode mode, enum machine_mode tmode)
1682 {
1683   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1684                               target, mode, tmode, true);
1685 }
1686 \f
1687 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1688    from bit BITNUM of OP0.
1689
1690    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1691    If TARGET is nonzero, attempts to store the value there
1692    and return TARGET, but this is not guaranteed.
1693    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1694
1695 static rtx
1696 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1697                          unsigned HOST_WIDE_INT bitsize,
1698                          unsigned HOST_WIDE_INT bitnum, rtx target,
1699                          int unsignedp)
1700 {
1701   enum machine_mode mode;
1702
1703   if (MEM_P (op0))
1704     {
1705       /* Get the proper mode to use for this field.  We want a mode that
1706          includes the entire field.  If such a mode would be larger than
1707          a word, we won't be doing the extraction the normal way.  */
1708
1709       if (MEM_VOLATILE_P (op0)
1710           && flag_strict_volatile_bitfields > 0)
1711         {
1712           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1713             mode = GET_MODE (op0);
1714           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1715             mode = GET_MODE (target);
1716           else
1717             mode = tmode;
1718         }
1719       else
1720         mode = get_best_mode (bitsize, bitnum, 0, 0,
1721                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722
1723       if (mode == VOIDmode)
1724         /* The only way this should occur is if the field spans word
1725            boundaries.  */
1726         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1727
1728       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1729       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1730
1731       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1732          if it results in a multi-word access where we otherwise wouldn't
1733          have one.  So, check for that case here.  */
1734       if (MEM_P (op0)
1735           && MEM_VOLATILE_P (op0)
1736           && flag_strict_volatile_bitfields > 0
1737           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1738           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1739         {
1740           /* If the target doesn't support unaligned access, give up and
1741              split the access into two.  */
1742           if (STRICT_ALIGNMENT)
1743             return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1744           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1745         }
1746       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1747       bitnum -= bit_offset;
1748     }
1749
1750   mode = GET_MODE (op0);
1751   gcc_assert (SCALAR_INT_MODE_P (mode));
1752
1753   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1754      for invalid input, such as extract equivalent of f5 from
1755      gcc.dg/pr48335-2.c.  */
1756
1757   if (BYTES_BIG_ENDIAN)
1758     /* BITNUM is the distance between our msb and that of OP0.
1759        Convert it to the distance from the lsb.  */
1760     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1761
1762   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1763      We have reduced the big-endian case to the little-endian case.  */
1764
1765   if (unsignedp)
1766     {
1767       if (bitnum)
1768         {
1769           /* If the field does not already start at the lsb,
1770              shift it so it does.  */
1771           /* Maybe propagate the target for the shift.  */
1772           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1773           if (tmode != mode)
1774             subtarget = 0;
1775           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1776         }
1777       /* Convert the value to the desired mode.  */
1778       if (mode != tmode)
1779         op0 = convert_to_mode (tmode, op0, 1);
1780
1781       /* Unless the msb of the field used to be the msb when we shifted,
1782          mask out the upper bits.  */
1783
1784       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1785         return expand_binop (GET_MODE (op0), and_optab, op0,
1786                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1787                              target, 1, OPTAB_LIB_WIDEN);
1788       return op0;
1789     }
1790
1791   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1792      then arithmetic-shift its lsb to the lsb of the word.  */
1793   op0 = force_reg (mode, op0);
1794
1795   /* Find the narrowest integer mode that contains the field.  */
1796
1797   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1798        mode = GET_MODE_WIDER_MODE (mode))
1799     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1800       {
1801         op0 = convert_to_mode (mode, op0, 0);
1802         break;
1803       }
1804
1805   if (mode != tmode)
1806     target = 0;
1807
1808   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1809     {
1810       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1811       /* Maybe propagate the target for the shift.  */
1812       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1813       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1814     }
1815
1816   return expand_shift (RSHIFT_EXPR, mode, op0,
1817                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1818 }
1819 \f
1820 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1821    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1822    complement of that if COMPLEMENT.  The mask is truncated if
1823    necessary to the width of mode MODE.  The mask is zero-extended if
1824    BITSIZE+BITPOS is too small for MODE.  */
1825
1826 static rtx
1827 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1828 {
1829   double_int mask;
1830
1831   mask = double_int::mask (bitsize);
1832   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1833
1834   if (complement)
1835     mask = ~mask;
1836
1837   return immed_double_int_const (mask, mode);
1838 }
1839
1840 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1841    VALUE << BITPOS.  */
1842
1843 static rtx
1844 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1845               int bitpos)
1846 {
1847   double_int val;
1848
1849   val = double_int::from_uhwi (value);
1850   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1851
1852   return immed_double_int_const (val, mode);
1853 }
1854 \f
1855 /* Extract a bit field that is split across two words
1856    and return an RTX for the result.
1857
1858    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1859    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1860    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1861
1862 static rtx
1863 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1864                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1865 {
1866   unsigned int unit;
1867   unsigned int bitsdone = 0;
1868   rtx result = NULL_RTX;
1869   int first = 1;
1870
1871   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1872      much at a time.  */
1873   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1874     unit = BITS_PER_WORD;
1875   else
1876     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1877
1878   while (bitsdone < bitsize)
1879     {
1880       unsigned HOST_WIDE_INT thissize;
1881       rtx part, word;
1882       unsigned HOST_WIDE_INT thispos;
1883       unsigned HOST_WIDE_INT offset;
1884
1885       offset = (bitpos + bitsdone) / unit;
1886       thispos = (bitpos + bitsdone) % unit;
1887
1888       /* THISSIZE must not overrun a word boundary.  Otherwise,
1889          extract_fixed_bit_field will call us again, and we will mutually
1890          recurse forever.  */
1891       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1892       thissize = MIN (thissize, unit - thispos);
1893
1894       /* If OP0 is a register, then handle OFFSET here.
1895
1896          When handling multiword bitfields, extract_bit_field may pass
1897          down a word_mode SUBREG of a larger REG for a bitfield that actually
1898          crosses a word boundary.  Thus, for a SUBREG, we must find
1899          the current word starting from the base register.  */
1900       if (GET_CODE (op0) == SUBREG)
1901         {
1902           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1903           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1904                                         GET_MODE (SUBREG_REG (op0)));
1905           offset = 0;
1906         }
1907       else if (REG_P (op0))
1908         {
1909           word = operand_subword_force (op0, offset, GET_MODE (op0));
1910           offset = 0;
1911         }
1912       else
1913         word = op0;
1914
1915       /* Extract the parts in bit-counting order,
1916          whose meaning is determined by BYTES_PER_UNIT.
1917          OFFSET is in UNITs, and UNIT is in bits.  */
1918       part = extract_fixed_bit_field (word_mode, word, thissize,
1919                                       offset * unit + thispos, 0, 1);
1920       bitsdone += thissize;
1921
1922       /* Shift this part into place for the result.  */
1923       if (BYTES_BIG_ENDIAN)
1924         {
1925           if (bitsize != bitsdone)
1926             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1927                                  bitsize - bitsdone, 0, 1);
1928         }
1929       else
1930         {
1931           if (bitsdone != thissize)
1932             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1933                                  bitsdone - thissize, 0, 1);
1934         }
1935
1936       if (first)
1937         result = part;
1938       else
1939         /* Combine the parts with bitwise or.  This works
1940            because we extracted each part as an unsigned bit field.  */
1941         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1942                                OPTAB_LIB_WIDEN);
1943
1944       first = 0;
1945     }
1946
1947   /* Unsigned bit field: we are done.  */
1948   if (unsignedp)
1949     return result;
1950   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1951   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1952                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1953   return expand_shift (RSHIFT_EXPR, word_mode, result,
1954                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1955 }
1956 \f
1957 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1958    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1959    MODE, fill the upper bits with zeros.  Fail if the layout of either
1960    mode is unknown (as for CC modes) or if the extraction would involve
1961    unprofitable mode punning.  Return the value on success, otherwise
1962    return null.
1963
1964    This is different from gen_lowpart* in these respects:
1965
1966      - the returned value must always be considered an rvalue
1967
1968      - when MODE is wider than SRC_MODE, the extraction involves
1969        a zero extension
1970
1971      - when MODE is smaller than SRC_MODE, the extraction involves
1972        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1973
1974    In other words, this routine performs a computation, whereas the
1975    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1976    operations.  */
1977
1978 rtx
1979 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1980 {
1981   enum machine_mode int_mode, src_int_mode;
1982
1983   if (mode == src_mode)
1984     return src;
1985
1986   if (CONSTANT_P (src))
1987     {
1988       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1989          fails, it will happily create (subreg (symbol_ref)) or similar
1990          invalid SUBREGs.  */
1991       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1992       rtx ret = simplify_subreg (mode, src, src_mode, byte);
1993       if (ret)
1994         return ret;
1995
1996       if (GET_MODE (src) == VOIDmode
1997           || !validate_subreg (mode, src_mode, src, byte))
1998         return NULL_RTX;
1999
2000       src = force_reg (GET_MODE (src), src);
2001       return gen_rtx_SUBREG (mode, src, byte);
2002     }
2003
2004   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2005     return NULL_RTX;
2006
2007   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2008       && MODES_TIEABLE_P (mode, src_mode))
2009     {
2010       rtx x = gen_lowpart_common (mode, src);
2011       if (x)
2012         return x;
2013     }
2014
2015   src_int_mode = int_mode_for_mode (src_mode);
2016   int_mode = int_mode_for_mode (mode);
2017   if (src_int_mode == BLKmode || int_mode == BLKmode)
2018     return NULL_RTX;
2019
2020   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2021     return NULL_RTX;
2022   if (!MODES_TIEABLE_P (int_mode, mode))
2023     return NULL_RTX;
2024
2025   src = gen_lowpart (src_int_mode, src);
2026   src = convert_modes (int_mode, src_int_mode, src, true);
2027   src = gen_lowpart (mode, src);
2028   return src;
2029 }
2030 \f
2031 /* Add INC into TARGET.  */
2032
2033 void
2034 expand_inc (rtx target, rtx inc)
2035 {
2036   rtx value = expand_binop (GET_MODE (target), add_optab,
2037                             target, inc,
2038                             target, 0, OPTAB_LIB_WIDEN);
2039   if (value != target)
2040     emit_move_insn (target, value);
2041 }
2042
2043 /* Subtract DEC from TARGET.  */
2044
2045 void
2046 expand_dec (rtx target, rtx dec)
2047 {
2048   rtx value = expand_binop (GET_MODE (target), sub_optab,
2049                             target, dec,
2050                             target, 0, OPTAB_LIB_WIDEN);
2051   if (value != target)
2052     emit_move_insn (target, value);
2053 }
2054 \f
2055 /* Output a shift instruction for expression code CODE,
2056    with SHIFTED being the rtx for the value to shift,
2057    and AMOUNT the rtx for the amount to shift by.
2058    Store the result in the rtx TARGET, if that is convenient.
2059    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2060    Return the rtx for where the value is.  */
2061
2062 static rtx
2063 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2064                 rtx amount, rtx target, int unsignedp)
2065 {
2066   rtx op1, temp = 0;
2067   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2068   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2069   optab lshift_optab = ashl_optab;
2070   optab rshift_arith_optab = ashr_optab;
2071   optab rshift_uns_optab = lshr_optab;
2072   optab lrotate_optab = rotl_optab;
2073   optab rrotate_optab = rotr_optab;
2074   enum machine_mode op1_mode;
2075   int attempt;
2076   bool speed = optimize_insn_for_speed_p ();
2077
2078   op1 = amount;
2079   op1_mode = GET_MODE (op1);
2080
2081   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2082      shift amount is a vector, use the vector/vector shift patterns.  */
2083   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2084     {
2085       lshift_optab = vashl_optab;
2086       rshift_arith_optab = vashr_optab;
2087       rshift_uns_optab = vlshr_optab;
2088       lrotate_optab = vrotl_optab;
2089       rrotate_optab = vrotr_optab;
2090     }
2091
2092   /* Previously detected shift-counts computed by NEGATE_EXPR
2093      and shifted in the other direction; but that does not work
2094      on all machines.  */
2095
2096   if (SHIFT_COUNT_TRUNCATED)
2097     {
2098       if (CONST_INT_P (op1)
2099           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2100               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2101         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2102                        % GET_MODE_BITSIZE (mode));
2103       else if (GET_CODE (op1) == SUBREG
2104                && subreg_lowpart_p (op1)
2105                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2106                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2107         op1 = SUBREG_REG (op1);
2108     }
2109
2110   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2111      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2112      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2113      amount instead.  */
2114   if (rotate
2115       && CONST_INT_P (op1)
2116       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2117                    GET_MODE_BITSIZE (mode) - 1))
2118     {
2119       op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2120       left = !left;
2121       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2122     }
2123
2124   if (op1 == const0_rtx)
2125     return shifted;
2126
2127   /* Check whether its cheaper to implement a left shift by a constant
2128      bit count by a sequence of additions.  */
2129   if (code == LSHIFT_EXPR
2130       && CONST_INT_P (op1)
2131       && INTVAL (op1) > 0
2132       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2133       && INTVAL (op1) < MAX_BITS_PER_WORD
2134       && (shift_cost (speed, mode, INTVAL (op1))
2135           > INTVAL (op1) * add_cost (speed, mode))
2136       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2137     {
2138       int i;
2139       for (i = 0; i < INTVAL (op1); i++)
2140         {
2141           temp = force_reg (mode, shifted);
2142           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2143                                   unsignedp, OPTAB_LIB_WIDEN);
2144         }
2145       return shifted;
2146     }
2147
2148   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2149     {
2150       enum optab_methods methods;
2151
2152       if (attempt == 0)
2153         methods = OPTAB_DIRECT;
2154       else if (attempt == 1)
2155         methods = OPTAB_WIDEN;
2156       else
2157         methods = OPTAB_LIB_WIDEN;
2158
2159       if (rotate)
2160         {
2161           /* Widening does not work for rotation.  */
2162           if (methods == OPTAB_WIDEN)
2163             continue;
2164           else if (methods == OPTAB_LIB_WIDEN)
2165             {
2166               /* If we have been unable to open-code this by a rotation,
2167                  do it as the IOR of two shifts.  I.e., to rotate A
2168                  by N bits, compute
2169                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2170                  where C is the bitsize of A.
2171
2172                  It is theoretically possible that the target machine might
2173                  not be able to perform either shift and hence we would
2174                  be making two libcalls rather than just the one for the
2175                  shift (similarly if IOR could not be done).  We will allow
2176                  this extremely unlikely lossage to avoid complicating the
2177                  code below.  */
2178
2179               rtx subtarget = target == shifted ? 0 : target;
2180               rtx new_amount, other_amount;
2181               rtx temp1;
2182
2183               new_amount = op1;
2184               if (op1 == const0_rtx)
2185                 return shifted;
2186               else if (CONST_INT_P (op1))
2187                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2188                                         - INTVAL (op1));
2189               else
2190                 {
2191                   other_amount
2192                     = simplify_gen_unary (NEG, GET_MODE (op1),
2193                                           op1, GET_MODE (op1));
2194                   HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2195                   other_amount
2196                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2197                                            gen_int_mode (mask, GET_MODE (op1)));
2198                 }
2199
2200               shifted = force_reg (mode, shifted);
2201
2202               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2203                                      mode, shifted, new_amount, 0, 1);
2204               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2205                                       mode, shifted, other_amount,
2206                                       subtarget, 1);
2207               return expand_binop (mode, ior_optab, temp, temp1, target,
2208                                    unsignedp, methods);
2209             }
2210
2211           temp = expand_binop (mode,
2212                                left ? lrotate_optab : rrotate_optab,
2213                                shifted, op1, target, unsignedp, methods);
2214         }
2215       else if (unsignedp)
2216         temp = expand_binop (mode,
2217                              left ? lshift_optab : rshift_uns_optab,
2218                              shifted, op1, target, unsignedp, methods);
2219
2220       /* Do arithmetic shifts.
2221          Also, if we are going to widen the operand, we can just as well
2222          use an arithmetic right-shift instead of a logical one.  */
2223       if (temp == 0 && ! rotate
2224           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2225         {
2226           enum optab_methods methods1 = methods;
2227
2228           /* If trying to widen a log shift to an arithmetic shift,
2229              don't accept an arithmetic shift of the same size.  */
2230           if (unsignedp)
2231             methods1 = OPTAB_MUST_WIDEN;
2232
2233           /* Arithmetic shift */
2234
2235           temp = expand_binop (mode,
2236                                left ? lshift_optab : rshift_arith_optab,
2237                                shifted, op1, target, unsignedp, methods1);
2238         }
2239
2240       /* We used to try extzv here for logical right shifts, but that was
2241          only useful for one machine, the VAX, and caused poor code
2242          generation there for lshrdi3, so the code was deleted and a
2243          define_expand for lshrsi3 was added to vax.md.  */
2244     }
2245
2246   gcc_assert (temp);
2247   return temp;
2248 }
2249
2250 /* Output a shift instruction for expression code CODE,
2251    with SHIFTED being the rtx for the value to shift,
2252    and AMOUNT the amount to shift by.
2253    Store the result in the rtx TARGET, if that is convenient.
2254    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2255    Return the rtx for where the value is.  */
2256
2257 rtx
2258 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2259               int amount, rtx target, int unsignedp)
2260 {
2261   return expand_shift_1 (code, mode,
2262                          shifted, GEN_INT (amount), target, unsignedp);
2263 }
2264
2265 /* Output a shift instruction for expression code CODE,
2266    with SHIFTED being the rtx for the value to shift,
2267    and AMOUNT the tree for the amount to shift by.
2268    Store the result in the rtx TARGET, if that is convenient.
2269    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2270    Return the rtx for where the value is.  */
2271
2272 rtx
2273 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2274                        tree amount, rtx target, int unsignedp)
2275 {
2276   return expand_shift_1 (code, mode,
2277                          shifted, expand_normal (amount), target, unsignedp);
2278 }
2279
2280 \f
2281 /* Indicates the type of fixup needed after a constant multiplication.
2282    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2283    the result should be negated, and ADD_VARIANT means that the
2284    multiplicand should be added to the result.  */
2285 enum mult_variant {basic_variant, negate_variant, add_variant};
2286
2287 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2288                         const struct mult_cost *, enum machine_mode mode);
2289 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2290                                  struct algorithm *, enum mult_variant *, int);
2291 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2292                               const struct algorithm *, enum mult_variant);
2293 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2294 static rtx extract_high_half (enum machine_mode, rtx);
2295 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2296 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2297                                        int, int);
2298 /* Compute and return the best algorithm for multiplying by T.
2299    The algorithm must cost less than cost_limit
2300    If retval.cost >= COST_LIMIT, no algorithm was found and all
2301    other field of the returned struct are undefined.
2302    MODE is the machine mode of the multiplication.  */
2303
2304 static void
2305 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2306             const struct mult_cost *cost_limit, enum machine_mode mode)
2307 {
2308   int m;
2309   struct algorithm *alg_in, *best_alg;
2310   struct mult_cost best_cost;
2311   struct mult_cost new_limit;
2312   int op_cost, op_latency;
2313   unsigned HOST_WIDE_INT orig_t = t;
2314   unsigned HOST_WIDE_INT q;
2315   int maxm, hash_index;
2316   bool cache_hit = false;
2317   enum alg_code cache_alg = alg_zero;
2318   bool speed = optimize_insn_for_speed_p ();
2319   enum machine_mode imode;
2320   struct alg_hash_entry *entry_ptr;
2321
2322   /* Indicate that no algorithm is yet found.  If no algorithm
2323      is found, this value will be returned and indicate failure.  */
2324   alg_out->cost.cost = cost_limit->cost + 1;
2325   alg_out->cost.latency = cost_limit->latency + 1;
2326
2327   if (cost_limit->cost < 0
2328       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2329     return;
2330
2331   /* Be prepared for vector modes.  */
2332   imode = GET_MODE_INNER (mode);
2333   if (imode == VOIDmode)
2334     imode = mode;
2335
2336   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2337
2338   /* Restrict the bits of "t" to the multiplication's mode.  */
2339   t &= GET_MODE_MASK (imode);
2340
2341   /* t == 1 can be done in zero cost.  */
2342   if (t == 1)
2343     {
2344       alg_out->ops = 1;
2345       alg_out->cost.cost = 0;
2346       alg_out->cost.latency = 0;
2347       alg_out->op[0] = alg_m;
2348       return;
2349     }
2350
2351   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2352      fail now.  */
2353   if (t == 0)
2354     {
2355       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2356         return;
2357       else
2358         {
2359           alg_out->ops = 1;
2360           alg_out->cost.cost = zero_cost (speed);
2361           alg_out->cost.latency = zero_cost (speed);
2362           alg_out->op[0] = alg_zero;
2363           return;
2364         }
2365     }
2366
2367   /* We'll be needing a couple extra algorithm structures now.  */
2368
2369   alg_in = XALLOCA (struct algorithm);
2370   best_alg = XALLOCA (struct algorithm);
2371   best_cost = *cost_limit;
2372
2373   /* Compute the hash index.  */
2374   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2375
2376   /* See if we already know what to do for T.  */
2377   entry_ptr = alg_hash_entry_ptr (hash_index);
2378   if (entry_ptr->t == t
2379       && entry_ptr->mode == mode
2380       && entry_ptr->mode == mode
2381       && entry_ptr->speed == speed
2382       && entry_ptr->alg != alg_unknown)
2383     {
2384       cache_alg = entry_ptr->alg;
2385
2386       if (cache_alg == alg_impossible)
2387         {
2388           /* The cache tells us that it's impossible to synthesize
2389              multiplication by T within entry_ptr->cost.  */
2390           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2391             /* COST_LIMIT is at least as restrictive as the one
2392                recorded in the hash table, in which case we have no
2393                hope of synthesizing a multiplication.  Just
2394                return.  */
2395             return;
2396
2397           /* If we get here, COST_LIMIT is less restrictive than the
2398              one recorded in the hash table, so we may be able to
2399              synthesize a multiplication.  Proceed as if we didn't
2400              have the cache entry.  */
2401         }
2402       else
2403         {
2404           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2405             /* The cached algorithm shows that this multiplication
2406                requires more cost than COST_LIMIT.  Just return.  This
2407                way, we don't clobber this cache entry with
2408                alg_impossible but retain useful information.  */
2409             return;
2410
2411           cache_hit = true;
2412
2413           switch (cache_alg)
2414             {
2415             case alg_shift:
2416               goto do_alg_shift;
2417
2418             case alg_add_t_m2:
2419             case alg_sub_t_m2:
2420               goto do_alg_addsub_t_m2;
2421
2422             case alg_add_factor:
2423             case alg_sub_factor:
2424               goto do_alg_addsub_factor;
2425
2426             case alg_add_t2_m:
2427               goto do_alg_add_t2_m;
2428
2429             case alg_sub_t2_m:
2430               goto do_alg_sub_t2_m;
2431
2432             default:
2433               gcc_unreachable ();
2434             }
2435         }
2436     }
2437
2438   /* If we have a group of zero bits at the low-order part of T, try
2439      multiplying by the remaining bits and then doing a shift.  */
2440
2441   if ((t & 1) == 0)
2442     {
2443     do_alg_shift:
2444       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2445       if (m < maxm)
2446         {
2447           q = t >> m;
2448           /* The function expand_shift will choose between a shift and
2449              a sequence of additions, so the observed cost is given as
2450              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2451           op_cost = m * add_cost (speed, mode);
2452           if (shift_cost (speed, mode, m) < op_cost)
2453             op_cost = shift_cost (speed, mode, m);
2454           new_limit.cost = best_cost.cost - op_cost;
2455           new_limit.latency = best_cost.latency - op_cost;
2456           synth_mult (alg_in, q, &new_limit, mode);
2457
2458           alg_in->cost.cost += op_cost;
2459           alg_in->cost.latency += op_cost;
2460           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2461             {
2462               struct algorithm *x;
2463               best_cost = alg_in->cost;
2464               x = alg_in, alg_in = best_alg, best_alg = x;
2465               best_alg->log[best_alg->ops] = m;
2466               best_alg->op[best_alg->ops] = alg_shift;
2467             }
2468
2469           /* See if treating ORIG_T as a signed number yields a better
2470              sequence.  Try this sequence only for a negative ORIG_T
2471              as it would be useless for a non-negative ORIG_T.  */
2472           if ((HOST_WIDE_INT) orig_t < 0)
2473             {
2474               /* Shift ORIG_T as follows because a right shift of a
2475                  negative-valued signed type is implementation
2476                  defined.  */
2477               q = ~(~orig_t >> m);
2478               /* The function expand_shift will choose between a shift
2479                  and a sequence of additions, so the observed cost is
2480                  given as MIN (m * add_cost(speed, mode),
2481                  shift_cost(speed, mode, m)).  */
2482               op_cost = m * add_cost (speed, mode);
2483               if (shift_cost (speed, mode, m) < op_cost)
2484                 op_cost = shift_cost (speed, mode, m);
2485               new_limit.cost = best_cost.cost - op_cost;
2486               new_limit.latency = best_cost.latency - op_cost;
2487               synth_mult (alg_in, q, &new_limit, mode);
2488
2489               alg_in->cost.cost += op_cost;
2490               alg_in->cost.latency += op_cost;
2491               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2492                 {
2493                   struct algorithm *x;
2494                   best_cost = alg_in->cost;
2495                   x = alg_in, alg_in = best_alg, best_alg = x;
2496                   best_alg->log[best_alg->ops] = m;
2497                   best_alg->op[best_alg->ops] = alg_shift;
2498                 }
2499             }
2500         }
2501       if (cache_hit)
2502         goto done;
2503     }
2504
2505   /* If we have an odd number, add or subtract one.  */
2506   if ((t & 1) != 0)
2507     {
2508       unsigned HOST_WIDE_INT w;
2509
2510     do_alg_addsub_t_m2:
2511       for (w = 1; (w & t) != 0; w <<= 1)
2512         ;
2513       /* If T was -1, then W will be zero after the loop.  This is another
2514          case where T ends with ...111.  Handling this with (T + 1) and
2515          subtract 1 produces slightly better code and results in algorithm
2516          selection much faster than treating it like the ...0111 case
2517          below.  */
2518       if (w == 0
2519           || (w > 2
2520               /* Reject the case where t is 3.
2521                  Thus we prefer addition in that case.  */
2522               && t != 3))
2523         {
2524           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2525
2526           op_cost = add_cost (speed, mode);
2527           new_limit.cost = best_cost.cost - op_cost;
2528           new_limit.latency = best_cost.latency - op_cost;
2529           synth_mult (alg_in, t + 1, &new_limit, mode);
2530
2531           alg_in->cost.cost += op_cost;
2532           alg_in->cost.latency += op_cost;
2533           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2534             {
2535               struct algorithm *x;
2536               best_cost = alg_in->cost;
2537               x = alg_in, alg_in = best_alg, best_alg = x;
2538               best_alg->log[best_alg->ops] = 0;
2539               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2540             }
2541         }
2542       else
2543         {
2544           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2545
2546           op_cost = add_cost (speed, mode);
2547           new_limit.cost = best_cost.cost - op_cost;
2548           new_limit.latency = best_cost.latency - op_cost;
2549           synth_mult (alg_in, t - 1, &new_limit, mode);
2550
2551           alg_in->cost.cost += op_cost;
2552           alg_in->cost.latency += op_cost;
2553           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2554             {
2555               struct algorithm *x;
2556               best_cost = alg_in->cost;
2557               x = alg_in, alg_in = best_alg, best_alg = x;
2558               best_alg->log[best_alg->ops] = 0;
2559               best_alg->op[best_alg->ops] = alg_add_t_m2;
2560             }
2561         }
2562
2563       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2564          quickly with a - a * n for some appropriate constant n.  */
2565       m = exact_log2 (-orig_t + 1);
2566       if (m >= 0 && m < maxm)
2567         {
2568           op_cost = shiftsub1_cost (speed, mode, m);
2569           new_limit.cost = best_cost.cost - op_cost;
2570           new_limit.latency = best_cost.latency - op_cost;
2571           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2572                       &new_limit, mode);
2573
2574           alg_in->cost.cost += op_cost;
2575           alg_in->cost.latency += op_cost;
2576           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2577             {
2578               struct algorithm *x;
2579               best_cost = alg_in->cost;
2580               x = alg_in, alg_in = best_alg, best_alg = x;
2581               best_alg->log[best_alg->ops] = m;
2582               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2583             }
2584         }
2585
2586       if (cache_hit)
2587         goto done;
2588     }
2589
2590   /* Look for factors of t of the form
2591      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2592      If we find such a factor, we can multiply by t using an algorithm that
2593      multiplies by q, shift the result by m and add/subtract it to itself.
2594
2595      We search for large factors first and loop down, even if large factors
2596      are less probable than small; if we find a large factor we will find a
2597      good sequence quickly, and therefore be able to prune (by decreasing
2598      COST_LIMIT) the search.  */
2599
2600  do_alg_addsub_factor:
2601   for (m = floor_log2 (t - 1); m >= 2; m--)
2602     {
2603       unsigned HOST_WIDE_INT d;
2604
2605       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2606       if (t % d == 0 && t > d && m < maxm
2607           && (!cache_hit || cache_alg == alg_add_factor))
2608         {
2609           /* If the target has a cheap shift-and-add instruction use
2610              that in preference to a shift insn followed by an add insn.
2611              Assume that the shift-and-add is "atomic" with a latency
2612              equal to its cost, otherwise assume that on superscalar
2613              hardware the shift may be executed concurrently with the
2614              earlier steps in the algorithm.  */
2615           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2616           if (shiftadd_cost (speed, mode, m) < op_cost)
2617             {
2618               op_cost = shiftadd_cost (speed, mode, m);
2619               op_latency = op_cost;
2620             }
2621           else
2622             op_latency = add_cost (speed, mode);
2623
2624           new_limit.cost = best_cost.cost - op_cost;
2625           new_limit.latency = best_cost.latency - op_latency;
2626           synth_mult (alg_in, t / d, &new_limit, mode);
2627
2628           alg_in->cost.cost += op_cost;
2629           alg_in->cost.latency += op_latency;
2630           if (alg_in->cost.latency < op_cost)
2631             alg_in->cost.latency = op_cost;
2632           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2633             {
2634               struct algorithm *x;
2635               best_cost = alg_in->cost;
2636               x = alg_in, alg_in = best_alg, best_alg = x;
2637               best_alg->log[best_alg->ops] = m;
2638               best_alg->op[best_alg->ops] = alg_add_factor;
2639             }
2640           /* Other factors will have been taken care of in the recursion.  */
2641           break;
2642         }
2643
2644       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2645       if (t % d == 0 && t > d && m < maxm
2646           && (!cache_hit || cache_alg == alg_sub_factor))
2647         {
2648           /* If the target has a cheap shift-and-subtract insn use
2649              that in preference to a shift insn followed by a sub insn.
2650              Assume that the shift-and-sub is "atomic" with a latency
2651              equal to it's cost, otherwise assume that on superscalar
2652              hardware the shift may be executed concurrently with the
2653              earlier steps in the algorithm.  */
2654           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2655           if (shiftsub0_cost (speed, mode, m) < op_cost)
2656             {
2657               op_cost = shiftsub0_cost (speed, mode, m);
2658               op_latency = op_cost;
2659             }
2660           else
2661             op_latency = add_cost (speed, mode);
2662
2663           new_limit.cost = best_cost.cost - op_cost;
2664           new_limit.latency = best_cost.latency - op_latency;
2665           synth_mult (alg_in, t / d, &new_limit, mode);
2666
2667           alg_in->cost.cost += op_cost;
2668           alg_in->cost.latency += op_latency;
2669           if (alg_in->cost.latency < op_cost)
2670             alg_in->cost.latency = op_cost;
2671           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2672             {
2673               struct algorithm *x;
2674               best_cost = alg_in->cost;
2675               x = alg_in, alg_in = best_alg, best_alg = x;
2676               best_alg->log[best_alg->ops] = m;
2677               best_alg->op[best_alg->ops] = alg_sub_factor;
2678             }
2679           break;
2680         }
2681     }
2682   if (cache_hit)
2683     goto done;
2684
2685   /* Try shift-and-add (load effective address) instructions,
2686      i.e. do a*3, a*5, a*9.  */
2687   if ((t & 1) != 0)
2688     {
2689     do_alg_add_t2_m:
2690       q = t - 1;
2691       q = q & -q;
2692       m = exact_log2 (q);
2693       if (m >= 0 && m < maxm)
2694         {
2695           op_cost = shiftadd_cost (speed, mode, m);
2696           new_limit.cost = best_cost.cost - op_cost;
2697           new_limit.latency = best_cost.latency - op_cost;
2698           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2699
2700           alg_in->cost.cost += op_cost;
2701           alg_in->cost.latency += op_cost;
2702           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2703             {
2704               struct algorithm *x;
2705               best_cost = alg_in->cost;
2706               x = alg_in, alg_in = best_alg, best_alg = x;
2707               best_alg->log[best_alg->ops] = m;
2708               best_alg->op[best_alg->ops] = alg_add_t2_m;
2709             }
2710         }
2711       if (cache_hit)
2712         goto done;
2713
2714     do_alg_sub_t2_m:
2715       q = t + 1;
2716       q = q & -q;
2717       m = exact_log2 (q);
2718       if (m >= 0 && m < maxm)
2719         {
2720           op_cost = shiftsub0_cost (speed, mode, m);
2721           new_limit.cost = best_cost.cost - op_cost;
2722           new_limit.latency = best_cost.latency - op_cost;
2723           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2724
2725           alg_in->cost.cost += op_cost;
2726           alg_in->cost.latency += op_cost;
2727           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2728             {
2729               struct algorithm *x;
2730               best_cost = alg_in->cost;
2731               x = alg_in, alg_in = best_alg, best_alg = x;
2732               best_alg->log[best_alg->ops] = m;
2733               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2734             }
2735         }
2736       if (cache_hit)
2737         goto done;
2738     }
2739
2740  done:
2741   /* If best_cost has not decreased, we have not found any algorithm.  */
2742   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2743     {
2744       /* We failed to find an algorithm.  Record alg_impossible for
2745          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2746          we are asked to find an algorithm for T within the same or
2747          lower COST_LIMIT, we can immediately return to the
2748          caller.  */
2749       entry_ptr->t = t;
2750       entry_ptr->mode = mode;
2751       entry_ptr->speed = speed;
2752       entry_ptr->alg = alg_impossible;
2753       entry_ptr->cost = *cost_limit;
2754       return;
2755     }
2756
2757   /* Cache the result.  */
2758   if (!cache_hit)
2759     {
2760       entry_ptr->t = t;
2761       entry_ptr->mode = mode;
2762       entry_ptr->speed = speed;
2763       entry_ptr->alg = best_alg->op[best_alg->ops];
2764       entry_ptr->cost.cost = best_cost.cost;
2765       entry_ptr->cost.latency = best_cost.latency;
2766     }
2767
2768   /* If we are getting a too long sequence for `struct algorithm'
2769      to record, make this search fail.  */
2770   if (best_alg->ops == MAX_BITS_PER_WORD)
2771     return;
2772
2773   /* Copy the algorithm from temporary space to the space at alg_out.
2774      We avoid using structure assignment because the majority of
2775      best_alg is normally undefined, and this is a critical function.  */
2776   alg_out->ops = best_alg->ops + 1;
2777   alg_out->cost = best_cost;
2778   memcpy (alg_out->op, best_alg->op,
2779           alg_out->ops * sizeof *alg_out->op);
2780   memcpy (alg_out->log, best_alg->log,
2781           alg_out->ops * sizeof *alg_out->log);
2782 }
2783 \f
2784 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2785    Try three variations:
2786
2787        - a shift/add sequence based on VAL itself
2788        - a shift/add sequence based on -VAL, followed by a negation
2789        - a shift/add sequence based on VAL - 1, followed by an addition.
2790
2791    Return true if the cheapest of these cost less than MULT_COST,
2792    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2793
2794 static bool
2795 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2796                      struct algorithm *alg, enum mult_variant *variant,
2797                      int mult_cost)
2798 {
2799   struct algorithm alg2;
2800   struct mult_cost limit;
2801   int op_cost;
2802   bool speed = optimize_insn_for_speed_p ();
2803
2804   /* Fail quickly for impossible bounds.  */
2805   if (mult_cost < 0)
2806     return false;
2807
2808   /* Ensure that mult_cost provides a reasonable upper bound.
2809      Any constant multiplication can be performed with less
2810      than 2 * bits additions.  */
2811   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2812   if (mult_cost > op_cost)
2813     mult_cost = op_cost;
2814
2815   *variant = basic_variant;
2816   limit.cost = mult_cost;
2817   limit.latency = mult_cost;
2818   synth_mult (alg, val, &limit, mode);
2819
2820   /* This works only if the inverted value actually fits in an
2821      `unsigned int' */
2822   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2823     {
2824       op_cost = neg_cost (speed, mode);
2825       if (MULT_COST_LESS (&alg->cost, mult_cost))
2826         {
2827           limit.cost = alg->cost.cost - op_cost;
2828           limit.latency = alg->cost.latency - op_cost;
2829         }
2830       else
2831         {
2832           limit.cost = mult_cost - op_cost;
2833           limit.latency = mult_cost - op_cost;
2834         }
2835
2836       synth_mult (&alg2, -val, &limit, mode);
2837       alg2.cost.cost += op_cost;
2838       alg2.cost.latency += op_cost;
2839       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2840         *alg = alg2, *variant = negate_variant;
2841     }
2842
2843   /* This proves very useful for division-by-constant.  */
2844   op_cost = add_cost (speed, mode);
2845   if (MULT_COST_LESS (&alg->cost, mult_cost))
2846     {
2847       limit.cost = alg->cost.cost - op_cost;
2848       limit.latency = alg->cost.latency - op_cost;
2849     }
2850   else
2851     {
2852       limit.cost = mult_cost - op_cost;
2853       limit.latency = mult_cost - op_cost;
2854     }
2855
2856   synth_mult (&alg2, val - 1, &limit, mode);
2857   alg2.cost.cost += op_cost;
2858   alg2.cost.latency += op_cost;
2859   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2860     *alg = alg2, *variant = add_variant;
2861
2862   return MULT_COST_LESS (&alg->cost, mult_cost);
2863 }
2864
2865 /* A subroutine of expand_mult, used for constant multiplications.
2866    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2867    convenient.  Use the shift/add sequence described by ALG and apply
2868    the final fixup specified by VARIANT.  */
2869
2870 static rtx
2871 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2872                    rtx target, const struct algorithm *alg,
2873                    enum mult_variant variant)
2874 {
2875   HOST_WIDE_INT val_so_far;
2876   rtx insn, accum, tem;
2877   int opno;
2878   enum machine_mode nmode;
2879
2880   /* Avoid referencing memory over and over and invalid sharing
2881      on SUBREGs.  */
2882   op0 = force_reg (mode, op0);
2883
2884   /* ACCUM starts out either as OP0 or as a zero, depending on
2885      the first operation.  */
2886
2887   if (alg->op[0] == alg_zero)
2888     {
2889       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2890       val_so_far = 0;
2891     }
2892   else if (alg->op[0] == alg_m)
2893     {
2894       accum = copy_to_mode_reg (mode, op0);
2895       val_so_far = 1;
2896     }
2897   else
2898     gcc_unreachable ();
2899
2900   for (opno = 1; opno < alg->ops; opno++)
2901     {
2902       int log = alg->log[opno];
2903       rtx shift_subtarget = optimize ? 0 : accum;
2904       rtx add_target
2905         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2906            && !optimize)
2907           ? target : 0;
2908       rtx accum_target = optimize ? 0 : accum;
2909       rtx accum_inner;
2910
2911       switch (alg->op[opno])
2912         {
2913         case alg_shift:
2914           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2915           /* REG_EQUAL note will be attached to the following insn.  */
2916           emit_move_insn (accum, tem);
2917           val_so_far <<= log;
2918           break;
2919
2920         case alg_add_t_m2:
2921           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2922           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2923                                  add_target ? add_target : accum_target);
2924           val_so_far += (HOST_WIDE_INT) 1 << log;
2925           break;
2926
2927         case alg_sub_t_m2:
2928           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2929           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2930                                  add_target ? add_target : accum_target);
2931           val_so_far -= (HOST_WIDE_INT) 1 << log;
2932           break;
2933
2934         case alg_add_t2_m:
2935           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2936                                 log, shift_subtarget, 0);
2937           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2938                                  add_target ? add_target : accum_target);
2939           val_so_far = (val_so_far << log) + 1;
2940           break;
2941
2942         case alg_sub_t2_m:
2943           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2944                                 log, shift_subtarget, 0);
2945           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2946                                  add_target ? add_target : accum_target);
2947           val_so_far = (val_so_far << log) - 1;
2948           break;
2949
2950         case alg_add_factor:
2951           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2952           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2953                                  add_target ? add_target : accum_target);
2954           val_so_far += val_so_far << log;
2955           break;
2956
2957         case alg_sub_factor:
2958           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2959           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2960                                  (add_target
2961                                   ? add_target : (optimize ? 0 : tem)));
2962           val_so_far = (val_so_far << log) - val_so_far;
2963           break;
2964
2965         default:
2966           gcc_unreachable ();
2967         }
2968
2969       if (SCALAR_INT_MODE_P (mode))
2970         {
2971           /* Write a REG_EQUAL note on the last insn so that we can cse
2972              multiplication sequences.  Note that if ACCUM is a SUBREG,
2973              we've set the inner register and must properly indicate that.  */
2974           tem = op0, nmode = mode;
2975           accum_inner = accum;
2976           if (GET_CODE (accum) == SUBREG)
2977             {
2978               accum_inner = SUBREG_REG (accum);
2979               nmode = GET_MODE (accum_inner);
2980               tem = gen_lowpart (nmode, op0);
2981             }
2982
2983           insn = get_last_insn ();
2984           set_dst_reg_note (insn, REG_EQUAL,
2985                             gen_rtx_MULT (nmode, tem,
2986                                           gen_int_mode (val_so_far, nmode)),
2987                             accum_inner);
2988         }
2989     }
2990
2991   if (variant == negate_variant)
2992     {
2993       val_so_far = -val_so_far;
2994       accum = expand_unop (mode, neg_optab, accum, target, 0);
2995     }
2996   else if (variant == add_variant)
2997     {
2998       val_so_far = val_so_far + 1;
2999       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3000     }
3001
3002   /* Compare only the bits of val and val_so_far that are significant
3003      in the result mode, to avoid sign-/zero-extension confusion.  */
3004   nmode = GET_MODE_INNER (mode);
3005   if (nmode == VOIDmode)
3006     nmode = mode;
3007   val &= GET_MODE_MASK (nmode);
3008   val_so_far &= GET_MODE_MASK (nmode);
3009   gcc_assert (val == val_so_far);
3010
3011   return accum;
3012 }
3013
3014 /* Perform a multiplication and return an rtx for the result.
3015    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3016    TARGET is a suggestion for where to store the result (an rtx).
3017
3018    We check specially for a constant integer as OP1.
3019    If you want this check for OP0 as well, then before calling
3020    you should swap the two operands if OP0 would be constant.  */
3021
3022 rtx
3023 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3024              int unsignedp)
3025 {
3026   enum mult_variant variant;
3027   struct algorithm algorithm;
3028   rtx scalar_op1;
3029   int max_cost;
3030   bool speed = optimize_insn_for_speed_p ();
3031   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3032
3033   if (CONSTANT_P (op0))
3034     {
3035       rtx temp = op0;
3036       op0 = op1;
3037       op1 = temp;
3038     }
3039
3040   /* For vectors, there are several simplifications that can be made if
3041      all elements of the vector constant are identical.  */
3042   scalar_op1 = op1;
3043   if (GET_CODE (op1) == CONST_VECTOR)
3044     {
3045       int i, n = CONST_VECTOR_NUNITS (op1);
3046       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3047       for (i = 1; i < n; ++i)
3048         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3049           goto skip_scalar;
3050     }
3051
3052   if (INTEGRAL_MODE_P (mode))
3053     {
3054       rtx fake_reg;
3055       HOST_WIDE_INT coeff;
3056       bool is_neg;
3057       int mode_bitsize;
3058
3059       if (op1 == CONST0_RTX (mode))
3060         return op1;
3061       if (op1 == CONST1_RTX (mode))
3062         return op0;
3063       if (op1 == CONSTM1_RTX (mode))
3064         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3065                             op0, target, 0);
3066
3067       if (do_trapv)
3068         goto skip_synth;
3069
3070       /* These are the operations that are potentially turned into
3071          a sequence of shifts and additions.  */
3072       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3073
3074       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3075          less than or equal in size to `unsigned int' this doesn't matter.
3076          If the mode is larger than `unsigned int', then synth_mult works
3077          only if the constant value exactly fits in an `unsigned int' without
3078          any truncation.  This means that multiplying by negative values does
3079          not work; results are off by 2^32 on a 32 bit machine.  */
3080
3081       if (CONST_INT_P (scalar_op1))
3082         {
3083           coeff = INTVAL (scalar_op1);
3084           is_neg = coeff < 0;
3085         }
3086       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3087         {
3088           /* If we are multiplying in DImode, it may still be a win
3089              to try to work with shifts and adds.  */
3090           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3091               && (CONST_DOUBLE_LOW (scalar_op1) > 0
3092                   || (CONST_DOUBLE_LOW (scalar_op1) < 0
3093                       && EXACT_POWER_OF_2_OR_ZERO_P
3094                            (CONST_DOUBLE_LOW (scalar_op1)))))
3095             {
3096               coeff = CONST_DOUBLE_LOW (scalar_op1);
3097               is_neg = false;
3098             }
3099           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3100             {
3101               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3102               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3103                 {
3104                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3105                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3106                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3107                     return expand_shift (LSHIFT_EXPR, mode, op0,
3108                                          shift, target, unsignedp);
3109                 }
3110               goto skip_synth;
3111             }
3112           else
3113             goto skip_synth;
3114         }
3115       else
3116         goto skip_synth;
3117
3118       /* We used to test optimize here, on the grounds that it's better to
3119          produce a smaller program when -O is not used.  But this causes
3120          such a terrible slowdown sometimes that it seems better to always
3121          use synth_mult.  */
3122
3123       /* Special case powers of two.  */
3124       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3125           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3126         return expand_shift (LSHIFT_EXPR, mode, op0,
3127                              floor_log2 (coeff), target, unsignedp);
3128
3129       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3130
3131       /* Attempt to handle multiplication of DImode values by negative
3132          coefficients, by performing the multiplication by a positive
3133          multiplier and then inverting the result.  */
3134       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3135         {
3136           /* Its safe to use -coeff even for INT_MIN, as the
3137              result is interpreted as an unsigned coefficient.
3138              Exclude cost of op0 from max_cost to match the cost
3139              calculation of the synth_mult.  */
3140           coeff = -(unsigned HOST_WIDE_INT) coeff;
3141           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3142                       - neg_cost (speed, mode));
3143           if (max_cost <= 0)
3144             goto skip_synth;
3145
3146           /* Special case powers of two.  */
3147           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3148             {
3149               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3150                                        floor_log2 (coeff), target, unsignedp);
3151               return expand_unop (mode, neg_optab, temp, target, 0);
3152             }
3153
3154           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3155                                    max_cost))
3156             {
3157               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3158                                             &algorithm, variant);
3159               return expand_unop (mode, neg_optab, temp, target, 0);
3160             }
3161           goto skip_synth;
3162         }
3163
3164       /* Exclude cost of op0 from max_cost to match the cost
3165          calculation of the synth_mult.  */
3166       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3167       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3168         return expand_mult_const (mode, op0, coeff, target,
3169                                   &algorithm, variant);
3170     }
3171  skip_synth:
3172
3173   /* Expand x*2.0 as x+x.  */
3174   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3175     {
3176       REAL_VALUE_TYPE d;
3177       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3178
3179       if (REAL_VALUES_EQUAL (d, dconst2))
3180         {
3181           op0 = force_reg (GET_MODE (op0), op0);
3182           return expand_binop (mode, add_optab, op0, op0,
3183                                target, unsignedp, OPTAB_LIB_WIDEN);
3184         }
3185     }
3186  skip_scalar:
3187
3188   /* This used to use umul_optab if unsigned, but for non-widening multiply
3189      there is no difference between signed and unsigned.  */
3190   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3191                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3192   gcc_assert (op0);
3193   return op0;
3194 }
3195
3196 /* Return a cost estimate for multiplying a register by the given
3197    COEFFicient in the given MODE and SPEED.  */
3198
3199 int
3200 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3201 {
3202   int max_cost;
3203   struct algorithm algorithm;
3204   enum mult_variant variant;
3205
3206   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3207   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3208   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3209     return algorithm.cost.cost;
3210   else
3211     return max_cost;
3212 }
3213
3214 /* Perform a widening multiplication and return an rtx for the result.
3215    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3216    TARGET is a suggestion for where to store the result (an rtx).
3217    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3218    or smul_widen_optab.
3219
3220    We check specially for a constant integer as OP1, comparing the
3221    cost of a widening multiply against the cost of a sequence of shifts
3222    and adds.  */
3223
3224 rtx
3225 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3226                       int unsignedp, optab this_optab)
3227 {
3228   bool speed = optimize_insn_for_speed_p ();
3229   rtx cop1;
3230
3231   if (CONST_INT_P (op1)
3232       && GET_MODE (op0) != VOIDmode
3233       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3234                                 this_optab == umul_widen_optab))
3235       && CONST_INT_P (cop1)
3236       && (INTVAL (cop1) >= 0
3237           || HWI_COMPUTABLE_MODE_P (mode)))
3238     {
3239       HOST_WIDE_INT coeff = INTVAL (cop1);
3240       int max_cost;
3241       enum mult_variant variant;
3242       struct algorithm algorithm;
3243
3244       /* Special case powers of two.  */
3245       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3246         {
3247           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3248           return expand_shift (LSHIFT_EXPR, mode, op0,
3249                                floor_log2 (coeff), target, unsignedp);
3250         }
3251
3252       /* Exclude cost of op0 from max_cost to match the cost
3253          calculation of the synth_mult.  */
3254       max_cost = mul_widen_cost (speed, mode);
3255       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3256                                max_cost))
3257         {
3258           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3259           return expand_mult_const (mode, op0, coeff, target,
3260                                     &algorithm, variant);
3261         }
3262     }
3263   return expand_binop (mode, this_optab, op0, op1, target,
3264                        unsignedp, OPTAB_LIB_WIDEN);
3265 }
3266 \f
3267 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3268    replace division by D, and put the least significant N bits of the result
3269    in *MULTIPLIER_PTR and return the most significant bit.
3270
3271    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3272    needed precision is in PRECISION (should be <= N).
3273
3274    PRECISION should be as small as possible so this function can choose
3275    multiplier more freely.
3276
3277    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3278    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3279
3280    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3281    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3282
3283 unsigned HOST_WIDE_INT
3284 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3285                    unsigned HOST_WIDE_INT *multiplier_ptr,
3286                    int *post_shift_ptr, int *lgup_ptr)
3287 {
3288   double_int mhigh, mlow;
3289   int lgup, post_shift;
3290   int pow, pow2;
3291
3292   /* lgup = ceil(log2(divisor)); */
3293   lgup = ceil_log2 (d);
3294
3295   gcc_assert (lgup <= n);
3296
3297   pow = n + lgup;
3298   pow2 = n + lgup - precision;
3299
3300   /* We could handle this with some effort, but this case is much
3301      better handled directly with a scc insn, so rely on caller using
3302      that.  */
3303   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3304
3305   /* mlow = 2^(N + lgup)/d */
3306   double_int val = double_int_zero.set_bit (pow);
3307   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3308
3309   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3310   val |= double_int_zero.set_bit (pow2);
3311   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3312
3313   gcc_assert (!mhigh.high || val.high - d < d);
3314   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3315   /* Assert that mlow < mhigh.  */
3316   gcc_assert (mlow.ult (mhigh));
3317
3318   /* If precision == N, then mlow, mhigh exceed 2^N
3319      (but they do not exceed 2^(N+1)).  */
3320
3321   /* Reduce to lowest terms.  */
3322   for (post_shift = lgup; post_shift > 0; post_shift--)
3323     {
3324       int shft = HOST_BITS_PER_WIDE_INT - 1;
3325       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3326       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3327       if (ml_lo >= mh_lo)
3328         break;
3329
3330       mlow = double_int::from_uhwi (ml_lo);
3331       mhigh = double_int::from_uhwi (mh_lo);
3332     }
3333
3334   *post_shift_ptr = post_shift;
3335   *lgup_ptr = lgup;
3336   if (n < HOST_BITS_PER_WIDE_INT)
3337     {
3338       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3339       *multiplier_ptr = mhigh.low & mask;
3340       return mhigh.low >= mask;
3341     }
3342   else
3343     {
3344       *multiplier_ptr = mhigh.low;
3345       return mhigh.high;
3346     }
3347 }
3348
3349 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3350    congruent to 1 (mod 2**N).  */
3351
3352 static unsigned HOST_WIDE_INT
3353 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3354 {
3355   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3356
3357   /* The algorithm notes that the choice y = x satisfies
3358      x*y == 1 mod 2^3, since x is assumed odd.
3359      Each iteration doubles the number of bits of significance in y.  */
3360
3361   unsigned HOST_WIDE_INT mask;
3362   unsigned HOST_WIDE_INT y = x;
3363   int nbit = 3;
3364
3365   mask = (n == HOST_BITS_PER_WIDE_INT
3366           ? ~(unsigned HOST_WIDE_INT) 0
3367           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3368
3369   while (nbit < n)
3370     {
3371       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3372       nbit *= 2;
3373     }
3374   return y;
3375 }
3376
3377 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3378    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3379    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3380    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3381    become signed.
3382
3383    The result is put in TARGET if that is convenient.
3384
3385    MODE is the mode of operation.  */
3386
3387 rtx
3388 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3389                              rtx op1, rtx target, int unsignedp)
3390 {
3391   rtx tem;
3392   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3393
3394   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3395                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3396   tem = expand_and (mode, tem, op1, NULL_RTX);
3397   adj_operand
3398     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3399                      adj_operand);
3400
3401   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3402                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3403   tem = expand_and (mode, tem, op0, NULL_RTX);
3404   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3405                           target);
3406
3407   return target;
3408 }
3409
3410 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3411
3412 static rtx
3413 extract_high_half (enum machine_mode mode, rtx op)
3414 {
3415   enum machine_mode wider_mode;
3416
3417   if (mode == word_mode)
3418     return gen_highpart (mode, op);
3419
3420   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3421
3422   wider_mode = GET_MODE_WIDER_MODE (mode);
3423   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3424                      GET_MODE_BITSIZE (mode), 0, 1);
3425   return convert_modes (mode, wider_mode, op, 0);
3426 }
3427
3428 /* Like expmed_mult_highpart, but only consider using a multiplication
3429    optab.  OP1 is an rtx for the constant operand.  */
3430
3431 static rtx
3432 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3433                             rtx target, int unsignedp, int max_cost)
3434 {
3435   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3436   enum machine_mode wider_mode;
3437   optab moptab;
3438   rtx tem;
3439   int size;
3440   bool speed = optimize_insn_for_speed_p ();
3441
3442   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3443
3444   wider_mode = GET_MODE_WIDER_MODE (mode);
3445   size = GET_MODE_BITSIZE (mode);
3446
3447   /* Firstly, try using a multiplication insn that only generates the needed
3448      high part of the product, and in the sign flavor of unsignedp.  */
3449   if (mul_highpart_cost (speed, mode) < max_cost)
3450     {
3451       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3452       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3453                           unsignedp, OPTAB_DIRECT);
3454       if (tem)
3455         return tem;
3456     }
3457
3458   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3459      Need to adjust the result after the multiplication.  */
3460   if (size - 1 < BITS_PER_WORD
3461       && (mul_highpart_cost (speed, mode)
3462           + 2 * shift_cost (speed, mode, size-1)
3463           + 4 * add_cost (speed, mode) < max_cost))
3464     {
3465       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3466       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3467                           unsignedp, OPTAB_DIRECT);
3468       if (tem)
3469         /* We used the wrong signedness.  Adjust the result.  */
3470         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3471                                             tem, unsignedp);
3472     }
3473
3474   /* Try widening multiplication.  */
3475   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3476   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3477       && mul_widen_cost (speed, wider_mode) < max_cost)
3478     {
3479       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3480                           unsignedp, OPTAB_WIDEN);
3481       if (tem)
3482         return extract_high_half (mode, tem);
3483     }
3484
3485   /* Try widening the mode and perform a non-widening multiplication.  */
3486   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3487       && size - 1 < BITS_PER_WORD
3488       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3489           < max_cost))
3490     {
3491       rtx insns, wop0, wop1;
3492
3493       /* We need to widen the operands, for example to ensure the
3494          constant multiplier is correctly sign or zero extended.
3495          Use a sequence to clean-up any instructions emitted by
3496          the conversions if things don't work out.  */
3497       start_sequence ();
3498       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3499       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3500       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3501                           unsignedp, OPTAB_WIDEN);
3502       insns = get_insns ();
3503       end_sequence ();
3504
3505       if (tem)
3506         {
3507           emit_insn (insns);
3508           return extract_high_half (mode, tem);
3509         }
3510     }
3511
3512   /* Try widening multiplication of opposite signedness, and adjust.  */
3513   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3514   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3515       && size - 1 < BITS_PER_WORD
3516       && (mul_widen_cost (speed, wider_mode)
3517           + 2 * shift_cost (speed, mode, size-1)
3518           + 4 * add_cost (speed, mode) < max_cost))
3519     {
3520       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3521                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3522       if (tem != 0)
3523         {
3524           tem = extract_high_half (mode, tem);
3525           /* We used the wrong signedness.  Adjust the result.  */
3526           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3527                                               target, unsignedp);
3528         }
3529     }
3530
3531   return 0;
3532 }
3533
3534 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3535    putting the high half of the result in TARGET if that is convenient,
3536    and return where the result is.  If the operation can not be performed,
3537    0 is returned.
3538
3539    MODE is the mode of operation and result.
3540
3541    UNSIGNEDP nonzero means unsigned multiply.
3542
3543    MAX_COST is the total allowed cost for the expanded RTL.  */
3544
3545 static rtx
3546 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3547                       rtx target, int unsignedp, int max_cost)
3548 {
3549   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3550   unsigned HOST_WIDE_INT cnst1;
3551   int extra_cost;
3552   bool sign_adjust = false;
3553   enum mult_variant variant;
3554   struct algorithm alg;
3555   rtx tem;
3556   bool speed = optimize_insn_for_speed_p ();
3557
3558   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3559   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3560   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3561
3562   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3563
3564   /* We can't optimize modes wider than BITS_PER_WORD.
3565      ??? We might be able to perform double-word arithmetic if
3566      mode == word_mode, however all the cost calculations in
3567      synth_mult etc. assume single-word operations.  */
3568   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3569     return expmed_mult_highpart_optab (mode, op0, op1, target,
3570                                        unsignedp, max_cost);
3571
3572   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3573
3574   /* Check whether we try to multiply by a negative constant.  */
3575   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3576     {
3577       sign_adjust = true;
3578       extra_cost += add_cost (speed, mode);
3579     }
3580
3581   /* See whether shift/add multiplication is cheap enough.  */
3582   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3583                            max_cost - extra_cost))
3584     {
3585       /* See whether the specialized multiplication optabs are
3586          cheaper than the shift/add version.  */
3587       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3588                                         alg.cost.cost + extra_cost);
3589       if (tem)
3590         return tem;
3591
3592       tem = convert_to_mode (wider_mode, op0, unsignedp);
3593       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3594       tem = extract_high_half (mode, tem);
3595
3596       /* Adjust result for signedness.  */
3597       if (sign_adjust)
3598         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3599
3600       return tem;
3601     }
3602   return expmed_mult_highpart_optab (mode, op0, op1, target,
3603                                      unsignedp, max_cost);
3604 }
3605
3606
3607 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3608
3609 static rtx
3610 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3611 {
3612   unsigned HOST_WIDE_INT masklow, maskhigh;
3613   rtx result, temp, shift, label;
3614   int logd;
3615
3616   logd = floor_log2 (d);
3617   result = gen_reg_rtx (mode);
3618
3619   /* Avoid conditional branches when they're expensive.  */
3620   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3621       && optimize_insn_for_speed_p ())
3622     {
3623       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3624                                       mode, 0, -1);
3625       if (signmask)
3626         {
3627           signmask = force_reg (mode, signmask);
3628           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3629           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3630
3631           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3632              which instruction sequence to use.  If logical right shifts
3633              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3634              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3635
3636           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3637           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3638               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3639                   > COSTS_N_INSNS (2)))
3640             {
3641               temp = expand_binop (mode, xor_optab, op0, signmask,
3642                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3643               temp = expand_binop (mode, sub_optab, temp, signmask,
3644                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3645               temp = expand_binop (mode, and_optab, temp,
3646                                    gen_int_mode (masklow, mode),
3647                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3648               temp = expand_binop (mode, xor_optab, temp, signmask,
3649                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650               temp = expand_binop (mode, sub_optab, temp, signmask,
3651                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652             }
3653           else
3654             {
3655               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3656                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657               signmask = force_reg (mode, signmask);
3658
3659               temp = expand_binop (mode, add_optab, op0, signmask,
3660                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3661               temp = expand_binop (mode, and_optab, temp,
3662                                    gen_int_mode (masklow, mode),
3663                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664               temp = expand_binop (mode, sub_optab, temp, signmask,
3665                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3666             }
3667           return temp;
3668         }
3669     }
3670
3671   /* Mask contains the mode's signbit and the significant bits of the
3672      modulus.  By including the signbit in the operation, many targets
3673      can avoid an explicit compare operation in the following comparison
3674      against zero.  */
3675
3676   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3677   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3678     {
3679       masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3680       maskhigh = -1;
3681     }
3682   else
3683     maskhigh = HOST_WIDE_INT_M1U
3684                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3685
3686   temp = expand_binop (mode, and_optab, op0,
3687                        immed_double_const (masklow, maskhigh, mode),
3688                        result, 1, OPTAB_LIB_WIDEN);
3689   if (temp != result)
3690     emit_move_insn (result, temp);
3691
3692   label = gen_label_rtx ();
3693   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3694
3695   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3696                        0, OPTAB_LIB_WIDEN);
3697   masklow = HOST_WIDE_INT_M1U << logd;
3698   maskhigh = -1;
3699   temp = expand_binop (mode, ior_optab, temp,
3700                        immed_double_const (masklow, maskhigh, mode),
3701                        result, 1, OPTAB_LIB_WIDEN);
3702   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3703                        0, OPTAB_LIB_WIDEN);
3704   if (temp != result)
3705     emit_move_insn (result, temp);
3706   emit_label (label);
3707   return result;
3708 }
3709
3710 /* Expand signed division of OP0 by a power of two D in mode MODE.
3711    This routine is only called for positive values of D.  */
3712
3713 static rtx
3714 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3715 {
3716   rtx temp, label;
3717   int logd;
3718
3719   logd = floor_log2 (d);
3720
3721   if (d == 2
3722       && BRANCH_COST (optimize_insn_for_speed_p (),
3723                       false) >= 1)
3724     {
3725       temp = gen_reg_rtx (mode);
3726       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3727       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3728                            0, OPTAB_LIB_WIDEN);
3729       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3730     }
3731
3732 #ifdef HAVE_conditional_move
3733   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3734       >= 2)
3735     {
3736       rtx temp2;
3737
3738       /* ??? emit_conditional_move forces a stack adjustment via
3739          compare_from_rtx so, if the sequence is discarded, it will
3740          be lost.  Do it now instead.  */
3741       do_pending_stack_adjust ();
3742
3743       start_sequence ();
3744       temp2 = copy_to_mode_reg (mode, op0);
3745       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3746                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3747       temp = force_reg (mode, temp);
3748
3749       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3750       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3751                                      mode, temp, temp2, mode, 0);
3752       if (temp2)
3753         {
3754           rtx seq = get_insns ();
3755           end_sequence ();
3756           emit_insn (seq);
3757           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3758         }
3759       end_sequence ();
3760     }
3761 #endif
3762
3763   if (BRANCH_COST (optimize_insn_for_speed_p (),
3764                    false) >= 2)
3765     {
3766       int ushift = GET_MODE_BITSIZE (mode) - logd;
3767
3768       temp = gen_reg_rtx (mode);
3769       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3770       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3771           > COSTS_N_INSNS (1))
3772         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3773                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3774       else
3775         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3776                              ushift, NULL_RTX, 1);
3777       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3778                            0, OPTAB_LIB_WIDEN);
3779       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3780     }
3781
3782   label = gen_label_rtx ();
3783   temp = copy_to_mode_reg (mode, op0);
3784   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3785   expand_inc (temp, gen_int_mode (d - 1, mode));
3786   emit_label (label);
3787   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3788 }
3789 \f
3790 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3791    if that is convenient, and returning where the result is.
3792    You may request either the quotient or the remainder as the result;
3793    specify REM_FLAG nonzero to get the remainder.
3794
3795    CODE is the expression code for which kind of division this is;
3796    it controls how rounding is done.  MODE is the machine mode to use.
3797    UNSIGNEDP nonzero means do unsigned division.  */
3798
3799 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3800    and then correct it by or'ing in missing high bits
3801    if result of ANDI is nonzero.
3802    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3803    This could optimize to a bfexts instruction.
3804    But C doesn't use these operations, so their optimizations are
3805    left for later.  */
3806 /* ??? For modulo, we don't actually need the highpart of the first product,
3807    the low part will do nicely.  And for small divisors, the second multiply
3808    can also be a low-part only multiply or even be completely left out.
3809    E.g. to calculate the remainder of a division by 3 with a 32 bit
3810    multiply, multiply with 0x55555556 and extract the upper two bits;
3811    the result is exact for inputs up to 0x1fffffff.
3812    The input range can be reduced by using cross-sum rules.
3813    For odd divisors >= 3, the following table gives right shift counts
3814    so that if a number is shifted by an integer multiple of the given
3815    amount, the remainder stays the same:
3816    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3817    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3818    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3819    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3820    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3821
3822    Cross-sum rules for even numbers can be derived by leaving as many bits
3823    to the right alone as the divisor has zeros to the right.
3824    E.g. if x is an unsigned 32 bit number:
3825    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3826    */
3827
3828 rtx
3829 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3830                rtx op0, rtx op1, rtx target, int unsignedp)
3831 {
3832   enum machine_mode compute_mode;
3833   rtx tquotient;
3834   rtx quotient = 0, remainder = 0;
3835   rtx last;
3836   int size;
3837   rtx insn;
3838   optab optab1, optab2;
3839   int op1_is_constant, op1_is_pow2 = 0;
3840   int max_cost, extra_cost;
3841   static HOST_WIDE_INT last_div_const = 0;
3842   bool speed = optimize_insn_for_speed_p ();
3843
3844   op1_is_constant = CONST_INT_P (op1);
3845   if (op1_is_constant)
3846     {
3847       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3848       if (unsignedp)
3849         ext_op1 &= GET_MODE_MASK (mode);
3850       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3851                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3852     }
3853
3854   /*
3855      This is the structure of expand_divmod:
3856
3857      First comes code to fix up the operands so we can perform the operations
3858      correctly and efficiently.
3859
3860      Second comes a switch statement with code specific for each rounding mode.
3861      For some special operands this code emits all RTL for the desired
3862      operation, for other cases, it generates only a quotient and stores it in
3863      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3864      to indicate that it has not done anything.
3865
3866      Last comes code that finishes the operation.  If QUOTIENT is set and
3867      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3868      QUOTIENT is not set, it is computed using trunc rounding.
3869
3870      We try to generate special code for division and remainder when OP1 is a
3871      constant.  If |OP1| = 2**n we can use shifts and some other fast
3872      operations.  For other values of OP1, we compute a carefully selected
3873      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3874      by m.
3875
3876      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3877      half of the product.  Different strategies for generating the product are
3878      implemented in expmed_mult_highpart.
3879
3880      If what we actually want is the remainder, we generate that by another
3881      by-constant multiplication and a subtraction.  */
3882
3883   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3884      code below will malfunction if we are, so check here and handle
3885      the special case if so.  */
3886   if (op1 == const1_rtx)
3887     return rem_flag ? const0_rtx : op0;
3888
3889     /* When dividing by -1, we could get an overflow.
3890      negv_optab can handle overflows.  */
3891   if (! unsignedp && op1 == constm1_rtx)
3892     {
3893       if (rem_flag)
3894         return const0_rtx;
3895       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3896                           ? negv_optab : neg_optab, op0, target, 0);
3897     }
3898
3899   if (target
3900       /* Don't use the function value register as a target
3901          since we have to read it as well as write it,
3902          and function-inlining gets confused by this.  */
3903       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3904           /* Don't clobber an operand while doing a multi-step calculation.  */
3905           || ((rem_flag || op1_is_constant)
3906               && (reg_mentioned_p (target, op0)
3907                   || (MEM_P (op0) && MEM_P (target))))
3908           || reg_mentioned_p (target, op1)
3909           || (MEM_P (op1) && MEM_P (target))))
3910     target = 0;
3911
3912   /* Get the mode in which to perform this computation.  Normally it will
3913      be MODE, but sometimes we can't do the desired operation in MODE.
3914      If so, pick a wider mode in which we can do the operation.  Convert
3915      to that mode at the start to avoid repeated conversions.
3916
3917      First see what operations we need.  These depend on the expression
3918      we are evaluating.  (We assume that divxx3 insns exist under the
3919      same conditions that modxx3 insns and that these insns don't normally
3920      fail.  If these assumptions are not correct, we may generate less
3921      efficient code in some cases.)
3922
3923      Then see if we find a mode in which we can open-code that operation
3924      (either a division, modulus, or shift).  Finally, check for the smallest
3925      mode for which we can do the operation with a library call.  */
3926
3927   /* We might want to refine this now that we have division-by-constant
3928      optimization.  Since expmed_mult_highpart tries so many variants, it is
3929      not straightforward to generalize this.  Maybe we should make an array
3930      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3931
3932   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3933             ? (unsignedp ? lshr_optab : ashr_optab)
3934             : (unsignedp ? udiv_optab : sdiv_optab));
3935   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3936             ? optab1
3937             : (unsignedp ? udivmod_optab : sdivmod_optab));
3938
3939   for (compute_mode = mode; compute_mode != VOIDmode;
3940        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3941     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3942         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3943       break;
3944
3945   if (compute_mode == VOIDmode)
3946     for (compute_mode = mode; compute_mode != VOIDmode;
3947          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3948       if (optab_libfunc (optab1, compute_mode)
3949           || optab_libfunc (optab2, compute_mode))
3950         break;
3951
3952   /* If we still couldn't find a mode, use MODE, but expand_binop will
3953      probably die.  */
3954   if (compute_mode == VOIDmode)
3955     compute_mode = mode;
3956
3957   if (target && GET_MODE (target) == compute_mode)
3958     tquotient = target;
3959   else
3960     tquotient = gen_reg_rtx (compute_mode);
3961
3962   size = GET_MODE_BITSIZE (compute_mode);
3963 #if 0
3964   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3965      (mode), and thereby get better code when OP1 is a constant.  Do that
3966      later.  It will require going over all usages of SIZE below.  */
3967   size = GET_MODE_BITSIZE (mode);
3968 #endif
3969
3970   /* Only deduct something for a REM if the last divide done was
3971      for a different constant.   Then set the constant of the last
3972      divide.  */
3973   max_cost = (unsignedp
3974               ? udiv_cost (speed, compute_mode)
3975               : sdiv_cost (speed, compute_mode));
3976   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3977                      && INTVAL (op1) == last_div_const))
3978     max_cost -= (mul_cost (speed, compute_mode)
3979                  + add_cost (speed, compute_mode));
3980
3981   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3982
3983   /* Now convert to the best mode to use.  */
3984   if (compute_mode != mode)
3985     {
3986       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3987       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3988
3989       /* convert_modes may have placed op1 into a register, so we
3990          must recompute the following.  */
3991       op1_is_constant = CONST_INT_P (op1);
3992       op1_is_pow2 = (op1_is_constant
3993                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3994                           || (! unsignedp
3995                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
3996     }
3997
3998   /* If one of the operands is a volatile MEM, copy it into a register.  */
3999
4000   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4001     op0 = force_reg (compute_mode, op0);
4002   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4003     op1 = force_reg (compute_mode, op1);
4004
4005   /* If we need the remainder or if OP1 is constant, we need to
4006      put OP0 in a register in case it has any queued subexpressions.  */
4007   if (rem_flag || op1_is_constant)
4008     op0 = force_reg (compute_mode, op0);
4009
4010   last = get_last_insn ();
4011
4012   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4013   if (unsignedp)
4014     {
4015       if (code == FLOOR_DIV_EXPR)
4016         code = TRUNC_DIV_EXPR;
4017       if (code == FLOOR_MOD_EXPR)
4018         code = TRUNC_MOD_EXPR;
4019       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4020         code = TRUNC_DIV_EXPR;
4021     }
4022
4023   if (op1 != const0_rtx)
4024     switch (code)
4025       {
4026       case TRUNC_MOD_EXPR:
4027       case TRUNC_DIV_EXPR:
4028         if (op1_is_constant)
4029           {
4030             if (unsignedp)
4031               {
4032                 unsigned HOST_WIDE_INT mh, ml;
4033                 int pre_shift, post_shift;
4034                 int dummy;
4035                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4036                                             & GET_MODE_MASK (compute_mode));
4037
4038                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4039                   {
4040                     pre_shift = floor_log2 (d);
4041                     if (rem_flag)
4042                       {
4043                         unsigned HOST_WIDE_INT mask
4044                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4045                         remainder
4046                           = expand_binop (compute_mode, and_optab, op0,
4047                                           gen_int_mode (mask, compute_mode),
4048                                           remainder, 1,
4049                                           OPTAB_LIB_WIDEN);
4050                         if (remainder)
4051                           return gen_lowpart (mode, remainder);
4052                       }
4053                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4054                                              pre_shift, tquotient, 1);
4055                   }
4056                 else if (size <= HOST_BITS_PER_WIDE_INT)
4057                   {
4058                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4059                       {
4060                         /* Most significant bit of divisor is set; emit an scc
4061                            insn.  */
4062                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4063                                                           compute_mode, 1, 1);
4064                       }
4065                     else
4066                       {
4067                         /* Find a suitable multiplier and right shift count
4068                            instead of multiplying with D.  */
4069
4070                         mh = choose_multiplier (d, size, size,
4071                                                 &ml, &post_shift, &dummy);
4072
4073                         /* If the suggested multiplier is more than SIZE bits,
4074                            we can do better for even divisors, using an
4075                            initial right shift.  */
4076                         if (mh != 0 && (d & 1) == 0)
4077                           {
4078                             pre_shift = floor_log2 (d & -d);
4079                             mh = choose_multiplier (d >> pre_shift, size,
4080                                                     size - pre_shift,
4081                                                     &ml, &post_shift, &dummy);
4082                             gcc_assert (!mh);
4083                           }
4084                         else
4085                           pre_shift = 0;
4086
4087                         if (mh != 0)
4088                           {
4089                             rtx t1, t2, t3, t4;
4090
4091                             if (post_shift - 1 >= BITS_PER_WORD)
4092                               goto fail1;
4093
4094                             extra_cost
4095                               = (shift_cost (speed, compute_mode, post_shift - 1)
4096                                  + shift_cost (speed, compute_mode, 1)
4097                                  + 2 * add_cost (speed, compute_mode));
4098                             t1 = expmed_mult_highpart
4099                               (compute_mode, op0,
4100                                gen_int_mode (ml, compute_mode),
4101                                NULL_RTX, 1, max_cost - extra_cost);
4102                             if (t1 == 0)
4103                               goto fail1;
4104                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4105                                                                op0, t1),
4106                                                 NULL_RTX);
4107                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4108                                                t2, 1, NULL_RTX, 1);
4109                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4110                                                               t1, t3),
4111                                                 NULL_RTX);
4112                             quotient = expand_shift
4113                               (RSHIFT_EXPR, compute_mode, t4,
4114                                post_shift - 1, tquotient, 1);
4115                           }
4116                         else
4117                           {
4118                             rtx t1, t2;
4119
4120                             if (pre_shift >= BITS_PER_WORD
4121                                 || post_shift >= BITS_PER_WORD)
4122                               goto fail1;
4123
4124                             t1 = expand_shift
4125                               (RSHIFT_EXPR, compute_mode, op0,
4126                                pre_shift, NULL_RTX, 1);
4127                             extra_cost
4128                               = (shift_cost (speed, compute_mode, pre_shift)
4129                                  + shift_cost (speed, compute_mode, post_shift));
4130                             t2 = expmed_mult_highpart
4131                               (compute_mode, t1,
4132                                gen_int_mode (ml, compute_mode),
4133                                NULL_RTX, 1, max_cost - extra_cost);
4134                             if (t2 == 0)
4135                               goto fail1;
4136                             quotient = expand_shift
4137                               (RSHIFT_EXPR, compute_mode, t2,
4138                                post_shift, tquotient, 1);
4139                           }
4140                       }
4141                   }
4142                 else            /* Too wide mode to use tricky code */
4143                   break;
4144
4145                 insn = get_last_insn ();
4146                 if (insn != last)
4147                   set_dst_reg_note (insn, REG_EQUAL,
4148                                     gen_rtx_UDIV (compute_mode, op0, op1),
4149                                     quotient);
4150               }
4151             else                /* TRUNC_DIV, signed */
4152               {
4153                 unsigned HOST_WIDE_INT ml;
4154                 int lgup, post_shift;
4155                 rtx mlr;
4156                 HOST_WIDE_INT d = INTVAL (op1);
4157                 unsigned HOST_WIDE_INT abs_d;
4158
4159                 /* Since d might be INT_MIN, we have to cast to
4160                    unsigned HOST_WIDE_INT before negating to avoid
4161                    undefined signed overflow.  */
4162                 abs_d = (d >= 0
4163                          ? (unsigned HOST_WIDE_INT) d
4164                          : - (unsigned HOST_WIDE_INT) d);
4165
4166                 /* n rem d = n rem -d */
4167                 if (rem_flag && d < 0)
4168                   {
4169                     d = abs_d;
4170                     op1 = gen_int_mode (abs_d, compute_mode);
4171                   }
4172
4173                 if (d == 1)
4174                   quotient = op0;
4175                 else if (d == -1)
4176                   quotient = expand_unop (compute_mode, neg_optab, op0,
4177                                           tquotient, 0);
4178                 else if (HOST_BITS_PER_WIDE_INT >= size
4179                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4180                   {
4181                     /* This case is not handled correctly below.  */
4182                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4183                                                 compute_mode, 1, 1);
4184                     if (quotient == 0)
4185                       goto fail1;
4186                   }
4187                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4188                          && (rem_flag
4189                              ? smod_pow2_cheap (speed, compute_mode)
4190                              : sdiv_pow2_cheap (speed, compute_mode))
4191                          /* We assume that cheap metric is true if the
4192                             optab has an expander for this mode.  */
4193                          && ((optab_handler ((rem_flag ? smod_optab
4194                                               : sdiv_optab),
4195                                              compute_mode)
4196                               != CODE_FOR_nothing)
4197                              || (optab_handler (sdivmod_optab,
4198                                                 compute_mode)
4199                                  != CODE_FOR_nothing)))
4200                   ;
4201                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4202                   {
4203                     if (rem_flag)
4204                       {
4205                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4206                         if (remainder)
4207                           return gen_lowpart (mode, remainder);
4208                       }
4209
4210                     if (sdiv_pow2_cheap (speed, compute_mode)
4211                         && ((optab_handler (sdiv_optab, compute_mode)
4212                              != CODE_FOR_nothing)
4213                             || (optab_handler (sdivmod_optab, compute_mode)
4214                                 != CODE_FOR_nothing)))
4215                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4216                                                 compute_mode, op0,
4217                                                 gen_int_mode (abs_d,
4218                                                               compute_mode),
4219                                                 NULL_RTX, 0);
4220                     else
4221                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4222
4223                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4224                        negate the quotient.  */
4225                     if (d < 0)
4226                       {
4227                         insn = get_last_insn ();
4228                         if (insn != last
4229                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4230                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4231                           set_dst_reg_note (insn, REG_EQUAL,
4232                                             gen_rtx_DIV (compute_mode, op0,
4233                                                          gen_int_mode
4234                                                            (abs_d,
4235                                                             compute_mode)),
4236                                             quotient);
4237
4238                         quotient = expand_unop (compute_mode, neg_optab,
4239                                                 quotient, quotient, 0);
4240                       }
4241                   }
4242                 else if (size <= HOST_BITS_PER_WIDE_INT)
4243                   {
4244                     choose_multiplier (abs_d, size, size - 1,
4245                                        &ml, &post_shift, &lgup);
4246                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4247                       {
4248                         rtx t1, t2, t3;
4249
4250                         if (post_shift >= BITS_PER_WORD
4251                             || size - 1 >= BITS_PER_WORD)
4252                           goto fail1;
4253
4254                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4255                                       + shift_cost (speed, compute_mode, size - 1)
4256                                       + add_cost (speed, compute_mode));
4257                         t1 = expmed_mult_highpart
4258                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4259                            NULL_RTX, 0, max_cost - extra_cost);
4260                         if (t1 == 0)
4261                           goto fail1;
4262                         t2 = expand_shift
4263                           (RSHIFT_EXPR, compute_mode, t1,
4264                            post_shift, NULL_RTX, 0);
4265                         t3 = expand_shift
4266                           (RSHIFT_EXPR, compute_mode, op0,
4267                            size - 1, NULL_RTX, 0);
4268                         if (d < 0)
4269                           quotient
4270                             = force_operand (gen_rtx_MINUS (compute_mode,
4271                                                             t3, t2),
4272                                              tquotient);
4273                         else
4274                           quotient
4275                             = force_operand (gen_rtx_MINUS (compute_mode,
4276                                                             t2, t3),
4277                                              tquotient);
4278                       }
4279                     else
4280                       {
4281                         rtx t1, t2, t3, t4;
4282
4283                         if (post_shift >= BITS_PER_WORD
4284                             || size - 1 >= BITS_PER_WORD)
4285                           goto fail1;
4286
4287                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4288                         mlr = gen_int_mode (ml, compute_mode);
4289                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4290                                       + shift_cost (speed, compute_mode, size - 1)
4291                                       + 2 * add_cost (speed, compute_mode));
4292                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4293                                                    NULL_RTX, 0,
4294                                                    max_cost - extra_cost);
4295                         if (t1 == 0)
4296                           goto fail1;
4297                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4298                                                           t1, op0),
4299                                             NULL_RTX);
4300                         t3 = expand_shift
4301                           (RSHIFT_EXPR, compute_mode, t2,
4302                            post_shift, NULL_RTX, 0);
4303                         t4 = expand_shift
4304                           (RSHIFT_EXPR, compute_mode, op0,
4305                            size - 1, NULL_RTX, 0);
4306                         if (d < 0)
4307                           quotient
4308                             = force_operand (gen_rtx_MINUS (compute_mode,
4309                                                             t4, t3),
4310                                              tquotient);
4311                         else
4312                           quotient
4313                             = force_operand (gen_rtx_MINUS (compute_mode,
4314                                                             t3, t4),
4315                                              tquotient);
4316                       }
4317                   }
4318                 else            /* Too wide mode to use tricky code */
4319                   break;
4320
4321                 insn = get_last_insn ();
4322                 if (insn != last)
4323                   set_dst_reg_note (insn, REG_EQUAL,
4324                                     gen_rtx_DIV (compute_mode, op0, op1),
4325                                     quotient);
4326               }
4327             break;
4328           }
4329       fail1:
4330         delete_insns_since (last);
4331         break;
4332
4333       case FLOOR_DIV_EXPR:
4334       case FLOOR_MOD_EXPR:
4335       /* We will come here only for signed operations.  */
4336         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4337           {
4338             unsigned HOST_WIDE_INT mh, ml;
4339             int pre_shift, lgup, post_shift;
4340             HOST_WIDE_INT d = INTVAL (op1);
4341
4342             if (d > 0)
4343               {
4344                 /* We could just as easily deal with negative constants here,
4345                    but it does not seem worth the trouble for GCC 2.6.  */
4346                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4347                   {
4348                     pre_shift = floor_log2 (d);
4349                     if (rem_flag)
4350                       {
4351                         unsigned HOST_WIDE_INT mask
4352                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4353                         remainder = expand_binop
4354                           (compute_mode, and_optab, op0,
4355                            gen_int_mode (mask, compute_mode),
4356                            remainder, 0, OPTAB_LIB_WIDEN);
4357                         if (remainder)
4358                           return gen_lowpart (mode, remainder);
4359                       }
4360                     quotient = expand_shift
4361                       (RSHIFT_EXPR, compute_mode, op0,
4362                        pre_shift, tquotient, 0);
4363                   }
4364                 else
4365                   {
4366                     rtx t1, t2, t3, t4;
4367
4368                     mh = choose_multiplier (d, size, size - 1,
4369                                             &ml, &post_shift, &lgup);
4370                     gcc_assert (!mh);
4371
4372                     if (post_shift < BITS_PER_WORD
4373                         && size - 1 < BITS_PER_WORD)
4374                       {
4375                         t1 = expand_shift
4376                           (RSHIFT_EXPR, compute_mode, op0,
4377                            size - 1, NULL_RTX, 0);
4378                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4379                                            NULL_RTX, 0, OPTAB_WIDEN);
4380                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4381                                       + shift_cost (speed, compute_mode, size - 1)
4382                                       + 2 * add_cost (speed, compute_mode));
4383                         t3 = expmed_mult_highpart
4384                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4385                            NULL_RTX, 1, max_cost - extra_cost);
4386                         if (t3 != 0)
4387                           {
4388                             t4 = expand_shift
4389                               (RSHIFT_EXPR, compute_mode, t3,
4390                                post_shift, NULL_RTX, 1);
4391                             quotient = expand_binop (compute_mode, xor_optab,
4392                                                      t4, t1, tquotient, 0,
4393                                                      OPTAB_WIDEN);
4394                           }
4395                       }
4396                   }
4397               }
4398             else
4399               {
4400                 rtx nsign, t1, t2, t3, t4;
4401                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4402                                                   op0, constm1_rtx), NULL_RTX);
4403                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4404                                    0, OPTAB_WIDEN);
4405                 nsign = expand_shift
4406                   (RSHIFT_EXPR, compute_mode, t2,
4407                    size - 1, NULL_RTX, 0);
4408                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4409                                     NULL_RTX);
4410                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4411                                     NULL_RTX, 0);
4412                 if (t4)
4413                   {
4414                     rtx t5;
4415                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4416                                       NULL_RTX, 0);
4417                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4418                                                             t4, t5),
4419                                               tquotient);
4420                   }
4421               }
4422           }
4423
4424         if (quotient != 0)
4425           break;
4426         delete_insns_since (last);
4427
4428         /* Try using an instruction that produces both the quotient and
4429            remainder, using truncation.  We can easily compensate the quotient
4430            or remainder to get floor rounding, once we have the remainder.
4431            Notice that we compute also the final remainder value here,
4432            and return the result right away.  */
4433         if (target == 0 || GET_MODE (target) != compute_mode)
4434           target = gen_reg_rtx (compute_mode);
4435
4436         if (rem_flag)
4437           {
4438             remainder
4439               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4440             quotient = gen_reg_rtx (compute_mode);
4441           }
4442         else
4443           {
4444             quotient
4445               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4446             remainder = gen_reg_rtx (compute_mode);
4447           }
4448
4449         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4450                                  quotient, remainder, 0))
4451           {
4452             /* This could be computed with a branch-less sequence.
4453                Save that for later.  */
4454             rtx tem;
4455             rtx label = gen_label_rtx ();
4456             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4457             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4458                                 NULL_RTX, 0, OPTAB_WIDEN);
4459             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4460             expand_dec (quotient, const1_rtx);
4461             expand_inc (remainder, op1);
4462             emit_label (label);
4463             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4464           }
4465
4466         /* No luck with division elimination or divmod.  Have to do it
4467            by conditionally adjusting op0 *and* the result.  */
4468         {
4469           rtx label1, label2, label3, label4, label5;
4470           rtx adjusted_op0;
4471           rtx tem;
4472
4473           quotient = gen_reg_rtx (compute_mode);
4474           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4475           label1 = gen_label_rtx ();
4476           label2 = gen_label_rtx ();
4477           label3 = gen_label_rtx ();
4478           label4 = gen_label_rtx ();
4479           label5 = gen_label_rtx ();
4480           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4481           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4482           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4483                               quotient, 0, OPTAB_LIB_WIDEN);
4484           if (tem != quotient)
4485             emit_move_insn (quotient, tem);
4486           emit_jump_insn (gen_jump (label5));
4487           emit_barrier ();
4488           emit_label (label1);
4489           expand_inc (adjusted_op0, const1_rtx);
4490           emit_jump_insn (gen_jump (label4));
4491           emit_barrier ();
4492           emit_label (label2);
4493           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4494           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4495                               quotient, 0, OPTAB_LIB_WIDEN);
4496           if (tem != quotient)
4497             emit_move_insn (quotient, tem);
4498           emit_jump_insn (gen_jump (label5));
4499           emit_barrier ();
4500           emit_label (label3);
4501           expand_dec (adjusted_op0, const1_rtx);
4502           emit_label (label4);
4503           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4504                               quotient, 0, OPTAB_LIB_WIDEN);
4505           if (tem != quotient)
4506             emit_move_insn (quotient, tem);
4507           expand_dec (quotient, const1_rtx);
4508           emit_label (label5);
4509         }
4510         break;
4511
4512       case CEIL_DIV_EXPR:
4513       case CEIL_MOD_EXPR:
4514         if (unsignedp)
4515           {
4516             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4517               {
4518                 rtx t1, t2, t3;
4519                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4520                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4521                                    floor_log2 (d), tquotient, 1);
4522                 t2 = expand_binop (compute_mode, and_optab, op0,
4523                                    gen_int_mode (d - 1, compute_mode),
4524                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4525                 t3 = gen_reg_rtx (compute_mode);
4526                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4527                                       compute_mode, 1, 1);
4528                 if (t3 == 0)
4529                   {
4530                     rtx lab;
4531                     lab = gen_label_rtx ();
4532                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4533                     expand_inc (t1, const1_rtx);
4534                     emit_label (lab);
4535                     quotient = t1;
4536                   }
4537                 else
4538                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4539                                                           t1, t3),
4540                                             tquotient);
4541                 break;
4542               }
4543
4544             /* Try using an instruction that produces both the quotient and
4545                remainder, using truncation.  We can easily compensate the
4546                quotient or remainder to get ceiling rounding, once we have the
4547                remainder.  Notice that we compute also the final remainder
4548                value here, and return the result right away.  */
4549             if (target == 0 || GET_MODE (target) != compute_mode)
4550               target = gen_reg_rtx (compute_mode);
4551
4552             if (rem_flag)
4553               {
4554                 remainder = (REG_P (target)
4555                              ? target : gen_reg_rtx (compute_mode));
4556                 quotient = gen_reg_rtx (compute_mode);
4557               }
4558             else
4559               {
4560                 quotient = (REG_P (target)
4561                             ? target : gen_reg_rtx (compute_mode));
4562                 remainder = gen_reg_rtx (compute_mode);
4563               }
4564
4565             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4566                                      remainder, 1))
4567               {
4568                 /* This could be computed with a branch-less sequence.
4569                    Save that for later.  */
4570                 rtx label = gen_label_rtx ();
4571                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4572                                  compute_mode, label);
4573                 expand_inc (quotient, const1_rtx);
4574                 expand_dec (remainder, op1);
4575                 emit_label (label);
4576                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4577               }
4578
4579             /* No luck with division elimination or divmod.  Have to do it
4580                by conditionally adjusting op0 *and* the result.  */
4581             {
4582               rtx label1, label2;
4583               rtx adjusted_op0, tem;
4584
4585               quotient = gen_reg_rtx (compute_mode);
4586               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4587               label1 = gen_label_rtx ();
4588               label2 = gen_label_rtx ();
4589               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4590                                compute_mode, label1);
4591               emit_move_insn  (quotient, const0_rtx);
4592               emit_jump_insn (gen_jump (label2));
4593               emit_barrier ();
4594               emit_label (label1);
4595               expand_dec (adjusted_op0, const1_rtx);
4596               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4597                                   quotient, 1, OPTAB_LIB_WIDEN);
4598               if (tem != quotient)
4599                 emit_move_insn (quotient, tem);
4600               expand_inc (quotient, const1_rtx);
4601               emit_label (label2);
4602             }
4603           }
4604         else /* signed */
4605           {
4606             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4607                 && INTVAL (op1) >= 0)
4608               {
4609                 /* This is extremely similar to the code for the unsigned case
4610                    above.  For 2.7 we should merge these variants, but for
4611                    2.6.1 I don't want to touch the code for unsigned since that
4612                    get used in C.  The signed case will only be used by other
4613                    languages (Ada).  */
4614
4615                 rtx t1, t2, t3;
4616                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4617                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4618                                    floor_log2 (d), tquotient, 0);
4619                 t2 = expand_binop (compute_mode, and_optab, op0,
4620                                    gen_int_mode (d - 1, compute_mode),
4621                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4622                 t3 = gen_reg_rtx (compute_mode);
4623                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4624                                       compute_mode, 1, 1);
4625                 if (t3 == 0)
4626                   {
4627                     rtx lab;
4628                     lab = gen_label_rtx ();
4629                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4630                     expand_inc (t1, const1_rtx);
4631                     emit_label (lab);
4632                     quotient = t1;
4633                   }
4634                 else
4635                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4636                                                           t1, t3),
4637                                             tquotient);
4638                 break;
4639               }
4640
4641             /* Try using an instruction that produces both the quotient and
4642                remainder, using truncation.  We can easily compensate the
4643                quotient or remainder to get ceiling rounding, once we have the
4644                remainder.  Notice that we compute also the final remainder
4645                value here, and return the result right away.  */
4646             if (target == 0 || GET_MODE (target) != compute_mode)
4647               target = gen_reg_rtx (compute_mode);
4648             if (rem_flag)
4649               {
4650                 remainder= (REG_P (target)
4651                             ? target : gen_reg_rtx (compute_mode));
4652                 quotient = gen_reg_rtx (compute_mode);
4653               }
4654             else
4655               {
4656                 quotient = (REG_P (target)
4657                             ? target : gen_reg_rtx (compute_mode));
4658                 remainder = gen_reg_rtx (compute_mode);
4659               }
4660
4661             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4662                                      remainder, 0))
4663               {
4664                 /* This could be computed with a branch-less sequence.
4665                    Save that for later.  */
4666                 rtx tem;
4667                 rtx label = gen_label_rtx ();
4668                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4669                                  compute_mode, label);
4670                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4671                                     NULL_RTX, 0, OPTAB_WIDEN);
4672                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4673                 expand_inc (quotient, const1_rtx);
4674                 expand_dec (remainder, op1);
4675                 emit_label (label);
4676                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4677               }
4678
4679             /* No luck with division elimination or divmod.  Have to do it
4680                by conditionally adjusting op0 *and* the result.  */
4681             {
4682               rtx label1, label2, label3, label4, label5;
4683               rtx adjusted_op0;
4684               rtx tem;
4685
4686               quotient = gen_reg_rtx (compute_mode);
4687               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4688               label1 = gen_label_rtx ();
4689               label2 = gen_label_rtx ();
4690               label3 = gen_label_rtx ();
4691               label4 = gen_label_rtx ();
4692               label5 = gen_label_rtx ();
4693               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4694               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4695                                compute_mode, label1);
4696               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4697                                   quotient, 0, OPTAB_LIB_WIDEN);
4698               if (tem != quotient)
4699                 emit_move_insn (quotient, tem);
4700               emit_jump_insn (gen_jump (label5));
4701               emit_barrier ();
4702               emit_label (label1);
4703               expand_dec (adjusted_op0, const1_rtx);
4704               emit_jump_insn (gen_jump (label4));
4705               emit_barrier ();
4706               emit_label (label2);
4707               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4708                                compute_mode, label3);
4709               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4710                                   quotient, 0, OPTAB_LIB_WIDEN);
4711               if (tem != quotient)
4712                 emit_move_insn (quotient, tem);
4713               emit_jump_insn (gen_jump (label5));
4714               emit_barrier ();
4715               emit_label (label3);
4716               expand_inc (adjusted_op0, const1_rtx);
4717               emit_label (label4);
4718               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4719                                   quotient, 0, OPTAB_LIB_WIDEN);
4720               if (tem != quotient)
4721                 emit_move_insn (quotient, tem);
4722               expand_inc (quotient, const1_rtx);
4723               emit_label (label5);
4724             }
4725           }
4726         break;
4727
4728       case EXACT_DIV_EXPR:
4729         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4730           {
4731             HOST_WIDE_INT d = INTVAL (op1);
4732             unsigned HOST_WIDE_INT ml;
4733             int pre_shift;
4734             rtx t1;
4735
4736             pre_shift = floor_log2 (d & -d);
4737             ml = invert_mod2n (d >> pre_shift, size);
4738             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4739                                pre_shift, NULL_RTX, unsignedp);
4740             quotient = expand_mult (compute_mode, t1,
4741                                     gen_int_mode (ml, compute_mode),
4742                                     NULL_RTX, 1);
4743
4744             insn = get_last_insn ();
4745             set_dst_reg_note (insn, REG_EQUAL,
4746                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4747                                               compute_mode, op0, op1),
4748                               quotient);
4749           }
4750         break;
4751
4752       case ROUND_DIV_EXPR:
4753       case ROUND_MOD_EXPR:
4754         if (unsignedp)
4755           {
4756             rtx tem;
4757             rtx label;
4758             label = gen_label_rtx ();
4759             quotient = gen_reg_rtx (compute_mode);
4760             remainder = gen_reg_rtx (compute_mode);
4761             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4762               {
4763                 rtx tem;
4764                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4765                                          quotient, 1, OPTAB_LIB_WIDEN);
4766                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4767                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4768                                           remainder, 1, OPTAB_LIB_WIDEN);
4769               }
4770             tem = plus_constant (compute_mode, op1, -1);
4771             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4772             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4773             expand_inc (quotient, const1_rtx);
4774             expand_dec (remainder, op1);
4775             emit_label (label);
4776           }
4777         else
4778           {
4779             rtx abs_rem, abs_op1, tem, mask;
4780             rtx label;
4781             label = gen_label_rtx ();
4782             quotient = gen_reg_rtx (compute_mode);
4783             remainder = gen_reg_rtx (compute_mode);
4784             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4785               {
4786                 rtx tem;
4787                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4788                                          quotient, 0, OPTAB_LIB_WIDEN);
4789                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4790                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4791                                           remainder, 0, OPTAB_LIB_WIDEN);
4792               }
4793             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4794             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4795             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4796                                 1, NULL_RTX, 1);
4797             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4798             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4801                                  size - 1, NULL_RTX, 0);
4802             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4803                                 NULL_RTX, 0, OPTAB_WIDEN);
4804             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4805                                 NULL_RTX, 0, OPTAB_WIDEN);
4806             expand_inc (quotient, tem);
4807             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4808                                 NULL_RTX, 0, OPTAB_WIDEN);
4809             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4810                                 NULL_RTX, 0, OPTAB_WIDEN);
4811             expand_dec (remainder, tem);
4812             emit_label (label);
4813           }
4814         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4815
4816       default:
4817         gcc_unreachable ();
4818       }
4819
4820   if (quotient == 0)
4821     {
4822       if (target && GET_MODE (target) != compute_mode)
4823         target = 0;
4824
4825       if (rem_flag)
4826         {
4827           /* Try to produce the remainder without producing the quotient.
4828              If we seem to have a divmod pattern that does not require widening,
4829              don't try widening here.  We should really have a WIDEN argument
4830              to expand_twoval_binop, since what we'd really like to do here is
4831              1) try a mod insn in compute_mode
4832              2) try a divmod insn in compute_mode
4833              3) try a div insn in compute_mode and multiply-subtract to get
4834                 remainder
4835              4) try the same things with widening allowed.  */
4836           remainder
4837             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4838                                  op0, op1, target,
4839                                  unsignedp,
4840                                  ((optab_handler (optab2, compute_mode)
4841                                    != CODE_FOR_nothing)
4842                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4843           if (remainder == 0)
4844             {
4845               /* No luck there.  Can we do remainder and divide at once
4846                  without a library call?  */
4847               remainder = gen_reg_rtx (compute_mode);
4848               if (! expand_twoval_binop ((unsignedp
4849                                           ? udivmod_optab
4850                                           : sdivmod_optab),
4851                                          op0, op1,
4852                                          NULL_RTX, remainder, unsignedp))
4853                 remainder = 0;
4854             }
4855
4856           if (remainder)
4857             return gen_lowpart (mode, remainder);
4858         }
4859
4860       /* Produce the quotient.  Try a quotient insn, but not a library call.
4861          If we have a divmod in this mode, use it in preference to widening
4862          the div (for this test we assume it will not fail). Note that optab2
4863          is set to the one of the two optabs that the call below will use.  */
4864       quotient
4865         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4866                              op0, op1, rem_flag ? NULL_RTX : target,
4867                              unsignedp,
4868                              ((optab_handler (optab2, compute_mode)
4869                                != CODE_FOR_nothing)
4870                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4871
4872       if (quotient == 0)
4873         {
4874           /* No luck there.  Try a quotient-and-remainder insn,
4875              keeping the quotient alone.  */
4876           quotient = gen_reg_rtx (compute_mode);
4877           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4878                                      op0, op1,
4879                                      quotient, NULL_RTX, unsignedp))
4880             {
4881               quotient = 0;
4882               if (! rem_flag)
4883                 /* Still no luck.  If we are not computing the remainder,
4884                    use a library call for the quotient.  */
4885                 quotient = sign_expand_binop (compute_mode,
4886                                               udiv_optab, sdiv_optab,
4887                                               op0, op1, target,
4888                                               unsignedp, OPTAB_LIB_WIDEN);
4889             }
4890         }
4891     }
4892
4893   if (rem_flag)
4894     {
4895       if (target && GET_MODE (target) != compute_mode)
4896         target = 0;
4897
4898       if (quotient == 0)
4899         {
4900           /* No divide instruction either.  Use library for remainder.  */
4901           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4902                                          op0, op1, target,
4903                                          unsignedp, OPTAB_LIB_WIDEN);
4904           /* No remainder function.  Try a quotient-and-remainder
4905              function, keeping the remainder.  */
4906           if (!remainder)
4907             {
4908               remainder = gen_reg_rtx (compute_mode);
4909               if (!expand_twoval_binop_libfunc
4910                   (unsignedp ? udivmod_optab : sdivmod_optab,
4911                    op0, op1,
4912                    NULL_RTX, remainder,
4913                    unsignedp ? UMOD : MOD))
4914                 remainder = NULL_RTX;
4915             }
4916         }
4917       else
4918         {
4919           /* We divided.  Now finish doing X - Y * (X / Y).  */
4920           remainder = expand_mult (compute_mode, quotient, op1,
4921                                    NULL_RTX, unsignedp);
4922           remainder = expand_binop (compute_mode, sub_optab, op0,
4923                                     remainder, target, unsignedp,
4924                                     OPTAB_LIB_WIDEN);
4925         }
4926     }
4927
4928   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4929 }
4930 \f
4931 /* Return a tree node with data type TYPE, describing the value of X.
4932    Usually this is an VAR_DECL, if there is no obvious better choice.
4933    X may be an expression, however we only support those expressions
4934    generated by loop.c.  */
4935
4936 tree
4937 make_tree (tree type, rtx x)
4938 {
4939   tree t;
4940
4941   switch (GET_CODE (x))
4942     {
4943     case CONST_INT:
4944       {
4945         HOST_WIDE_INT hi = 0;
4946
4947         if (INTVAL (x) < 0
4948             && !(TYPE_UNSIGNED (type)
4949                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4950                      < HOST_BITS_PER_WIDE_INT)))
4951           hi = -1;
4952
4953         t = build_int_cst_wide (type, INTVAL (x), hi);
4954
4955         return t;
4956       }
4957
4958     case CONST_DOUBLE:
4959       if (GET_MODE (x) == VOIDmode)
4960         t = build_int_cst_wide (type,
4961                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4962       else
4963         {
4964           REAL_VALUE_TYPE d;
4965
4966           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4967           t = build_real (type, d);
4968         }
4969
4970       return t;
4971
4972     case CONST_VECTOR:
4973       {
4974         int units = CONST_VECTOR_NUNITS (x);
4975         tree itype = TREE_TYPE (type);
4976         tree *elts;
4977         int i;
4978
4979         /* Build a tree with vector elements.  */
4980         elts = XALLOCAVEC (tree, units);
4981         for (i = units - 1; i >= 0; --i)
4982           {
4983             rtx elt = CONST_VECTOR_ELT (x, i);
4984             elts[i] = make_tree (itype, elt);
4985           }
4986
4987         return build_vector (type, elts);
4988       }
4989
4990     case PLUS:
4991       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4992                           make_tree (type, XEXP (x, 1)));
4993
4994     case MINUS:
4995       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4996                           make_tree (type, XEXP (x, 1)));
4997
4998     case NEG:
4999       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5000
5001     case MULT:
5002       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5003                           make_tree (type, XEXP (x, 1)));
5004
5005     case ASHIFT:
5006       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5007                           make_tree (type, XEXP (x, 1)));
5008
5009     case LSHIFTRT:
5010       t = unsigned_type_for (type);
5011       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5012                                          make_tree (t, XEXP (x, 0)),
5013                                          make_tree (type, XEXP (x, 1))));
5014
5015     case ASHIFTRT:
5016       t = signed_type_for (type);
5017       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5018                                          make_tree (t, XEXP (x, 0)),
5019                                          make_tree (type, XEXP (x, 1))));
5020
5021     case DIV:
5022       if (TREE_CODE (type) != REAL_TYPE)
5023         t = signed_type_for (type);
5024       else
5025         t = type;
5026
5027       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5028                                          make_tree (t, XEXP (x, 0)),
5029                                          make_tree (t, XEXP (x, 1))));
5030     case UDIV:
5031       t = unsigned_type_for (type);
5032       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5033                                          make_tree (t, XEXP (x, 0)),
5034                                          make_tree (t, XEXP (x, 1))));
5035
5036     case SIGN_EXTEND:
5037     case ZERO_EXTEND:
5038       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5039                                           GET_CODE (x) == ZERO_EXTEND);
5040       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5041
5042     case CONST:
5043       return make_tree (type, XEXP (x, 0));
5044
5045     case SYMBOL_REF:
5046       t = SYMBOL_REF_DECL (x);
5047       if (t)
5048         return fold_convert (type, build_fold_addr_expr (t));
5049       /* else fall through.  */
5050
5051     default:
5052       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5053
5054       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5055          address mode to pointer mode.  */
5056       if (POINTER_TYPE_P (type))
5057         x = convert_memory_address_addr_space
5058               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5059
5060       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5061          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5062       t->decl_with_rtl.rtl = x;
5063
5064       return t;
5065     }
5066 }
5067 \f
5068 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5069    and returning TARGET.
5070
5071    If TARGET is 0, a pseudo-register or constant is returned.  */
5072
5073 rtx
5074 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5075 {
5076   rtx tem = 0;
5077
5078   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5079     tem = simplify_binary_operation (AND, mode, op0, op1);
5080   if (tem == 0)
5081     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5082
5083   if (target == 0)
5084     target = tem;
5085   else if (tem != target)
5086     emit_move_insn (target, tem);
5087   return target;
5088 }
5089
5090 /* Helper function for emit_store_flag.  */
5091 static rtx
5092 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5093              enum machine_mode mode, enum machine_mode compare_mode,
5094              int unsignedp, rtx x, rtx y, int normalizep,
5095              enum machine_mode target_mode)
5096 {
5097   struct expand_operand ops[4];
5098   rtx op0, last, comparison, subtarget;
5099   enum machine_mode result_mode = targetm.cstore_mode (icode);
5100
5101   last = get_last_insn ();
5102   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5103   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5104   if (!x || !y)
5105     {
5106       delete_insns_since (last);
5107       return NULL_RTX;
5108     }
5109
5110   if (target_mode == VOIDmode)
5111     target_mode = result_mode;
5112   if (!target)
5113     target = gen_reg_rtx (target_mode);
5114
5115   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5116
5117   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5118   create_fixed_operand (&ops[1], comparison);
5119   create_fixed_operand (&ops[2], x);
5120   create_fixed_operand (&ops[3], y);
5121   if (!maybe_expand_insn (icode, 4, ops))
5122     {
5123       delete_insns_since (last);
5124       return NULL_RTX;
5125     }
5126   subtarget = ops[0].value;
5127
5128   /* If we are converting to a wider mode, first convert to
5129      TARGET_MODE, then normalize.  This produces better combining
5130      opportunities on machines that have a SIGN_EXTRACT when we are
5131      testing a single bit.  This mostly benefits the 68k.
5132
5133      If STORE_FLAG_VALUE does not have the sign bit set when
5134      interpreted in MODE, we can do this conversion as unsigned, which
5135      is usually more efficient.  */
5136   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5137     {
5138       convert_move (target, subtarget,
5139                     val_signbit_known_clear_p (result_mode,
5140                                                STORE_FLAG_VALUE));
5141       op0 = target;
5142       result_mode = target_mode;
5143     }
5144   else
5145     op0 = subtarget;
5146
5147   /* If we want to keep subexpressions around, don't reuse our last
5148      target.  */
5149   if (optimize)
5150     subtarget = 0;
5151
5152   /* Now normalize to the proper value in MODE.  Sometimes we don't
5153      have to do anything.  */
5154   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5155     ;
5156   /* STORE_FLAG_VALUE might be the most negative number, so write
5157      the comparison this way to avoid a compiler-time warning.  */
5158   else if (- normalizep == STORE_FLAG_VALUE)
5159     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5160
5161   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5162      it hard to use a value of just the sign bit due to ANSI integer
5163      constant typing rules.  */
5164   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5165     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5166                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5167                         normalizep == 1);
5168   else
5169     {
5170       gcc_assert (STORE_FLAG_VALUE & 1);
5171
5172       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5173       if (normalizep == -1)
5174         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5175     }
5176
5177   /* If we were converting to a smaller mode, do the conversion now.  */
5178   if (target_mode != result_mode)
5179     {
5180       convert_move (target, op0, 0);
5181       return target;
5182     }
5183   else
5184     return op0;
5185 }
5186
5187
5188 /* A subroutine of emit_store_flag only including "tricks" that do not
5189    need a recursive call.  These are kept separate to avoid infinite
5190    loops.  */
5191
5192 static rtx
5193 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5194                    enum machine_mode mode, int unsignedp, int normalizep,
5195                    enum machine_mode target_mode)
5196 {
5197   rtx subtarget;
5198   enum insn_code icode;
5199   enum machine_mode compare_mode;
5200   enum mode_class mclass;
5201   enum rtx_code scode;
5202   rtx tem;
5203
5204   if (unsignedp)
5205     code = unsigned_condition (code);
5206   scode = swap_condition (code);
5207
5208   /* If one operand is constant, make it the second one.  Only do this
5209      if the other operand is not constant as well.  */
5210
5211   if (swap_commutative_operands_p (op0, op1))
5212     {
5213       tem = op0;
5214       op0 = op1;
5215       op1 = tem;
5216       code = swap_condition (code);
5217     }
5218
5219   if (mode == VOIDmode)
5220     mode = GET_MODE (op0);
5221
5222   /* For some comparisons with 1 and -1, we can convert this to
5223      comparisons with zero.  This will often produce more opportunities for
5224      store-flag insns.  */
5225
5226   switch (code)
5227     {
5228     case LT:
5229       if (op1 == const1_rtx)
5230         op1 = const0_rtx, code = LE;
5231       break;
5232     case LE:
5233       if (op1 == constm1_rtx)
5234         op1 = const0_rtx, code = LT;
5235       break;
5236     case GE:
5237       if (op1 == const1_rtx)
5238         op1 = const0_rtx, code = GT;
5239       break;
5240     case GT:
5241       if (op1 == constm1_rtx)
5242         op1 = const0_rtx, code = GE;
5243       break;
5244     case GEU:
5245       if (op1 == const1_rtx)
5246         op1 = const0_rtx, code = NE;
5247       break;
5248     case LTU:
5249       if (op1 == const1_rtx)
5250         op1 = const0_rtx, code = EQ;
5251       break;
5252     default:
5253       break;
5254     }
5255
5256   /* If we are comparing a double-word integer with zero or -1, we can
5257      convert the comparison into one involving a single word.  */
5258   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5259       && GET_MODE_CLASS (mode) == MODE_INT
5260       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5261     {
5262       if ((code == EQ || code == NE)
5263           && (op1 == const0_rtx || op1 == constm1_rtx))
5264         {
5265           rtx op00, op01;
5266
5267           /* Do a logical OR or AND of the two words and compare the
5268              result.  */
5269           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5270           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5271           tem = expand_binop (word_mode,
5272                               op1 == const0_rtx ? ior_optab : and_optab,
5273                               op00, op01, NULL_RTX, unsignedp,
5274                               OPTAB_DIRECT);
5275
5276           if (tem != 0)
5277             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5278                                    unsignedp, normalizep);
5279         }
5280       else if ((code == LT || code == GE) && op1 == const0_rtx)
5281         {
5282           rtx op0h;
5283
5284           /* If testing the sign bit, can just test on high word.  */
5285           op0h = simplify_gen_subreg (word_mode, op0, mode,
5286                                       subreg_highpart_offset (word_mode,
5287                                                               mode));
5288           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5289                                  unsignedp, normalizep);
5290         }
5291       else
5292         tem = NULL_RTX;
5293
5294       if (tem)
5295         {
5296           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5297             return tem;
5298           if (!target)
5299             target = gen_reg_rtx (target_mode);
5300
5301           convert_move (target, tem,
5302                         !val_signbit_known_set_p (word_mode,
5303                                                   (normalizep ? normalizep
5304                                                    : STORE_FLAG_VALUE)));
5305           return target;
5306         }
5307     }
5308
5309   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5310      complement of A (for GE) and shifting the sign bit to the low bit.  */
5311   if (op1 == const0_rtx && (code == LT || code == GE)
5312       && GET_MODE_CLASS (mode) == MODE_INT
5313       && (normalizep || STORE_FLAG_VALUE == 1
5314           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5315     {
5316       subtarget = target;
5317
5318       if (!target)
5319         target_mode = mode;
5320
5321       /* If the result is to be wider than OP0, it is best to convert it
5322          first.  If it is to be narrower, it is *incorrect* to convert it
5323          first.  */
5324       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5325         {
5326           op0 = convert_modes (target_mode, mode, op0, 0);
5327           mode = target_mode;
5328         }
5329
5330       if (target_mode != mode)
5331         subtarget = 0;
5332
5333       if (code == GE)
5334         op0 = expand_unop (mode, one_cmpl_optab, op0,
5335                            ((STORE_FLAG_VALUE == 1 || normalizep)
5336                             ? 0 : subtarget), 0);
5337
5338       if (STORE_FLAG_VALUE == 1 || normalizep)
5339         /* If we are supposed to produce a 0/1 value, we want to do
5340            a logical shift from the sign bit to the low-order bit; for
5341            a -1/0 value, we do an arithmetic shift.  */
5342         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5343                             GET_MODE_BITSIZE (mode) - 1,
5344                             subtarget, normalizep != -1);
5345
5346       if (mode != target_mode)
5347         op0 = convert_modes (target_mode, mode, op0, 0);
5348
5349       return op0;
5350     }
5351
5352   mclass = GET_MODE_CLASS (mode);
5353   for (compare_mode = mode; compare_mode != VOIDmode;
5354        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5355     {
5356      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5357      icode = optab_handler (cstore_optab, optab_mode);
5358      if (icode != CODE_FOR_nothing)
5359         {
5360           do_pending_stack_adjust ();
5361           tem = emit_cstore (target, icode, code, mode, compare_mode,
5362                              unsignedp, op0, op1, normalizep, target_mode);
5363           if (tem)
5364             return tem;
5365
5366           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5367             {
5368               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5369                                  unsignedp, op1, op0, normalizep, target_mode);
5370               if (tem)
5371                 return tem;
5372             }
5373           break;
5374         }
5375     }
5376
5377   return 0;
5378 }
5379
5380 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5381    and storing in TARGET.  Normally return TARGET.
5382    Return 0 if that cannot be done.
5383
5384    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5385    it is VOIDmode, they cannot both be CONST_INT.
5386
5387    UNSIGNEDP is for the case where we have to widen the operands
5388    to perform the operation.  It says to use zero-extension.
5389
5390    NORMALIZEP is 1 if we should convert the result to be either zero
5391    or one.  Normalize is -1 if we should convert the result to be
5392    either zero or -1.  If NORMALIZEP is zero, the result will be left
5393    "raw" out of the scc insn.  */
5394
5395 rtx
5396 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5397                  enum machine_mode mode, int unsignedp, int normalizep)
5398 {
5399   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5400   enum rtx_code rcode;
5401   rtx subtarget;
5402   rtx tem, last, trueval;
5403
5404   /* If we compare constants, we shouldn't use a store-flag operation,
5405      but a constant load.  We can get there via the vanilla route that
5406      usually generates a compare-branch sequence, but will in this case
5407      fold the comparison to a constant, and thus elide the branch.  */
5408   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5409     return NULL_RTX;
5410
5411   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5412                            target_mode);
5413   if (tem)
5414     return tem;
5415
5416   /* If we reached here, we can't do this with a scc insn, however there
5417      are some comparisons that can be done in other ways.  Don't do any
5418      of these cases if branches are very cheap.  */
5419   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5420     return 0;
5421
5422   /* See what we need to return.  We can only return a 1, -1, or the
5423      sign bit.  */
5424
5425   if (normalizep == 0)
5426     {
5427       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5428         normalizep = STORE_FLAG_VALUE;
5429
5430       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5431         ;
5432       else
5433         return 0;
5434     }
5435
5436   last = get_last_insn ();
5437
5438   /* If optimizing, use different pseudo registers for each insn, instead
5439      of reusing the same pseudo.  This leads to better CSE, but slows
5440      down the compiler, since there are more pseudos */
5441   subtarget = (!optimize
5442                && (target_mode == mode)) ? target : NULL_RTX;
5443   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5444
5445   /* For floating-point comparisons, try the reverse comparison or try
5446      changing the "orderedness" of the comparison.  */
5447   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5448     {
5449       enum rtx_code first_code;
5450       bool and_them;
5451
5452       rcode = reverse_condition_maybe_unordered (code);
5453       if (can_compare_p (rcode, mode, ccp_store_flag)
5454           && (code == ORDERED || code == UNORDERED
5455               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5456               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5457         {
5458           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5459                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5460
5461           /* For the reverse comparison, use either an addition or a XOR.  */
5462           if (want_add
5463               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5464                            optimize_insn_for_speed_p ()) == 0)
5465             {
5466               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5467                                        STORE_FLAG_VALUE, target_mode);
5468               if (tem)
5469                 return expand_binop (target_mode, add_optab, tem,
5470                                      gen_int_mode (normalizep, target_mode),
5471                                      target, 0, OPTAB_WIDEN);
5472             }
5473           else if (!want_add
5474                    && rtx_cost (trueval, XOR, 1,
5475                                 optimize_insn_for_speed_p ()) == 0)
5476             {
5477               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5478                                        normalizep, target_mode);
5479               if (tem)
5480                 return expand_binop (target_mode, xor_optab, tem, trueval,
5481                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5482             }
5483         }
5484
5485       delete_insns_since (last);
5486
5487       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5488       if (code == ORDERED || code == UNORDERED)
5489         return 0;
5490
5491       and_them = split_comparison (code, mode, &first_code, &code);
5492
5493       /* If there are no NaNs, the first comparison should always fall through.
5494          Effectively change the comparison to the other one.  */
5495       if (!HONOR_NANS (mode))
5496         {
5497           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5498           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5499                                     target_mode);
5500         }
5501
5502 #ifdef HAVE_conditional_move
5503       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5504          conditional move.  */
5505       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5506                                normalizep, target_mode);
5507       if (tem == 0)
5508         return 0;
5509
5510       if (and_them)
5511         tem = emit_conditional_move (target, code, op0, op1, mode,
5512                                      tem, const0_rtx, GET_MODE (tem), 0);
5513       else
5514         tem = emit_conditional_move (target, code, op0, op1, mode,
5515                                      trueval, tem, GET_MODE (tem), 0);
5516
5517       if (tem == 0)
5518         delete_insns_since (last);
5519       return tem;
5520 #else
5521       return 0;
5522 #endif
5523     }
5524
5525   /* The remaining tricks only apply to integer comparisons.  */
5526
5527   if (GET_MODE_CLASS (mode) != MODE_INT)
5528     return 0;
5529
5530   /* If this is an equality comparison of integers, we can try to exclusive-or
5531      (or subtract) the two operands and use a recursive call to try the
5532      comparison with zero.  Don't do any of these cases if branches are
5533      very cheap.  */
5534
5535   if ((code == EQ || code == NE) && op1 != const0_rtx)
5536     {
5537       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5538                           OPTAB_WIDEN);
5539
5540       if (tem == 0)
5541         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5542                             OPTAB_WIDEN);
5543       if (tem != 0)
5544         tem = emit_store_flag (target, code, tem, const0_rtx,
5545                                mode, unsignedp, normalizep);
5546       if (tem != 0)
5547         return tem;
5548
5549       delete_insns_since (last);
5550     }
5551
5552   /* For integer comparisons, try the reverse comparison.  However, for
5553      small X and if we'd have anyway to extend, implementing "X != 0"
5554      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5555   rcode = reverse_condition (code);
5556   if (can_compare_p (rcode, mode, ccp_store_flag)
5557       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5558             && code == NE
5559             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5560             && op1 == const0_rtx))
5561     {
5562       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5563                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5564
5565       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5566       if (want_add
5567           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5568                        optimize_insn_for_speed_p ()) == 0)
5569         {
5570           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5571                                    STORE_FLAG_VALUE, target_mode);
5572           if (tem != 0)
5573             tem = expand_binop (target_mode, add_optab, tem,
5574                                 gen_int_mode (normalizep, target_mode),
5575                                 target, 0, OPTAB_WIDEN);
5576         }
5577       else if (!want_add
5578                && rtx_cost (trueval, XOR, 1,
5579                             optimize_insn_for_speed_p ()) == 0)
5580         {
5581           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582                                    normalizep, target_mode);
5583           if (tem != 0)
5584             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5585                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5586         }
5587
5588       if (tem != 0)
5589         return tem;
5590       delete_insns_since (last);
5591     }
5592
5593   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5594      the constant zero.  Reject all other comparisons at this point.  Only
5595      do LE and GT if branches are expensive since they are expensive on
5596      2-operand machines.  */
5597
5598   if (op1 != const0_rtx
5599       || (code != EQ && code != NE
5600           && (BRANCH_COST (optimize_insn_for_speed_p (),
5601                            false) <= 1 || (code != LE && code != GT))))
5602     return 0;
5603
5604   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5605      do the necessary operation below.  */
5606
5607   tem = 0;
5608
5609   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5610      the sign bit set.  */
5611
5612   if (code == LE)
5613     {
5614       /* This is destructive, so SUBTARGET can't be OP0.  */
5615       if (rtx_equal_p (subtarget, op0))
5616         subtarget = 0;
5617
5618       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5619                           OPTAB_WIDEN);
5620       if (tem)
5621         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5622                             OPTAB_WIDEN);
5623     }
5624
5625   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5626      number of bits in the mode of OP0, minus one.  */
5627
5628   if (code == GT)
5629     {
5630       if (rtx_equal_p (subtarget, op0))
5631         subtarget = 0;
5632
5633       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5634                           GET_MODE_BITSIZE (mode) - 1,
5635                           subtarget, 0);
5636       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5637                           OPTAB_WIDEN);
5638     }
5639
5640   if (code == EQ || code == NE)
5641     {
5642       /* For EQ or NE, one way to do the comparison is to apply an operation
5643          that converts the operand into a positive number if it is nonzero
5644          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5645          for NE we negate.  This puts the result in the sign bit.  Then we
5646          normalize with a shift, if needed.
5647
5648          Two operations that can do the above actions are ABS and FFS, so try
5649          them.  If that doesn't work, and MODE is smaller than a full word,
5650          we can use zero-extension to the wider mode (an unsigned conversion)
5651          as the operation.  */
5652
5653       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5654          that is compensated by the subsequent overflow when subtracting
5655          one / negating.  */
5656
5657       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5658         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5659       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5660         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5661       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5662         {
5663           tem = convert_modes (word_mode, mode, op0, 1);
5664           mode = word_mode;
5665         }
5666
5667       if (tem != 0)
5668         {
5669           if (code == EQ)
5670             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5671                                 0, OPTAB_WIDEN);
5672           else
5673             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5674         }
5675
5676       /* If we couldn't do it that way, for NE we can "or" the two's complement
5677          of the value with itself.  For EQ, we take the one's complement of
5678          that "or", which is an extra insn, so we only handle EQ if branches
5679          are expensive.  */
5680
5681       if (tem == 0
5682           && (code == NE
5683               || BRANCH_COST (optimize_insn_for_speed_p (),
5684                               false) > 1))
5685         {
5686           if (rtx_equal_p (subtarget, op0))
5687             subtarget = 0;
5688
5689           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5690           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5691                               OPTAB_WIDEN);
5692
5693           if (tem && code == EQ)
5694             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5695         }
5696     }
5697
5698   if (tem && normalizep)
5699     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5700                         GET_MODE_BITSIZE (mode) - 1,
5701                         subtarget, normalizep == 1);
5702
5703   if (tem)
5704     {
5705       if (!target)
5706         ;
5707       else if (GET_MODE (tem) != target_mode)
5708         {
5709           convert_move (target, tem, 0);
5710           tem = target;
5711         }
5712       else if (!subtarget)
5713         {
5714           emit_move_insn (target, tem);
5715           tem = target;
5716         }
5717     }
5718   else
5719     delete_insns_since (last);
5720
5721   return tem;
5722 }
5723
5724 /* Like emit_store_flag, but always succeeds.  */
5725
5726 rtx
5727 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5728                        enum machine_mode mode, int unsignedp, int normalizep)
5729 {
5730   rtx tem, label;
5731   rtx trueval, falseval;
5732
5733   /* First see if emit_store_flag can do the job.  */
5734   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5735   if (tem != 0)
5736     return tem;
5737
5738   if (!target)
5739     target = gen_reg_rtx (word_mode);
5740
5741   /* If this failed, we have to do this with set/compare/jump/set code.
5742      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5743   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5744   if (code == NE
5745       && GET_MODE_CLASS (mode) == MODE_INT
5746       && REG_P (target)
5747       && op0 == target
5748       && op1 == const0_rtx)
5749     {
5750       label = gen_label_rtx ();
5751       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5752                                mode, NULL_RTX, NULL_RTX, label, -1);
5753       emit_move_insn (target, trueval);
5754       emit_label (label);
5755       return target;
5756     }
5757
5758   if (!REG_P (target)
5759       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5760     target = gen_reg_rtx (GET_MODE (target));
5761
5762   /* Jump in the right direction if the target cannot implement CODE
5763      but can jump on its reverse condition.  */
5764   falseval = const0_rtx;
5765   if (! can_compare_p (code, mode, ccp_jump)
5766       && (! FLOAT_MODE_P (mode)
5767           || code == ORDERED || code == UNORDERED
5768           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5769           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5770     {
5771       enum rtx_code rcode;
5772       if (FLOAT_MODE_P (mode))
5773         rcode = reverse_condition_maybe_unordered (code);
5774       else
5775         rcode = reverse_condition (code);
5776
5777       /* Canonicalize to UNORDERED for the libcall.  */
5778       if (can_compare_p (rcode, mode, ccp_jump)
5779           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5780         {
5781           falseval = trueval;
5782           trueval = const0_rtx;
5783           code = rcode;
5784         }
5785     }
5786
5787   emit_move_insn (target, trueval);
5788   label = gen_label_rtx ();
5789   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5790                            NULL_RTX, label, -1);
5791
5792   emit_move_insn (target, falseval);
5793   emit_label (label);
5794
5795   return target;
5796 }
5797 \f
5798 /* Perform possibly multi-word comparison and conditional jump to LABEL
5799    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5800    now a thin wrapper around do_compare_rtx_and_jump.  */
5801
5802 static void
5803 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5804                  rtx label)
5805 {
5806   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5807   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5808                            NULL_RTX, NULL_RTX, label, -1);
5809 }