gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55
  56 /* Nonzero means divides or modulus operations are relatively cheap for
  57    powers of two, so don't use branches; emit the operation instead.
  58    Usually, this will mean that the MD file will emit non-branch
  59    sequences.  */
  60
  61 static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
  62 static int smod_pow2_cheap[NUM_MACHINE_MODES];
  63
  64 #ifndef SLOW_UNALIGNED_ACCESS
  65 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  66 #endif
  67
  68 /* For compilers that support multiple targets with different word sizes,
  69    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  70    is the H8/300(H) compiler.  */
  71
  72 #ifndef MAX_BITS_PER_WORD
  73 #define MAX_BITS_PER_WORD BITS_PER_WORD
  74 #endif
  75
  76 /* Reduce conditional compilation elsewhere.  */
  77 #ifndef HAVE_insv
  78 #define HAVE_insv       0
  79 #define CODE_FOR_insv   CODE_FOR_nothing
  80 #define gen_insv(a,b,c,d) NULL_RTX
  81 #endif
  82 #ifndef HAVE_extv
  83 #define HAVE_extv       0
  84 #define CODE_FOR_extv   CODE_FOR_nothing
  85 #define gen_extv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extzv
  88 #define HAVE_extzv      0
  89 #define CODE_FOR_extzv  CODE_FOR_nothing
  90 #define gen_extzv(a,b,c,d) NULL_RTX
  91 #endif
  92
  93 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  94    shift count and some by mode.  */
  95 static int zero_cost;
  96 static int add_cost[NUM_MACHINE_MODES];
  97 static int neg_cost[NUM_MACHINE_MODES];
  98 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  99 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int mul_cost[NUM_MACHINE_MODES];
 102 static int div_cost[NUM_MACHINE_MODES];
 103 static int mul_widen_cost[NUM_MACHINE_MODES];
 104 static int mul_highpart_cost[NUM_MACHINE_MODES];
 105
 106 void
 107 init_expmed (void)
 108 {
 109   struct
 110   {
 111     struct rtx_def reg;
 112     struct rtx_def plus;        rtunion plus_fld1;
 113     struct rtx_def neg;
 114     struct rtx_def udiv;        rtunion udiv_fld1;
 115     struct rtx_def mult;        rtunion mult_fld1;
 116     struct rtx_def div;         rtunion div_fld1;
 117     struct rtx_def mod;         rtunion mod_fld1;
 118     struct rtx_def zext;
 119     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 120     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 121     struct rtx_def wide_trunc;
 122     struct rtx_def shift;       rtunion shift_fld1;
 123     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 124     struct rtx_def shift_add;   rtunion shift_add_fld1;
 125     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 126   } all;
 127
 128   rtx pow2[MAX_BITS_PER_WORD];
 129   rtx cint[MAX_BITS_PER_WORD];
 130   int m, n;
 131   enum machine_mode mode, wider_mode;
 132
 133   zero_cost = rtx_cost (const0_rtx, 0);
 134
 135   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 136     {
 137       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 138       cint[m] = GEN_INT (m);
 139     }
 140
 141   memset (&all, 0, sizeof all);
 142
 143   PUT_CODE (&all.reg, REG);
 144   REGNO (&all.reg) = 10000;
 145
 146   PUT_CODE (&all.plus, PLUS);
 147   XEXP (&all.plus, 0) = &all.reg;
 148   XEXP (&all.plus, 1) = &all.reg;
 149
 150   PUT_CODE (&all.neg, NEG);
 151   XEXP (&all.neg, 0) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.mult, MULT);
 158   XEXP (&all.mult, 0) = &all.reg;
 159   XEXP (&all.mult, 1) = &all.reg;
 160
 161   PUT_CODE (&all.div, DIV);
 162   XEXP (&all.div, 0) = &all.reg;
 163   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 164
 165   PUT_CODE (&all.mod, MOD);
 166   XEXP (&all.mod, 0) = &all.reg;
 167   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 168
 169   PUT_CODE (&all.zext, ZERO_EXTEND);
 170   XEXP (&all.zext, 0) = &all.reg;
 171
 172   PUT_CODE (&all.wide_mult, MULT);
 173   XEXP (&all.wide_mult, 0) = &all.zext;
 174   XEXP (&all.wide_mult, 1) = &all.zext;
 175
 176   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 177   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 178
 179   PUT_CODE (&all.wide_trunc, TRUNCATE);
 180   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 181
 182   PUT_CODE (&all.shift, ASHIFT);
 183   XEXP (&all.shift, 0) = &all.reg;
 184
 185   PUT_CODE (&all.shift_mult, MULT);
 186   XEXP (&all.shift_mult, 0) = &all.reg;
 187
 188   PUT_CODE (&all.shift_add, PLUS);
 189   XEXP (&all.shift_add, 0) = &all.shift_mult;
 190   XEXP (&all.shift_add, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub, MINUS);
 193   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 194   XEXP (&all.shift_sub, 1) = &all.reg;
 195
 196   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 197        mode != VOIDmode;
 198        mode = GET_MODE_WIDER_MODE (mode))
 199     {
 200       PUT_MODE (&all.reg, mode);
 201       PUT_MODE (&all.plus, mode);
 202       PUT_MODE (&all.neg, mode);
 203       PUT_MODE (&all.udiv, mode);
 204       PUT_MODE (&all.mult, mode);
 205       PUT_MODE (&all.div, mode);
 206       PUT_MODE (&all.mod, mode);
 207       PUT_MODE (&all.wide_trunc, mode);
 208       PUT_MODE (&all.shift, mode);
 209       PUT_MODE (&all.shift_mult, mode);
 210       PUT_MODE (&all.shift_add, mode);
 211       PUT_MODE (&all.shift_sub, mode);
 212
 213       add_cost[mode] = rtx_cost (&all.plus, SET);
 214       neg_cost[mode] = rtx_cost (&all.neg, SET);
 215       div_cost[mode] = rtx_cost (&all.udiv, SET);
 216       mul_cost[mode] = rtx_cost (&all.mult, SET);
 217
 218       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 219       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 2 * add_cost[mode]);
 220
 221       wider_mode = GET_MODE_WIDER_MODE (mode);
 222       if (wider_mode != VOIDmode)
 223         {
 224           PUT_MODE (&all.zext, wider_mode);
 225           PUT_MODE (&all.wide_mult, wider_mode);
 226           PUT_MODE (&all.wide_lshr, wider_mode);
 227           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 228
 229           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 230           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 231         }
 232
 233       shift_cost[mode][0] = 0;
 234       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 235
 236       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 237       for (m = 1; m < n; m++)
 238         {
 239           XEXP (&all.shift, 1) = cint[m];
 240           XEXP (&all.shift_mult, 1) = pow2[m];
 241
 242           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 243           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 244           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 245         }
 246     }
 247 }
 248
 249 /* Return an rtx representing minus the value of X.
 250    MODE is the intended mode of the result,
 251    useful if X is a CONST_INT.  */
 252
 253 rtx
 254 negate_rtx (enum machine_mode mode, rtx x)
 255 {
 256   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 257
 258   if (result == 0)
 259     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 260
 261   return result;
 262 }
 263
 264 /* Report on the availability of insv/extv/extzv and the desired mode
 265    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 266    is false; else the mode of the specified operand.  If OPNO is -1,
 267    all the caller cares about is whether the insn is available.  */
 268 enum machine_mode
 269 mode_for_extraction (enum extraction_pattern pattern, int opno)
 270 {
 271   const struct insn_data *data;
 272
 273   switch (pattern)
 274     {
 275     case EP_insv:
 276       if (HAVE_insv)
 277         {
 278           data = &insn_data[CODE_FOR_insv];
 279           break;
 280         }
 281       return MAX_MACHINE_MODE;
 282
 283     case EP_extv:
 284       if (HAVE_extv)
 285         {
 286           data = &insn_data[CODE_FOR_extv];
 287           break;
 288         }
 289       return MAX_MACHINE_MODE;
 290
 291     case EP_extzv:
 292       if (HAVE_extzv)
 293         {
 294           data = &insn_data[CODE_FOR_extzv];
 295           break;
 296         }
 297       return MAX_MACHINE_MODE;
 298
 299     default:
 300       abort ();
 301     }
 302
 303   if (opno == -1)
 304     return VOIDmode;
 305
 306   /* Everyone who uses this function used to follow it with
 307      if (result == VOIDmode) result = word_mode; */
 308   if (data->operand[opno].mode == VOIDmode)
 309     return word_mode;
 310   return data->operand[opno].mode;
 311 }
 312
 313 \f
 314 /* Generate code to store value from rtx VALUE
 315    into a bit-field within structure STR_RTX
 316    containing BITSIZE bits starting at bit BITNUM.
 317    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 318    ALIGN is the alignment that STR_RTX is known to have.
 319    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 320
 321 /* ??? Note that there are two different ideas here for how
 322    to determine the size to count bits within, for a register.
 323    One is BITS_PER_WORD, and the other is the size of operand 3
 324    of the insv pattern.
 325
 326    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 327    else, we use the mode of operand 3.  */
 328
 329 rtx
 330 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 331                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 332                  rtx value)
 333 {
 334   unsigned int unit
 335     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 336   unsigned HOST_WIDE_INT offset = bitnum / unit;
 337   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 338   rtx op0 = str_rtx;
 339   int byte_offset;
 340
 341   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 342
 343   while (GET_CODE (op0) == SUBREG)
 344     {
 345       /* The following line once was done only if WORDS_BIG_ENDIAN,
 346          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 347          meaningful at a much higher level; when structures are copied
 348          between memory and regs, the higher-numbered regs
 349          always get higher addresses.  */
 350       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 351       /* We used to adjust BITPOS here, but now we do the whole adjustment
 352          right after the loop.  */
 353       op0 = SUBREG_REG (op0);
 354     }
 355
 356   value = protect_from_queue (value, 0);
 357
 358   /* Use vec_set patterns for inserting parts of vectors whenever
 359      available.  */
 360   if (VECTOR_MODE_P (GET_MODE (op0))
 361       && !MEM_P (op0)
 362       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 363           != CODE_FOR_nothing)
 364       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 365       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 366       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 367     {
 368       enum machine_mode outermode = GET_MODE (op0);
 369       enum machine_mode innermode = GET_MODE_INNER (outermode);
 370       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 371       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 372       rtx rtxpos = GEN_INT (pos);
 373       rtx src = value;
 374       rtx dest = op0;
 375       rtx pat, seq;
 376       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 377       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 378       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 379
 380       start_sequence ();
 381
 382       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 383         src = copy_to_mode_reg (mode1, src);
 384
 385       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 386         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 387
 388       /* We could handle this, but we should always be called with a pseudo
 389          for our targets and all insns should take them as outputs.  */
 390       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 391           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 392           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 393         abort ();
 394       pat = GEN_FCN (icode) (dest, src, rtxpos);
 395       seq = get_insns ();
 396       end_sequence ();
 397       if (pat)
 398         {
 399           emit_insn (seq);
 400           emit_insn (pat);
 401           return dest;
 402         }
 403     }
 404
 405   if (flag_force_mem)
 406     {
 407       int old_generating_concat_p = generating_concat_p;
 408       generating_concat_p = 0;
 409       value = force_not_mem (value);
 410       generating_concat_p = old_generating_concat_p;
 411     }
 412
 413   /* If the target is a register, overwriting the entire object, or storing
 414      a full-word or multi-word field can be done with just a SUBREG.
 415
 416      If the target is memory, storing any naturally aligned field can be
 417      done with a simple store.  For targets that support fast unaligned
 418      memory, any naturally sized, unit aligned field can be done directly.  */
 419
 420   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 421                 + (offset * UNITS_PER_WORD);
 422
 423   if (bitpos == 0
 424       && bitsize == GET_MODE_BITSIZE (fieldmode)
 425       && (!MEM_P (op0)
 426           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 427              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 428              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 429           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 430              || (offset * BITS_PER_UNIT % bitsize == 0
 431                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 432     {
 433       if (GET_MODE (op0) != fieldmode)
 434         {
 435           if (GET_CODE (op0) == SUBREG)
 436             {
 437               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 438                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 439                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 440                 op0 = SUBREG_REG (op0);
 441               else
 442                 /* Else we've got some float mode source being extracted into
 443                    a different float mode destination -- this combination of
 444                    subregs results in Severe Tire Damage.  */
 445                 abort ();
 446             }
 447           if (REG_P (op0))
 448             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 449           else
 450             op0 = adjust_address (op0, fieldmode, offset);
 451         }
 452       emit_move_insn (op0, value);
 453       return value;
 454     }
 455
 456   /* Make sure we are playing with integral modes.  Pun with subregs
 457      if we aren't.  This must come after the entire register case above,
 458      since that case is valid for any mode.  The following cases are only
 459      valid for integral modes.  */
 460   {
 461     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 462     if (imode != GET_MODE (op0))
 463       {
 464         if (MEM_P (op0))
 465           op0 = adjust_address (op0, imode, 0);
 466         else if (imode != BLKmode)
 467           op0 = gen_lowpart (imode, op0);
 468         else
 469           abort ();
 470       }
 471   }
 472
 473   /* We may be accessing data outside the field, which means
 474      we can alias adjacent data.  */
 475   if (MEM_P (op0))
 476     {
 477       op0 = shallow_copy_rtx (op0);
 478       set_mem_alias_set (op0, 0);
 479       set_mem_expr (op0, 0);
 480     }
 481
 482   /* If OP0 is a register, BITPOS must count within a word.
 483      But as we have it, it counts within whatever size OP0 now has.
 484      On a bigendian machine, these are not the same, so convert.  */
 485   if (BYTES_BIG_ENDIAN
 486       && !MEM_P (op0)
 487       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 488     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 489
 490   /* Storing an lsb-aligned field in a register
 491      can be done with a movestrict instruction.  */
 492
 493   if (!MEM_P (op0)
 494       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 495       && bitsize == GET_MODE_BITSIZE (fieldmode)
 496       && (movstrict_optab->handlers[fieldmode].insn_code
 497           != CODE_FOR_nothing))
 498     {
 499       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 500
 501       /* Get appropriate low part of the value being stored.  */
 502       if (GET_CODE (value) == CONST_INT || REG_P (value))
 503         value = gen_lowpart (fieldmode, value);
 504       else if (!(GET_CODE (value) == SYMBOL_REF
 505                  || GET_CODE (value) == LABEL_REF
 506                  || GET_CODE (value) == CONST))
 507         value = convert_to_mode (fieldmode, value, 0);
 508
 509       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 510         value = copy_to_mode_reg (fieldmode, value);
 511
 512       if (GET_CODE (op0) == SUBREG)
 513         {
 514           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 515               || GET_MODE_CLASS (fieldmode) == MODE_INT
 516               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 517             op0 = SUBREG_REG (op0);
 518           else
 519             /* Else we've got some float mode source being extracted into
 520                a different float mode destination -- this combination of
 521                subregs results in Severe Tire Damage.  */
 522             abort ();
 523         }
 524
 525       emit_insn (GEN_FCN (icode)
 526                  (gen_rtx_SUBREG (fieldmode, op0,
 527                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 528                                   + (offset * UNITS_PER_WORD)),
 529                                   value));
 530
 531       return value;
 532     }
 533
 534   /* Handle fields bigger than a word.  */
 535
 536   if (bitsize > BITS_PER_WORD)
 537     {
 538       /* Here we transfer the words of the field
 539          in the order least significant first.
 540          This is because the most significant word is the one which may
 541          be less than full.
 542          However, only do that if the value is not BLKmode.  */
 543
 544       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 545       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 546       unsigned int i;
 547
 548       /* This is the mode we must force value to, so that there will be enough
 549          subwords to extract.  Note that fieldmode will often (always?) be
 550          VOIDmode, because that is what store_field uses to indicate that this
 551          is a bit field, but passing VOIDmode to operand_subword_force will
 552          result in an abort.  */
 553       fieldmode = GET_MODE (value);
 554       if (fieldmode == VOIDmode)
 555         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 556
 557       for (i = 0; i < nwords; i++)
 558         {
 559           /* If I is 0, use the low-order word in both field and target;
 560              if I is 1, use the next to lowest word; and so on.  */
 561           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 562           unsigned int bit_offset = (backwards
 563                                      ? MAX ((int) bitsize - ((int) i + 1)
 564                                             * BITS_PER_WORD,
 565                                             0)
 566                                      : (int) i * BITS_PER_WORD);
 567
 568           store_bit_field (op0, MIN (BITS_PER_WORD,
 569                                      bitsize - i * BITS_PER_WORD),
 570                            bitnum + bit_offset, word_mode,
 571                            operand_subword_force (value, wordnum, fieldmode));
 572         }
 573       return value;
 574     }
 575
 576   /* From here on we can assume that the field to be stored in is
 577      a full-word (whatever type that is), since it is shorter than a word.  */
 578
 579   /* OFFSET is the number of words or bytes (UNIT says which)
 580      from STR_RTX to the first word or byte containing part of the field.  */
 581
 582   if (!MEM_P (op0))
 583     {
 584       if (offset != 0
 585           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 586         {
 587           if (!REG_P (op0))
 588             {
 589               /* Since this is a destination (lvalue), we can't copy it to a
 590                  pseudo.  We can trivially remove a SUBREG that does not
 591                  change the size of the operand.  Such a SUBREG may have been
 592                  added above.  Otherwise, abort.  */
 593               if (GET_CODE (op0) == SUBREG
 594                   && (GET_MODE_SIZE (GET_MODE (op0))
 595                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 596                 op0 = SUBREG_REG (op0);
 597               else
 598                 abort ();
 599             }
 600           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 601                                 op0, (offset * UNITS_PER_WORD));
 602         }
 603       offset = 0;
 604     }
 605   else
 606     op0 = protect_from_queue (op0, 1);
 607
 608   /* If VALUE is a floating-point mode, access it as an integer of the
 609      corresponding size.  This can occur on a machine with 64 bit registers
 610      that uses SFmode for float.  This can also occur for unaligned float
 611      structure fields.  */
 612   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 613       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 614     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 615                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 616                          value);
 617
 618   /* Now OFFSET is nonzero only if OP0 is memory
 619      and is therefore always measured in bytes.  */
 620
 621   if (HAVE_insv
 622       && GET_MODE (value) != BLKmode
 623       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 624       /* Ensure insv's size is wide enough for this field.  */
 625       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 626       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 627             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 628     {
 629       int xbitpos = bitpos;
 630       rtx value1;
 631       rtx xop0 = op0;
 632       rtx last = get_last_insn ();
 633       rtx pat;
 634       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 635       int save_volatile_ok = volatile_ok;
 636
 637       volatile_ok = 1;
 638
 639       /* If this machine's insv can only insert into a register, copy OP0
 640          into a register and save it back later.  */
 641       /* This used to check flag_force_mem, but that was a serious
 642          de-optimization now that flag_force_mem is enabled by -O2.  */
 643       if (MEM_P (op0)
 644           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 645                 (op0, VOIDmode)))
 646         {
 647           rtx tempreg;
 648           enum machine_mode bestmode;
 649
 650           /* Get the mode to use for inserting into this field.  If OP0 is
 651              BLKmode, get the smallest mode consistent with the alignment. If
 652              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 653              mode. Otherwise, use the smallest mode containing the field.  */
 654
 655           if (GET_MODE (op0) == BLKmode
 656               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 657             bestmode
 658               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 659                                MEM_VOLATILE_P (op0));
 660           else
 661             bestmode = GET_MODE (op0);
 662
 663           if (bestmode == VOIDmode
 664               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 665                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 666             goto insv_loses;
 667
 668           /* Adjust address to point to the containing unit of that mode.
 669              Compute offset as multiple of this unit, counting in bytes.  */
 670           unit = GET_MODE_BITSIZE (bestmode);
 671           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 672           bitpos = bitnum % unit;
 673           op0 = adjust_address (op0, bestmode,  offset);
 674
 675           /* Fetch that unit, store the bitfield in it, then store
 676              the unit.  */
 677           tempreg = copy_to_reg (op0);
 678           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
 679           emit_move_insn (op0, tempreg);
 680           return value;
 681         }
 682       volatile_ok = save_volatile_ok;
 683
 684       /* Add OFFSET into OP0's address.  */
 685       if (MEM_P (xop0))
 686         xop0 = adjust_address (xop0, byte_mode, offset);
 687
 688       /* If xop0 is a register, we need it in MAXMODE
 689          to make it acceptable to the format of insv.  */
 690       if (GET_CODE (xop0) == SUBREG)
 691         /* We can't just change the mode, because this might clobber op0,
 692            and we will need the original value of op0 if insv fails.  */
 693         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 694       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 695         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 696
 697       /* On big-endian machines, we count bits from the most significant.
 698          If the bit field insn does not, we must invert.  */
 699
 700       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 701         xbitpos = unit - bitsize - xbitpos;
 702
 703       /* We have been counting XBITPOS within UNIT.
 704          Count instead within the size of the register.  */
 705       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 706         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 707
 708       unit = GET_MODE_BITSIZE (maxmode);
 709
 710       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 711       value1 = value;
 712       if (GET_MODE (value) != maxmode)
 713         {
 714           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 715             {
 716               /* Optimization: Don't bother really extending VALUE
 717                  if it has all the bits we will actually use.  However,
 718                  if we must narrow it, be sure we do it correctly.  */
 719
 720               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 721                 {
 722                   rtx tmp;
 723
 724                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 725                   if (! tmp)
 726                     tmp = simplify_gen_subreg (maxmode,
 727                                                force_reg (GET_MODE (value),
 728                                                           value1),
 729                                                GET_MODE (value), 0);
 730                   value1 = tmp;
 731                 }
 732               else
 733                 value1 = gen_lowpart (maxmode, value1);
 734             }
 735           else if (GET_CODE (value) == CONST_INT)
 736             value1 = gen_int_mode (INTVAL (value), maxmode);
 737           else if (!CONSTANT_P (value))
 738             /* Parse phase is supposed to make VALUE's data type
 739                match that of the component reference, which is a type
 740                at least as wide as the field; so VALUE should have
 741                a mode that corresponds to that type.  */
 742             abort ();
 743         }
 744
 745       /* If this machine's insv insists on a register,
 746          get VALUE1 into a register.  */
 747       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 748              (value1, maxmode)))
 749         value1 = force_reg (maxmode, value1);
 750
 751       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 752       if (pat)
 753         emit_insn (pat);
 754       else
 755         {
 756           delete_insns_since (last);
 757           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 758         }
 759     }
 760   else
 761     insv_loses:
 762     /* Insv is not available; store using shifts and boolean ops.  */
 763     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 764   return value;
 765 }
 766 \f
 767 /* Use shifts and boolean operations to store VALUE
 768    into a bit field of width BITSIZE
 769    in a memory location specified by OP0 except offset by OFFSET bytes.
 770      (OFFSET must be 0 if OP0 is a register.)
 771    The field starts at position BITPOS within the byte.
 772     (If OP0 is a register, it may be a full word or a narrower mode,
 773      but BITPOS still counts within a full word,
 774      which is significant on bigendian machines.)
 775
 776    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 777
 778 static void
 779 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 780                        unsigned HOST_WIDE_INT bitsize,
 781                        unsigned HOST_WIDE_INT bitpos, rtx value)
 782 {
 783   enum machine_mode mode;
 784   unsigned int total_bits = BITS_PER_WORD;
 785   rtx subtarget, temp;
 786   int all_zero = 0;
 787   int all_one = 0;
 788
 789   /* There is a case not handled here:
 790      a structure with a known alignment of just a halfword
 791      and a field split across two aligned halfwords within the structure.
 792      Or likewise a structure with a known alignment of just a byte
 793      and a field split across two bytes.
 794      Such cases are not supposed to be able to occur.  */
 795
 796   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 797     {
 798       if (offset != 0)
 799         abort ();
 800       /* Special treatment for a bit field split across two registers.  */
 801       if (bitsize + bitpos > BITS_PER_WORD)
 802         {
 803           store_split_bit_field (op0, bitsize, bitpos, value);
 804           return;
 805         }
 806     }
 807   else
 808     {
 809       /* Get the proper mode to use for this field.  We want a mode that
 810          includes the entire field.  If such a mode would be larger than
 811          a word, we won't be doing the extraction the normal way.
 812          We don't want a mode bigger than the destination.  */
 813
 814       mode = GET_MODE (op0);
 815       if (GET_MODE_BITSIZE (mode) == 0
 816           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 817         mode = word_mode;
 818       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 819                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 820
 821       if (mode == VOIDmode)
 822         {
 823           /* The only way this should occur is if the field spans word
 824              boundaries.  */
 825           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 826                                  value);
 827           return;
 828         }
 829
 830       total_bits = GET_MODE_BITSIZE (mode);
 831
 832       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 833          be in the range 0 to total_bits-1, and put any excess bytes in
 834          OFFSET.  */
 835       if (bitpos >= total_bits)
 836         {
 837           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 838           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 839                      * BITS_PER_UNIT);
 840         }
 841
 842       /* Get ref to an aligned byte, halfword, or word containing the field.
 843          Adjust BITPOS to be position within a word,
 844          and OFFSET to be the offset of that word.
 845          Then alter OP0 to refer to that word.  */
 846       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 847       offset -= (offset % (total_bits / BITS_PER_UNIT));
 848       op0 = adjust_address (op0, mode, offset);
 849     }
 850
 851   mode = GET_MODE (op0);
 852
 853   /* Now MODE is either some integral mode for a MEM as OP0,
 854      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 855      The bit field is contained entirely within OP0.
 856      BITPOS is the starting bit number within OP0.
 857      (OP0's mode may actually be narrower than MODE.)  */
 858
 859   if (BYTES_BIG_ENDIAN)
 860       /* BITPOS is the distance between our msb
 861          and that of the containing datum.
 862          Convert it to the distance from the lsb.  */
 863       bitpos = total_bits - bitsize - bitpos;
 864
 865   /* Now BITPOS is always the distance between our lsb
 866      and that of OP0.  */
 867
 868   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 869      we must first convert its mode to MODE.  */
 870
 871   if (GET_CODE (value) == CONST_INT)
 872     {
 873       HOST_WIDE_INT v = INTVAL (value);
 874
 875       if (bitsize < HOST_BITS_PER_WIDE_INT)
 876         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 877
 878       if (v == 0)
 879         all_zero = 1;
 880       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 881                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 882                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 883         all_one = 1;
 884
 885       value = lshift_value (mode, value, bitpos, bitsize);
 886     }
 887   else
 888     {
 889       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 890                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 891
 892       if (GET_MODE (value) != mode)
 893         {
 894           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 895               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 896             value = gen_lowpart (mode, value);
 897           else
 898             value = convert_to_mode (mode, value, 1);
 899         }
 900
 901       if (must_and)
 902         value = expand_binop (mode, and_optab, value,
 903                               mask_rtx (mode, 0, bitsize, 0),
 904                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 905       if (bitpos > 0)
 906         value = expand_shift (LSHIFT_EXPR, mode, value,
 907                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 908     }
 909
 910   /* Now clear the chosen bits in OP0,
 911      except that if VALUE is -1 we need not bother.  */
 912
 913   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 914
 915   if (! all_one)
 916     {
 917       temp = expand_binop (mode, and_optab, op0,
 918                            mask_rtx (mode, bitpos, bitsize, 1),
 919                            subtarget, 1, OPTAB_LIB_WIDEN);
 920       subtarget = temp;
 921     }
 922   else
 923     temp = op0;
 924
 925   /* Now logical-or VALUE into OP0, unless it is zero.  */
 926
 927   if (! all_zero)
 928     temp = expand_binop (mode, ior_optab, temp, value,
 929                          subtarget, 1, OPTAB_LIB_WIDEN);
 930   if (op0 != temp)
 931     emit_move_insn (op0, temp);
 932 }
 933 \f
 934 /* Store a bit field that is split across multiple accessible memory objects.
 935
 936    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 937    BITSIZE is the field width; BITPOS the position of its first bit
 938    (within the word).
 939    VALUE is the value to store.
 940
 941    This does not yet handle fields wider than BITS_PER_WORD.  */
 942
 943 static void
 944 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 945                        unsigned HOST_WIDE_INT bitpos, rtx value)
 946 {
 947   unsigned int unit;
 948   unsigned int bitsdone = 0;
 949
 950   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 951      much at a time.  */
 952   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 953     unit = BITS_PER_WORD;
 954   else
 955     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 956
 957   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 958      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 959      that VALUE might be a floating-point constant.  */
 960   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 961     {
 962       rtx word = gen_lowpart_common (word_mode, value);
 963
 964       if (word && (value != word))
 965         value = word;
 966       else
 967         value = gen_lowpart_common (word_mode,
 968                                     force_reg (GET_MODE (value) != VOIDmode
 969                                                ? GET_MODE (value)
 970                                                : word_mode, value));
 971     }
 972
 973   while (bitsdone < bitsize)
 974     {
 975       unsigned HOST_WIDE_INT thissize;
 976       rtx part, word;
 977       unsigned HOST_WIDE_INT thispos;
 978       unsigned HOST_WIDE_INT offset;
 979
 980       offset = (bitpos + bitsdone) / unit;
 981       thispos = (bitpos + bitsdone) % unit;
 982
 983       /* THISSIZE must not overrun a word boundary.  Otherwise,
 984          store_fixed_bit_field will call us again, and we will mutually
 985          recurse forever.  */
 986       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 987       thissize = MIN (thissize, unit - thispos);
 988
 989       if (BYTES_BIG_ENDIAN)
 990         {
 991           int total_bits;
 992
 993           /* We must do an endian conversion exactly the same way as it is
 994              done in extract_bit_field, so that the two calls to
 995              extract_fixed_bit_field will have comparable arguments.  */
 996           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 997             total_bits = BITS_PER_WORD;
 998           else
 999             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1000
1001           /* Fetch successively less significant portions.  */
1002           if (GET_CODE (value) == CONST_INT)
1003             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1004                              >> (bitsize - bitsdone - thissize))
1005                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1006           else
1007             /* The args are chosen so that the last part includes the
1008                lsb.  Give extract_bit_field the value it needs (with
1009                endianness compensation) to fetch the piece we want.  */
1010             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1011                                             total_bits - bitsize + bitsdone,
1012                                             NULL_RTX, 1);
1013         }
1014       else
1015         {
1016           /* Fetch successively more significant portions.  */
1017           if (GET_CODE (value) == CONST_INT)
1018             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1019                              >> bitsdone)
1020                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1021           else
1022             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1023                                             bitsdone, NULL_RTX, 1);
1024         }
1025
1026       /* If OP0 is a register, then handle OFFSET here.
1027
1028          When handling multiword bitfields, extract_bit_field may pass
1029          down a word_mode SUBREG of a larger REG for a bitfield that actually
1030          crosses a word boundary.  Thus, for a SUBREG, we must find
1031          the current word starting from the base register.  */
1032       if (GET_CODE (op0) == SUBREG)
1033         {
1034           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1035           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1036                                         GET_MODE (SUBREG_REG (op0)));
1037           offset = 0;
1038         }
1039       else if (REG_P (op0))
1040         {
1041           word = operand_subword_force (op0, offset, GET_MODE (op0));
1042           offset = 0;
1043         }
1044       else
1045         word = op0;
1046
1047       /* OFFSET is in UNITs, and UNIT is in bits.
1048          store_fixed_bit_field wants offset in bytes.  */
1049       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1050                              thispos, part);
1051       bitsdone += thissize;
1052     }
1053 }
1054 \f
1055 /* Generate code to extract a byte-field from STR_RTX
1056    containing BITSIZE bits, starting at BITNUM,
1057    and put it in TARGET if possible (if TARGET is nonzero).
1058    Regardless of TARGET, we return the rtx for where the value is placed.
1059    It may be a QUEUED.
1060
1061    STR_RTX is the structure containing the byte (a REG or MEM).
1062    UNSIGNEDP is nonzero if this is an unsigned bit field.
1063    MODE is the natural mode of the field value once extracted.
1064    TMODE is the mode the caller would like the value to have;
1065    but the value may be returned with type MODE instead.
1066
1067    TOTAL_SIZE is the size in bytes of the containing structure,
1068    or -1 if varying.
1069
1070    If a TARGET is specified and we can store in it at no extra cost,
1071    we do so, and return TARGET.
1072    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1073    if they are equally easy.  */
1074
1075 rtx
1076 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1077                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1078                    enum machine_mode mode, enum machine_mode tmode)
1079 {
1080   unsigned int unit
1081     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1082   unsigned HOST_WIDE_INT offset = bitnum / unit;
1083   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1084   rtx op0 = str_rtx;
1085   rtx spec_target = target;
1086   rtx spec_target_subreg = 0;
1087   enum machine_mode int_mode;
1088   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1089   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1090   enum machine_mode mode1;
1091   int byte_offset;
1092
1093   if (tmode == VOIDmode)
1094     tmode = mode;
1095
1096   while (GET_CODE (op0) == SUBREG)
1097     {
1098       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1099       if (bitpos > unit)
1100         {
1101           offset += (bitpos / unit);
1102           bitpos %= unit;
1103         }
1104       op0 = SUBREG_REG (op0);
1105     }
1106
1107   if (REG_P (op0)
1108       && mode == GET_MODE (op0)
1109       && bitnum == 0
1110       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1111     {
1112       /* We're trying to extract a full register from itself.  */
1113       return op0;
1114     }
1115
1116   /* Use vec_extract patterns for extracting parts of vectors whenever
1117      available.  */
1118   if (VECTOR_MODE_P (GET_MODE (op0))
1119       && !MEM_P (op0)
1120       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1121           != CODE_FOR_nothing)
1122       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1123           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1124     {
1125       enum machine_mode outermode = GET_MODE (op0);
1126       enum machine_mode innermode = GET_MODE_INNER (outermode);
1127       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1128       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1129       rtx rtxpos = GEN_INT (pos);
1130       rtx src = op0;
1131       rtx dest = NULL, pat, seq;
1132       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1133       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1134       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1135
1136       if (innermode == tmode || innermode == mode)
1137         dest = target;
1138
1139       if (!dest)
1140         dest = gen_reg_rtx (innermode);
1141
1142       start_sequence ();
1143
1144       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1145         dest = copy_to_mode_reg (mode0, dest);
1146
1147       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1148         src = copy_to_mode_reg (mode1, src);
1149
1150       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1151         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1152
1153       /* We could handle this, but we should always be called with a pseudo
1154          for our targets and all insns should take them as outputs.  */
1155       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1156           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1157           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1158         abort ();
1159
1160       pat = GEN_FCN (icode) (dest, src, rtxpos);
1161       seq = get_insns ();
1162       end_sequence ();
1163       if (pat)
1164         {
1165           emit_insn (seq);
1166           emit_insn (pat);
1167           return dest;
1168         }
1169     }
1170
1171   /* Make sure we are playing with integral modes.  Pun with subregs
1172      if we aren't.  */
1173   {
1174     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1175     if (imode != GET_MODE (op0))
1176       {
1177         if (MEM_P (op0))
1178           op0 = adjust_address (op0, imode, 0);
1179         else if (imode != BLKmode)
1180           op0 = gen_lowpart (imode, op0);
1181         else
1182           abort ();
1183       }
1184   }
1185
1186   /* We may be accessing data outside the field, which means
1187      we can alias adjacent data.  */
1188   if (MEM_P (op0))
1189     {
1190       op0 = shallow_copy_rtx (op0);
1191       set_mem_alias_set (op0, 0);
1192       set_mem_expr (op0, 0);
1193     }
1194
1195   /* Extraction of a full-word or multi-word value from a structure
1196      in a register or aligned memory can be done with just a SUBREG.
1197      A subword value in the least significant part of a register
1198      can also be extracted with a SUBREG.  For this, we need the
1199      byte offset of the value in op0.  */
1200
1201   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1202
1203   /* If OP0 is a register, BITPOS must count within a word.
1204      But as we have it, it counts within whatever size OP0 now has.
1205      On a bigendian machine, these are not the same, so convert.  */
1206   if (BYTES_BIG_ENDIAN
1207       && !MEM_P (op0)
1208       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1209     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1210
1211   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1212      If that's wrong, the solution is to test for it and set TARGET to 0
1213      if needed.  */
1214
1215   /* Only scalar integer modes can be converted via subregs.  There is an
1216      additional problem for FP modes here in that they can have a precision
1217      which is different from the size.  mode_for_size uses precision, but
1218      we want a mode based on the size, so we must avoid calling it for FP
1219      modes.  */
1220   mode1  = (SCALAR_INT_MODE_P (tmode)
1221             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1222             : mode);
1223
1224   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1225         && bitpos % BITS_PER_WORD == 0)
1226        || (mode1 != BLKmode
1227            /* ??? The big endian test here is wrong.  This is correct
1228               if the value is in a register, and if mode_for_size is not
1229               the same mode as op0.  This causes us to get unnecessarily
1230               inefficient code from the Thumb port when -mbig-endian.  */
1231            && (BYTES_BIG_ENDIAN
1232                ? bitpos + bitsize == BITS_PER_WORD
1233                : bitpos == 0)))
1234       && ((!MEM_P (op0)
1235            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1236                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1237            && GET_MODE_SIZE (mode1) != 0
1238            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1239           || (MEM_P (op0)
1240               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1241                   || (offset * BITS_PER_UNIT % bitsize == 0
1242                       && MEM_ALIGN (op0) % bitsize == 0)))))
1243     {
1244       if (mode1 != GET_MODE (op0))
1245         {
1246           if (GET_CODE (op0) == SUBREG)
1247             {
1248               if (GET_MODE (SUBREG_REG (op0)) == mode1
1249                   || GET_MODE_CLASS (mode1) == MODE_INT
1250                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1251                 op0 = SUBREG_REG (op0);
1252               else
1253                 /* Else we've got some float mode source being extracted into
1254                    a different float mode destination -- this combination of
1255                    subregs results in Severe Tire Damage.  */
1256                 goto no_subreg_mode_swap;
1257             }
1258           if (REG_P (op0))
1259             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1260           else
1261             op0 = adjust_address (op0, mode1, offset);
1262         }
1263       if (mode1 != mode)
1264         return convert_to_mode (tmode, op0, unsignedp);
1265       return op0;
1266     }
1267  no_subreg_mode_swap:
1268
1269   /* Handle fields bigger than a word.  */
1270
1271   if (bitsize > BITS_PER_WORD)
1272     {
1273       /* Here we transfer the words of the field
1274          in the order least significant first.
1275          This is because the most significant word is the one which may
1276          be less than full.  */
1277
1278       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1279       unsigned int i;
1280
1281       if (target == 0 || !REG_P (target))
1282         target = gen_reg_rtx (mode);
1283
1284       /* Indicate for flow that the entire target reg is being set.  */
1285       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1286
1287       for (i = 0; i < nwords; i++)
1288         {
1289           /* If I is 0, use the low-order word in both field and target;
1290              if I is 1, use the next to lowest word; and so on.  */
1291           /* Word number in TARGET to use.  */
1292           unsigned int wordnum
1293             = (WORDS_BIG_ENDIAN
1294                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1295                : i);
1296           /* Offset from start of field in OP0.  */
1297           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1298                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1299                                                 * (int) BITS_PER_WORD))
1300                                      : (int) i * BITS_PER_WORD);
1301           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1302           rtx result_part
1303             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1304                                            bitsize - i * BITS_PER_WORD),
1305                                  bitnum + bit_offset, 1, target_part, mode,
1306                                  word_mode);
1307
1308           if (target_part == 0)
1309             abort ();
1310
1311           if (result_part != target_part)
1312             emit_move_insn (target_part, result_part);
1313         }
1314
1315       if (unsignedp)
1316         {
1317           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1318              need to be zero'd out.  */
1319           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1320             {
1321               unsigned int i, total_words;
1322
1323               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1324               for (i = nwords; i < total_words; i++)
1325                 emit_move_insn
1326                   (operand_subword (target,
1327                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1328                                     1, VOIDmode),
1329                    const0_rtx);
1330             }
1331           return target;
1332         }
1333
1334       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1335       target = expand_shift (LSHIFT_EXPR, mode, target,
1336                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1337                              NULL_RTX, 0);
1338       return expand_shift (RSHIFT_EXPR, mode, target,
1339                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1340                            NULL_RTX, 0);
1341     }
1342
1343   /* From here on we know the desired field is smaller than a word.  */
1344
1345   /* Check if there is a correspondingly-sized integer field, so we can
1346      safely extract it as one size of integer, if necessary; then
1347      truncate or extend to the size that is wanted; then use SUBREGs or
1348      convert_to_mode to get one of the modes we really wanted.  */
1349
1350   int_mode = int_mode_for_mode (tmode);
1351   if (int_mode == BLKmode)
1352     int_mode = int_mode_for_mode (mode);
1353   if (int_mode == BLKmode)
1354     abort ();    /* Should probably push op0 out to memory and then
1355                     do a load.  */
1356
1357   /* OFFSET is the number of words or bytes (UNIT says which)
1358      from STR_RTX to the first word or byte containing part of the field.  */
1359
1360   if (!MEM_P (op0))
1361     {
1362       if (offset != 0
1363           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1364         {
1365           if (!REG_P (op0))
1366             op0 = copy_to_reg (op0);
1367           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1368                                 op0, (offset * UNITS_PER_WORD));
1369         }
1370       offset = 0;
1371     }
1372   else
1373     op0 = protect_from_queue (str_rtx, 1);
1374
1375   /* Now OFFSET is nonzero only for memory operands.  */
1376
1377   if (unsignedp)
1378     {
1379       if (HAVE_extzv
1380           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1381           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1382                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1383         {
1384           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1385           rtx bitsize_rtx, bitpos_rtx;
1386           rtx last = get_last_insn ();
1387           rtx xop0 = op0;
1388           rtx xtarget = target;
1389           rtx xspec_target = spec_target;
1390           rtx xspec_target_subreg = spec_target_subreg;
1391           rtx pat;
1392           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1393
1394           if (MEM_P (xop0))
1395             {
1396               int save_volatile_ok = volatile_ok;
1397               volatile_ok = 1;
1398
1399               /* Is the memory operand acceptable?  */
1400               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1401                      (xop0, GET_MODE (xop0))))
1402                 {
1403                   /* No, load into a reg and extract from there.  */
1404                   enum machine_mode bestmode;
1405
1406                   /* Get the mode to use for inserting into this field.  If
1407                      OP0 is BLKmode, get the smallest mode consistent with the
1408                      alignment. If OP0 is a non-BLKmode object that is no
1409                      wider than MAXMODE, use its mode. Otherwise, use the
1410                      smallest mode containing the field.  */
1411
1412                   if (GET_MODE (xop0) == BLKmode
1413                       || (GET_MODE_SIZE (GET_MODE (op0))
1414                           > GET_MODE_SIZE (maxmode)))
1415                     bestmode = get_best_mode (bitsize, bitnum,
1416                                               MEM_ALIGN (xop0), maxmode,
1417                                               MEM_VOLATILE_P (xop0));
1418                   else
1419                     bestmode = GET_MODE (xop0);
1420
1421                   if (bestmode == VOIDmode
1422                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1423                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1424                     goto extzv_loses;
1425
1426                   /* Compute offset as multiple of this unit,
1427                      counting in bytes.  */
1428                   unit = GET_MODE_BITSIZE (bestmode);
1429                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1430                   xbitpos = bitnum % unit;
1431                   xop0 = adjust_address (xop0, bestmode, xoffset);
1432
1433                   /* Fetch it to a register in that size.  */
1434                   xop0 = force_reg (bestmode, xop0);
1435
1436                   /* XBITPOS counts within UNIT, which is what is expected.  */
1437                 }
1438               else
1439                 /* Get ref to first byte containing part of the field.  */
1440                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1441
1442               volatile_ok = save_volatile_ok;
1443             }
1444
1445           /* If op0 is a register, we need it in MAXMODE (which is usually
1446              SImode). to make it acceptable to the format of extzv.  */
1447           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1448             goto extzv_loses;
1449           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1450             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1451
1452           /* On big-endian machines, we count bits from the most significant.
1453              If the bit field insn does not, we must invert.  */
1454           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1455             xbitpos = unit - bitsize - xbitpos;
1456
1457           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1458           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1459             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1460
1461           unit = GET_MODE_BITSIZE (maxmode);
1462
1463           if (xtarget == 0
1464               || (flag_force_mem && MEM_P (xtarget)))
1465             xtarget = xspec_target = gen_reg_rtx (tmode);
1466
1467           if (GET_MODE (xtarget) != maxmode)
1468             {
1469               if (REG_P (xtarget))
1470                 {
1471                   int wider = (GET_MODE_SIZE (maxmode)
1472                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1473                   xtarget = gen_lowpart (maxmode, xtarget);
1474                   if (wider)
1475                     xspec_target_subreg = xtarget;
1476                 }
1477               else
1478                 xtarget = gen_reg_rtx (maxmode);
1479             }
1480
1481           /* If this machine's extzv insists on a register target,
1482              make sure we have one.  */
1483           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1484                  (xtarget, maxmode)))
1485             xtarget = gen_reg_rtx (maxmode);
1486
1487           bitsize_rtx = GEN_INT (bitsize);
1488           bitpos_rtx = GEN_INT (xbitpos);
1489
1490           pat = gen_extzv (protect_from_queue (xtarget, 1),
1491                            xop0, bitsize_rtx, bitpos_rtx);
1492           if (pat)
1493             {
1494               emit_insn (pat);
1495               target = xtarget;
1496               spec_target = xspec_target;
1497               spec_target_subreg = xspec_target_subreg;
1498             }
1499           else
1500             {
1501               delete_insns_since (last);
1502               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1503                                                 bitpos, target, 1);
1504             }
1505         }
1506       else
1507       extzv_loses:
1508         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1509                                           bitpos, target, 1);
1510     }
1511   else
1512     {
1513       if (HAVE_extv
1514           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1515           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1516                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1517         {
1518           int xbitpos = bitpos, xoffset = offset;
1519           rtx bitsize_rtx, bitpos_rtx;
1520           rtx last = get_last_insn ();
1521           rtx xop0 = op0, xtarget = target;
1522           rtx xspec_target = spec_target;
1523           rtx xspec_target_subreg = spec_target_subreg;
1524           rtx pat;
1525           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1526
1527           if (MEM_P (xop0))
1528             {
1529               /* Is the memory operand acceptable?  */
1530               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1531                      (xop0, GET_MODE (xop0))))
1532                 {
1533                   /* No, load into a reg and extract from there.  */
1534                   enum machine_mode bestmode;
1535
1536                   /* Get the mode to use for inserting into this field.  If
1537                      OP0 is BLKmode, get the smallest mode consistent with the
1538                      alignment. If OP0 is a non-BLKmode object that is no
1539                      wider than MAXMODE, use its mode. Otherwise, use the
1540                      smallest mode containing the field.  */
1541
1542                   if (GET_MODE (xop0) == BLKmode
1543                       || (GET_MODE_SIZE (GET_MODE (op0))
1544                           > GET_MODE_SIZE (maxmode)))
1545                     bestmode = get_best_mode (bitsize, bitnum,
1546                                               MEM_ALIGN (xop0), maxmode,
1547                                               MEM_VOLATILE_P (xop0));
1548                   else
1549                     bestmode = GET_MODE (xop0);
1550
1551                   if (bestmode == VOIDmode
1552                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1553                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1554                     goto extv_loses;
1555
1556                   /* Compute offset as multiple of this unit,
1557                      counting in bytes.  */
1558                   unit = GET_MODE_BITSIZE (bestmode);
1559                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1560                   xbitpos = bitnum % unit;
1561                   xop0 = adjust_address (xop0, bestmode, xoffset);
1562
1563                   /* Fetch it to a register in that size.  */
1564                   xop0 = force_reg (bestmode, xop0);
1565
1566                   /* XBITPOS counts within UNIT, which is what is expected.  */
1567                 }
1568               else
1569                 /* Get ref to first byte containing part of the field.  */
1570                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1571             }
1572
1573           /* If op0 is a register, we need it in MAXMODE (which is usually
1574              SImode) to make it acceptable to the format of extv.  */
1575           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1576             goto extv_loses;
1577           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1578             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1579
1580           /* On big-endian machines, we count bits from the most significant.
1581              If the bit field insn does not, we must invert.  */
1582           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1583             xbitpos = unit - bitsize - xbitpos;
1584
1585           /* XBITPOS counts within a size of UNIT.
1586              Adjust to count within a size of MAXMODE.  */
1587           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1588             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1589
1590           unit = GET_MODE_BITSIZE (maxmode);
1591
1592           if (xtarget == 0
1593               || (flag_force_mem && MEM_P (xtarget)))
1594             xtarget = xspec_target = gen_reg_rtx (tmode);
1595
1596           if (GET_MODE (xtarget) != maxmode)
1597             {
1598               if (REG_P (xtarget))
1599                 {
1600                   int wider = (GET_MODE_SIZE (maxmode)
1601                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1602                   xtarget = gen_lowpart (maxmode, xtarget);
1603                   if (wider)
1604                     xspec_target_subreg = xtarget;
1605                 }
1606               else
1607                 xtarget = gen_reg_rtx (maxmode);
1608             }
1609
1610           /* If this machine's extv insists on a register target,
1611              make sure we have one.  */
1612           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1613                  (xtarget, maxmode)))
1614             xtarget = gen_reg_rtx (maxmode);
1615
1616           bitsize_rtx = GEN_INT (bitsize);
1617           bitpos_rtx = GEN_INT (xbitpos);
1618
1619           pat = gen_extv (protect_from_queue (xtarget, 1),
1620                           xop0, bitsize_rtx, bitpos_rtx);
1621           if (pat)
1622             {
1623               emit_insn (pat);
1624               target = xtarget;
1625               spec_target = xspec_target;
1626               spec_target_subreg = xspec_target_subreg;
1627             }
1628           else
1629             {
1630               delete_insns_since (last);
1631               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1632                                                 bitpos, target, 0);
1633             }
1634         }
1635       else
1636       extv_loses:
1637         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1638                                           bitpos, target, 0);
1639     }
1640   if (target == spec_target)
1641     return target;
1642   if (target == spec_target_subreg)
1643     return spec_target;
1644   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1645     {
1646       /* If the target mode is floating-point, first convert to the
1647          integer mode of that size and then access it as a floating-point
1648          value via a SUBREG.  */
1649       if (GET_MODE_CLASS (tmode) != MODE_INT
1650           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1651         {
1652           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1653                                                    MODE_INT, 0),
1654                                     target, unsignedp);
1655           return gen_lowpart (tmode, target);
1656         }
1657       else
1658         return convert_to_mode (tmode, target, unsignedp);
1659     }
1660   return target;
1661 }
1662 \f
1663 /* Extract a bit field using shifts and boolean operations
1664    Returns an rtx to represent the value.
1665    OP0 addresses a register (word) or memory (byte).
1666    BITPOS says which bit within the word or byte the bit field starts in.
1667    OFFSET says how many bytes farther the bit field starts;
1668     it is 0 if OP0 is a register.
1669    BITSIZE says how many bits long the bit field is.
1670     (If OP0 is a register, it may be narrower than a full word,
1671      but BITPOS still counts within a full word,
1672      which is significant on bigendian machines.)
1673
1674    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1675    If TARGET is nonzero, attempts to store the value there
1676    and return TARGET, but this is not guaranteed.
1677    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1678
1679 static rtx
1680 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1681                          unsigned HOST_WIDE_INT offset,
1682                          unsigned HOST_WIDE_INT bitsize,
1683                          unsigned HOST_WIDE_INT bitpos, rtx target,
1684                          int unsignedp)
1685 {
1686   unsigned int total_bits = BITS_PER_WORD;
1687   enum machine_mode mode;
1688
1689   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1690     {
1691       /* Special treatment for a bit field split across two registers.  */
1692       if (bitsize + bitpos > BITS_PER_WORD)
1693         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1694     }
1695   else
1696     {
1697       /* Get the proper mode to use for this field.  We want a mode that
1698          includes the entire field.  If such a mode would be larger than
1699          a word, we won't be doing the extraction the normal way.  */
1700
1701       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1702                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1703
1704       if (mode == VOIDmode)
1705         /* The only way this should occur is if the field spans word
1706            boundaries.  */
1707         return extract_split_bit_field (op0, bitsize,
1708                                         bitpos + offset * BITS_PER_UNIT,
1709                                         unsignedp);
1710
1711       total_bits = GET_MODE_BITSIZE (mode);
1712
1713       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1714          be in the range 0 to total_bits-1, and put any excess bytes in
1715          OFFSET.  */
1716       if (bitpos >= total_bits)
1717         {
1718           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1719           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1720                      * BITS_PER_UNIT);
1721         }
1722
1723       /* Get ref to an aligned byte, halfword, or word containing the field.
1724          Adjust BITPOS to be position within a word,
1725          and OFFSET to be the offset of that word.
1726          Then alter OP0 to refer to that word.  */
1727       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1728       offset -= (offset % (total_bits / BITS_PER_UNIT));
1729       op0 = adjust_address (op0, mode, offset);
1730     }
1731
1732   mode = GET_MODE (op0);
1733
1734   if (BYTES_BIG_ENDIAN)
1735     /* BITPOS is the distance between our msb and that of OP0.
1736        Convert it to the distance from the lsb.  */
1737     bitpos = total_bits - bitsize - bitpos;
1738
1739   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1740      We have reduced the big-endian case to the little-endian case.  */
1741
1742   if (unsignedp)
1743     {
1744       if (bitpos)
1745         {
1746           /* If the field does not already start at the lsb,
1747              shift it so it does.  */
1748           tree amount = build_int_2 (bitpos, 0);
1749           /* Maybe propagate the target for the shift.  */
1750           /* But not if we will return it--could confuse integrate.c.  */
1751           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1752           if (tmode != mode) subtarget = 0;
1753           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1754         }
1755       /* Convert the value to the desired mode.  */
1756       if (mode != tmode)
1757         op0 = convert_to_mode (tmode, op0, 1);
1758
1759       /* Unless the msb of the field used to be the msb when we shifted,
1760          mask out the upper bits.  */
1761
1762       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1763         return expand_binop (GET_MODE (op0), and_optab, op0,
1764                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1765                              target, 1, OPTAB_LIB_WIDEN);
1766       return op0;
1767     }
1768
1769   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1770      then arithmetic-shift its lsb to the lsb of the word.  */
1771   op0 = force_reg (mode, op0);
1772   if (mode != tmode)
1773     target = 0;
1774
1775   /* Find the narrowest integer mode that contains the field.  */
1776
1777   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1778        mode = GET_MODE_WIDER_MODE (mode))
1779     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1780       {
1781         op0 = convert_to_mode (mode, op0, 0);
1782         break;
1783       }
1784
1785   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1786     {
1787       tree amount
1788         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1789       /* Maybe propagate the target for the shift.  */
1790       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1791       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1792     }
1793
1794   return expand_shift (RSHIFT_EXPR, mode, op0,
1795                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1796                        target, 0);
1797 }
1798 \f
1799 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1800    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1801    complement of that if COMPLEMENT.  The mask is truncated if
1802    necessary to the width of mode MODE.  The mask is zero-extended if
1803    BITSIZE+BITPOS is too small for MODE.  */
1804
1805 static rtx
1806 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1807 {
1808   HOST_WIDE_INT masklow, maskhigh;
1809
1810   if (bitsize == 0)
1811     masklow = 0;
1812   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1813     masklow = (HOST_WIDE_INT) -1 << bitpos;
1814   else
1815     masklow = 0;
1816
1817   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1818     masklow &= ((unsigned HOST_WIDE_INT) -1
1819                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1820
1821   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1822     maskhigh = -1;
1823   else
1824     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1825
1826   if (bitsize == 0)
1827     maskhigh = 0;
1828   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1829     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1830                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1831   else
1832     maskhigh = 0;
1833
1834   if (complement)
1835     {
1836       maskhigh = ~maskhigh;
1837       masklow = ~masklow;
1838     }
1839
1840   return immed_double_const (masklow, maskhigh, mode);
1841 }
1842
1843 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1844    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1845
1846 static rtx
1847 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1848 {
1849   unsigned HOST_WIDE_INT v = INTVAL (value);
1850   HOST_WIDE_INT low, high;
1851
1852   if (bitsize < HOST_BITS_PER_WIDE_INT)
1853     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1854
1855   if (bitpos < HOST_BITS_PER_WIDE_INT)
1856     {
1857       low = v << bitpos;
1858       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1859     }
1860   else
1861     {
1862       low = 0;
1863       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1864     }
1865
1866   return immed_double_const (low, high, mode);
1867 }
1868 \f
1869 /* Extract a bit field that is split across two words
1870    and return an RTX for the result.
1871
1872    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1873    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1874    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1875
1876 static rtx
1877 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1878                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1879 {
1880   unsigned int unit;
1881   unsigned int bitsdone = 0;
1882   rtx result = NULL_RTX;
1883   int first = 1;
1884
1885   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1886      much at a time.  */
1887   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1888     unit = BITS_PER_WORD;
1889   else
1890     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1891
1892   while (bitsdone < bitsize)
1893     {
1894       unsigned HOST_WIDE_INT thissize;
1895       rtx part, word;
1896       unsigned HOST_WIDE_INT thispos;
1897       unsigned HOST_WIDE_INT offset;
1898
1899       offset = (bitpos + bitsdone) / unit;
1900       thispos = (bitpos + bitsdone) % unit;
1901
1902       /* THISSIZE must not overrun a word boundary.  Otherwise,
1903          extract_fixed_bit_field will call us again, and we will mutually
1904          recurse forever.  */
1905       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1906       thissize = MIN (thissize, unit - thispos);
1907
1908       /* If OP0 is a register, then handle OFFSET here.
1909
1910          When handling multiword bitfields, extract_bit_field may pass
1911          down a word_mode SUBREG of a larger REG for a bitfield that actually
1912          crosses a word boundary.  Thus, for a SUBREG, we must find
1913          the current word starting from the base register.  */
1914       if (GET_CODE (op0) == SUBREG)
1915         {
1916           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1917           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1918                                         GET_MODE (SUBREG_REG (op0)));
1919           offset = 0;
1920         }
1921       else if (REG_P (op0))
1922         {
1923           word = operand_subword_force (op0, offset, GET_MODE (op0));
1924           offset = 0;
1925         }
1926       else
1927         word = op0;
1928
1929       /* Extract the parts in bit-counting order,
1930          whose meaning is determined by BYTES_PER_UNIT.
1931          OFFSET is in UNITs, and UNIT is in bits.
1932          extract_fixed_bit_field wants offset in bytes.  */
1933       part = extract_fixed_bit_field (word_mode, word,
1934                                       offset * unit / BITS_PER_UNIT,
1935                                       thissize, thispos, 0, 1);
1936       bitsdone += thissize;
1937
1938       /* Shift this part into place for the result.  */
1939       if (BYTES_BIG_ENDIAN)
1940         {
1941           if (bitsize != bitsdone)
1942             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1943                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1944         }
1945       else
1946         {
1947           if (bitsdone != thissize)
1948             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1949                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1950         }
1951
1952       if (first)
1953         result = part;
1954       else
1955         /* Combine the parts with bitwise or.  This works
1956            because we extracted each part as an unsigned bit field.  */
1957         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1958                                OPTAB_LIB_WIDEN);
1959
1960       first = 0;
1961     }
1962
1963   /* Unsigned bit field: we are done.  */
1964   if (unsignedp)
1965     return result;
1966   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1967   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1968                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1969                          NULL_RTX, 0);
1970   return expand_shift (RSHIFT_EXPR, word_mode, result,
1971                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1972 }
1973 \f
1974 /* Add INC into TARGET.  */
1975
1976 void
1977 expand_inc (rtx target, rtx inc)
1978 {
1979   rtx value = expand_binop (GET_MODE (target), add_optab,
1980                             target, inc,
1981                             target, 0, OPTAB_LIB_WIDEN);
1982   if (value != target)
1983     emit_move_insn (target, value);
1984 }
1985
1986 /* Subtract DEC from TARGET.  */
1987
1988 void
1989 expand_dec (rtx target, rtx dec)
1990 {
1991   rtx value = expand_binop (GET_MODE (target), sub_optab,
1992                             target, dec,
1993                             target, 0, OPTAB_LIB_WIDEN);
1994   if (value != target)
1995     emit_move_insn (target, value);
1996 }
1997 \f
1998 /* Output a shift instruction for expression code CODE,
1999    with SHIFTED being the rtx for the value to shift,
2000    and AMOUNT the tree for the amount to shift by.
2001    Store the result in the rtx TARGET, if that is convenient.
2002    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2003    Return the rtx for where the value is.  */
2004
2005 rtx
2006 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2007               tree amount, rtx target, int unsignedp)
2008 {
2009   rtx op1, temp = 0;
2010   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2011   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2012   int try;
2013
2014   /* Previously detected shift-counts computed by NEGATE_EXPR
2015      and shifted in the other direction; but that does not work
2016      on all machines.  */
2017
2018   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2019
2020   if (SHIFT_COUNT_TRUNCATED)
2021     {
2022       if (GET_CODE (op1) == CONST_INT
2023           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2024               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2025         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2026                        % GET_MODE_BITSIZE (mode));
2027       else if (GET_CODE (op1) == SUBREG
2028                && subreg_lowpart_p (op1))
2029         op1 = SUBREG_REG (op1);
2030     }
2031
2032   if (op1 == const0_rtx)
2033     return shifted;
2034
2035   /* Check whether its cheaper to implement a left shift by a constant
2036      bit count by a sequence of additions.  */
2037   if (code == LSHIFT_EXPR
2038       && GET_CODE (op1) == CONST_INT
2039       && INTVAL (op1) > 0
2040       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2041       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2042     {
2043       int i;
2044       for (i = 0; i < INTVAL (op1); i++)
2045         {
2046           temp = force_reg (mode, shifted);
2047           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2048                                   unsignedp, OPTAB_LIB_WIDEN);
2049         }
2050       return shifted;
2051     }
2052
2053   for (try = 0; temp == 0 && try < 3; try++)
2054     {
2055       enum optab_methods methods;
2056
2057       if (try == 0)
2058         methods = OPTAB_DIRECT;
2059       else if (try == 1)
2060         methods = OPTAB_WIDEN;
2061       else
2062         methods = OPTAB_LIB_WIDEN;
2063
2064       if (rotate)
2065         {
2066           /* Widening does not work for rotation.  */
2067           if (methods == OPTAB_WIDEN)
2068             continue;
2069           else if (methods == OPTAB_LIB_WIDEN)
2070             {
2071               /* If we have been unable to open-code this by a rotation,
2072                  do it as the IOR of two shifts.  I.e., to rotate A
2073                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2074                  where C is the bitsize of A.
2075
2076                  It is theoretically possible that the target machine might
2077                  not be able to perform either shift and hence we would
2078                  be making two libcalls rather than just the one for the
2079                  shift (similarly if IOR could not be done).  We will allow
2080                  this extremely unlikely lossage to avoid complicating the
2081                  code below.  */
2082
2083               rtx subtarget = target == shifted ? 0 : target;
2084               rtx temp1;
2085               tree type = TREE_TYPE (amount);
2086               tree new_amount = make_tree (type, op1);
2087               tree other_amount
2088                 = fold (build (MINUS_EXPR, type,
2089                                convert (type,
2090                                         build_int_2 (GET_MODE_BITSIZE (mode),
2091                                                      0)),
2092                                amount));
2093
2094               shifted = force_reg (mode, shifted);
2095
2096               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2097                                    mode, shifted, new_amount, subtarget, 1);
2098               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2099                                     mode, shifted, other_amount, 0, 1);
2100               return expand_binop (mode, ior_optab, temp, temp1, target,
2101                                    unsignedp, methods);
2102             }
2103
2104           temp = expand_binop (mode,
2105                                left ? rotl_optab : rotr_optab,
2106                                shifted, op1, target, unsignedp, methods);
2107
2108           /* If we don't have the rotate, but we are rotating by a constant
2109              that is in range, try a rotate in the opposite direction.  */
2110
2111           if (temp == 0 && GET_CODE (op1) == CONST_INT
2112               && INTVAL (op1) > 0
2113               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2114             temp = expand_binop (mode,
2115                                  left ? rotr_optab : rotl_optab,
2116                                  shifted,
2117                                  GEN_INT (GET_MODE_BITSIZE (mode)
2118                                           - INTVAL (op1)),
2119                                  target, unsignedp, methods);
2120         }
2121       else if (unsignedp)
2122         temp = expand_binop (mode,
2123                              left ? ashl_optab : lshr_optab,
2124                              shifted, op1, target, unsignedp, methods);
2125
2126       /* Do arithmetic shifts.
2127          Also, if we are going to widen the operand, we can just as well
2128          use an arithmetic right-shift instead of a logical one.  */
2129       if (temp == 0 && ! rotate
2130           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2131         {
2132           enum optab_methods methods1 = methods;
2133
2134           /* If trying to widen a log shift to an arithmetic shift,
2135              don't accept an arithmetic shift of the same size.  */
2136           if (unsignedp)
2137             methods1 = OPTAB_MUST_WIDEN;
2138
2139           /* Arithmetic shift */
2140
2141           temp = expand_binop (mode,
2142                                left ? ashl_optab : ashr_optab,
2143                                shifted, op1, target, unsignedp, methods1);
2144         }
2145
2146       /* We used to try extzv here for logical right shifts, but that was
2147          only useful for one machine, the VAX, and caused poor code
2148          generation there for lshrdi3, so the code was deleted and a
2149          define_expand for lshrsi3 was added to vax.md.  */
2150     }
2151
2152   if (temp == 0)
2153     abort ();
2154   return temp;
2155 }
2156 \f
2157 enum alg_code { alg_zero, alg_m, alg_shift,
2158                   alg_add_t_m2, alg_sub_t_m2,
2159                   alg_add_factor, alg_sub_factor,
2160                   alg_add_t2_m, alg_sub_t2_m,
2161                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2162
2163 /* This structure records a sequence of operations.
2164    `ops' is the number of operations recorded.
2165    `cost' is their total cost.
2166    The operations are stored in `op' and the corresponding
2167    logarithms of the integer coefficients in `log'.
2168
2169    These are the operations:
2170    alg_zero             total := 0;
2171    alg_m                total := multiplicand;
2172    alg_shift            total := total * coeff
2173    alg_add_t_m2         total := total + multiplicand * coeff;
2174    alg_sub_t_m2         total := total - multiplicand * coeff;
2175    alg_add_factor       total := total * coeff + total;
2176    alg_sub_factor       total := total * coeff - total;
2177    alg_add_t2_m         total := total * coeff + multiplicand;
2178    alg_sub_t2_m         total := total * coeff - multiplicand;
2179
2180    The first operand must be either alg_zero or alg_m.  */
2181
2182 struct algorithm
2183 {
2184   short cost;
2185   short ops;
2186   /* The size of the OP and LOG fields are not directly related to the
2187      word size, but the worst-case algorithms will be if we have few
2188      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2189      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2190      in total wordsize operations.  */
2191   enum alg_code op[MAX_BITS_PER_WORD];
2192   char log[MAX_BITS_PER_WORD];
2193 };
2194
2195 /* Indicates the type of fixup needed after a constant multiplication.
2196    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2197    the result should be negated, and ADD_VARIANT means that the
2198    multiplicand should be added to the result.  */
2199 enum mult_variant {basic_variant, negate_variant, add_variant};
2200
2201 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2202                         int, enum machine_mode mode);
2203 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2204                                  struct algorithm *, enum mult_variant *, int);
2205 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2206                               const struct algorithm *, enum mult_variant);
2207 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2208                                                  int, unsigned HOST_WIDE_INT *,
2209                                                  int *, int *);
2210 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2211 static rtx extract_high_half (enum machine_mode, rtx);
2212 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2213                                        int, int);
2214 /* Compute and return the best algorithm for multiplying by T.
2215    The algorithm must cost less than cost_limit
2216    If retval.cost >= COST_LIMIT, no algorithm was found and all
2217    other field of the returned struct are undefined.
2218    MODE is the machine mode of the multiplication.  */
2219
2220 static void
2221 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2222             int cost_limit, enum machine_mode mode)
2223 {
2224   int m;
2225   struct algorithm *alg_in, *best_alg;
2226   int cost;
2227   unsigned HOST_WIDE_INT q;
2228   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2229
2230   /* Indicate that no algorithm is yet found.  If no algorithm
2231      is found, this value will be returned and indicate failure.  */
2232   alg_out->cost = cost_limit;
2233
2234   if (cost_limit <= 0)
2235     return;
2236
2237   /* Restrict the bits of "t" to the multiplication's mode.  */
2238   t &= GET_MODE_MASK (mode);
2239
2240   /* t == 1 can be done in zero cost.  */
2241   if (t == 1)
2242     {
2243       alg_out->ops = 1;
2244       alg_out->cost = 0;
2245       alg_out->op[0] = alg_m;
2246       return;
2247     }
2248
2249   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2250      fail now.  */
2251   if (t == 0)
2252     {
2253       if (zero_cost >= cost_limit)
2254         return;
2255       else
2256         {
2257           alg_out->ops = 1;
2258           alg_out->cost = zero_cost;
2259           alg_out->op[0] = alg_zero;
2260           return;
2261         }
2262     }
2263
2264   /* We'll be needing a couple extra algorithm structures now.  */
2265
2266   alg_in = alloca (sizeof (struct algorithm));
2267   best_alg = alloca (sizeof (struct algorithm));
2268
2269   /* If we have a group of zero bits at the low-order part of T, try
2270      multiplying by the remaining bits and then doing a shift.  */
2271
2272   if ((t & 1) == 0)
2273     {
2274       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2275       if (m < maxm)
2276         {
2277           q = t >> m;
2278           /* The function expand_shift will choose between a shift and
2279              a sequence of additions, so the observed cost is given as
2280              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2281           cost = m * add_cost[mode];
2282           if (shift_cost[mode][m] < cost)
2283             cost = shift_cost[mode][m];
2284           synth_mult (alg_in, q, cost_limit - cost, mode);
2285
2286           cost += alg_in->cost;
2287           if (cost < cost_limit)
2288             {
2289               struct algorithm *x;
2290               x = alg_in, alg_in = best_alg, best_alg = x;
2291               best_alg->log[best_alg->ops] = m;
2292               best_alg->op[best_alg->ops] = alg_shift;
2293               cost_limit = cost;
2294             }
2295         }
2296     }
2297
2298   /* If we have an odd number, add or subtract one.  */
2299   if ((t & 1) != 0)
2300     {
2301       unsigned HOST_WIDE_INT w;
2302
2303       for (w = 1; (w & t) != 0; w <<= 1)
2304         ;
2305       /* If T was -1, then W will be zero after the loop.  This is another
2306          case where T ends with ...111.  Handling this with (T + 1) and
2307          subtract 1 produces slightly better code and results in algorithm
2308          selection much faster than treating it like the ...0111 case
2309          below.  */
2310       if (w == 0
2311           || (w > 2
2312               /* Reject the case where t is 3.
2313                  Thus we prefer addition in that case.  */
2314               && t != 3))
2315         {
2316           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2317
2318           cost = add_cost[mode];
2319           synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2320
2321           cost += alg_in->cost;
2322           if (cost < cost_limit)
2323             {
2324               struct algorithm *x;
2325               x = alg_in, alg_in = best_alg, best_alg = x;
2326               best_alg->log[best_alg->ops] = 0;
2327               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2328               cost_limit = cost;
2329             }
2330         }
2331       else
2332         {
2333           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2334
2335           cost = add_cost[mode];
2336           synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2337
2338           cost += alg_in->cost;
2339           if (cost < cost_limit)
2340             {
2341               struct algorithm *x;
2342               x = alg_in, alg_in = best_alg, best_alg = x;
2343               best_alg->log[best_alg->ops] = 0;
2344               best_alg->op[best_alg->ops] = alg_add_t_m2;
2345               cost_limit = cost;
2346             }
2347         }
2348     }
2349
2350   /* Look for factors of t of the form
2351      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2352      If we find such a factor, we can multiply by t using an algorithm that
2353      multiplies by q, shift the result by m and add/subtract it to itself.
2354
2355      We search for large factors first and loop down, even if large factors
2356      are less probable than small; if we find a large factor we will find a
2357      good sequence quickly, and therefore be able to prune (by decreasing
2358      COST_LIMIT) the search.  */
2359
2360   for (m = floor_log2 (t - 1); m >= 2; m--)
2361     {
2362       unsigned HOST_WIDE_INT d;
2363
2364       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2365       if (t % d == 0 && t > d && m < maxm)
2366         {
2367           cost = add_cost[mode] + shift_cost[mode][m];
2368           if (shiftadd_cost[mode][m] < cost)
2369             cost = shiftadd_cost[mode][m];
2370           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2371
2372           cost += alg_in->cost;
2373           if (cost < cost_limit)
2374             {
2375               struct algorithm *x;
2376               x = alg_in, alg_in = best_alg, best_alg = x;
2377               best_alg->log[best_alg->ops] = m;
2378               best_alg->op[best_alg->ops] = alg_add_factor;
2379               cost_limit = cost;
2380             }
2381           /* Other factors will have been taken care of in the recursion.  */
2382           break;
2383         }
2384
2385       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2386       if (t % d == 0 && t > d && m < maxm)
2387         {
2388           cost = add_cost[mode] + shift_cost[mode][m];
2389           if (shiftsub_cost[mode][m] < cost)
2390             cost = shiftsub_cost[mode][m];
2391           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2392
2393           cost += alg_in->cost;
2394           if (cost < cost_limit)
2395             {
2396               struct algorithm *x;
2397               x = alg_in, alg_in = best_alg, best_alg = x;
2398               best_alg->log[best_alg->ops] = m;
2399               best_alg->op[best_alg->ops] = alg_sub_factor;
2400               cost_limit = cost;
2401             }
2402           break;
2403         }
2404     }
2405
2406   /* Try shift-and-add (load effective address) instructions,
2407      i.e. do a*3, a*5, a*9.  */
2408   if ((t & 1) != 0)
2409     {
2410       q = t - 1;
2411       q = q & -q;
2412       m = exact_log2 (q);
2413       if (m >= 0 && m < maxm)
2414         {
2415           cost = shiftadd_cost[mode][m];
2416           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2417
2418           cost += alg_in->cost;
2419           if (cost < cost_limit)
2420             {
2421               struct algorithm *x;
2422               x = alg_in, alg_in = best_alg, best_alg = x;
2423               best_alg->log[best_alg->ops] = m;
2424               best_alg->op[best_alg->ops] = alg_add_t2_m;
2425               cost_limit = cost;
2426             }
2427         }
2428
2429       q = t + 1;
2430       q = q & -q;
2431       m = exact_log2 (q);
2432       if (m >= 0 && m < maxm)
2433         {
2434           cost = shiftsub_cost[mode][m];
2435           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2436
2437           cost += alg_in->cost;
2438           if (cost < cost_limit)
2439             {
2440               struct algorithm *x;
2441               x = alg_in, alg_in = best_alg, best_alg = x;
2442               best_alg->log[best_alg->ops] = m;
2443               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2444               cost_limit = cost;
2445             }
2446         }
2447     }
2448
2449   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2450      we have not found any algorithm.  */
2451   if (cost_limit == alg_out->cost)
2452     return;
2453
2454   /* If we are getting a too long sequence for `struct algorithm'
2455      to record, make this search fail.  */
2456   if (best_alg->ops == MAX_BITS_PER_WORD)
2457     return;
2458
2459   /* Copy the algorithm from temporary space to the space at alg_out.
2460      We avoid using structure assignment because the majority of
2461      best_alg is normally undefined, and this is a critical function.  */
2462   alg_out->ops = best_alg->ops + 1;
2463   alg_out->cost = cost_limit;
2464   memcpy (alg_out->op, best_alg->op,
2465           alg_out->ops * sizeof *alg_out->op);
2466   memcpy (alg_out->log, best_alg->log,
2467           alg_out->ops * sizeof *alg_out->log);
2468 }
2469 \f
2470 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2471    Try three variations:
2472
2473        - a shift/add sequence based on VAL itself
2474        - a shift/add sequence based on -VAL, followed by a negation
2475        - a shift/add sequence based on VAL - 1, followed by an addition.
2476
2477    Return true if the cheapest of these cost less than MULT_COST,
2478    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2479
2480 static bool
2481 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2482                      struct algorithm *alg, enum mult_variant *variant,
2483                      int mult_cost)
2484 {
2485   struct algorithm alg2;
2486
2487   *variant = basic_variant;
2488   synth_mult (alg, val, mult_cost, mode);
2489
2490   /* This works only if the inverted value actually fits in an
2491      `unsigned int' */
2492   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2493     {
2494       synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2495                   mode);
2496       alg2.cost += neg_cost[mode];
2497       if (alg2.cost < alg->cost)
2498         *alg = alg2, *variant = negate_variant;
2499     }
2500
2501   /* This proves very useful for division-by-constant.  */
2502   synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2503               mode);
2504   alg2.cost += add_cost[mode];
2505   if (alg2.cost < alg->cost)
2506     *alg = alg2, *variant = add_variant;
2507
2508   return alg->cost < mult_cost;
2509 }
2510
2511 /* A subroutine of expand_mult, used for constant multiplications.
2512    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2513    convenient.  Use the shift/add sequence described by ALG and apply
2514    the final fixup specified by VARIANT.  */
2515
2516 static rtx
2517 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2518                    rtx target, const struct algorithm *alg,
2519                    enum mult_variant variant)
2520 {
2521   HOST_WIDE_INT val_so_far;
2522   rtx insn, accum, tem;
2523   int opno;
2524   enum machine_mode nmode;
2525
2526   /* op0 must be register to make mult_cost match the precomputed
2527      shiftadd_cost array.  */
2528   op0 = protect_from_queue (op0, 0);
2529
2530   /* Avoid referencing memory over and over.
2531      For speed, but also for correctness when mem is volatile.  */
2532   if (MEM_P (op0))
2533     op0 = force_reg (mode, op0);
2534
2535   /* ACCUM starts out either as OP0 or as a zero, depending on
2536      the first operation.  */
2537
2538   if (alg->op[0] == alg_zero)
2539     {
2540       accum = copy_to_mode_reg (mode, const0_rtx);
2541       val_so_far = 0;
2542     }
2543   else if (alg->op[0] == alg_m)
2544     {
2545       accum = copy_to_mode_reg (mode, op0);
2546       val_so_far = 1;
2547     }
2548   else
2549     abort ();
2550
2551   for (opno = 1; opno < alg->ops; opno++)
2552     {
2553       int log = alg->log[opno];
2554       int preserve = preserve_subexpressions_p ();
2555       rtx shift_subtarget = preserve ? 0 : accum;
2556       rtx add_target
2557         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2558            && ! preserve)
2559           ? target : 0;
2560       rtx accum_target = preserve ? 0 : accum;
2561
2562       switch (alg->op[opno])
2563         {
2564         case alg_shift:
2565           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2566                                 build_int_2 (log, 0), NULL_RTX, 0);
2567           val_so_far <<= log;
2568           break;
2569
2570         case alg_add_t_m2:
2571           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2572                               build_int_2 (log, 0), NULL_RTX, 0);
2573           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2574                                  add_target ? add_target : accum_target);
2575           val_so_far += (HOST_WIDE_INT) 1 << log;
2576           break;
2577
2578         case alg_sub_t_m2:
2579           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2580                               build_int_2 (log, 0), NULL_RTX, 0);
2581           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2582                                  add_target ? add_target : accum_target);
2583           val_so_far -= (HOST_WIDE_INT) 1 << log;
2584           break;
2585
2586         case alg_add_t2_m:
2587           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2588                                 build_int_2 (log, 0), shift_subtarget,
2589                                 0);
2590           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2591                                  add_target ? add_target : accum_target);
2592           val_so_far = (val_so_far << log) + 1;
2593           break;
2594
2595         case alg_sub_t2_m:
2596           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2597                                 build_int_2 (log, 0), shift_subtarget, 0);
2598           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2599                                  add_target ? add_target : accum_target);
2600           val_so_far = (val_so_far << log) - 1;
2601           break;
2602
2603         case alg_add_factor:
2604           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2605                               build_int_2 (log, 0), NULL_RTX, 0);
2606           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2607                                  add_target ? add_target : accum_target);
2608           val_so_far += val_so_far << log;
2609           break;
2610
2611         case alg_sub_factor:
2612           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2613                               build_int_2 (log, 0), NULL_RTX, 0);
2614           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2615                                  (add_target ? add_target
2616                                   : preserve ? 0 : tem));
2617           val_so_far = (val_so_far << log) - val_so_far;
2618           break;
2619
2620         default:
2621           abort ();
2622         }
2623
2624       /* Write a REG_EQUAL note on the last insn so that we can cse
2625          multiplication sequences.  Note that if ACCUM is a SUBREG,
2626          we've set the inner register and must properly indicate
2627          that.  */
2628
2629       tem = op0, nmode = mode;
2630       if (GET_CODE (accum) == SUBREG)
2631         {
2632           nmode = GET_MODE (SUBREG_REG (accum));
2633           tem = gen_lowpart (nmode, op0);
2634         }
2635
2636       insn = get_last_insn ();
2637       set_unique_reg_note (insn, REG_EQUAL,
2638                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2639     }
2640
2641   if (variant == negate_variant)
2642     {
2643       val_so_far = -val_so_far;
2644       accum = expand_unop (mode, neg_optab, accum, target, 0);
2645     }
2646   else if (variant == add_variant)
2647     {
2648       val_so_far = val_so_far + 1;
2649       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2650     }
2651
2652   /* Compare only the bits of val and val_so_far that are significant
2653      in the result mode, to avoid sign-/zero-extension confusion.  */
2654   val &= GET_MODE_MASK (mode);
2655   val_so_far &= GET_MODE_MASK (mode);
2656   if (val != val_so_far)
2657     abort ();
2658
2659   return accum;
2660 }
2661
2662 /* Perform a multiplication and return an rtx for the result.
2663    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2664    TARGET is a suggestion for where to store the result (an rtx).
2665
2666    We check specially for a constant integer as OP1.
2667    If you want this check for OP0 as well, then before calling
2668    you should swap the two operands if OP0 would be constant.  */
2669
2670 rtx
2671 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2672              int unsignedp)
2673 {
2674   rtx const_op1 = op1;
2675   enum mult_variant variant;
2676   struct algorithm algorithm;
2677
2678   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2679      less than or equal in size to `unsigned int' this doesn't matter.
2680      If the mode is larger than `unsigned int', then synth_mult works only
2681      if the constant value exactly fits in an `unsigned int' without any
2682      truncation.  This means that multiplying by negative values does
2683      not work; results are off by 2^32 on a 32 bit machine.  */
2684
2685   /* If we are multiplying in DImode, it may still be a win
2686      to try to work with shifts and adds.  */
2687   if (GET_CODE (op1) == CONST_DOUBLE
2688       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2689       && HOST_BITS_PER_INT >= BITS_PER_WORD
2690       && CONST_DOUBLE_HIGH (op1) == 0)
2691     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2692   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2693            && GET_CODE (op1) == CONST_INT
2694            && INTVAL (op1) < 0)
2695     const_op1 = 0;
2696
2697   /* We used to test optimize here, on the grounds that it's better to
2698      produce a smaller program when -O is not used.
2699      But this causes such a terrible slowdown sometimes
2700      that it seems better to use synth_mult always.  */
2701
2702   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2703       && (unsignedp || !flag_trapv))
2704     {
2705       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2706
2707       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2708                                mult_cost))
2709         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2710                                   &algorithm, variant);
2711     }
2712
2713   if (GET_CODE (op0) == CONST_DOUBLE)
2714     {
2715       rtx temp = op0;
2716       op0 = op1;
2717       op1 = temp;
2718     }
2719
2720   /* Expand x*2.0 as x+x.  */
2721   if (GET_CODE (op1) == CONST_DOUBLE
2722       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2723     {
2724       REAL_VALUE_TYPE d;
2725       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2726
2727       if (REAL_VALUES_EQUAL (d, dconst2))
2728         {
2729           op0 = force_reg (GET_MODE (op0), op0);
2730           return expand_binop (mode, add_optab, op0, op0,
2731                                target, unsignedp, OPTAB_LIB_WIDEN);
2732         }
2733     }
2734
2735   /* This used to use umul_optab if unsigned, but for non-widening multiply
2736      there is no difference between signed and unsigned.  */
2737   op0 = expand_binop (mode,
2738                       ! unsignedp
2739                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2740                       ? smulv_optab : smul_optab,
2741                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2742   if (op0 == 0)
2743     abort ();
2744   return op0;
2745 }
2746 \f
2747 /* Return the smallest n such that 2**n >= X.  */
2748
2749 int
2750 ceil_log2 (unsigned HOST_WIDE_INT x)
2751 {
2752   return floor_log2 (x - 1) + 1;
2753 }
2754
2755 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2756    replace division by D, and put the least significant N bits of the result
2757    in *MULTIPLIER_PTR and return the most significant bit.
2758
2759    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2760    needed precision is in PRECISION (should be <= N).
2761
2762    PRECISION should be as small as possible so this function can choose
2763    multiplier more freely.
2764
2765    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2766    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2767
2768    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2769    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2770
2771 static
2772 unsigned HOST_WIDE_INT
2773 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2774                    unsigned HOST_WIDE_INT *multiplier_ptr,
2775                    int *post_shift_ptr, int *lgup_ptr)
2776 {
2777   HOST_WIDE_INT mhigh_hi, mlow_hi;
2778   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2779   int lgup, post_shift;
2780   int pow, pow2;
2781   unsigned HOST_WIDE_INT nl, dummy1;
2782   HOST_WIDE_INT nh, dummy2;
2783
2784   /* lgup = ceil(log2(divisor)); */
2785   lgup = ceil_log2 (d);
2786
2787   if (lgup > n)
2788     abort ();
2789
2790   pow = n + lgup;
2791   pow2 = n + lgup - precision;
2792
2793   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2794     {
2795       /* We could handle this with some effort, but this case is much better
2796          handled directly with a scc insn, so rely on caller using that.  */
2797       abort ();
2798     }
2799
2800   /* mlow = 2^(N + lgup)/d */
2801  if (pow >= HOST_BITS_PER_WIDE_INT)
2802     {
2803       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2804       nl = 0;
2805     }
2806   else
2807     {
2808       nh = 0;
2809       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2810     }
2811   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2812                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2813
2814   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2815   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2816     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2817   else
2818     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2819   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2820                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2821
2822   if (mhigh_hi && nh - d >= d)
2823     abort ();
2824   if (mhigh_hi > 1 || mlow_hi > 1)
2825     abort ();
2826   /* Assert that mlow < mhigh.  */
2827   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2828     abort ();
2829
2830   /* If precision == N, then mlow, mhigh exceed 2^N
2831      (but they do not exceed 2^(N+1)).  */
2832
2833   /* Reduce to lowest terms.  */
2834   for (post_shift = lgup; post_shift > 0; post_shift--)
2835     {
2836       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2837       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2838       if (ml_lo >= mh_lo)
2839         break;
2840
2841       mlow_hi = 0;
2842       mlow_lo = ml_lo;
2843       mhigh_hi = 0;
2844       mhigh_lo = mh_lo;
2845     }
2846
2847   *post_shift_ptr = post_shift;
2848   *lgup_ptr = lgup;
2849   if (n < HOST_BITS_PER_WIDE_INT)
2850     {
2851       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2852       *multiplier_ptr = mhigh_lo & mask;
2853       return mhigh_lo >= mask;
2854     }
2855   else
2856     {
2857       *multiplier_ptr = mhigh_lo;
2858       return mhigh_hi;
2859     }
2860 }
2861
2862 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2863    congruent to 1 (mod 2**N).  */
2864
2865 static unsigned HOST_WIDE_INT
2866 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2867 {
2868   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2869
2870   /* The algorithm notes that the choice y = x satisfies
2871      x*y == 1 mod 2^3, since x is assumed odd.
2872      Each iteration doubles the number of bits of significance in y.  */
2873
2874   unsigned HOST_WIDE_INT mask;
2875   unsigned HOST_WIDE_INT y = x;
2876   int nbit = 3;
2877
2878   mask = (n == HOST_BITS_PER_WIDE_INT
2879           ? ~(unsigned HOST_WIDE_INT) 0
2880           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2881
2882   while (nbit < n)
2883     {
2884       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2885       nbit *= 2;
2886     }
2887   return y;
2888 }
2889
2890 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2891    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2892    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2893    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2894    become signed.
2895
2896    The result is put in TARGET if that is convenient.
2897
2898    MODE is the mode of operation.  */
2899
2900 rtx
2901 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2902                              rtx op1, rtx target, int unsignedp)
2903 {
2904   rtx tem;
2905   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2906
2907   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2908                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2909                       NULL_RTX, 0);
2910   tem = expand_and (mode, tem, op1, NULL_RTX);
2911   adj_operand
2912     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2913                      adj_operand);
2914
2915   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2916                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2917                       NULL_RTX, 0);
2918   tem = expand_and (mode, tem, op0, NULL_RTX);
2919   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2920                           target);
2921
2922   return target;
2923 }
2924
2925 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
2926
2927 static rtx
2928 extract_high_half (enum machine_mode mode, rtx op)
2929 {
2930   enum machine_mode wider_mode;
2931
2932   if (mode == word_mode)
2933     return gen_highpart (mode, op);
2934
2935   wider_mode = GET_MODE_WIDER_MODE (mode);
2936   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2937                      build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
2938   return convert_modes (mode, wider_mode, op, 0);
2939 }
2940
2941 /* Like expand_mult_highpart, but only consider using a multiplication
2942    optab.  OP1 is an rtx for the constant operand.  */
2943
2944 static rtx
2945 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2946                             rtx target, int unsignedp, int max_cost)
2947 {
2948   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2949   enum machine_mode wider_mode;
2950   optab moptab;
2951   rtx tem;
2952   int size;
2953
2954   wider_mode = GET_MODE_WIDER_MODE (mode);
2955   size = GET_MODE_BITSIZE (mode);
2956
2957   /* Firstly, try using a multiplication insn that only generates the needed
2958      high part of the product, and in the sign flavor of unsignedp.  */
2959   if (mul_highpart_cost[mode] < max_cost)
2960     {
2961       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2962       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2963                           unsignedp, OPTAB_DIRECT);
2964       if (tem)
2965         return tem;
2966     }
2967
2968   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2969      Need to adjust the result after the multiplication.  */
2970   if (size - 1 < BITS_PER_WORD
2971       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2972           + 4 * add_cost[mode] < max_cost))
2973     {
2974       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2975       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2976                           unsignedp, OPTAB_DIRECT);
2977       if (tem)
2978         /* We used the wrong signedness.  Adjust the result.  */
2979         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2980                                             tem, unsignedp);
2981     }
2982
2983   /* Try widening multiplication.  */
2984   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2985   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2986       && mul_widen_cost[wider_mode] < max_cost)
2987     {
2988       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2989                           unsignedp, OPTAB_WIDEN);
2990       if (tem)
2991         return extract_high_half (mode, tem);
2992     }
2993
2994   /* Try widening the mode and perform a non-widening multiplication.  */
2995   moptab = smul_optab;
2996   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2997       && size - 1 < BITS_PER_WORD
2998       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
2999     {
3000       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3001                           unsignedp, OPTAB_WIDEN);
3002       if (tem)
3003         return extract_high_half (mode, tem);
3004     }
3005
3006   /* Try widening multiplication of opposite signedness, and adjust.  */
3007   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3008   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3009       && size - 1 < BITS_PER_WORD
3010       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3011           + 4 * add_cost[mode] < max_cost))
3012     {
3013       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3014                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3015       if (tem != 0)
3016         {
3017           tem = extract_high_half (mode, tem);
3018           /* We used the wrong signedness.  Adjust the result.  */
3019           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3020                                               target, unsignedp);
3021         }
3022     }
3023
3024   return 0;
3025 }
3026
3027 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3028    in TARGET if that is convenient, and return where the result is.  If the
3029    operation can not be performed, 0 is returned.
3030
3031    MODE is the mode of operation and result.
3032
3033    UNSIGNEDP nonzero means unsigned multiply.
3034
3035    MAX_COST is the total allowed cost for the expanded RTL.  */
3036
3037 rtx
3038 expand_mult_highpart (enum machine_mode mode, rtx op0,
3039                       unsigned HOST_WIDE_INT cnst1, rtx target,
3040                       int unsignedp, int max_cost)
3041 {
3042   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3043   int extra_cost;
3044   bool sign_adjust = false;
3045   enum mult_variant variant;
3046   struct algorithm alg;
3047   rtx op1, tem;
3048
3049   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3050   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3051     abort ();
3052
3053   op1 = gen_int_mode (cnst1, wider_mode);
3054   cnst1 &= GET_MODE_MASK (mode);
3055
3056   /* We can't optimize modes wider than BITS_PER_WORD.
3057      ??? We might be able to perform double-word arithmetic if
3058      mode == word_mode, however all the cost calculations in
3059      synth_mult etc. assume single-word operations.  */
3060   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3061     return expand_mult_highpart_optab (mode, op0, op1, target,
3062                                        unsignedp, max_cost);
3063
3064   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3065
3066   /* Check whether we try to multiply by a negative constant.  */
3067   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3068     {
3069       sign_adjust = true;
3070       extra_cost += add_cost[mode];
3071     }
3072
3073   /* See whether shift/add multiplication is cheap enough.  */
3074   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3075                            max_cost - extra_cost))
3076     {
3077       /* See whether the specialized multiplication optabs are
3078          cheaper than the shift/add version.  */
3079       tem = expand_mult_highpart_optab (mode, op0, op1, target,
3080                                         unsignedp, alg.cost + extra_cost);
3081       if (tem)
3082         return tem;
3083
3084       tem = convert_to_mode (wider_mode, op0, unsignedp);
3085       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3086       tem = extract_high_half (mode, tem);
3087
3088       /* Adjust result for signedness.  */
3089       if (sign_adjust)
3090         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3091
3092       return tem;
3093     }
3094   return expand_mult_highpart_optab (mode, op0, op1, target,
3095                                      unsignedp, max_cost);
3096 }
3097
3098
3099 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3100
3101 static rtx
3102 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3103 {
3104   unsigned HOST_WIDE_INT mask;
3105   rtx result, temp, shift, label;
3106   int logd;
3107
3108   logd = floor_log2 (d);
3109   result = gen_reg_rtx (mode);
3110
3111   /* Avoid conditional branches when they're expensive.  */
3112   if (BRANCH_COST >= 2
3113       && !optimize_size)
3114     {
3115       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3116                                       mode, 0, -1);
3117       if (signmask)
3118         {
3119           signmask = force_reg (mode, signmask);
3120           mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3121           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3122
3123           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3124              which instruction sequence to use.  If logical right shifts
3125              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3126              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3127
3128           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3129           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3130               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3131             {
3132               temp = expand_binop (mode, xor_optab, op0, signmask,
3133                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3134               temp = expand_binop (mode, sub_optab, temp, signmask,
3135                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3136               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3137                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3138               temp = expand_binop (mode, xor_optab, temp, signmask,
3139                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3140               temp = expand_binop (mode, sub_optab, temp, signmask,
3141                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3142             }
3143           else
3144             {
3145               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3146                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3147               signmask = force_reg (mode, signmask);
3148
3149               temp = expand_binop (mode, add_optab, op0, signmask,
3150                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3151               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3152                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3153               temp = expand_binop (mode, sub_optab, temp, signmask,
3154                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3155             }
3156           return temp;
3157         }
3158     }
3159
3160   /* Mask contains the mode's signbit and the significant bits of the
3161      modulus.  By including the signbit in the operation, many targets
3162      can avoid an explicit compare operation in the following comparison
3163      against zero.  */
3164
3165   mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3166          | (((HOST_WIDE_INT) 1 << logd) - 1);
3167
3168   temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3169                        1, OPTAB_LIB_WIDEN);
3170   if (temp != result)
3171     emit_move_insn (result, temp);
3172
3173   label = gen_label_rtx ();
3174   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3175
3176   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3177                        0, OPTAB_LIB_WIDEN);
3178   mask = (HOST_WIDE_INT) -1 << logd;
3179   temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3180                        1, OPTAB_LIB_WIDEN);
3181   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3182                        0, OPTAB_LIB_WIDEN);
3183   if (temp != result)
3184     emit_move_insn (result, temp);
3185   emit_label (label);
3186   return result;
3187 }
3188 \f
3189 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3190    if that is convenient, and returning where the result is.
3191    You may request either the quotient or the remainder as the result;
3192    specify REM_FLAG nonzero to get the remainder.
3193
3194    CODE is the expression code for which kind of division this is;
3195    it controls how rounding is done.  MODE is the machine mode to use.
3196    UNSIGNEDP nonzero means do unsigned division.  */
3197
3198 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3199    and then correct it by or'ing in missing high bits
3200    if result of ANDI is nonzero.
3201    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3202    This could optimize to a bfexts instruction.
3203    But C doesn't use these operations, so their optimizations are
3204    left for later.  */
3205 /* ??? For modulo, we don't actually need the highpart of the first product,
3206    the low part will do nicely.  And for small divisors, the second multiply
3207    can also be a low-part only multiply or even be completely left out.
3208    E.g. to calculate the remainder of a division by 3 with a 32 bit
3209    multiply, multiply with 0x55555556 and extract the upper two bits;
3210    the result is exact for inputs up to 0x1fffffff.
3211    The input range can be reduced by using cross-sum rules.
3212    For odd divisors >= 3, the following table gives right shift counts
3213    so that if a number is shifted by an integer multiple of the given
3214    amount, the remainder stays the same:
3215    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3216    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3217    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3218    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3219    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3220
3221    Cross-sum rules for even numbers can be derived by leaving as many bits
3222    to the right alone as the divisor has zeros to the right.
3223    E.g. if x is an unsigned 32 bit number:
3224    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3225    */
3226
3227 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3228
3229 rtx
3230 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3231                rtx op0, rtx op1, rtx target, int unsignedp)
3232 {
3233   enum machine_mode compute_mode;
3234   rtx tquotient;
3235   rtx quotient = 0, remainder = 0;
3236   rtx last;
3237   int size;
3238   rtx insn, set;
3239   optab optab1, optab2;
3240   int op1_is_constant, op1_is_pow2 = 0;
3241   int max_cost, extra_cost;
3242   static HOST_WIDE_INT last_div_const = 0;
3243   static HOST_WIDE_INT ext_op1;
3244
3245   op1_is_constant = GET_CODE (op1) == CONST_INT;
3246   if (op1_is_constant)
3247     {
3248       ext_op1 = INTVAL (op1);
3249       if (unsignedp)
3250         ext_op1 &= GET_MODE_MASK (mode);
3251       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3252                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3253     }
3254
3255   /*
3256      This is the structure of expand_divmod:
3257
3258      First comes code to fix up the operands so we can perform the operations
3259      correctly and efficiently.
3260
3261      Second comes a switch statement with code specific for each rounding mode.
3262      For some special operands this code emits all RTL for the desired
3263      operation, for other cases, it generates only a quotient and stores it in
3264      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3265      to indicate that it has not done anything.
3266
3267      Last comes code that finishes the operation.  If QUOTIENT is set and
3268      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3269      QUOTIENT is not set, it is computed using trunc rounding.
3270
3271      We try to generate special code for division and remainder when OP1 is a
3272      constant.  If |OP1| = 2**n we can use shifts and some other fast
3273      operations.  For other values of OP1, we compute a carefully selected
3274      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3275      by m.
3276
3277      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3278      half of the product.  Different strategies for generating the product are
3279      implemented in expand_mult_highpart.
3280
3281      If what we actually want is the remainder, we generate that by another
3282      by-constant multiplication and a subtraction.  */
3283
3284   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3285      code below will malfunction if we are, so check here and handle
3286      the special case if so.  */
3287   if (op1 == const1_rtx)
3288     return rem_flag ? const0_rtx : op0;
3289
3290     /* When dividing by -1, we could get an overflow.
3291      negv_optab can handle overflows.  */
3292   if (! unsignedp && op1 == constm1_rtx)
3293     {
3294       if (rem_flag)
3295         return const0_rtx;
3296       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3297                           ? negv_optab : neg_optab, op0, target, 0);
3298     }
3299
3300   if (target
3301       /* Don't use the function value register as a target
3302          since we have to read it as well as write it,
3303          and function-inlining gets confused by this.  */
3304       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3305           /* Don't clobber an operand while doing a multi-step calculation.  */
3306           || ((rem_flag || op1_is_constant)
3307               && (reg_mentioned_p (target, op0)
3308                   || (MEM_P (op0) && MEM_P (target))))
3309           || reg_mentioned_p (target, op1)
3310           || (MEM_P (op1) && MEM_P (target))))
3311     target = 0;
3312
3313   /* Get the mode in which to perform this computation.  Normally it will
3314      be MODE, but sometimes we can't do the desired operation in MODE.
3315      If so, pick a wider mode in which we can do the operation.  Convert
3316      to that mode at the start to avoid repeated conversions.
3317
3318      First see what operations we need.  These depend on the expression
3319      we are evaluating.  (We assume that divxx3 insns exist under the
3320      same conditions that modxx3 insns and that these insns don't normally
3321      fail.  If these assumptions are not correct, we may generate less
3322      efficient code in some cases.)
3323
3324      Then see if we find a mode in which we can open-code that operation
3325      (either a division, modulus, or shift).  Finally, check for the smallest
3326      mode for which we can do the operation with a library call.  */
3327
3328   /* We might want to refine this now that we have division-by-constant
3329      optimization.  Since expand_mult_highpart tries so many variants, it is
3330      not straightforward to generalize this.  Maybe we should make an array
3331      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3332
3333   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3334             ? (unsignedp ? lshr_optab : ashr_optab)
3335             : (unsignedp ? udiv_optab : sdiv_optab));
3336   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3337             ? optab1
3338             : (unsignedp ? udivmod_optab : sdivmod_optab));
3339
3340   for (compute_mode = mode; compute_mode != VOIDmode;
3341        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3342     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3343         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3344       break;
3345
3346   if (compute_mode == VOIDmode)
3347     for (compute_mode = mode; compute_mode != VOIDmode;
3348          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3349       if (optab1->handlers[compute_mode].libfunc
3350           || optab2->handlers[compute_mode].libfunc)
3351         break;
3352
3353   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3354      in expand_binop.  */
3355   if (compute_mode == VOIDmode)
3356     compute_mode = mode;
3357
3358   if (target && GET_MODE (target) == compute_mode)
3359     tquotient = target;
3360   else
3361     tquotient = gen_reg_rtx (compute_mode);
3362
3363   size = GET_MODE_BITSIZE (compute_mode);
3364 #if 0
3365   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3366      (mode), and thereby get better code when OP1 is a constant.  Do that
3367      later.  It will require going over all usages of SIZE below.  */
3368   size = GET_MODE_BITSIZE (mode);
3369 #endif
3370
3371   /* Only deduct something for a REM if the last divide done was
3372      for a different constant.   Then set the constant of the last
3373      divide.  */
3374   max_cost = div_cost[compute_mode]
3375     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3376                       && INTVAL (op1) == last_div_const)
3377        ? mul_cost[compute_mode] + add_cost[compute_mode]
3378        : 0);
3379
3380   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3381
3382   /* Now convert to the best mode to use.  */
3383   if (compute_mode != mode)
3384     {
3385       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3386       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3387
3388       /* convert_modes may have placed op1 into a register, so we
3389          must recompute the following.  */
3390       op1_is_constant = GET_CODE (op1) == CONST_INT;
3391       op1_is_pow2 = (op1_is_constant
3392                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3393                           || (! unsignedp
3394                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3395     }
3396
3397   /* If one of the operands is a volatile MEM, copy it into a register.  */
3398
3399   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3400     op0 = force_reg (compute_mode, op0);
3401   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3402     op1 = force_reg (compute_mode, op1);
3403
3404   /* If we need the remainder or if OP1 is constant, we need to
3405      put OP0 in a register in case it has any queued subexpressions.  */
3406   if (rem_flag || op1_is_constant)
3407     op0 = force_reg (compute_mode, op0);
3408
3409   last = get_last_insn ();
3410
3411   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3412   if (unsignedp)
3413     {
3414       if (code == FLOOR_DIV_EXPR)
3415         code = TRUNC_DIV_EXPR;
3416       if (code == FLOOR_MOD_EXPR)
3417         code = TRUNC_MOD_EXPR;
3418       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3419         code = TRUNC_DIV_EXPR;
3420     }
3421
3422   if (op1 != const0_rtx)
3423     switch (code)
3424       {
3425       case TRUNC_MOD_EXPR:
3426       case TRUNC_DIV_EXPR:
3427         if (op1_is_constant)
3428           {
3429             if (unsignedp)
3430               {
3431                 unsigned HOST_WIDE_INT mh, ml;
3432                 int pre_shift, post_shift;
3433                 int dummy;
3434                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3435                                             & GET_MODE_MASK (compute_mode));
3436
3437                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3438                   {
3439                     pre_shift = floor_log2 (d);
3440                     if (rem_flag)
3441                       {
3442                         remainder
3443                           = expand_binop (compute_mode, and_optab, op0,
3444                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3445                                           remainder, 1,
3446                                           OPTAB_LIB_WIDEN);
3447                         if (remainder)
3448                           return gen_lowpart (mode, remainder);
3449                       }
3450                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3451                                              build_int_2 (pre_shift, 0),
3452                                              tquotient, 1);
3453                   }
3454                 else if (size <= HOST_BITS_PER_WIDE_INT)
3455                   {
3456                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3457                       {
3458                         /* Most significant bit of divisor is set; emit an scc
3459                            insn.  */
3460                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3461                                                     compute_mode, 1, 1);
3462                         if (quotient == 0)
3463                           goto fail1;
3464                       }
3465                     else
3466                       {
3467                         /* Find a suitable multiplier and right shift count
3468                            instead of multiplying with D.  */
3469
3470                         mh = choose_multiplier (d, size, size,
3471                                                 &ml, &post_shift, &dummy);
3472
3473                         /* If the suggested multiplier is more than SIZE bits,
3474                            we can do better for even divisors, using an
3475                            initial right shift.  */
3476                         if (mh != 0 && (d & 1) == 0)
3477                           {
3478                             pre_shift = floor_log2 (d & -d);
3479                             mh = choose_multiplier (d >> pre_shift, size,
3480                                                     size - pre_shift,
3481                                                     &ml, &post_shift, &dummy);
3482                             if (mh)
3483                               abort ();
3484                           }
3485                         else
3486                           pre_shift = 0;
3487
3488                         if (mh != 0)
3489                           {
3490                             rtx t1, t2, t3, t4;
3491
3492                             if (post_shift - 1 >= BITS_PER_WORD)
3493                               goto fail1;
3494
3495                             extra_cost
3496                               = (shift_cost[compute_mode][post_shift - 1]
3497                                  + shift_cost[compute_mode][1]
3498                                  + 2 * add_cost[compute_mode]);
3499                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3500                                                        NULL_RTX, 1,
3501                                                        max_cost - extra_cost);
3502                             if (t1 == 0)
3503                               goto fail1;
3504                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3505                                                                op0, t1),
3506                                                 NULL_RTX);
3507                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3508                                                build_int_2 (1, 0), NULL_RTX,1);
3509                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3510                                                               t1, t3),
3511                                                 NULL_RTX);
3512                             quotient
3513                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3514                                               build_int_2 (post_shift - 1, 0),
3515                                               tquotient, 1);
3516                           }
3517                         else
3518                           {
3519                             rtx t1, t2;
3520
3521                             if (pre_shift >= BITS_PER_WORD
3522                                 || post_shift >= BITS_PER_WORD)
3523                               goto fail1;
3524
3525                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3526                                                build_int_2 (pre_shift, 0),
3527                                                NULL_RTX, 1);
3528                             extra_cost
3529                               = (shift_cost[compute_mode][pre_shift]
3530                                  + shift_cost[compute_mode][post_shift]);
3531                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3532                                                        NULL_RTX, 1,
3533                                                        max_cost - extra_cost);
3534                             if (t2 == 0)
3535                               goto fail1;
3536                             quotient
3537                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3538                                               build_int_2 (post_shift, 0),
3539                                               tquotient, 1);
3540                           }
3541                       }
3542                   }
3543                 else            /* Too wide mode to use tricky code */
3544                   break;
3545
3546                 insn = get_last_insn ();
3547                 if (insn != last
3548                     && (set = single_set (insn)) != 0
3549                     && SET_DEST (set) == quotient)
3550                   set_unique_reg_note (insn,
3551                                        REG_EQUAL,
3552                                        gen_rtx_UDIV (compute_mode, op0, op1));
3553               }
3554             else                /* TRUNC_DIV, signed */
3555               {
3556                 unsigned HOST_WIDE_INT ml;
3557                 int lgup, post_shift;
3558                 HOST_WIDE_INT d = INTVAL (op1);
3559                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3560
3561                 /* n rem d = n rem -d */
3562                 if (rem_flag && d < 0)
3563                   {
3564                     d = abs_d;
3565                     op1 = gen_int_mode (abs_d, compute_mode);
3566                   }
3567
3568                 if (d == 1)
3569                   quotient = op0;
3570                 else if (d == -1)
3571                   quotient = expand_unop (compute_mode, neg_optab, op0,
3572                                           tquotient, 0);
3573                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3574                   {
3575                     /* This case is not handled correctly below.  */
3576                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3577                                                 compute_mode, 1, 1);
3578                     if (quotient == 0)
3579                       goto fail1;
3580                   }
3581                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3582                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3583                                       : sdiv_pow2_cheap[compute_mode])
3584                          /* We assume that cheap metric is true if the
3585                             optab has an expander for this mode.  */
3586                          && (((rem_flag ? smod_optab : sdiv_optab)
3587                               ->handlers[compute_mode].insn_code
3588                               != CODE_FOR_nothing)
3589                              || (sdivmod_optab->handlers[compute_mode]
3590                                  .insn_code != CODE_FOR_nothing)))
3591                   ;
3592                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3593                   {
3594                     if (rem_flag)
3595                       {
3596                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3597                         if (remainder)
3598                           return gen_lowpart (mode, remainder);
3599                       }
3600                     lgup = floor_log2 (abs_d);
3601                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3602                       {
3603                         rtx label = gen_label_rtx ();
3604                         rtx t1;
3605
3606                         t1 = copy_to_mode_reg (compute_mode, op0);
3607                         do_cmp_and_jump (t1, const0_rtx, GE,
3608                                          compute_mode, label);
3609                         expand_inc (t1, gen_int_mode (abs_d - 1,
3610                                                       compute_mode));
3611                         emit_label (label);
3612                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3613                                                  build_int_2 (lgup, 0),
3614                                                  tquotient, 0);
3615                       }
3616                     else
3617                       {
3618                         rtx t1, t2, t3;
3619                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3620                                            build_int_2 (size - 1, 0),
3621                                            NULL_RTX, 0);
3622                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3623                                            build_int_2 (size - lgup, 0),
3624                                            NULL_RTX, 1);
3625                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3626                                                           op0, t2),
3627                                             NULL_RTX);
3628                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3629                                                  build_int_2 (lgup, 0),
3630                                                  tquotient, 0);
3631                       }
3632
3633                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3634                        negate the quotient.  */
3635                     if (d < 0)
3636                       {
3637                         insn = get_last_insn ();
3638                         if (insn != last
3639                             && (set = single_set (insn)) != 0
3640                             && SET_DEST (set) == quotient
3641                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3642                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3643                           set_unique_reg_note (insn,
3644                                                REG_EQUAL,
3645                                                gen_rtx_DIV (compute_mode,
3646                                                             op0,
3647                                                             GEN_INT
3648                                                             (trunc_int_for_mode
3649                                                              (abs_d,
3650                                                               compute_mode))));
3651
3652                         quotient = expand_unop (compute_mode, neg_optab,
3653                                                 quotient, quotient, 0);
3654                       }
3655                   }
3656                 else if (size <= HOST_BITS_PER_WIDE_INT)
3657                   {
3658                     choose_multiplier (abs_d, size, size - 1,
3659                                        &ml, &post_shift, &lgup);
3660                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3661                       {
3662                         rtx t1, t2, t3;
3663
3664                         if (post_shift >= BITS_PER_WORD
3665                             || size - 1 >= BITS_PER_WORD)
3666                           goto fail1;
3667
3668                         extra_cost = (shift_cost[compute_mode][post_shift]
3669                                       + shift_cost[compute_mode][size - 1]
3670                                       + add_cost[compute_mode]);
3671                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3672                                                    NULL_RTX, 0,
3673                                                    max_cost - extra_cost);
3674                         if (t1 == 0)
3675                           goto fail1;
3676                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3677                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3678                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3679                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3680                         if (d < 0)
3681                           quotient
3682                             = force_operand (gen_rtx_MINUS (compute_mode,
3683                                                             t3, t2),
3684                                              tquotient);
3685                         else
3686                           quotient
3687                             = force_operand (gen_rtx_MINUS (compute_mode,
3688                                                             t2, t3),
3689                                              tquotient);
3690                       }
3691                     else
3692                       {
3693                         rtx t1, t2, t3, t4;
3694
3695                         if (post_shift >= BITS_PER_WORD
3696                             || size - 1 >= BITS_PER_WORD)
3697                           goto fail1;
3698
3699                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3700                         extra_cost = (shift_cost[compute_mode][post_shift]
3701                                       + shift_cost[compute_mode][size - 1]
3702                                       + 2 * add_cost[compute_mode]);
3703                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3704                                                    NULL_RTX, 0,
3705                                                    max_cost - extra_cost);
3706                         if (t1 == 0)
3707                           goto fail1;
3708                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3709                                                           t1, op0),
3710                                             NULL_RTX);
3711                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3712                                            build_int_2 (post_shift, 0),
3713                                            NULL_RTX, 0);
3714                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3715                                            build_int_2 (size - 1, 0),
3716                                            NULL_RTX, 0);
3717                         if (d < 0)
3718                           quotient
3719                             = force_operand (gen_rtx_MINUS (compute_mode,
3720                                                             t4, t3),
3721                                              tquotient);
3722                         else
3723                           quotient
3724                             = force_operand (gen_rtx_MINUS (compute_mode,
3725                                                             t3, t4),
3726                                              tquotient);
3727                       }
3728                   }
3729                 else            /* Too wide mode to use tricky code */
3730                   break;
3731
3732                 insn = get_last_insn ();
3733                 if (insn != last
3734                     && (set = single_set (insn)) != 0
3735                     && SET_DEST (set) == quotient)
3736                   set_unique_reg_note (insn,
3737                                        REG_EQUAL,
3738                                        gen_rtx_DIV (compute_mode, op0, op1));
3739               }
3740             break;
3741           }
3742       fail1:
3743         delete_insns_since (last);
3744         break;
3745
3746       case FLOOR_DIV_EXPR:
3747       case FLOOR_MOD_EXPR:
3748       /* We will come here only for signed operations.  */
3749         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3750           {
3751             unsigned HOST_WIDE_INT mh, ml;
3752             int pre_shift, lgup, post_shift;
3753             HOST_WIDE_INT d = INTVAL (op1);
3754
3755             if (d > 0)
3756               {
3757                 /* We could just as easily deal with negative constants here,
3758                    but it does not seem worth the trouble for GCC 2.6.  */
3759                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3760                   {
3761                     pre_shift = floor_log2 (d);
3762                     if (rem_flag)
3763                       {
3764                         remainder = expand_binop (compute_mode, and_optab, op0,
3765                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3766                                                   remainder, 0, OPTAB_LIB_WIDEN);
3767                         if (remainder)
3768                           return gen_lowpart (mode, remainder);
3769                       }
3770                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3771                                              build_int_2 (pre_shift, 0),
3772                                              tquotient, 0);
3773                   }
3774                 else
3775                   {
3776                     rtx t1, t2, t3, t4;
3777
3778                     mh = choose_multiplier (d, size, size - 1,
3779                                             &ml, &post_shift, &lgup);
3780                     if (mh)
3781                       abort ();
3782
3783                     if (post_shift < BITS_PER_WORD
3784                         && size - 1 < BITS_PER_WORD)
3785                       {
3786                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3787                                            build_int_2 (size - 1, 0),
3788                                            NULL_RTX, 0);
3789                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3790                                            NULL_RTX, 0, OPTAB_WIDEN);
3791                         extra_cost = (shift_cost[compute_mode][post_shift]
3792                                       + shift_cost[compute_mode][size - 1]
3793                                       + 2 * add_cost[compute_mode]);
3794                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3795                                                    NULL_RTX, 1,
3796                                                    max_cost - extra_cost);
3797                         if (t3 != 0)
3798                           {
3799                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3800                                                build_int_2 (post_shift, 0),
3801                                                NULL_RTX, 1);
3802                             quotient = expand_binop (compute_mode, xor_optab,
3803                                                      t4, t1, tquotient, 0,
3804                                                      OPTAB_WIDEN);
3805                           }
3806                       }
3807                   }
3808               }
3809             else
3810               {
3811                 rtx nsign, t1, t2, t3, t4;
3812                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3813                                                   op0, constm1_rtx), NULL_RTX);
3814                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3815                                    0, OPTAB_WIDEN);
3816                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3817                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3818                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3819                                     NULL_RTX);
3820                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3821                                     NULL_RTX, 0);
3822                 if (t4)
3823                   {
3824                     rtx t5;
3825                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3826                                       NULL_RTX, 0);
3827                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3828                                                             t4, t5),
3829                                               tquotient);
3830                   }
3831               }
3832           }
3833
3834         if (quotient != 0)
3835           break;
3836         delete_insns_since (last);
3837
3838         /* Try using an instruction that produces both the quotient and
3839            remainder, using truncation.  We can easily compensate the quotient
3840            or remainder to get floor rounding, once we have the remainder.
3841            Notice that we compute also the final remainder value here,
3842            and return the result right away.  */
3843         if (target == 0 || GET_MODE (target) != compute_mode)
3844           target = gen_reg_rtx (compute_mode);
3845
3846         if (rem_flag)
3847           {
3848             remainder
3849               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3850             quotient = gen_reg_rtx (compute_mode);
3851           }
3852         else
3853           {
3854             quotient
3855               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3856             remainder = gen_reg_rtx (compute_mode);
3857           }
3858
3859         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3860                                  quotient, remainder, 0))
3861           {
3862             /* This could be computed with a branch-less sequence.
3863                Save that for later.  */
3864             rtx tem;
3865             rtx label = gen_label_rtx ();
3866             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3867             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3868                                 NULL_RTX, 0, OPTAB_WIDEN);
3869             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3870             expand_dec (quotient, const1_rtx);
3871             expand_inc (remainder, op1);
3872             emit_label (label);
3873             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3874           }
3875
3876         /* No luck with division elimination or divmod.  Have to do it
3877            by conditionally adjusting op0 *and* the result.  */
3878         {
3879           rtx label1, label2, label3, label4, label5;
3880           rtx adjusted_op0;
3881           rtx tem;
3882
3883           quotient = gen_reg_rtx (compute_mode);
3884           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3885           label1 = gen_label_rtx ();
3886           label2 = gen_label_rtx ();
3887           label3 = gen_label_rtx ();
3888           label4 = gen_label_rtx ();
3889           label5 = gen_label_rtx ();
3890           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3891           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3892           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3893                               quotient, 0, OPTAB_LIB_WIDEN);
3894           if (tem != quotient)
3895             emit_move_insn (quotient, tem);
3896           emit_jump_insn (gen_jump (label5));
3897           emit_barrier ();
3898           emit_label (label1);
3899           expand_inc (adjusted_op0, const1_rtx);
3900           emit_jump_insn (gen_jump (label4));
3901           emit_barrier ();
3902           emit_label (label2);
3903           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3904           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3905                               quotient, 0, OPTAB_LIB_WIDEN);
3906           if (tem != quotient)
3907             emit_move_insn (quotient, tem);
3908           emit_jump_insn (gen_jump (label5));
3909           emit_barrier ();
3910           emit_label (label3);
3911           expand_dec (adjusted_op0, const1_rtx);
3912           emit_label (label4);
3913           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3914                               quotient, 0, OPTAB_LIB_WIDEN);
3915           if (tem != quotient)
3916             emit_move_insn (quotient, tem);
3917           expand_dec (quotient, const1_rtx);
3918           emit_label (label5);
3919         }
3920         break;
3921
3922       case CEIL_DIV_EXPR:
3923       case CEIL_MOD_EXPR:
3924         if (unsignedp)
3925           {
3926             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3927               {
3928                 rtx t1, t2, t3;
3929                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3930                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3931                                    build_int_2 (floor_log2 (d), 0),
3932                                    tquotient, 1);
3933                 t2 = expand_binop (compute_mode, and_optab, op0,
3934                                    GEN_INT (d - 1),
3935                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3936                 t3 = gen_reg_rtx (compute_mode);
3937                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3938                                       compute_mode, 1, 1);
3939                 if (t3 == 0)
3940                   {
3941                     rtx lab;
3942                     lab = gen_label_rtx ();
3943                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3944                     expand_inc (t1, const1_rtx);
3945                     emit_label (lab);
3946                     quotient = t1;
3947                   }
3948                 else
3949                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3950                                                           t1, t3),
3951                                             tquotient);
3952                 break;
3953               }
3954
3955             /* Try using an instruction that produces both the quotient and
3956                remainder, using truncation.  We can easily compensate the
3957                quotient or remainder to get ceiling rounding, once we have the
3958                remainder.  Notice that we compute also the final remainder
3959                value here, and return the result right away.  */
3960             if (target == 0 || GET_MODE (target) != compute_mode)
3961               target = gen_reg_rtx (compute_mode);
3962
3963             if (rem_flag)
3964               {
3965                 remainder = (REG_P (target)
3966                              ? target : gen_reg_rtx (compute_mode));
3967                 quotient = gen_reg_rtx (compute_mode);
3968               }
3969             else
3970               {
3971                 quotient = (REG_P (target)
3972                             ? target : gen_reg_rtx (compute_mode));
3973                 remainder = gen_reg_rtx (compute_mode);
3974               }
3975
3976             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3977                                      remainder, 1))
3978               {
3979                 /* This could be computed with a branch-less sequence.
3980                    Save that for later.  */
3981                 rtx label = gen_label_rtx ();
3982                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3983                                  compute_mode, label);
3984                 expand_inc (quotient, const1_rtx);
3985                 expand_dec (remainder, op1);
3986                 emit_label (label);
3987                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3988               }
3989
3990             /* No luck with division elimination or divmod.  Have to do it
3991                by conditionally adjusting op0 *and* the result.  */
3992             {
3993               rtx label1, label2;
3994               rtx adjusted_op0, tem;
3995
3996               quotient = gen_reg_rtx (compute_mode);
3997               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3998               label1 = gen_label_rtx ();
3999               label2 = gen_label_rtx ();
4000               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4001                                compute_mode, label1);
4002               emit_move_insn  (quotient, const0_rtx);
4003               emit_jump_insn (gen_jump (label2));
4004               emit_barrier ();
4005               emit_label (label1);
4006               expand_dec (adjusted_op0, const1_rtx);
4007               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4008                                   quotient, 1, OPTAB_LIB_WIDEN);
4009               if (tem != quotient)
4010                 emit_move_insn (quotient, tem);
4011               expand_inc (quotient, const1_rtx);
4012               emit_label (label2);
4013             }
4014           }
4015         else /* signed */
4016           {
4017             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4018                 && INTVAL (op1) >= 0)
4019               {
4020                 /* This is extremely similar to the code for the unsigned case
4021                    above.  For 2.7 we should merge these variants, but for
4022                    2.6.1 I don't want to touch the code for unsigned since that
4023                    get used in C.  The signed case will only be used by other
4024                    languages (Ada).  */
4025
4026                 rtx t1, t2, t3;
4027                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4028                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4029                                    build_int_2 (floor_log2 (d), 0),
4030                                    tquotient, 0);
4031                 t2 = expand_binop (compute_mode, and_optab, op0,
4032                                    GEN_INT (d - 1),
4033                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4034                 t3 = gen_reg_rtx (compute_mode);
4035                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4036                                       compute_mode, 1, 1);
4037                 if (t3 == 0)
4038                   {
4039                     rtx lab;
4040                     lab = gen_label_rtx ();
4041                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4042                     expand_inc (t1, const1_rtx);
4043                     emit_label (lab);
4044                     quotient = t1;
4045                   }
4046                 else
4047                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4048                                                           t1, t3),
4049                                             tquotient);
4050                 break;
4051               }
4052
4053             /* Try using an instruction that produces both the quotient and
4054                remainder, using truncation.  We can easily compensate the
4055                quotient or remainder to get ceiling rounding, once we have the
4056                remainder.  Notice that we compute also the final remainder
4057                value here, and return the result right away.  */
4058             if (target == 0 || GET_MODE (target) != compute_mode)
4059               target = gen_reg_rtx (compute_mode);
4060             if (rem_flag)
4061               {
4062                 remainder= (REG_P (target)
4063                             ? target : gen_reg_rtx (compute_mode));
4064                 quotient = gen_reg_rtx (compute_mode);
4065               }
4066             else
4067               {
4068                 quotient = (REG_P (target)
4069                             ? target : gen_reg_rtx (compute_mode));
4070                 remainder = gen_reg_rtx (compute_mode);
4071               }
4072
4073             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4074                                      remainder, 0))
4075               {
4076                 /* This could be computed with a branch-less sequence.
4077                    Save that for later.  */
4078                 rtx tem;
4079                 rtx label = gen_label_rtx ();
4080                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4081                                  compute_mode, label);
4082                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4083                                     NULL_RTX, 0, OPTAB_WIDEN);
4084                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4085                 expand_inc (quotient, const1_rtx);
4086                 expand_dec (remainder, op1);
4087                 emit_label (label);
4088                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4089               }
4090
4091             /* No luck with division elimination or divmod.  Have to do it
4092                by conditionally adjusting op0 *and* the result.  */
4093             {
4094               rtx label1, label2, label3, label4, label5;
4095               rtx adjusted_op0;
4096               rtx tem;
4097
4098               quotient = gen_reg_rtx (compute_mode);
4099               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4100               label1 = gen_label_rtx ();
4101               label2 = gen_label_rtx ();
4102               label3 = gen_label_rtx ();
4103               label4 = gen_label_rtx ();
4104               label5 = gen_label_rtx ();
4105               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4106               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4107                                compute_mode, label1);
4108               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4109                                   quotient, 0, OPTAB_LIB_WIDEN);
4110               if (tem != quotient)
4111                 emit_move_insn (quotient, tem);
4112               emit_jump_insn (gen_jump (label5));
4113               emit_barrier ();
4114               emit_label (label1);
4115               expand_dec (adjusted_op0, const1_rtx);
4116               emit_jump_insn (gen_jump (label4));
4117               emit_barrier ();
4118               emit_label (label2);
4119               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4120                                compute_mode, label3);
4121               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4122                                   quotient, 0, OPTAB_LIB_WIDEN);
4123               if (tem != quotient)
4124                 emit_move_insn (quotient, tem);
4125               emit_jump_insn (gen_jump (label5));
4126               emit_barrier ();
4127               emit_label (label3);
4128               expand_inc (adjusted_op0, const1_rtx);
4129               emit_label (label4);
4130               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4131                                   quotient, 0, OPTAB_LIB_WIDEN);
4132               if (tem != quotient)
4133                 emit_move_insn (quotient, tem);
4134               expand_inc (quotient, const1_rtx);
4135               emit_label (label5);
4136             }
4137           }
4138         break;
4139
4140       case EXACT_DIV_EXPR:
4141         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4142           {
4143             HOST_WIDE_INT d = INTVAL (op1);
4144             unsigned HOST_WIDE_INT ml;
4145             int pre_shift;
4146             rtx t1;
4147
4148             pre_shift = floor_log2 (d & -d);
4149             ml = invert_mod2n (d >> pre_shift, size);
4150             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4151                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
4152             quotient = expand_mult (compute_mode, t1,
4153                                     gen_int_mode (ml, compute_mode),
4154                                     NULL_RTX, 1);
4155
4156             insn = get_last_insn ();
4157             set_unique_reg_note (insn,
4158                                  REG_EQUAL,
4159                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4160                                                  compute_mode,
4161                                                  op0, op1));
4162           }
4163         break;
4164
4165       case ROUND_DIV_EXPR:
4166       case ROUND_MOD_EXPR:
4167         if (unsignedp)
4168           {
4169             rtx tem;
4170             rtx label;
4171             label = gen_label_rtx ();
4172             quotient = gen_reg_rtx (compute_mode);
4173             remainder = gen_reg_rtx (compute_mode);
4174             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4175               {
4176                 rtx tem;
4177                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4178                                          quotient, 1, OPTAB_LIB_WIDEN);
4179                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4180                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4181                                           remainder, 1, OPTAB_LIB_WIDEN);
4182               }
4183             tem = plus_constant (op1, -1);
4184             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4185                                 build_int_2 (1, 0), NULL_RTX, 1);
4186             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4187             expand_inc (quotient, const1_rtx);
4188             expand_dec (remainder, op1);
4189             emit_label (label);
4190           }
4191         else
4192           {
4193             rtx abs_rem, abs_op1, tem, mask;
4194             rtx label;
4195             label = gen_label_rtx ();
4196             quotient = gen_reg_rtx (compute_mode);
4197             remainder = gen_reg_rtx (compute_mode);
4198             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4199               {
4200                 rtx tem;
4201                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4202                                          quotient, 0, OPTAB_LIB_WIDEN);
4203                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4204                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4205                                           remainder, 0, OPTAB_LIB_WIDEN);
4206               }
4207             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4208             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4209             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4210                                 build_int_2 (1, 0), NULL_RTX, 1);
4211             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4212             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4213                                 NULL_RTX, 0, OPTAB_WIDEN);
4214             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4215                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
4216             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4217                                 NULL_RTX, 0, OPTAB_WIDEN);
4218             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4219                                 NULL_RTX, 0, OPTAB_WIDEN);
4220             expand_inc (quotient, tem);
4221             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4222                                 NULL_RTX, 0, OPTAB_WIDEN);
4223             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4224                                 NULL_RTX, 0, OPTAB_WIDEN);
4225             expand_dec (remainder, tem);
4226             emit_label (label);
4227           }
4228         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4229
4230       default:
4231         abort ();
4232       }
4233
4234   if (quotient == 0)
4235     {
4236       if (target && GET_MODE (target) != compute_mode)
4237         target = 0;
4238
4239       if (rem_flag)
4240         {
4241           /* Try to produce the remainder without producing the quotient.
4242              If we seem to have a divmod pattern that does not require widening,
4243              don't try widening here.  We should really have a WIDEN argument
4244              to expand_twoval_binop, since what we'd really like to do here is
4245              1) try a mod insn in compute_mode
4246              2) try a divmod insn in compute_mode
4247              3) try a div insn in compute_mode and multiply-subtract to get
4248                 remainder
4249              4) try the same things with widening allowed.  */
4250           remainder
4251             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4252                                  op0, op1, target,
4253                                  unsignedp,
4254                                  ((optab2->handlers[compute_mode].insn_code
4255                                    != CODE_FOR_nothing)
4256                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4257           if (remainder == 0)
4258             {
4259               /* No luck there.  Can we do remainder and divide at once
4260                  without a library call?  */
4261               remainder = gen_reg_rtx (compute_mode);
4262               if (! expand_twoval_binop ((unsignedp
4263                                           ? udivmod_optab
4264                                           : sdivmod_optab),
4265                                          op0, op1,
4266                                          NULL_RTX, remainder, unsignedp))
4267                 remainder = 0;
4268             }
4269
4270           if (remainder)
4271             return gen_lowpart (mode, remainder);
4272         }
4273
4274       /* Produce the quotient.  Try a quotient insn, but not a library call.
4275          If we have a divmod in this mode, use it in preference to widening
4276          the div (for this test we assume it will not fail). Note that optab2
4277          is set to the one of the two optabs that the call below will use.  */
4278       quotient
4279         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4280                              op0, op1, rem_flag ? NULL_RTX : target,
4281                              unsignedp,
4282                              ((optab2->handlers[compute_mode].insn_code
4283                                != CODE_FOR_nothing)
4284                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4285
4286       if (quotient == 0)
4287         {
4288           /* No luck there.  Try a quotient-and-remainder insn,
4289              keeping the quotient alone.  */
4290           quotient = gen_reg_rtx (compute_mode);
4291           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4292                                      op0, op1,
4293                                      quotient, NULL_RTX, unsignedp))
4294             {
4295               quotient = 0;
4296               if (! rem_flag)
4297                 /* Still no luck.  If we are not computing the remainder,
4298                    use a library call for the quotient.  */
4299                 quotient = sign_expand_binop (compute_mode,
4300                                               udiv_optab, sdiv_optab,
4301                                               op0, op1, target,
4302                                               unsignedp, OPTAB_LIB_WIDEN);
4303             }
4304         }
4305     }
4306
4307   if (rem_flag)
4308     {
4309       if (target && GET_MODE (target) != compute_mode)
4310         target = 0;
4311
4312       if (quotient == 0)
4313         /* No divide instruction either.  Use library for remainder.  */
4314         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4315                                        op0, op1, target,
4316                                        unsignedp, OPTAB_LIB_WIDEN);
4317       else
4318         {
4319           /* We divided.  Now finish doing X - Y * (X / Y).  */
4320           remainder = expand_mult (compute_mode, quotient, op1,
4321                                    NULL_RTX, unsignedp);
4322           remainder = expand_binop (compute_mode, sub_optab, op0,
4323                                     remainder, target, unsignedp,
4324                                     OPTAB_LIB_WIDEN);
4325         }
4326     }
4327
4328   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4329 }
4330 \f
4331 /* Return a tree node with data type TYPE, describing the value of X.
4332    Usually this is an VAR_DECL, if there is no obvious better choice.
4333    X may be an expression, however we only support those expressions
4334    generated by loop.c.  */
4335
4336 tree
4337 make_tree (tree type, rtx x)
4338 {
4339   tree t;
4340
4341   switch (GET_CODE (x))
4342     {
4343     case CONST_INT:
4344       t = build_int_2 (INTVAL (x),
4345                        (TYPE_UNSIGNED (type)
4346                         && (GET_MODE_BITSIZE (TYPE_MODE (type))
4347                             < HOST_BITS_PER_WIDE_INT))
4348                        || INTVAL (x) >= 0 ? 0 : -1);
4349       TREE_TYPE (t) = type;
4350       return t;
4351
4352     case CONST_DOUBLE:
4353       if (GET_MODE (x) == VOIDmode)
4354         {
4355           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4356           TREE_TYPE (t) = type;
4357         }
4358       else
4359         {
4360           REAL_VALUE_TYPE d;
4361
4362           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4363           t = build_real (type, d);
4364         }
4365
4366       return t;
4367
4368     case CONST_VECTOR:
4369       {
4370         int i, units;
4371         rtx elt;
4372         tree t = NULL_TREE;
4373
4374         units = CONST_VECTOR_NUNITS (x);
4375
4376         /* Build a tree with vector elements.  */
4377         for (i = units - 1; i >= 0; --i)
4378           {
4379             elt = CONST_VECTOR_ELT (x, i);
4380             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4381           }
4382
4383         return build_vector (type, t);
4384       }
4385
4386     case PLUS:
4387       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4388                           make_tree (type, XEXP (x, 1))));
4389
4390     case MINUS:
4391       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4392                           make_tree (type, XEXP (x, 1))));
4393
4394     case NEG:
4395       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4396
4397     case MULT:
4398       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4399                           make_tree (type, XEXP (x, 1))));
4400
4401     case ASHIFT:
4402       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4403                           make_tree (type, XEXP (x, 1))));
4404
4405     case LSHIFTRT:
4406       t = lang_hooks.types.unsigned_type (type);
4407       return fold (convert (type,
4408                             build (RSHIFT_EXPR, t,
4409                                    make_tree (t, XEXP (x, 0)),
4410                                    make_tree (type, XEXP (x, 1)))));
4411
4412     case ASHIFTRT:
4413       t = lang_hooks.types.signed_type (type);
4414       return fold (convert (type,
4415                             build (RSHIFT_EXPR, t,
4416                                    make_tree (t, XEXP (x, 0)),
4417                                    make_tree (type, XEXP (x, 1)))));
4418
4419     case DIV:
4420       if (TREE_CODE (type) != REAL_TYPE)
4421         t = lang_hooks.types.signed_type (type);
4422       else
4423         t = type;
4424
4425       return fold (convert (type,
4426                             build (TRUNC_DIV_EXPR, t,
4427                                    make_tree (t, XEXP (x, 0)),
4428                                    make_tree (t, XEXP (x, 1)))));
4429     case UDIV:
4430       t = lang_hooks.types.unsigned_type (type);
4431       return fold (convert (type,
4432                             build (TRUNC_DIV_EXPR, t,
4433                                    make_tree (t, XEXP (x, 0)),
4434                                    make_tree (t, XEXP (x, 1)))));
4435
4436     case SIGN_EXTEND:
4437     case ZERO_EXTEND:
4438       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4439                                           GET_CODE (x) == ZERO_EXTEND);
4440       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4441
4442     default:
4443       t = build_decl (VAR_DECL, NULL_TREE, type);
4444
4445       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4446          ptr_mode.  So convert.  */
4447       if (POINTER_TYPE_P (type))
4448         x = convert_memory_address (TYPE_MODE (type), x);
4449
4450       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4451          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4452       t->decl.rtl = x;
4453
4454       return t;
4455     }
4456 }
4457
4458 /* Check whether the multiplication X * MULT + ADD overflows.
4459    X, MULT and ADD must be CONST_*.
4460    MODE is the machine mode for the computation.
4461    X and MULT must have mode MODE.  ADD may have a different mode.
4462    So can X (defaults to same as MODE).
4463    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4464
4465 bool
4466 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4467 {
4468   tree type, mult_type, add_type, result;
4469
4470   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4471
4472   /* In order to get a proper overflow indication from an unsigned
4473      type, we have to pretend that it's a sizetype.  */
4474   mult_type = type;
4475   if (unsignedp)
4476     {
4477       mult_type = copy_node (type);
4478       TYPE_IS_SIZETYPE (mult_type) = 1;
4479     }
4480
4481   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4482               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4483
4484   result = fold (build (PLUS_EXPR, mult_type,
4485                         fold (build (MULT_EXPR, mult_type,
4486                                      make_tree (mult_type, x),
4487                                      make_tree (mult_type, mult))),
4488                         make_tree (add_type, add)));
4489
4490   return TREE_CONSTANT_OVERFLOW (result);
4491 }
4492
4493 /* Return an rtx representing the value of X * MULT + ADD.
4494    TARGET is a suggestion for where to store the result (an rtx).
4495    MODE is the machine mode for the computation.
4496    X and MULT must have mode MODE.  ADD may have a different mode.
4497    So can X (defaults to same as MODE).
4498    UNSIGNEDP is nonzero to do unsigned multiplication.
4499    This may emit insns.  */
4500
4501 rtx
4502 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4503                  int unsignedp)
4504 {
4505   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4506   tree add_type = (GET_MODE (add) == VOIDmode
4507                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4508                                                            unsignedp));
4509   tree result =  fold (build (PLUS_EXPR, type,
4510                               fold (build (MULT_EXPR, type,
4511                                            make_tree (type, x),
4512                                            make_tree (type, mult))),
4513                               make_tree (add_type, add)));
4514
4515   return expand_expr (result, target, VOIDmode, 0);
4516 }
4517 \f
4518 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4519    and returning TARGET.
4520
4521    If TARGET is 0, a pseudo-register or constant is returned.  */
4522
4523 rtx
4524 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4525 {
4526   rtx tem = 0;
4527
4528   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4529     tem = simplify_binary_operation (AND, mode, op0, op1);
4530   if (tem == 0)
4531     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4532
4533   if (target == 0)
4534     target = tem;
4535   else if (tem != target)
4536     emit_move_insn (target, tem);
4537   return target;
4538 }
4539 \f
4540 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4541    and storing in TARGET.  Normally return TARGET.
4542    Return 0 if that cannot be done.
4543
4544    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4545    it is VOIDmode, they cannot both be CONST_INT.
4546
4547    UNSIGNEDP is for the case where we have to widen the operands
4548    to perform the operation.  It says to use zero-extension.
4549
4550    NORMALIZEP is 1 if we should convert the result to be either zero
4551    or one.  Normalize is -1 if we should convert the result to be
4552    either zero or -1.  If NORMALIZEP is zero, the result will be left
4553    "raw" out of the scc insn.  */
4554
4555 rtx
4556 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4557                  enum machine_mode mode, int unsignedp, int normalizep)
4558 {
4559   rtx subtarget;
4560   enum insn_code icode;
4561   enum machine_mode compare_mode;
4562   enum machine_mode target_mode = GET_MODE (target);
4563   rtx tem;
4564   rtx last = get_last_insn ();
4565   rtx pattern, comparison;
4566
4567   /* ??? Ok to do this and then fail? */
4568   op0 = protect_from_queue (op0, 0);
4569   op1 = protect_from_queue (op1, 0);
4570
4571   if (unsignedp)
4572     code = unsigned_condition (code);
4573
4574   /* If one operand is constant, make it the second one.  Only do this
4575      if the other operand is not constant as well.  */
4576
4577   if (swap_commutative_operands_p (op0, op1))
4578     {
4579       tem = op0;
4580       op0 = op1;
4581       op1 = tem;
4582       code = swap_condition (code);
4583     }
4584
4585   if (mode == VOIDmode)
4586     mode = GET_MODE (op0);
4587
4588   /* For some comparisons with 1 and -1, we can convert this to
4589      comparisons with zero.  This will often produce more opportunities for
4590      store-flag insns.  */
4591
4592   switch (code)
4593     {
4594     case LT:
4595       if (op1 == const1_rtx)
4596         op1 = const0_rtx, code = LE;
4597       break;
4598     case LE:
4599       if (op1 == constm1_rtx)
4600         op1 = const0_rtx, code = LT;
4601       break;
4602     case GE:
4603       if (op1 == const1_rtx)
4604         op1 = const0_rtx, code = GT;
4605       break;
4606     case GT:
4607       if (op1 == constm1_rtx)
4608         op1 = const0_rtx, code = GE;
4609       break;
4610     case GEU:
4611       if (op1 == const1_rtx)
4612         op1 = const0_rtx, code = NE;
4613       break;
4614     case LTU:
4615       if (op1 == const1_rtx)
4616         op1 = const0_rtx, code = EQ;
4617       break;
4618     default:
4619       break;
4620     }
4621
4622   /* If we are comparing a double-word integer with zero or -1, we can
4623      convert the comparison into one involving a single word.  */
4624   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4625       && GET_MODE_CLASS (mode) == MODE_INT
4626       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4627     {
4628       if ((code == EQ || code == NE)
4629           && (op1 == const0_rtx || op1 == constm1_rtx))
4630         {
4631           rtx op00, op01, op0both;
4632
4633           /* Do a logical OR or AND of the two words and compare the result.  */
4634           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4635           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4636           op0both = expand_binop (word_mode,
4637                                   op1 == const0_rtx ? ior_optab : and_optab,
4638                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4639
4640           if (op0both != 0)
4641             return emit_store_flag (target, code, op0both, op1, word_mode,
4642                                     unsignedp, normalizep);
4643         }
4644       else if ((code == LT || code == GE) && op1 == const0_rtx)
4645         {
4646           rtx op0h;
4647
4648           /* If testing the sign bit, can just test on high word.  */
4649           op0h = simplify_gen_subreg (word_mode, op0, mode,
4650                                       subreg_highpart_offset (word_mode, mode));
4651           return emit_store_flag (target, code, op0h, op1, word_mode,
4652                                   unsignedp, normalizep);
4653         }
4654     }
4655
4656   /* From now on, we won't change CODE, so set ICODE now.  */
4657   icode = setcc_gen_code[(int) code];
4658
4659   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4660      complement of A (for GE) and shifting the sign bit to the low bit.  */
4661   if (op1 == const0_rtx && (code == LT || code == GE)
4662       && GET_MODE_CLASS (mode) == MODE_INT
4663       && (normalizep || STORE_FLAG_VALUE == 1
4664           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4665               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4666                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4667     {
4668       subtarget = target;
4669
4670       /* If the result is to be wider than OP0, it is best to convert it
4671          first.  If it is to be narrower, it is *incorrect* to convert it
4672          first.  */
4673       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4674         {
4675           op0 = protect_from_queue (op0, 0);
4676           op0 = convert_modes (target_mode, mode, op0, 0);
4677           mode = target_mode;
4678         }
4679
4680       if (target_mode != mode)
4681         subtarget = 0;
4682
4683       if (code == GE)
4684         op0 = expand_unop (mode, one_cmpl_optab, op0,
4685                            ((STORE_FLAG_VALUE == 1 || normalizep)
4686                             ? 0 : subtarget), 0);
4687
4688       if (STORE_FLAG_VALUE == 1 || normalizep)
4689         /* If we are supposed to produce a 0/1 value, we want to do
4690            a logical shift from the sign bit to the low-order bit; for
4691            a -1/0 value, we do an arithmetic shift.  */
4692         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4693                             size_int (GET_MODE_BITSIZE (mode) - 1),
4694                             subtarget, normalizep != -1);
4695
4696       if (mode != target_mode)
4697         op0 = convert_modes (target_mode, mode, op0, 0);
4698
4699       return op0;
4700     }
4701
4702   if (icode != CODE_FOR_nothing)
4703     {
4704       insn_operand_predicate_fn pred;
4705
4706       /* We think we may be able to do this with a scc insn.  Emit the
4707          comparison and then the scc insn.
4708
4709          compare_from_rtx may call emit_queue, which would be deleted below
4710          if the scc insn fails.  So call it ourselves before setting LAST.
4711          Likewise for do_pending_stack_adjust.  */
4712
4713       emit_queue ();
4714       do_pending_stack_adjust ();
4715       last = get_last_insn ();
4716
4717       comparison
4718         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4719       if (CONSTANT_P (comparison))
4720         {
4721           if (GET_CODE (comparison) == CONST_INT)
4722             {
4723               if (comparison == const0_rtx)
4724                 return const0_rtx;
4725             }
4726 #ifdef FLOAT_STORE_FLAG_VALUE
4727           else if (GET_CODE (comparison) == CONST_DOUBLE)
4728             {
4729               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4730                 return const0_rtx;
4731             }
4732 #endif
4733           else
4734             abort ();
4735           if (normalizep == 1)
4736             return const1_rtx;
4737           if (normalizep == -1)
4738             return constm1_rtx;
4739           return const_true_rtx;
4740         }
4741
4742       /* The code of COMPARISON may not match CODE if compare_from_rtx
4743          decided to swap its operands and reverse the original code.
4744
4745          We know that compare_from_rtx returns either a CONST_INT or
4746          a new comparison code, so it is safe to just extract the
4747          code from COMPARISON.  */
4748       code = GET_CODE (comparison);
4749
4750       /* Get a reference to the target in the proper mode for this insn.  */
4751       compare_mode = insn_data[(int) icode].operand[0].mode;
4752       subtarget = target;
4753       pred = insn_data[(int) icode].operand[0].predicate;
4754       if (preserve_subexpressions_p ()
4755           || ! (*pred) (subtarget, compare_mode))
4756         subtarget = gen_reg_rtx (compare_mode);
4757
4758       pattern = GEN_FCN (icode) (subtarget);
4759       if (pattern)
4760         {
4761           emit_insn (pattern);
4762
4763           /* If we are converting to a wider mode, first convert to
4764              TARGET_MODE, then normalize.  This produces better combining
4765              opportunities on machines that have a SIGN_EXTRACT when we are
4766              testing a single bit.  This mostly benefits the 68k.
4767
4768              If STORE_FLAG_VALUE does not have the sign bit set when
4769              interpreted in COMPARE_MODE, we can do this conversion as
4770              unsigned, which is usually more efficient.  */
4771           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4772             {
4773               convert_move (target, subtarget,
4774                             (GET_MODE_BITSIZE (compare_mode)
4775                              <= HOST_BITS_PER_WIDE_INT)
4776                             && 0 == (STORE_FLAG_VALUE
4777                                      & ((HOST_WIDE_INT) 1
4778                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4779               op0 = target;
4780               compare_mode = target_mode;
4781             }
4782           else
4783             op0 = subtarget;
4784
4785           /* If we want to keep subexpressions around, don't reuse our
4786              last target.  */
4787
4788           if (preserve_subexpressions_p ())
4789             subtarget = 0;
4790
4791           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4792              we don't have to do anything.  */
4793           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4794             ;
4795           /* STORE_FLAG_VALUE might be the most negative number, so write
4796              the comparison this way to avoid a compiler-time warning.  */
4797           else if (- normalizep == STORE_FLAG_VALUE)
4798             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4799
4800           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4801              makes it hard to use a value of just the sign bit due to
4802              ANSI integer constant typing rules.  */
4803           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4804                    && (STORE_FLAG_VALUE
4805                        & ((HOST_WIDE_INT) 1
4806                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4807             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4808                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4809                                 subtarget, normalizep == 1);
4810           else if (STORE_FLAG_VALUE & 1)
4811             {
4812               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4813               if (normalizep == -1)
4814                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4815             }
4816           else
4817             abort ();
4818
4819           /* If we were converting to a smaller mode, do the
4820              conversion now.  */
4821           if (target_mode != compare_mode)
4822             {
4823               convert_move (target, op0, 0);
4824               return target;
4825             }
4826           else
4827             return op0;
4828         }
4829     }
4830
4831   delete_insns_since (last);
4832
4833   /* If expensive optimizations, use different pseudo registers for each
4834      insn, instead of reusing the same pseudo.  This leads to better CSE,
4835      but slows down the compiler, since there are more pseudos */
4836   subtarget = (!flag_expensive_optimizations
4837                && (target_mode == mode)) ? target : NULL_RTX;
4838
4839   /* If we reached here, we can't do this with a scc insn.  However, there
4840      are some comparisons that can be done directly.  For example, if
4841      this is an equality comparison of integers, we can try to exclusive-or
4842      (or subtract) the two operands and use a recursive call to try the
4843      comparison with zero.  Don't do any of these cases if branches are
4844      very cheap.  */
4845
4846   if (BRANCH_COST > 0
4847       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4848       && op1 != const0_rtx)
4849     {
4850       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4851                           OPTAB_WIDEN);
4852
4853       if (tem == 0)
4854         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4855                             OPTAB_WIDEN);
4856       if (tem != 0)
4857         tem = emit_store_flag (target, code, tem, const0_rtx,
4858                                mode, unsignedp, normalizep);
4859       if (tem == 0)
4860         delete_insns_since (last);
4861       return tem;
4862     }
4863
4864   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4865      the constant zero.  Reject all other comparisons at this point.  Only
4866      do LE and GT if branches are expensive since they are expensive on
4867      2-operand machines.  */
4868
4869   if (BRANCH_COST == 0
4870       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4871       || (code != EQ && code != NE
4872           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4873     return 0;
4874
4875   /* See what we need to return.  We can only return a 1, -1, or the
4876      sign bit.  */
4877
4878   if (normalizep == 0)
4879     {
4880       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4881         normalizep = STORE_FLAG_VALUE;
4882
4883       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4884                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4885                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4886         ;
4887       else
4888         return 0;
4889     }
4890
4891   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4892      do the necessary operation below.  */
4893
4894   tem = 0;
4895
4896   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4897      the sign bit set.  */
4898
4899   if (code == LE)
4900     {
4901       /* This is destructive, so SUBTARGET can't be OP0.  */
4902       if (rtx_equal_p (subtarget, op0))
4903         subtarget = 0;
4904
4905       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4906                           OPTAB_WIDEN);
4907       if (tem)
4908         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4909                             OPTAB_WIDEN);
4910     }
4911
4912   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4913      number of bits in the mode of OP0, minus one.  */
4914
4915   if (code == GT)
4916     {
4917       if (rtx_equal_p (subtarget, op0))
4918         subtarget = 0;
4919
4920       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4921                           size_int (GET_MODE_BITSIZE (mode) - 1),
4922                           subtarget, 0);
4923       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4924                           OPTAB_WIDEN);
4925     }
4926
4927   if (code == EQ || code == NE)
4928     {
4929       /* For EQ or NE, one way to do the comparison is to apply an operation
4930          that converts the operand into a positive number if it is nonzero
4931          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4932          for NE we negate.  This puts the result in the sign bit.  Then we
4933          normalize with a shift, if needed.
4934
4935          Two operations that can do the above actions are ABS and FFS, so try
4936          them.  If that doesn't work, and MODE is smaller than a full word,
4937          we can use zero-extension to the wider mode (an unsigned conversion)
4938          as the operation.  */
4939
4940       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4941          that is compensated by the subsequent overflow when subtracting
4942          one / negating.  */
4943
4944       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4945         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4946       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4947         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4948       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4949         {
4950           op0 = protect_from_queue (op0, 0);
4951           tem = convert_modes (word_mode, mode, op0, 1);
4952           mode = word_mode;
4953         }
4954
4955       if (tem != 0)
4956         {
4957           if (code == EQ)
4958             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4959                                 0, OPTAB_WIDEN);
4960           else
4961             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4962         }
4963
4964       /* If we couldn't do it that way, for NE we can "or" the two's complement
4965          of the value with itself.  For EQ, we take the one's complement of
4966          that "or", which is an extra insn, so we only handle EQ if branches
4967          are expensive.  */
4968
4969       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4970         {
4971           if (rtx_equal_p (subtarget, op0))
4972             subtarget = 0;
4973
4974           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4975           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4976                               OPTAB_WIDEN);
4977
4978           if (tem && code == EQ)
4979             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4980         }
4981     }
4982
4983   if (tem && normalizep)
4984     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4985                         size_int (GET_MODE_BITSIZE (mode) - 1),
4986                         subtarget, normalizep == 1);
4987
4988   if (tem)
4989     {
4990       if (GET_MODE (tem) != target_mode)
4991         {
4992           convert_move (target, tem, 0);
4993           tem = target;
4994         }
4995       else if (!subtarget)
4996         {
4997           emit_move_insn (target, tem);
4998           tem = target;
4999         }
5000     }
5001   else
5002     delete_insns_since (last);
5003
5004   return tem;
5005 }
5006
5007 /* Like emit_store_flag, but always succeeds.  */
5008
5009 rtx
5010 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5011                        enum machine_mode mode, int unsignedp, int normalizep)
5012 {
5013   rtx tem, label;
5014
5015   /* First see if emit_store_flag can do the job.  */
5016   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5017   if (tem != 0)
5018     return tem;
5019
5020   if (normalizep == 0)
5021     normalizep = 1;
5022
5023   /* If this failed, we have to do this with set/compare/jump/set code.  */
5024
5025   if (!REG_P (target)
5026       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5027     target = gen_reg_rtx (GET_MODE (target));
5028
5029   emit_move_insn (target, const1_rtx);
5030   label = gen_label_rtx ();
5031   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5032                            NULL_RTX, label);
5033
5034   emit_move_insn (target, const0_rtx);
5035   emit_label (label);
5036
5037   return target;
5038 }
5039 \f
5040 /* Perform possibly multi-word comparison and conditional jump to LABEL
5041    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5042
5043    The algorithm is based on the code in expr.c:do_jump.
5044
5045    Note that this does not perform a general comparison.  Only variants
5046    generated within expmed.c are correctly handled, others abort (but could
5047    be handled if needed).  */
5048
5049 static void
5050 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5051                  rtx label)
5052 {
5053   /* If this mode is an integer too wide to compare properly,
5054      compare word by word.  Rely on cse to optimize constant cases.  */
5055
5056   if (GET_MODE_CLASS (mode) == MODE_INT
5057       && ! can_compare_p (op, mode, ccp_jump))
5058     {
5059       rtx label2 = gen_label_rtx ();
5060
5061       switch (op)
5062         {
5063         case LTU:
5064           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5065           break;
5066
5067         case LEU:
5068           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5069           break;
5070
5071         case LT:
5072           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5073           break;
5074
5075         case GT:
5076           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5077           break;
5078
5079         case GE:
5080           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5081           break;
5082
5083           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5084              that's the only equality operations we do */
5085         case EQ:
5086           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5087             abort ();
5088           do_jump_by_parts_equality_rtx (arg1, label2, label);
5089           break;
5090
5091         case NE:
5092           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5093             abort ();
5094           do_jump_by_parts_equality_rtx (arg1, label, label2);
5095           break;
5096
5097         default:
5098           abort ();
5099         }
5100
5101       emit_label (label2);
5102     }
5103   else
5104     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5105 }