gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-config.h"
  32 #include "expr.h"
  33 #include "optabs.h"
  34 #include "real.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37
  38 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT,
  40                                                  unsigned HOST_WIDE_INT, rtx));
  41 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  42                                                  unsigned HOST_WIDE_INT, rtx));
  43 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  44                                                  unsigned HOST_WIDE_INT,
  45                                                  unsigned HOST_WIDE_INT,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  rtx, int));
  48 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  49                                                  int, int));
  50 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  51                                                  int, int));
  52 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  53                                                  unsigned HOST_WIDE_INT, int));
  54 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  55                                                  enum machine_mode, rtx));
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static int sdiv_pow2_cheap, smod_pow2_cheap;
  63
  64 #ifndef SLOW_UNALIGNED_ACCESS
  65 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  66 #endif
  67
  68 /* For compilers that support multiple targets with different word sizes,
  69    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  70    is the H8/300(H) compiler.  */
  71
  72 #ifndef MAX_BITS_PER_WORD
  73 #define MAX_BITS_PER_WORD BITS_PER_WORD
  74 #endif
  75
  76 /* Reduce conditional compilation elsewhere.  */
  77 #ifndef HAVE_insv
  78 #define HAVE_insv       0
  79 #define CODE_FOR_insv   CODE_FOR_nothing
  80 #define gen_insv(a,b,c,d) NULL_RTX
  81 #endif
  82 #ifndef HAVE_extv
  83 #define HAVE_extv       0
  84 #define CODE_FOR_extv   CODE_FOR_nothing
  85 #define gen_extv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extzv
  88 #define HAVE_extzv      0
  89 #define CODE_FOR_extzv  CODE_FOR_nothing
  90 #define gen_extzv(a,b,c,d) NULL_RTX
  91 #endif
  92
  93 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  94    shift count and some by mode.  */
  95 static int add_cost, negate_cost, zero_cost;
  96 static int shift_cost[MAX_BITS_PER_WORD];
  97 static int shiftadd_cost[MAX_BITS_PER_WORD];
  98 static int shiftsub_cost[MAX_BITS_PER_WORD];
  99 static int mul_cost[NUM_MACHINE_MODES];
 100 static int div_cost[NUM_MACHINE_MODES];
 101 static int mul_widen_cost[NUM_MACHINE_MODES];
 102 static int mul_highpart_cost[NUM_MACHINE_MODES];
 103
 104 void
 105 init_expmed ()
 106 {
 107   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 108   int dummy;
 109   int m;
 110   enum machine_mode mode, wider_mode;
 111
 112   start_sequence ();
 113
 114   /* This is "some random pseudo register" for purposes of calling recog
 115      to see what insns exist.  */
 116   reg = gen_rtx_REG (word_mode, 10000);
 117
 118   zero_cost = rtx_cost (const0_rtx, 0);
 119   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 120
 121   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 122                                        gen_rtx_ASHIFT (word_mode, reg,
 123                                                        const0_rtx)));
 124
 125   shiftadd_insn
 126     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 127                               gen_rtx_PLUS (word_mode,
 128                                             gen_rtx_MULT (word_mode,
 129                                                           reg, const0_rtx),
 130                                             reg)));
 131
 132   shiftsub_insn
 133     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 134                               gen_rtx_MINUS (word_mode,
 135                                              gen_rtx_MULT (word_mode,
 136                                                            reg, const0_rtx),
 137                                              reg)));
 138
 139   init_recog ();
 140
 141   shift_cost[0] = 0;
 142   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 143
 144   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 145     {
 146       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 147       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 148
 149       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 150       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 151         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 152
 153       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 154       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 155         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 156
 157       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 158       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 159         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 160     }
 161
 162   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 163
 164   sdiv_pow2_cheap
 165     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 166        <= 2 * add_cost);
 167   smod_pow2_cheap
 168     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 169        <= 2 * add_cost);
 170
 171   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 172        mode != VOIDmode;
 173        mode = GET_MODE_WIDER_MODE (mode))
 174     {
 175       reg = gen_rtx_REG (mode, 10000);
 176       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 177       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 178       wider_mode = GET_MODE_WIDER_MODE (mode);
 179       if (wider_mode != VOIDmode)
 180         {
 181           mul_widen_cost[(int) wider_mode]
 182             = rtx_cost (gen_rtx_MULT (wider_mode,
 183                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 184                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 185                         SET);
 186           mul_highpart_cost[(int) mode]
 187             = rtx_cost (gen_rtx_TRUNCATE
 188                         (mode,
 189                          gen_rtx_LSHIFTRT (wider_mode,
 190                                            gen_rtx_MULT (wider_mode,
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg),
 193                                                          gen_rtx_ZERO_EXTEND
 194                                                          (wider_mode, reg)),
 195                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 196                         SET);
 197         }
 198     }
 199
 200   end_sequence ();
 201 }
 202
 203 /* Return an rtx representing minus the value of X.
 204    MODE is the intended mode of the result,
 205    useful if X is a CONST_INT.  */
 206
 207 rtx
 208 negate_rtx (mode, x)
 209      enum machine_mode mode;
 210      rtx x;
 211 {
 212   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 213
 214   if (result == 0)
 215     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 216
 217   return result;
 218 }
 219
 220 /* Report on the availability of insv/extv/extzv and the desired mode
 221    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 222    is false; else the mode of the specified operand.  If OPNO is -1,
 223    all the caller cares about is whether the insn is available.  */
 224 enum machine_mode
 225 mode_for_extraction (pattern, opno)
 226      enum extraction_pattern pattern;
 227      int opno;
 228 {
 229   const struct insn_data *data;
 230
 231   switch (pattern)
 232     {
 233     case EP_insv:
 234       if (HAVE_insv)
 235         {
 236           data = &insn_data[CODE_FOR_insv];
 237           break;
 238         }
 239       return MAX_MACHINE_MODE;
 240
 241     case EP_extv:
 242       if (HAVE_extv)
 243         {
 244           data = &insn_data[CODE_FOR_extv];
 245           break;
 246         }
 247       return MAX_MACHINE_MODE;
 248
 249     case EP_extzv:
 250       if (HAVE_extzv)
 251         {
 252           data = &insn_data[CODE_FOR_extzv];
 253           break;
 254         }
 255       return MAX_MACHINE_MODE;
 256
 257     default:
 258       abort ();
 259     }
 260
 261   if (opno == -1)
 262     return VOIDmode;
 263
 264   /* Everyone who uses this function used to follow it with
 265      if (result == VOIDmode) result = word_mode; */
 266   if (data->operand[opno].mode == VOIDmode)
 267     return word_mode;
 268   return data->operand[opno].mode;
 269 }
 270
 271 \f
 272 /* Generate code to store value from rtx VALUE
 273    into a bit-field within structure STR_RTX
 274    containing BITSIZE bits starting at bit BITNUM.
 275    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 276    ALIGN is the alignment that STR_RTX is known to have.
 277    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 278
 279 /* ??? Note that there are two different ideas here for how
 280    to determine the size to count bits within, for a register.
 281    One is BITS_PER_WORD, and the other is the size of operand 3
 282    of the insv pattern.
 283
 284    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 285    else, we use the mode of operand 3.  */
 286
 287 rtx
 288 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 289      rtx str_rtx;
 290      unsigned HOST_WIDE_INT bitsize;
 291      unsigned HOST_WIDE_INT bitnum;
 292      enum machine_mode fieldmode;
 293      rtx value;
 294      HOST_WIDE_INT total_size;
 295 {
 296   unsigned int unit
 297     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 298   unsigned HOST_WIDE_INT offset = bitnum / unit;
 299   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 300   rtx op0 = str_rtx;
 301   int byte_offset;
 302
 303   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 304
 305   /* Discount the part of the structure before the desired byte.
 306      We need to know how many bytes are safe to reference after it.  */
 307   if (total_size >= 0)
 308     total_size -= (bitpos / BIGGEST_ALIGNMENT
 309                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 310
 311   while (GET_CODE (op0) == SUBREG)
 312     {
 313       /* The following line once was done only if WORDS_BIG_ENDIAN,
 314          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 315          meaningful at a much higher level; when structures are copied
 316          between memory and regs, the higher-numbered regs
 317          always get higher addresses.  */
 318       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 319       /* We used to adjust BITPOS here, but now we do the whole adjustment
 320          right after the loop.  */
 321       op0 = SUBREG_REG (op0);
 322     }
 323
 324   value = protect_from_queue (value, 0);
 325
 326   if (flag_force_mem)
 327     {
 328       int old_generating_concat_p = generating_concat_p;
 329       generating_concat_p = 0;
 330       value = force_not_mem (value);
 331       generating_concat_p = old_generating_concat_p;
 332     }
 333
 334   /* If the target is a register, overwriting the entire object, or storing
 335      a full-word or multi-word field can be done with just a SUBREG.
 336
 337      If the target is memory, storing any naturally aligned field can be
 338      done with a simple store.  For targets that support fast unaligned
 339      memory, any naturally sized, unit aligned field can be done directly.  */
 340
 341   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 342                 + (offset * UNITS_PER_WORD);
 343
 344   if (bitpos == 0
 345       && bitsize == GET_MODE_BITSIZE (fieldmode)
 346       && (GET_CODE (op0) != MEM
 347           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 348              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 349              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 350           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 351              || (offset * BITS_PER_UNIT % bitsize == 0
 352                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 353     {
 354       if (GET_MODE (op0) != fieldmode)
 355         {
 356           if (GET_CODE (op0) == SUBREG)
 357             {
 358               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 359                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 360                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 361                 op0 = SUBREG_REG (op0);
 362               else
 363                 /* Else we've got some float mode source being extracted into
 364                    a different float mode destination -- this combination of
 365                    subregs results in Severe Tire Damage.  */
 366                 abort ();
 367             }
 368           if (GET_CODE (op0) == REG)
 369             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 370           else
 371             op0 = adjust_address (op0, fieldmode, offset);
 372         }
 373       emit_move_insn (op0, value);
 374       return value;
 375     }
 376
 377   /* Make sure we are playing with integral modes.  Pun with subregs
 378      if we aren't.  This must come after the entire register case above,
 379      since that case is valid for any mode.  The following cases are only
 380      valid for integral modes.  */
 381   {
 382     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 383     if (imode != GET_MODE (op0))
 384       {
 385         if (GET_CODE (op0) == MEM)
 386           op0 = adjust_address (op0, imode, 0);
 387         else if (imode != BLKmode)
 388           op0 = gen_lowpart (imode, op0);
 389         else
 390           abort ();
 391       }
 392   }
 393
 394   /* We may be accessing data outside the field, which means
 395      we can alias adjacent data.  */
 396   if (GET_CODE (op0) == MEM)
 397     {
 398       op0 = shallow_copy_rtx (op0);
 399       set_mem_alias_set (op0, 0);
 400       set_mem_expr (op0, 0);
 401     }
 402
 403   /* If OP0 is a register, BITPOS must count within a word.
 404      But as we have it, it counts within whatever size OP0 now has.
 405      On a bigendian machine, these are not the same, so convert.  */
 406   if (BYTES_BIG_ENDIAN
 407       && !FUNCTION_ARG_REG_LITTLE_ENDIAN
 408       && GET_CODE (op0) != MEM
 409       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 410     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 411
 412   /* Storing an lsb-aligned field in a register
 413      can be done with a movestrict instruction.  */
 414
 415   if (GET_CODE (op0) != MEM
 416       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 417       && bitsize == GET_MODE_BITSIZE (fieldmode)
 418       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 419           != CODE_FOR_nothing))
 420     {
 421       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 422
 423       /* Get appropriate low part of the value being stored.  */
 424       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 425         value = gen_lowpart (fieldmode, value);
 426       else if (!(GET_CODE (value) == SYMBOL_REF
 427                  || GET_CODE (value) == LABEL_REF
 428                  || GET_CODE (value) == CONST))
 429         value = convert_to_mode (fieldmode, value, 0);
 430
 431       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 432         value = copy_to_mode_reg (fieldmode, value);
 433
 434       if (GET_CODE (op0) == SUBREG)
 435         {
 436           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 437               || GET_MODE_CLASS (fieldmode) == MODE_INT
 438               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 439             op0 = SUBREG_REG (op0);
 440           else
 441             /* Else we've got some float mode source being extracted into
 442                a different float mode destination -- this combination of
 443                subregs results in Severe Tire Damage.  */
 444             abort ();
 445         }
 446
 447       emit_insn (GEN_FCN (icode)
 448                  (gen_rtx_SUBREG (fieldmode, op0,
 449                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 450                                   + (offset * UNITS_PER_WORD)),
 451                                   value));
 452
 453       return value;
 454     }
 455
 456   /* Handle fields bigger than a word.  */
 457
 458   if (bitsize > BITS_PER_WORD)
 459     {
 460       /* Here we transfer the words of the field
 461          in the order least significant first.
 462          This is because the most significant word is the one which may
 463          be less than full.
 464          However, only do that if the value is not BLKmode.  */
 465
 466       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 467       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 468       unsigned int i;
 469
 470       /* This is the mode we must force value to, so that there will be enough
 471          subwords to extract.  Note that fieldmode will often (always?) be
 472          VOIDmode, because that is what store_field uses to indicate that this
 473          is a bit field, but passing VOIDmode to operand_subword_force will
 474          result in an abort.  */
 475       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 476
 477       for (i = 0; i < nwords; i++)
 478         {
 479           /* If I is 0, use the low-order word in both field and target;
 480              if I is 1, use the next to lowest word; and so on.  */
 481           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 482           unsigned int bit_offset = (backwards
 483                                      ? MAX ((int) bitsize - ((int) i + 1)
 484                                             * BITS_PER_WORD,
 485                                             0)
 486                                      : (int) i * BITS_PER_WORD);
 487
 488           store_bit_field (op0, MIN (BITS_PER_WORD,
 489                                      bitsize - i * BITS_PER_WORD),
 490                            bitnum + bit_offset, word_mode,
 491                            operand_subword_force (value, wordnum,
 492                                                   (GET_MODE (value) == VOIDmode
 493                                                    ? fieldmode
 494                                                    : GET_MODE (value))),
 495                            total_size);
 496         }
 497       return value;
 498     }
 499
 500   /* From here on we can assume that the field to be stored in is
 501      a full-word (whatever type that is), since it is shorter than a word.  */
 502
 503   /* OFFSET is the number of words or bytes (UNIT says which)
 504      from STR_RTX to the first word or byte containing part of the field.  */
 505
 506   if (GET_CODE (op0) != MEM)
 507     {
 508       if (offset != 0
 509           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 510         {
 511           if (GET_CODE (op0) != REG)
 512             {
 513               /* Since this is a destination (lvalue), we can't copy it to a
 514                  pseudo.  We can trivially remove a SUBREG that does not
 515                  change the size of the operand.  Such a SUBREG may have been
 516                  added above.  Otherwise, abort.  */
 517               if (GET_CODE (op0) == SUBREG
 518                   && (GET_MODE_SIZE (GET_MODE (op0))
 519                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 520                 op0 = SUBREG_REG (op0);
 521               else
 522                 abort ();
 523             }
 524           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 525                                 op0, (offset * UNITS_PER_WORD));
 526         }
 527       offset = 0;
 528     }
 529   else
 530     op0 = protect_from_queue (op0, 1);
 531
 532   /* If VALUE is a floating-point mode, access it as an integer of the
 533      corresponding size.  This can occur on a machine with 64 bit registers
 534      that uses SFmode for float.  This can also occur for unaligned float
 535      structure fields.  */
 536   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 537       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 538     value = gen_lowpart (word_mode, value);
 539
 540   /* Now OFFSET is nonzero only if OP0 is memory
 541      and is therefore always measured in bytes.  */
 542
 543   if (HAVE_insv
 544       && GET_MODE (value) != BLKmode
 545       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 546       /* Ensure insv's size is wide enough for this field.  */
 547       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 548       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 549             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 550     {
 551       int xbitpos = bitpos;
 552       rtx value1;
 553       rtx xop0 = op0;
 554       rtx last = get_last_insn ();
 555       rtx pat;
 556       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 557       int save_volatile_ok = volatile_ok;
 558
 559       volatile_ok = 1;
 560
 561       /* If this machine's insv can only insert into a register, copy OP0
 562          into a register and save it back later.  */
 563       /* This used to check flag_force_mem, but that was a serious
 564          de-optimization now that flag_force_mem is enabled by -O2.  */
 565       if (GET_CODE (op0) == MEM
 566           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 567                 (op0, VOIDmode)))
 568         {
 569           rtx tempreg;
 570           enum machine_mode bestmode;
 571
 572           /* Get the mode to use for inserting into this field.  If OP0 is
 573              BLKmode, get the smallest mode consistent with the alignment. If
 574              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 575              mode. Otherwise, use the smallest mode containing the field.  */
 576
 577           if (GET_MODE (op0) == BLKmode
 578               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 579             bestmode
 580               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 581                                MEM_VOLATILE_P (op0));
 582           else
 583             bestmode = GET_MODE (op0);
 584
 585           if (bestmode == VOIDmode
 586               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 587                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 588             goto insv_loses;
 589
 590           /* Adjust address to point to the containing unit of that mode.
 591              Compute offset as multiple of this unit, counting in bytes.  */
 592           unit = GET_MODE_BITSIZE (bestmode);
 593           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 594           bitpos = bitnum % unit;
 595           op0 = adjust_address (op0, bestmode,  offset);
 596
 597           /* Fetch that unit, store the bitfield in it, then store
 598              the unit.  */
 599           tempreg = copy_to_reg (op0);
 600           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 601                            total_size);
 602           emit_move_insn (op0, tempreg);
 603           return value;
 604         }
 605       volatile_ok = save_volatile_ok;
 606
 607       /* Add OFFSET into OP0's address.  */
 608       if (GET_CODE (xop0) == MEM)
 609         xop0 = adjust_address (xop0, byte_mode, offset);
 610
 611       /* If xop0 is a register, we need it in MAXMODE
 612          to make it acceptable to the format of insv.  */
 613       if (GET_CODE (xop0) == SUBREG)
 614         /* We can't just change the mode, because this might clobber op0,
 615            and we will need the original value of op0 if insv fails.  */
 616         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 617       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 618         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 619
 620       /* On big-endian machines, we count bits from the most significant.
 621          If the bit field insn does not, we must invert.  */
 622
 623       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 624         xbitpos = unit - bitsize - xbitpos;
 625
 626       /* We have been counting XBITPOS within UNIT.
 627          Count instead within the size of the register.  */
 628       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 629         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 630
 631       unit = GET_MODE_BITSIZE (maxmode);
 632
 633       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 634       value1 = value;
 635       if (GET_MODE (value) != maxmode)
 636         {
 637           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 638             {
 639               /* Optimization: Don't bother really extending VALUE
 640                  if it has all the bits we will actually use.  However,
 641                  if we must narrow it, be sure we do it correctly.  */
 642
 643               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 644                 {
 645                   rtx tmp;
 646
 647                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 648                   if (! tmp)
 649                     tmp = simplify_gen_subreg (maxmode,
 650                                                force_reg (GET_MODE (value),
 651                                                           value1),
 652                                                GET_MODE (value), 0);
 653                   value1 = tmp;
 654                 }
 655               else
 656                 value1 = gen_lowpart (maxmode, value1);
 657             }
 658           else if (GET_CODE (value) == CONST_INT)
 659             value1 = gen_int_mode (INTVAL (value), maxmode);
 660           else if (!CONSTANT_P (value))
 661             /* Parse phase is supposed to make VALUE's data type
 662                match that of the component reference, which is a type
 663                at least as wide as the field; so VALUE should have
 664                a mode that corresponds to that type.  */
 665             abort ();
 666         }
 667
 668       /* If this machine's insv insists on a register,
 669          get VALUE1 into a register.  */
 670       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 671              (value1, maxmode)))
 672         value1 = force_reg (maxmode, value1);
 673
 674       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 675       if (pat)
 676         emit_insn (pat);
 677       else
 678         {
 679           delete_insns_since (last);
 680           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 681         }
 682     }
 683   else
 684     insv_loses:
 685     /* Insv is not available; store using shifts and boolean ops.  */
 686     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 687   return value;
 688 }
 689 \f
 690 /* Use shifts and boolean operations to store VALUE
 691    into a bit field of width BITSIZE
 692    in a memory location specified by OP0 except offset by OFFSET bytes.
 693      (OFFSET must be 0 if OP0 is a register.)
 694    The field starts at position BITPOS within the byte.
 695     (If OP0 is a register, it may be a full word or a narrower mode,
 696      but BITPOS still counts within a full word,
 697      which is significant on bigendian machines.)
 698
 699    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 700
 701 static void
 702 store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
 703      rtx op0;
 704      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 705      rtx value;
 706 {
 707   enum machine_mode mode;
 708   unsigned int total_bits = BITS_PER_WORD;
 709   rtx subtarget, temp;
 710   int all_zero = 0;
 711   int all_one = 0;
 712
 713   /* There is a case not handled here:
 714      a structure with a known alignment of just a halfword
 715      and a field split across two aligned halfwords within the structure.
 716      Or likewise a structure with a known alignment of just a byte
 717      and a field split across two bytes.
 718      Such cases are not supposed to be able to occur.  */
 719
 720   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 721     {
 722       if (offset != 0)
 723         abort ();
 724       /* Special treatment for a bit field split across two registers.  */
 725       if (bitsize + bitpos > BITS_PER_WORD)
 726         {
 727           store_split_bit_field (op0, bitsize, bitpos, value);
 728           return;
 729         }
 730     }
 731   else
 732     {
 733       /* Get the proper mode to use for this field.  We want a mode that
 734          includes the entire field.  If such a mode would be larger than
 735          a word, we won't be doing the extraction the normal way.
 736          We don't want a mode bigger than the destination.  */
 737
 738       mode = GET_MODE (op0);
 739       if (GET_MODE_BITSIZE (mode) == 0
 740           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 741         mode = word_mode;
 742       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 743                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 744
 745       if (mode == VOIDmode)
 746         {
 747           /* The only way this should occur is if the field spans word
 748              boundaries.  */
 749           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 750                                  value);
 751           return;
 752         }
 753
 754       total_bits = GET_MODE_BITSIZE (mode);
 755
 756       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 757          be in the range 0 to total_bits-1, and put any excess bytes in
 758          OFFSET.  */
 759       if (bitpos >= total_bits)
 760         {
 761           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 762           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 763                      * BITS_PER_UNIT);
 764         }
 765
 766       /* Get ref to an aligned byte, halfword, or word containing the field.
 767          Adjust BITPOS to be position within a word,
 768          and OFFSET to be the offset of that word.
 769          Then alter OP0 to refer to that word.  */
 770       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 771       offset -= (offset % (total_bits / BITS_PER_UNIT));
 772       op0 = adjust_address (op0, mode, offset);
 773     }
 774
 775   mode = GET_MODE (op0);
 776
 777   /* Now MODE is either some integral mode for a MEM as OP0,
 778      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 779      The bit field is contained entirely within OP0.
 780      BITPOS is the starting bit number within OP0.
 781      (OP0's mode may actually be narrower than MODE.)  */
 782
 783   if (BYTES_BIG_ENDIAN)
 784       /* BITPOS is the distance between our msb
 785          and that of the containing datum.
 786          Convert it to the distance from the lsb.  */
 787       bitpos = total_bits - bitsize - bitpos;
 788
 789   /* Now BITPOS is always the distance between our lsb
 790      and that of OP0.  */
 791
 792   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 793      we must first convert its mode to MODE.  */
 794
 795   if (GET_CODE (value) == CONST_INT)
 796     {
 797       HOST_WIDE_INT v = INTVAL (value);
 798
 799       if (bitsize < HOST_BITS_PER_WIDE_INT)
 800         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 801
 802       if (v == 0)
 803         all_zero = 1;
 804       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 805                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 806                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 807         all_one = 1;
 808
 809       value = lshift_value (mode, value, bitpos, bitsize);
 810     }
 811   else
 812     {
 813       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 814                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 815
 816       if (GET_MODE (value) != mode)
 817         {
 818           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 819               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 820             value = gen_lowpart (mode, value);
 821           else
 822             value = convert_to_mode (mode, value, 1);
 823         }
 824
 825       if (must_and)
 826         value = expand_binop (mode, and_optab, value,
 827                               mask_rtx (mode, 0, bitsize, 0),
 828                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 829       if (bitpos > 0)
 830         value = expand_shift (LSHIFT_EXPR, mode, value,
 831                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 832     }
 833
 834   /* Now clear the chosen bits in OP0,
 835      except that if VALUE is -1 we need not bother.  */
 836
 837   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 838
 839   if (! all_one)
 840     {
 841       temp = expand_binop (mode, and_optab, op0,
 842                            mask_rtx (mode, bitpos, bitsize, 1),
 843                            subtarget, 1, OPTAB_LIB_WIDEN);
 844       subtarget = temp;
 845     }
 846   else
 847     temp = op0;
 848
 849   /* Now logical-or VALUE into OP0, unless it is zero.  */
 850
 851   if (! all_zero)
 852     temp = expand_binop (mode, ior_optab, temp, value,
 853                          subtarget, 1, OPTAB_LIB_WIDEN);
 854   if (op0 != temp)
 855     emit_move_insn (op0, temp);
 856 }
 857 \f
 858 /* Store a bit field that is split across multiple accessible memory objects.
 859
 860    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 861    BITSIZE is the field width; BITPOS the position of its first bit
 862    (within the word).
 863    VALUE is the value to store.
 864
 865    This does not yet handle fields wider than BITS_PER_WORD.  */
 866
 867 static void
 868 store_split_bit_field (op0, bitsize, bitpos, value)
 869      rtx op0;
 870      unsigned HOST_WIDE_INT bitsize, bitpos;
 871      rtx value;
 872 {
 873   unsigned int unit;
 874   unsigned int bitsdone = 0;
 875
 876   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 877      much at a time.  */
 878   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 879     unit = BITS_PER_WORD;
 880   else
 881     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 882
 883   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 884      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 885      that VALUE might be a floating-point constant.  */
 886   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 887     {
 888       rtx word = gen_lowpart_common (word_mode, value);
 889
 890       if (word && (value != word))
 891         value = word;
 892       else
 893         value = gen_lowpart_common (word_mode,
 894                                     force_reg (GET_MODE (value) != VOIDmode
 895                                                ? GET_MODE (value)
 896                                                : word_mode, value));
 897     }
 898   else if (GET_CODE (value) == ADDRESSOF)
 899     value = copy_to_reg (value);
 900
 901   while (bitsdone < bitsize)
 902     {
 903       unsigned HOST_WIDE_INT thissize;
 904       rtx part, word;
 905       unsigned HOST_WIDE_INT thispos;
 906       unsigned HOST_WIDE_INT offset;
 907
 908       offset = (bitpos + bitsdone) / unit;
 909       thispos = (bitpos + bitsdone) % unit;
 910
 911       /* THISSIZE must not overrun a word boundary.  Otherwise,
 912          store_fixed_bit_field will call us again, and we will mutually
 913          recurse forever.  */
 914       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 915       thissize = MIN (thissize, unit - thispos);
 916
 917       if (BYTES_BIG_ENDIAN)
 918         {
 919           int total_bits;
 920
 921           /* We must do an endian conversion exactly the same way as it is
 922              done in extract_bit_field, so that the two calls to
 923              extract_fixed_bit_field will have comparable arguments.  */
 924           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 925             total_bits = BITS_PER_WORD;
 926           else
 927             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 928
 929           /* Fetch successively less significant portions.  */
 930           if (GET_CODE (value) == CONST_INT)
 931             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 932                              >> (bitsize - bitsdone - thissize))
 933                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 934           else
 935             /* The args are chosen so that the last part includes the
 936                lsb.  Give extract_bit_field the value it needs (with
 937                endianness compensation) to fetch the piece we want.  */
 938             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 939                                             total_bits - bitsize + bitsdone,
 940                                             NULL_RTX, 1);
 941         }
 942       else
 943         {
 944           /* Fetch successively more significant portions.  */
 945           if (GET_CODE (value) == CONST_INT)
 946             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 947                              >> bitsdone)
 948                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 949           else
 950             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 951                                             bitsdone, NULL_RTX, 1);
 952         }
 953
 954       /* If OP0 is a register, then handle OFFSET here.
 955
 956          When handling multiword bitfields, extract_bit_field may pass
 957          down a word_mode SUBREG of a larger REG for a bitfield that actually
 958          crosses a word boundary.  Thus, for a SUBREG, we must find
 959          the current word starting from the base register.  */
 960       if (GET_CODE (op0) == SUBREG)
 961         {
 962           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 963           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 964                                         GET_MODE (SUBREG_REG (op0)));
 965           offset = 0;
 966         }
 967       else if (GET_CODE (op0) == REG)
 968         {
 969           word = operand_subword_force (op0, offset, GET_MODE (op0));
 970           offset = 0;
 971         }
 972       else
 973         word = op0;
 974
 975       /* OFFSET is in UNITs, and UNIT is in bits.
 976          store_fixed_bit_field wants offset in bytes.  */
 977       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 978                              thispos, part);
 979       bitsdone += thissize;
 980     }
 981 }
 982 \f
 983 /* Generate code to extract a byte-field from STR_RTX
 984    containing BITSIZE bits, starting at BITNUM,
 985    and put it in TARGET if possible (if TARGET is nonzero).
 986    Regardless of TARGET, we return the rtx for where the value is placed.
 987    It may be a QUEUED.
 988
 989    STR_RTX is the structure containing the byte (a REG or MEM).
 990    UNSIGNEDP is nonzero if this is an unsigned bit field.
 991    MODE is the natural mode of the field value once extracted.
 992    TMODE is the mode the caller would like the value to have;
 993    but the value may be returned with type MODE instead.
 994
 995    TOTAL_SIZE is the size in bytes of the containing structure,
 996    or -1 if varying.
 997
 998    If a TARGET is specified and we can store in it at no extra cost,
 999    we do so, and return TARGET.
1000    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1001    if they are equally easy.  */
1002
1003 rtx
1004 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
1005                    target, mode, tmode, total_size)
1006      rtx str_rtx;
1007      unsigned HOST_WIDE_INT bitsize;
1008      unsigned HOST_WIDE_INT bitnum;
1009      int unsignedp;
1010      rtx target;
1011      enum machine_mode mode, tmode;
1012      HOST_WIDE_INT total_size;
1013 {
1014   unsigned int unit
1015     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1016   unsigned HOST_WIDE_INT offset = bitnum / unit;
1017   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1018   rtx op0 = str_rtx;
1019   rtx spec_target = target;
1020   rtx spec_target_subreg = 0;
1021   enum machine_mode int_mode;
1022   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1023   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1024   enum machine_mode mode1;
1025   int byte_offset;
1026
1027   /* Discount the part of the structure before the desired byte.
1028      We need to know how many bytes are safe to reference after it.  */
1029   if (total_size >= 0)
1030     total_size -= (bitpos / BIGGEST_ALIGNMENT
1031                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1032
1033   if (tmode == VOIDmode)
1034     tmode = mode;
1035
1036   while (GET_CODE (op0) == SUBREG)
1037     {
1038       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1039       if (bitpos > unit)
1040         {
1041           offset += (bitpos / unit);
1042           bitpos %= unit;
1043         }
1044       op0 = SUBREG_REG (op0);
1045     }
1046
1047   if (GET_CODE (op0) == REG
1048       && mode == GET_MODE (op0)
1049       && bitnum == 0
1050       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1051     {
1052       /* We're trying to extract a full register from itself.  */
1053       return op0;
1054     }
1055
1056   /* Make sure we are playing with integral modes.  Pun with subregs
1057      if we aren't.  */
1058   {
1059     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1060     if (imode != GET_MODE (op0))
1061       {
1062         if (GET_CODE (op0) == MEM)
1063           op0 = adjust_address (op0, imode, 0);
1064         else if (imode != BLKmode)
1065           op0 = gen_lowpart (imode, op0);
1066         else
1067           abort ();
1068       }
1069   }
1070
1071   /* We may be accessing data outside the field, which means
1072      we can alias adjacent data.  */
1073   if (GET_CODE (op0) == MEM)
1074     {
1075       op0 = shallow_copy_rtx (op0);
1076       set_mem_alias_set (op0, 0);
1077       set_mem_expr (op0, 0);
1078     }
1079
1080   /* Extraction of a full-word or multi-word value from a structure
1081      in a register or aligned memory can be done with just a SUBREG.
1082      A subword value in the least significant part of a register
1083      can also be extracted with a SUBREG.  For this, we need the
1084      byte offset of the value in op0.  */
1085
1086   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1087
1088   /* If OP0 is a register, BITPOS must count within a word.
1089      But as we have it, it counts within whatever size OP0 now has.
1090      On a bigendian machine, these are not the same, so convert.  */
1091   if (BYTES_BIG_ENDIAN
1092       && GET_CODE (op0) != MEM
1093       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1094     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1095
1096   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1097      If that's wrong, the solution is to test for it and set TARGET to 0
1098      if needed.  */
1099
1100   mode1  = (VECTOR_MODE_P (tmode)
1101             ? mode
1102             : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1103
1104   if (((GET_CODE (op0) != MEM
1105         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1106                                   GET_MODE_BITSIZE (GET_MODE (op0)))
1107         && GET_MODE_SIZE (mode1) != 0
1108         && byte_offset % GET_MODE_SIZE (mode1) == 0)
1109        || (GET_CODE (op0) == MEM
1110            && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1111                || (offset * BITS_PER_UNIT % bitsize == 0
1112                    && MEM_ALIGN (op0) % bitsize == 0))))
1113       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1114            && bitpos % BITS_PER_WORD == 0)
1115           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1116               /* ??? The big endian test here is wrong.  This is correct
1117                  if the value is in a register, and if mode_for_size is not
1118                  the same mode as op0.  This causes us to get unnecessarily
1119                  inefficient code from the Thumb port when -mbig-endian.  */
1120               && (BYTES_BIG_ENDIAN
1121                   ? bitpos + bitsize == BITS_PER_WORD
1122                   : bitpos == 0))))
1123     {
1124       if (mode1 != GET_MODE (op0))
1125         {
1126           if (GET_CODE (op0) == SUBREG)
1127             {
1128               if (GET_MODE (SUBREG_REG (op0)) == mode1
1129                   || GET_MODE_CLASS (mode1) == MODE_INT
1130                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1131                 op0 = SUBREG_REG (op0);
1132               else
1133                 /* Else we've got some float mode source being extracted into
1134                    a different float mode destination -- this combination of
1135                    subregs results in Severe Tire Damage.  */
1136                 goto no_subreg_mode_swap;
1137             }
1138           if (GET_CODE (op0) == REG)
1139             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1140           else
1141             op0 = adjust_address (op0, mode1, offset);
1142         }
1143       if (mode1 != mode)
1144         return convert_to_mode (tmode, op0, unsignedp);
1145       return op0;
1146     }
1147  no_subreg_mode_swap:
1148
1149   /* Handle fields bigger than a word.  */
1150
1151   if (bitsize > BITS_PER_WORD)
1152     {
1153       /* Here we transfer the words of the field
1154          in the order least significant first.
1155          This is because the most significant word is the one which may
1156          be less than full.  */
1157
1158       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1159       unsigned int i;
1160
1161       if (target == 0 || GET_CODE (target) != REG)
1162         target = gen_reg_rtx (mode);
1163
1164       /* Indicate for flow that the entire target reg is being set.  */
1165       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1166
1167       for (i = 0; i < nwords; i++)
1168         {
1169           /* If I is 0, use the low-order word in both field and target;
1170              if I is 1, use the next to lowest word; and so on.  */
1171           /* Word number in TARGET to use.  */
1172           unsigned int wordnum
1173             = (WORDS_BIG_ENDIAN
1174                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1175                : i);
1176           /* Offset from start of field in OP0.  */
1177           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1178                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1179                                                 * (int) BITS_PER_WORD))
1180                                      : (int) i * BITS_PER_WORD);
1181           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1182           rtx result_part
1183             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1184                                            bitsize - i * BITS_PER_WORD),
1185                                  bitnum + bit_offset, 1, target_part, mode,
1186                                  word_mode, total_size);
1187
1188           if (target_part == 0)
1189             abort ();
1190
1191           if (result_part != target_part)
1192             emit_move_insn (target_part, result_part);
1193         }
1194
1195       if (unsignedp)
1196         {
1197           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1198              need to be zero'd out.  */
1199           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1200             {
1201               unsigned int i, total_words;
1202
1203               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1204               for (i = nwords; i < total_words; i++)
1205                 emit_move_insn
1206                   (operand_subword (target,
1207                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1208                                     1, VOIDmode),
1209                    const0_rtx);
1210             }
1211           return target;
1212         }
1213
1214       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1215       target = expand_shift (LSHIFT_EXPR, mode, target,
1216                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1217                              NULL_RTX, 0);
1218       return expand_shift (RSHIFT_EXPR, mode, target,
1219                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1220                            NULL_RTX, 0);
1221     }
1222
1223   /* From here on we know the desired field is smaller than a word.  */
1224
1225   /* Check if there is a correspondingly-sized integer field, so we can
1226      safely extract it as one size of integer, if necessary; then
1227      truncate or extend to the size that is wanted; then use SUBREGs or
1228      convert_to_mode to get one of the modes we really wanted.  */
1229
1230   int_mode = int_mode_for_mode (tmode);
1231   if (int_mode == BLKmode)
1232     int_mode = int_mode_for_mode (mode);
1233   if (int_mode == BLKmode)
1234     abort ();    /* Should probably push op0 out to memory and then
1235                     do a load.  */
1236
1237   /* OFFSET is the number of words or bytes (UNIT says which)
1238      from STR_RTX to the first word or byte containing part of the field.  */
1239
1240   if (GET_CODE (op0) != MEM)
1241     {
1242       if (offset != 0
1243           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1244         {
1245           if (GET_CODE (op0) != REG)
1246             op0 = copy_to_reg (op0);
1247           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1248                                 op0, (offset * UNITS_PER_WORD));
1249         }
1250       offset = 0;
1251     }
1252   else
1253     op0 = protect_from_queue (str_rtx, 1);
1254
1255   /* Now OFFSET is nonzero only for memory operands.  */
1256
1257   if (unsignedp)
1258     {
1259       if (HAVE_extzv
1260           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1261           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1262                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1263         {
1264           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1265           rtx bitsize_rtx, bitpos_rtx;
1266           rtx last = get_last_insn ();
1267           rtx xop0 = op0;
1268           rtx xtarget = target;
1269           rtx xspec_target = spec_target;
1270           rtx xspec_target_subreg = spec_target_subreg;
1271           rtx pat;
1272           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1273
1274           if (GET_CODE (xop0) == MEM)
1275             {
1276               int save_volatile_ok = volatile_ok;
1277               volatile_ok = 1;
1278
1279               /* Is the memory operand acceptable?  */
1280               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1281                      (xop0, GET_MODE (xop0))))
1282                 {
1283                   /* No, load into a reg and extract from there.  */
1284                   enum machine_mode bestmode;
1285
1286                   /* Get the mode to use for inserting into this field.  If
1287                      OP0 is BLKmode, get the smallest mode consistent with the
1288                      alignment. If OP0 is a non-BLKmode object that is no
1289                      wider than MAXMODE, use its mode. Otherwise, use the
1290                      smallest mode containing the field.  */
1291
1292                   if (GET_MODE (xop0) == BLKmode
1293                       || (GET_MODE_SIZE (GET_MODE (op0))
1294                           > GET_MODE_SIZE (maxmode)))
1295                     bestmode = get_best_mode (bitsize, bitnum,
1296                                               MEM_ALIGN (xop0), maxmode,
1297                                               MEM_VOLATILE_P (xop0));
1298                   else
1299                     bestmode = GET_MODE (xop0);
1300
1301                   if (bestmode == VOIDmode
1302                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1303                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1304                     goto extzv_loses;
1305
1306                   /* Compute offset as multiple of this unit,
1307                      counting in bytes.  */
1308                   unit = GET_MODE_BITSIZE (bestmode);
1309                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1310                   xbitpos = bitnum % unit;
1311                   xop0 = adjust_address (xop0, bestmode, xoffset);
1312
1313                   /* Fetch it to a register in that size.  */
1314                   xop0 = force_reg (bestmode, xop0);
1315
1316                   /* XBITPOS counts within UNIT, which is what is expected.  */
1317                 }
1318               else
1319                 /* Get ref to first byte containing part of the field.  */
1320                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1321
1322               volatile_ok = save_volatile_ok;
1323             }
1324
1325           /* If op0 is a register, we need it in MAXMODE (which is usually
1326              SImode). to make it acceptable to the format of extzv.  */
1327           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1328             goto extzv_loses;
1329           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1330             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1331
1332           /* On big-endian machines, we count bits from the most significant.
1333              If the bit field insn does not, we must invert.  */
1334           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1335             xbitpos = unit - bitsize - xbitpos;
1336
1337           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1338           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1339             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1340
1341           unit = GET_MODE_BITSIZE (maxmode);
1342
1343           if (xtarget == 0
1344               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1345             xtarget = xspec_target = gen_reg_rtx (tmode);
1346
1347           if (GET_MODE (xtarget) != maxmode)
1348             {
1349               if (GET_CODE (xtarget) == REG)
1350                 {
1351                   int wider = (GET_MODE_SIZE (maxmode)
1352                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1353                   xtarget = gen_lowpart (maxmode, xtarget);
1354                   if (wider)
1355                     xspec_target_subreg = xtarget;
1356                 }
1357               else
1358                 xtarget = gen_reg_rtx (maxmode);
1359             }
1360
1361           /* If this machine's extzv insists on a register target,
1362              make sure we have one.  */
1363           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1364                  (xtarget, maxmode)))
1365             xtarget = gen_reg_rtx (maxmode);
1366
1367           bitsize_rtx = GEN_INT (bitsize);
1368           bitpos_rtx = GEN_INT (xbitpos);
1369
1370           pat = gen_extzv (protect_from_queue (xtarget, 1),
1371                            xop0, bitsize_rtx, bitpos_rtx);
1372           if (pat)
1373             {
1374               emit_insn (pat);
1375               target = xtarget;
1376               spec_target = xspec_target;
1377               spec_target_subreg = xspec_target_subreg;
1378             }
1379           else
1380             {
1381               delete_insns_since (last);
1382               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1383                                                 bitpos, target, 1);
1384             }
1385         }
1386       else
1387       extzv_loses:
1388         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1389                                           bitpos, target, 1);
1390     }
1391   else
1392     {
1393       if (HAVE_extv
1394           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1395           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1396                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1397         {
1398           int xbitpos = bitpos, xoffset = offset;
1399           rtx bitsize_rtx, bitpos_rtx;
1400           rtx last = get_last_insn ();
1401           rtx xop0 = op0, xtarget = target;
1402           rtx xspec_target = spec_target;
1403           rtx xspec_target_subreg = spec_target_subreg;
1404           rtx pat;
1405           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1406
1407           if (GET_CODE (xop0) == MEM)
1408             {
1409               /* Is the memory operand acceptable?  */
1410               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1411                      (xop0, GET_MODE (xop0))))
1412                 {
1413                   /* No, load into a reg and extract from there.  */
1414                   enum machine_mode bestmode;
1415
1416                   /* Get the mode to use for inserting into this field.  If
1417                      OP0 is BLKmode, get the smallest mode consistent with the
1418                      alignment. If OP0 is a non-BLKmode object that is no
1419                      wider than MAXMODE, use its mode. Otherwise, use the
1420                      smallest mode containing the field.  */
1421
1422                   if (GET_MODE (xop0) == BLKmode
1423                       || (GET_MODE_SIZE (GET_MODE (op0))
1424                           > GET_MODE_SIZE (maxmode)))
1425                     bestmode = get_best_mode (bitsize, bitnum,
1426                                               MEM_ALIGN (xop0), maxmode,
1427                                               MEM_VOLATILE_P (xop0));
1428                   else
1429                     bestmode = GET_MODE (xop0);
1430
1431                   if (bestmode == VOIDmode
1432                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1433                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1434                     goto extv_loses;
1435
1436                   /* Compute offset as multiple of this unit,
1437                      counting in bytes.  */
1438                   unit = GET_MODE_BITSIZE (bestmode);
1439                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1440                   xbitpos = bitnum % unit;
1441                   xop0 = adjust_address (xop0, bestmode, xoffset);
1442
1443                   /* Fetch it to a register in that size.  */
1444                   xop0 = force_reg (bestmode, xop0);
1445
1446                   /* XBITPOS counts within UNIT, which is what is expected.  */
1447                 }
1448               else
1449                 /* Get ref to first byte containing part of the field.  */
1450                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1451             }
1452
1453           /* If op0 is a register, we need it in MAXMODE (which is usually
1454              SImode) to make it acceptable to the format of extv.  */
1455           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1456             goto extv_loses;
1457           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1458             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1459
1460           /* On big-endian machines, we count bits from the most significant.
1461              If the bit field insn does not, we must invert.  */
1462           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1463             xbitpos = unit - bitsize - xbitpos;
1464
1465           /* XBITPOS counts within a size of UNIT.
1466              Adjust to count within a size of MAXMODE.  */
1467           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1468             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1469
1470           unit = GET_MODE_BITSIZE (maxmode);
1471
1472           if (xtarget == 0
1473               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1474             xtarget = xspec_target = gen_reg_rtx (tmode);
1475
1476           if (GET_MODE (xtarget) != maxmode)
1477             {
1478               if (GET_CODE (xtarget) == REG)
1479                 {
1480                   int wider = (GET_MODE_SIZE (maxmode)
1481                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1482                   xtarget = gen_lowpart (maxmode, xtarget);
1483                   if (wider)
1484                     xspec_target_subreg = xtarget;
1485                 }
1486               else
1487                 xtarget = gen_reg_rtx (maxmode);
1488             }
1489
1490           /* If this machine's extv insists on a register target,
1491              make sure we have one.  */
1492           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1493                  (xtarget, maxmode)))
1494             xtarget = gen_reg_rtx (maxmode);
1495
1496           bitsize_rtx = GEN_INT (bitsize);
1497           bitpos_rtx = GEN_INT (xbitpos);
1498
1499           pat = gen_extv (protect_from_queue (xtarget, 1),
1500                           xop0, bitsize_rtx, bitpos_rtx);
1501           if (pat)
1502             {
1503               emit_insn (pat);
1504               target = xtarget;
1505               spec_target = xspec_target;
1506               spec_target_subreg = xspec_target_subreg;
1507             }
1508           else
1509             {
1510               delete_insns_since (last);
1511               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1512                                                 bitpos, target, 0);
1513             }
1514         }
1515       else
1516       extv_loses:
1517         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1518                                           bitpos, target, 0);
1519     }
1520   if (target == spec_target)
1521     return target;
1522   if (target == spec_target_subreg)
1523     return spec_target;
1524   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1525     {
1526       /* If the target mode is floating-point, first convert to the
1527          integer mode of that size and then access it as a floating-point
1528          value via a SUBREG.  */
1529       if (GET_MODE_CLASS (tmode) != MODE_INT
1530           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1531         {
1532           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1533                                                    MODE_INT, 0),
1534                                     target, unsignedp);
1535           return gen_lowpart (tmode, target);
1536         }
1537       else
1538         return convert_to_mode (tmode, target, unsignedp);
1539     }
1540   return target;
1541 }
1542 \f
1543 /* Extract a bit field using shifts and boolean operations
1544    Returns an rtx to represent the value.
1545    OP0 addresses a register (word) or memory (byte).
1546    BITPOS says which bit within the word or byte the bit field starts in.
1547    OFFSET says how many bytes farther the bit field starts;
1548     it is 0 if OP0 is a register.
1549    BITSIZE says how many bits long the bit field is.
1550     (If OP0 is a register, it may be narrower than a full word,
1551      but BITPOS still counts within a full word,
1552      which is significant on bigendian machines.)
1553
1554    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1555    If TARGET is nonzero, attempts to store the value there
1556    and return TARGET, but this is not guaranteed.
1557    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1558
1559 static rtx
1560 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1561                          target, unsignedp)
1562      enum machine_mode tmode;
1563      rtx op0, target;
1564      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1565      int unsignedp;
1566 {
1567   unsigned int total_bits = BITS_PER_WORD;
1568   enum machine_mode mode;
1569
1570   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1571     {
1572       /* Special treatment for a bit field split across two registers.  */
1573       if (bitsize + bitpos > BITS_PER_WORD)
1574         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1575     }
1576   else
1577     {
1578       /* Get the proper mode to use for this field.  We want a mode that
1579          includes the entire field.  If such a mode would be larger than
1580          a word, we won't be doing the extraction the normal way.  */
1581
1582       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1583                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1584
1585       if (mode == VOIDmode)
1586         /* The only way this should occur is if the field spans word
1587            boundaries.  */
1588         return extract_split_bit_field (op0, bitsize,
1589                                         bitpos + offset * BITS_PER_UNIT,
1590                                         unsignedp);
1591
1592       total_bits = GET_MODE_BITSIZE (mode);
1593
1594       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1595          be in the range 0 to total_bits-1, and put any excess bytes in
1596          OFFSET.  */
1597       if (bitpos >= total_bits)
1598         {
1599           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1600           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1601                      * BITS_PER_UNIT);
1602         }
1603
1604       /* Get ref to an aligned byte, halfword, or word containing the field.
1605          Adjust BITPOS to be position within a word,
1606          and OFFSET to be the offset of that word.
1607          Then alter OP0 to refer to that word.  */
1608       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1609       offset -= (offset % (total_bits / BITS_PER_UNIT));
1610       op0 = adjust_address (op0, mode, offset);
1611     }
1612
1613   mode = GET_MODE (op0);
1614
1615   if (BYTES_BIG_ENDIAN)
1616     /* BITPOS is the distance between our msb and that of OP0.
1617        Convert it to the distance from the lsb.  */
1618     bitpos = total_bits - bitsize - bitpos;
1619
1620   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1621      We have reduced the big-endian case to the little-endian case.  */
1622
1623   if (unsignedp)
1624     {
1625       if (bitpos)
1626         {
1627           /* If the field does not already start at the lsb,
1628              shift it so it does.  */
1629           tree amount = build_int_2 (bitpos, 0);
1630           /* Maybe propagate the target for the shift.  */
1631           /* But not if we will return it--could confuse integrate.c.  */
1632           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1633                            && !REG_FUNCTION_VALUE_P (target)
1634                            ? target : 0);
1635           if (tmode != mode) subtarget = 0;
1636           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1637         }
1638       /* Convert the value to the desired mode.  */
1639       if (mode != tmode)
1640         op0 = convert_to_mode (tmode, op0, 1);
1641
1642       /* Unless the msb of the field used to be the msb when we shifted,
1643          mask out the upper bits.  */
1644
1645       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1646         return expand_binop (GET_MODE (op0), and_optab, op0,
1647                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1648                              target, 1, OPTAB_LIB_WIDEN);
1649       return op0;
1650     }
1651
1652   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1653      then arithmetic-shift its lsb to the lsb of the word.  */
1654   op0 = force_reg (mode, op0);
1655   if (mode != tmode)
1656     target = 0;
1657
1658   /* Find the narrowest integer mode that contains the field.  */
1659
1660   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1661        mode = GET_MODE_WIDER_MODE (mode))
1662     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1663       {
1664         op0 = convert_to_mode (mode, op0, 0);
1665         break;
1666       }
1667
1668   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1669     {
1670       tree amount
1671         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1672       /* Maybe propagate the target for the shift.  */
1673       /* But not if we will return the result--could confuse integrate.c.  */
1674       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1675                        && ! REG_FUNCTION_VALUE_P (target)
1676                        ? target : 0);
1677       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1678     }
1679
1680   return expand_shift (RSHIFT_EXPR, mode, op0,
1681                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1682                        target, 0);
1683 }
1684 \f
1685 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1686    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1687    complement of that if COMPLEMENT.  The mask is truncated if
1688    necessary to the width of mode MODE.  The mask is zero-extended if
1689    BITSIZE+BITPOS is too small for MODE.  */
1690
1691 static rtx
1692 mask_rtx (mode, bitpos, bitsize, complement)
1693      enum machine_mode mode;
1694      int bitpos, bitsize, complement;
1695 {
1696   HOST_WIDE_INT masklow, maskhigh;
1697
1698   if (bitpos < HOST_BITS_PER_WIDE_INT)
1699     masklow = (HOST_WIDE_INT) -1 << bitpos;
1700   else
1701     masklow = 0;
1702
1703   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1704     masklow &= ((unsigned HOST_WIDE_INT) -1
1705                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1706
1707   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1708     maskhigh = -1;
1709   else
1710     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1711
1712   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1713     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1714                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1715   else
1716     maskhigh = 0;
1717
1718   if (complement)
1719     {
1720       maskhigh = ~maskhigh;
1721       masklow = ~masklow;
1722     }
1723
1724   return immed_double_const (masklow, maskhigh, mode);
1725 }
1726
1727 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1728    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1729
1730 static rtx
1731 lshift_value (mode, value, bitpos, bitsize)
1732      enum machine_mode mode;
1733      rtx value;
1734      int bitpos, bitsize;
1735 {
1736   unsigned HOST_WIDE_INT v = INTVAL (value);
1737   HOST_WIDE_INT low, high;
1738
1739   if (bitsize < HOST_BITS_PER_WIDE_INT)
1740     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1741
1742   if (bitpos < HOST_BITS_PER_WIDE_INT)
1743     {
1744       low = v << bitpos;
1745       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1746     }
1747   else
1748     {
1749       low = 0;
1750       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1751     }
1752
1753   return immed_double_const (low, high, mode);
1754 }
1755 \f
1756 /* Extract a bit field that is split across two words
1757    and return an RTX for the result.
1758
1759    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1760    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1761    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1762
1763 static rtx
1764 extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
1765      rtx op0;
1766      unsigned HOST_WIDE_INT bitsize, bitpos;
1767      int unsignedp;
1768 {
1769   unsigned int unit;
1770   unsigned int bitsdone = 0;
1771   rtx result = NULL_RTX;
1772   int first = 1;
1773
1774   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1775      much at a time.  */
1776   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1777     unit = BITS_PER_WORD;
1778   else
1779     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1780
1781   while (bitsdone < bitsize)
1782     {
1783       unsigned HOST_WIDE_INT thissize;
1784       rtx part, word;
1785       unsigned HOST_WIDE_INT thispos;
1786       unsigned HOST_WIDE_INT offset;
1787
1788       offset = (bitpos + bitsdone) / unit;
1789       thispos = (bitpos + bitsdone) % unit;
1790
1791       /* THISSIZE must not overrun a word boundary.  Otherwise,
1792          extract_fixed_bit_field will call us again, and we will mutually
1793          recurse forever.  */
1794       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1795       thissize = MIN (thissize, unit - thispos);
1796
1797       /* If OP0 is a register, then handle OFFSET here.
1798
1799          When handling multiword bitfields, extract_bit_field may pass
1800          down a word_mode SUBREG of a larger REG for a bitfield that actually
1801          crosses a word boundary.  Thus, for a SUBREG, we must find
1802          the current word starting from the base register.  */
1803       if (GET_CODE (op0) == SUBREG)
1804         {
1805           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1806           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1807                                         GET_MODE (SUBREG_REG (op0)));
1808           offset = 0;
1809         }
1810       else if (GET_CODE (op0) == REG)
1811         {
1812           word = operand_subword_force (op0, offset, GET_MODE (op0));
1813           offset = 0;
1814         }
1815       else
1816         word = op0;
1817
1818       /* Extract the parts in bit-counting order,
1819          whose meaning is determined by BYTES_PER_UNIT.
1820          OFFSET is in UNITs, and UNIT is in bits.
1821          extract_fixed_bit_field wants offset in bytes.  */
1822       part = extract_fixed_bit_field (word_mode, word,
1823                                       offset * unit / BITS_PER_UNIT,
1824                                       thissize, thispos, 0, 1);
1825       bitsdone += thissize;
1826
1827       /* Shift this part into place for the result.  */
1828       if (BYTES_BIG_ENDIAN)
1829         {
1830           if (bitsize != bitsdone)
1831             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1832                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1833         }
1834       else
1835         {
1836           if (bitsdone != thissize)
1837             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1838                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1839         }
1840
1841       if (first)
1842         result = part;
1843       else
1844         /* Combine the parts with bitwise or.  This works
1845            because we extracted each part as an unsigned bit field.  */
1846         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1847                                OPTAB_LIB_WIDEN);
1848
1849       first = 0;
1850     }
1851
1852   /* Unsigned bit field: we are done.  */
1853   if (unsignedp)
1854     return result;
1855   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1856   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1857                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1858                          NULL_RTX, 0);
1859   return expand_shift (RSHIFT_EXPR, word_mode, result,
1860                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1861 }
1862 \f
1863 /* Add INC into TARGET.  */
1864
1865 void
1866 expand_inc (target, inc)
1867      rtx target, inc;
1868 {
1869   rtx value = expand_binop (GET_MODE (target), add_optab,
1870                             target, inc,
1871                             target, 0, OPTAB_LIB_WIDEN);
1872   if (value != target)
1873     emit_move_insn (target, value);
1874 }
1875
1876 /* Subtract DEC from TARGET.  */
1877
1878 void
1879 expand_dec (target, dec)
1880      rtx target, dec;
1881 {
1882   rtx value = expand_binop (GET_MODE (target), sub_optab,
1883                             target, dec,
1884                             target, 0, OPTAB_LIB_WIDEN);
1885   if (value != target)
1886     emit_move_insn (target, value);
1887 }
1888 \f
1889 /* Output a shift instruction for expression code CODE,
1890    with SHIFTED being the rtx for the value to shift,
1891    and AMOUNT the tree for the amount to shift by.
1892    Store the result in the rtx TARGET, if that is convenient.
1893    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1894    Return the rtx for where the value is.  */
1895
1896 rtx
1897 expand_shift (code, mode, shifted, amount, target, unsignedp)
1898      enum tree_code code;
1899      enum machine_mode mode;
1900      rtx shifted;
1901      tree amount;
1902      rtx target;
1903      int unsignedp;
1904 {
1905   rtx op1, temp = 0;
1906   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1907   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1908   int try;
1909
1910   /* Previously detected shift-counts computed by NEGATE_EXPR
1911      and shifted in the other direction; but that does not work
1912      on all machines.  */
1913
1914   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1915
1916 #ifdef SHIFT_COUNT_TRUNCATED
1917   if (SHIFT_COUNT_TRUNCATED)
1918     {
1919       if (GET_CODE (op1) == CONST_INT
1920           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1921               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1922         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1923                        % GET_MODE_BITSIZE (mode));
1924       else if (GET_CODE (op1) == SUBREG
1925                && subreg_lowpart_p (op1))
1926         op1 = SUBREG_REG (op1);
1927     }
1928 #endif
1929
1930   if (op1 == const0_rtx)
1931     return shifted;
1932
1933   for (try = 0; temp == 0 && try < 3; try++)
1934     {
1935       enum optab_methods methods;
1936
1937       if (try == 0)
1938         methods = OPTAB_DIRECT;
1939       else if (try == 1)
1940         methods = OPTAB_WIDEN;
1941       else
1942         methods = OPTAB_LIB_WIDEN;
1943
1944       if (rotate)
1945         {
1946           /* Widening does not work for rotation.  */
1947           if (methods == OPTAB_WIDEN)
1948             continue;
1949           else if (methods == OPTAB_LIB_WIDEN)
1950             {
1951               /* If we have been unable to open-code this by a rotation,
1952                  do it as the IOR of two shifts.  I.e., to rotate A
1953                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1954                  where C is the bitsize of A.
1955
1956                  It is theoretically possible that the target machine might
1957                  not be able to perform either shift and hence we would
1958                  be making two libcalls rather than just the one for the
1959                  shift (similarly if IOR could not be done).  We will allow
1960                  this extremely unlikely lossage to avoid complicating the
1961                  code below.  */
1962
1963               rtx subtarget = target == shifted ? 0 : target;
1964               rtx temp1;
1965               tree type = TREE_TYPE (amount);
1966               tree new_amount = make_tree (type, op1);
1967               tree other_amount
1968                 = fold (build (MINUS_EXPR, type,
1969                                convert (type,
1970                                         build_int_2 (GET_MODE_BITSIZE (mode),
1971                                                      0)),
1972                                amount));
1973
1974               shifted = force_reg (mode, shifted);
1975
1976               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1977                                    mode, shifted, new_amount, subtarget, 1);
1978               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1979                                     mode, shifted, other_amount, 0, 1);
1980               return expand_binop (mode, ior_optab, temp, temp1, target,
1981                                    unsignedp, methods);
1982             }
1983
1984           temp = expand_binop (mode,
1985                                left ? rotl_optab : rotr_optab,
1986                                shifted, op1, target, unsignedp, methods);
1987
1988           /* If we don't have the rotate, but we are rotating by a constant
1989              that is in range, try a rotate in the opposite direction.  */
1990
1991           if (temp == 0 && GET_CODE (op1) == CONST_INT
1992               && INTVAL (op1) > 0
1993               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1994             temp = expand_binop (mode,
1995                                  left ? rotr_optab : rotl_optab,
1996                                  shifted,
1997                                  GEN_INT (GET_MODE_BITSIZE (mode)
1998                                           - INTVAL (op1)),
1999                                  target, unsignedp, methods);
2000         }
2001       else if (unsignedp)
2002         temp = expand_binop (mode,
2003                              left ? ashl_optab : lshr_optab,
2004                              shifted, op1, target, unsignedp, methods);
2005
2006       /* Do arithmetic shifts.
2007          Also, if we are going to widen the operand, we can just as well
2008          use an arithmetic right-shift instead of a logical one.  */
2009       if (temp == 0 && ! rotate
2010           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2011         {
2012           enum optab_methods methods1 = methods;
2013
2014           /* If trying to widen a log shift to an arithmetic shift,
2015              don't accept an arithmetic shift of the same size.  */
2016           if (unsignedp)
2017             methods1 = OPTAB_MUST_WIDEN;
2018
2019           /* Arithmetic shift */
2020
2021           temp = expand_binop (mode,
2022                                left ? ashl_optab : ashr_optab,
2023                                shifted, op1, target, unsignedp, methods1);
2024         }
2025
2026       /* We used to try extzv here for logical right shifts, but that was
2027          only useful for one machine, the VAX, and caused poor code
2028          generation there for lshrdi3, so the code was deleted and a
2029          define_expand for lshrsi3 was added to vax.md.  */
2030     }
2031
2032   if (temp == 0)
2033     abort ();
2034   return temp;
2035 }
2036 \f
2037 enum alg_code { alg_zero, alg_m, alg_shift,
2038                   alg_add_t_m2, alg_sub_t_m2,
2039                   alg_add_factor, alg_sub_factor,
2040                   alg_add_t2_m, alg_sub_t2_m,
2041                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2042
2043 /* This structure records a sequence of operations.
2044    `ops' is the number of operations recorded.
2045    `cost' is their total cost.
2046    The operations are stored in `op' and the corresponding
2047    logarithms of the integer coefficients in `log'.
2048
2049    These are the operations:
2050    alg_zero             total := 0;
2051    alg_m                total := multiplicand;
2052    alg_shift            total := total * coeff
2053    alg_add_t_m2         total := total + multiplicand * coeff;
2054    alg_sub_t_m2         total := total - multiplicand * coeff;
2055    alg_add_factor       total := total * coeff + total;
2056    alg_sub_factor       total := total * coeff - total;
2057    alg_add_t2_m         total := total * coeff + multiplicand;
2058    alg_sub_t2_m         total := total * coeff - multiplicand;
2059
2060    The first operand must be either alg_zero or alg_m.  */
2061
2062 struct algorithm
2063 {
2064   short cost;
2065   short ops;
2066   /* The size of the OP and LOG fields are not directly related to the
2067      word size, but the worst-case algorithms will be if we have few
2068      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2069      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2070      in total wordsize operations.  */
2071   enum alg_code op[MAX_BITS_PER_WORD];
2072   char log[MAX_BITS_PER_WORD];
2073 };
2074
2075 static void synth_mult                  PARAMS ((struct algorithm *,
2076                                                  unsigned HOST_WIDE_INT,
2077                                                  int));
2078 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2079                                                          int, int,
2080                                                          unsigned HOST_WIDE_INT *,
2081                                                          int *, int *));
2082 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2083                                                          int));
2084 /* Compute and return the best algorithm for multiplying by T.
2085    The algorithm must cost less than cost_limit
2086    If retval.cost >= COST_LIMIT, no algorithm was found and all
2087    other field of the returned struct are undefined.  */
2088
2089 static void
2090 synth_mult (alg_out, t, cost_limit)
2091      struct algorithm *alg_out;
2092      unsigned HOST_WIDE_INT t;
2093      int cost_limit;
2094 {
2095   int m;
2096   struct algorithm *alg_in, *best_alg;
2097   int cost;
2098   unsigned HOST_WIDE_INT q;
2099
2100   /* Indicate that no algorithm is yet found.  If no algorithm
2101      is found, this value will be returned and indicate failure.  */
2102   alg_out->cost = cost_limit;
2103
2104   if (cost_limit <= 0)
2105     return;
2106
2107   /* t == 1 can be done in zero cost.  */
2108   if (t == 1)
2109     {
2110       alg_out->ops = 1;
2111       alg_out->cost = 0;
2112       alg_out->op[0] = alg_m;
2113       return;
2114     }
2115
2116   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2117      fail now.  */
2118   if (t == 0)
2119     {
2120       if (zero_cost >= cost_limit)
2121         return;
2122       else
2123         {
2124           alg_out->ops = 1;
2125           alg_out->cost = zero_cost;
2126           alg_out->op[0] = alg_zero;
2127           return;
2128         }
2129     }
2130
2131   /* We'll be needing a couple extra algorithm structures now.  */
2132
2133   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2134   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2135
2136   /* If we have a group of zero bits at the low-order part of T, try
2137      multiplying by the remaining bits and then doing a shift.  */
2138
2139   if ((t & 1) == 0)
2140     {
2141       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2142       if (m < BITS_PER_WORD)
2143         {
2144           q = t >> m;
2145           cost = shift_cost[m];
2146           synth_mult (alg_in, q, cost_limit - cost);
2147
2148           cost += alg_in->cost;
2149           if (cost < cost_limit)
2150             {
2151               struct algorithm *x;
2152               x = alg_in, alg_in = best_alg, best_alg = x;
2153               best_alg->log[best_alg->ops] = m;
2154               best_alg->op[best_alg->ops] = alg_shift;
2155               cost_limit = cost;
2156             }
2157         }
2158     }
2159
2160   /* If we have an odd number, add or subtract one.  */
2161   if ((t & 1) != 0)
2162     {
2163       unsigned HOST_WIDE_INT w;
2164
2165       for (w = 1; (w & t) != 0; w <<= 1)
2166         ;
2167       /* If T was -1, then W will be zero after the loop.  This is another
2168          case where T ends with ...111.  Handling this with (T + 1) and
2169          subtract 1 produces slightly better code and results in algorithm
2170          selection much faster than treating it like the ...0111 case
2171          below.  */
2172       if (w == 0
2173           || (w > 2
2174               /* Reject the case where t is 3.
2175                  Thus we prefer addition in that case.  */
2176               && t != 3))
2177         {
2178           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2179
2180           cost = add_cost;
2181           synth_mult (alg_in, t + 1, cost_limit - cost);
2182
2183           cost += alg_in->cost;
2184           if (cost < cost_limit)
2185             {
2186               struct algorithm *x;
2187               x = alg_in, alg_in = best_alg, best_alg = x;
2188               best_alg->log[best_alg->ops] = 0;
2189               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2190               cost_limit = cost;
2191             }
2192         }
2193       else
2194         {
2195           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2196
2197           cost = add_cost;
2198           synth_mult (alg_in, t - 1, cost_limit - cost);
2199
2200           cost += alg_in->cost;
2201           if (cost < cost_limit)
2202             {
2203               struct algorithm *x;
2204               x = alg_in, alg_in = best_alg, best_alg = x;
2205               best_alg->log[best_alg->ops] = 0;
2206               best_alg->op[best_alg->ops] = alg_add_t_m2;
2207               cost_limit = cost;
2208             }
2209         }
2210     }
2211
2212   /* Look for factors of t of the form
2213      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2214      If we find such a factor, we can multiply by t using an algorithm that
2215      multiplies by q, shift the result by m and add/subtract it to itself.
2216
2217      We search for large factors first and loop down, even if large factors
2218      are less probable than small; if we find a large factor we will find a
2219      good sequence quickly, and therefore be able to prune (by decreasing
2220      COST_LIMIT) the search.  */
2221
2222   for (m = floor_log2 (t - 1); m >= 2; m--)
2223     {
2224       unsigned HOST_WIDE_INT d;
2225
2226       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2227       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2228         {
2229           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2230           synth_mult (alg_in, t / d, cost_limit - cost);
2231
2232           cost += alg_in->cost;
2233           if (cost < cost_limit)
2234             {
2235               struct algorithm *x;
2236               x = alg_in, alg_in = best_alg, best_alg = x;
2237               best_alg->log[best_alg->ops] = m;
2238               best_alg->op[best_alg->ops] = alg_add_factor;
2239               cost_limit = cost;
2240             }
2241           /* Other factors will have been taken care of in the recursion.  */
2242           break;
2243         }
2244
2245       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2246       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2247         {
2248           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2249           synth_mult (alg_in, t / d, cost_limit - cost);
2250
2251           cost += alg_in->cost;
2252           if (cost < cost_limit)
2253             {
2254               struct algorithm *x;
2255               x = alg_in, alg_in = best_alg, best_alg = x;
2256               best_alg->log[best_alg->ops] = m;
2257               best_alg->op[best_alg->ops] = alg_sub_factor;
2258               cost_limit = cost;
2259             }
2260           break;
2261         }
2262     }
2263
2264   /* Try shift-and-add (load effective address) instructions,
2265      i.e. do a*3, a*5, a*9.  */
2266   if ((t & 1) != 0)
2267     {
2268       q = t - 1;
2269       q = q & -q;
2270       m = exact_log2 (q);
2271       if (m >= 0 && m < BITS_PER_WORD)
2272         {
2273           cost = shiftadd_cost[m];
2274           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2275
2276           cost += alg_in->cost;
2277           if (cost < cost_limit)
2278             {
2279               struct algorithm *x;
2280               x = alg_in, alg_in = best_alg, best_alg = x;
2281               best_alg->log[best_alg->ops] = m;
2282               best_alg->op[best_alg->ops] = alg_add_t2_m;
2283               cost_limit = cost;
2284             }
2285         }
2286
2287       q = t + 1;
2288       q = q & -q;
2289       m = exact_log2 (q);
2290       if (m >= 0 && m < BITS_PER_WORD)
2291         {
2292           cost = shiftsub_cost[m];
2293           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2294
2295           cost += alg_in->cost;
2296           if (cost < cost_limit)
2297             {
2298               struct algorithm *x;
2299               x = alg_in, alg_in = best_alg, best_alg = x;
2300               best_alg->log[best_alg->ops] = m;
2301               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2302               cost_limit = cost;
2303             }
2304         }
2305     }
2306
2307   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2308      we have not found any algorithm.  */
2309   if (cost_limit == alg_out->cost)
2310     return;
2311
2312   /* If we are getting a too long sequence for `struct algorithm'
2313      to record, make this search fail.  */
2314   if (best_alg->ops == MAX_BITS_PER_WORD)
2315     return;
2316
2317   /* Copy the algorithm from temporary space to the space at alg_out.
2318      We avoid using structure assignment because the majority of
2319      best_alg is normally undefined, and this is a critical function.  */
2320   alg_out->ops = best_alg->ops + 1;
2321   alg_out->cost = cost_limit;
2322   memcpy (alg_out->op, best_alg->op,
2323           alg_out->ops * sizeof *alg_out->op);
2324   memcpy (alg_out->log, best_alg->log,
2325           alg_out->ops * sizeof *alg_out->log);
2326 }
2327 \f
2328 /* Perform a multiplication and return an rtx for the result.
2329    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2330    TARGET is a suggestion for where to store the result (an rtx).
2331
2332    We check specially for a constant integer as OP1.
2333    If you want this check for OP0 as well, then before calling
2334    you should swap the two operands if OP0 would be constant.  */
2335
2336 rtx
2337 expand_mult (mode, op0, op1, target, unsignedp)
2338      enum machine_mode mode;
2339      rtx op0, op1, target;
2340      int unsignedp;
2341 {
2342   rtx const_op1 = op1;
2343
2344   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2345      less than or equal in size to `unsigned int' this doesn't matter.
2346      If the mode is larger than `unsigned int', then synth_mult works only
2347      if the constant value exactly fits in an `unsigned int' without any
2348      truncation.  This means that multiplying by negative values does
2349      not work; results are off by 2^32 on a 32 bit machine.  */
2350
2351   /* If we are multiplying in DImode, it may still be a win
2352      to try to work with shifts and adds.  */
2353   if (GET_CODE (op1) == CONST_DOUBLE
2354       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2355       && HOST_BITS_PER_INT >= BITS_PER_WORD
2356       && CONST_DOUBLE_HIGH (op1) == 0)
2357     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2358   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2359            && GET_CODE (op1) == CONST_INT
2360            && INTVAL (op1) < 0)
2361     const_op1 = 0;
2362
2363   /* We used to test optimize here, on the grounds that it's better to
2364      produce a smaller program when -O is not used.
2365      But this causes such a terrible slowdown sometimes
2366      that it seems better to use synth_mult always.  */
2367
2368   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2369       && (unsignedp || ! flag_trapv))
2370     {
2371       struct algorithm alg;
2372       struct algorithm alg2;
2373       HOST_WIDE_INT val = INTVAL (op1);
2374       HOST_WIDE_INT val_so_far;
2375       rtx insn;
2376       int mult_cost;
2377       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2378
2379       /* op0 must be register to make mult_cost match the precomputed
2380          shiftadd_cost array.  */
2381       op0 = force_reg (mode, op0);
2382
2383       /* Try to do the computation three ways: multiply by the negative of OP1
2384          and then negate, do the multiplication directly, or do multiplication
2385          by OP1 - 1.  */
2386
2387       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2388       mult_cost = MIN (12 * add_cost, mult_cost);
2389
2390       synth_mult (&alg, val, mult_cost);
2391
2392       /* This works only if the inverted value actually fits in an
2393          `unsigned int' */
2394       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2395         {
2396           synth_mult (&alg2, - val,
2397                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2398           if (alg2.cost + negate_cost < alg.cost)
2399             alg = alg2, variant = negate_variant;
2400         }
2401
2402       /* This proves very useful for division-by-constant.  */
2403       synth_mult (&alg2, val - 1,
2404                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2405       if (alg2.cost + add_cost < alg.cost)
2406         alg = alg2, variant = add_variant;
2407
2408       if (alg.cost < mult_cost)
2409         {
2410           /* We found something cheaper than a multiply insn.  */
2411           int opno;
2412           rtx accum, tem;
2413           enum machine_mode nmode;
2414
2415           op0 = protect_from_queue (op0, 0);
2416
2417           /* Avoid referencing memory over and over.
2418              For speed, but also for correctness when mem is volatile.  */
2419           if (GET_CODE (op0) == MEM)
2420             op0 = force_reg (mode, op0);
2421
2422           /* ACCUM starts out either as OP0 or as a zero, depending on
2423              the first operation.  */
2424
2425           if (alg.op[0] == alg_zero)
2426             {
2427               accum = copy_to_mode_reg (mode, const0_rtx);
2428               val_so_far = 0;
2429             }
2430           else if (alg.op[0] == alg_m)
2431             {
2432               accum = copy_to_mode_reg (mode, op0);
2433               val_so_far = 1;
2434             }
2435           else
2436             abort ();
2437
2438           for (opno = 1; opno < alg.ops; opno++)
2439             {
2440               int log = alg.log[opno];
2441               int preserve = preserve_subexpressions_p ();
2442               rtx shift_subtarget = preserve ? 0 : accum;
2443               rtx add_target
2444                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2445                    && ! preserve)
2446                   ? target : 0;
2447               rtx accum_target = preserve ? 0 : accum;
2448
2449               switch (alg.op[opno])
2450                 {
2451                 case alg_shift:
2452                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2453                                         build_int_2 (log, 0), NULL_RTX, 0);
2454                   val_so_far <<= log;
2455                   break;
2456
2457                 case alg_add_t_m2:
2458                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2459                                       build_int_2 (log, 0), NULL_RTX, 0);
2460                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2461                                          add_target
2462                                          ? add_target : accum_target);
2463                   val_so_far += (HOST_WIDE_INT) 1 << log;
2464                   break;
2465
2466                 case alg_sub_t_m2:
2467                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2468                                       build_int_2 (log, 0), NULL_RTX, 0);
2469                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2470                                          add_target
2471                                          ? add_target : accum_target);
2472                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2473                   break;
2474
2475                 case alg_add_t2_m:
2476                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2477                                         build_int_2 (log, 0), shift_subtarget,
2478                                         0);
2479                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2480                                          add_target
2481                                          ? add_target : accum_target);
2482                   val_so_far = (val_so_far << log) + 1;
2483                   break;
2484
2485                 case alg_sub_t2_m:
2486                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2487                                         build_int_2 (log, 0), shift_subtarget,
2488                                         0);
2489                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2490                                          add_target
2491                                          ? add_target : accum_target);
2492                   val_so_far = (val_so_far << log) - 1;
2493                   break;
2494
2495                 case alg_add_factor:
2496                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2497                                       build_int_2 (log, 0), NULL_RTX, 0);
2498                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2499                                          add_target
2500                                          ? add_target : accum_target);
2501                   val_so_far += val_so_far << log;
2502                   break;
2503
2504                 case alg_sub_factor:
2505                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2506                                       build_int_2 (log, 0), NULL_RTX, 0);
2507                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2508                                          (add_target ? add_target
2509                                           : preserve ? 0 : tem));
2510                   val_so_far = (val_so_far << log) - val_so_far;
2511                   break;
2512
2513                 default:
2514                   abort ();
2515                 }
2516
2517               /* Write a REG_EQUAL note on the last insn so that we can cse
2518                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2519                  we've set the inner register and must properly indicate
2520                  that.  */
2521
2522               tem = op0, nmode = mode;
2523               if (GET_CODE (accum) == SUBREG)
2524                 {
2525                   nmode = GET_MODE (SUBREG_REG (accum));
2526                   tem = gen_lowpart (nmode, op0);
2527                 }
2528
2529               insn = get_last_insn ();
2530               set_unique_reg_note (insn,
2531                                    REG_EQUAL,
2532                                    gen_rtx_MULT (nmode, tem,
2533                                                  GEN_INT (val_so_far)));
2534             }
2535
2536           if (variant == negate_variant)
2537             {
2538               val_so_far = - val_so_far;
2539               accum = expand_unop (mode, neg_optab, accum, target, 0);
2540             }
2541           else if (variant == add_variant)
2542             {
2543               val_so_far = val_so_far + 1;
2544               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2545             }
2546
2547           if (val != val_so_far)
2548             abort ();
2549
2550           return accum;
2551         }
2552     }
2553
2554   /* This used to use umul_optab if unsigned, but for non-widening multiply
2555      there is no difference between signed and unsigned.  */
2556   op0 = expand_binop (mode,
2557                       ! unsignedp
2558                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2559                       ? smulv_optab : smul_optab,
2560                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2561   if (op0 == 0)
2562     abort ();
2563   return op0;
2564 }
2565 \f
2566 /* Return the smallest n such that 2**n >= X.  */
2567
2568 int
2569 ceil_log2 (x)
2570      unsigned HOST_WIDE_INT x;
2571 {
2572   return floor_log2 (x - 1) + 1;
2573 }
2574
2575 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2576    replace division by D, and put the least significant N bits of the result
2577    in *MULTIPLIER_PTR and return the most significant bit.
2578
2579    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2580    needed precision is in PRECISION (should be <= N).
2581
2582    PRECISION should be as small as possible so this function can choose
2583    multiplier more freely.
2584
2585    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2586    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2587
2588    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2589    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2590
2591 static
2592 unsigned HOST_WIDE_INT
2593 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2594      unsigned HOST_WIDE_INT d;
2595      int n;
2596      int precision;
2597      unsigned HOST_WIDE_INT *multiplier_ptr;
2598      int *post_shift_ptr;
2599      int *lgup_ptr;
2600 {
2601   HOST_WIDE_INT mhigh_hi, mlow_hi;
2602   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2603   int lgup, post_shift;
2604   int pow, pow2;
2605   unsigned HOST_WIDE_INT nl, dummy1;
2606   HOST_WIDE_INT nh, dummy2;
2607
2608   /* lgup = ceil(log2(divisor)); */
2609   lgup = ceil_log2 (d);
2610
2611   if (lgup > n)
2612     abort ();
2613
2614   pow = n + lgup;
2615   pow2 = n + lgup - precision;
2616
2617   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2618     {
2619       /* We could handle this with some effort, but this case is much better
2620          handled directly with a scc insn, so rely on caller using that.  */
2621       abort ();
2622     }
2623
2624   /* mlow = 2^(N + lgup)/d */
2625  if (pow >= HOST_BITS_PER_WIDE_INT)
2626     {
2627       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2628       nl = 0;
2629     }
2630   else
2631     {
2632       nh = 0;
2633       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2634     }
2635   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2636                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2637
2638   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2639   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2640     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2641   else
2642     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2643   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2644                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2645
2646   if (mhigh_hi && nh - d >= d)
2647     abort ();
2648   if (mhigh_hi > 1 || mlow_hi > 1)
2649     abort ();
2650   /* assert that mlow < mhigh.  */
2651   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2652     abort ();
2653
2654   /* If precision == N, then mlow, mhigh exceed 2^N
2655      (but they do not exceed 2^(N+1)).  */
2656
2657   /* Reduce to lowest terms */
2658   for (post_shift = lgup; post_shift > 0; post_shift--)
2659     {
2660       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2661       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2662       if (ml_lo >= mh_lo)
2663         break;
2664
2665       mlow_hi = 0;
2666       mlow_lo = ml_lo;
2667       mhigh_hi = 0;
2668       mhigh_lo = mh_lo;
2669     }
2670
2671   *post_shift_ptr = post_shift;
2672   *lgup_ptr = lgup;
2673   if (n < HOST_BITS_PER_WIDE_INT)
2674     {
2675       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2676       *multiplier_ptr = mhigh_lo & mask;
2677       return mhigh_lo >= mask;
2678     }
2679   else
2680     {
2681       *multiplier_ptr = mhigh_lo;
2682       return mhigh_hi;
2683     }
2684 }
2685
2686 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2687    congruent to 1 (mod 2**N).  */
2688
2689 static unsigned HOST_WIDE_INT
2690 invert_mod2n (x, n)
2691      unsigned HOST_WIDE_INT x;
2692      int n;
2693 {
2694   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2695
2696   /* The algorithm notes that the choice y = x satisfies
2697      x*y == 1 mod 2^3, since x is assumed odd.
2698      Each iteration doubles the number of bits of significance in y.  */
2699
2700   unsigned HOST_WIDE_INT mask;
2701   unsigned HOST_WIDE_INT y = x;
2702   int nbit = 3;
2703
2704   mask = (n == HOST_BITS_PER_WIDE_INT
2705           ? ~(unsigned HOST_WIDE_INT) 0
2706           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2707
2708   while (nbit < n)
2709     {
2710       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2711       nbit *= 2;
2712     }
2713   return y;
2714 }
2715
2716 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2717    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2718    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2719    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2720    become signed.
2721
2722    The result is put in TARGET if that is convenient.
2723
2724    MODE is the mode of operation.  */
2725
2726 rtx
2727 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2728      enum machine_mode mode;
2729      rtx adj_operand, op0, op1, target;
2730      int unsignedp;
2731 {
2732   rtx tem;
2733   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2734
2735   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2736                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2737                       NULL_RTX, 0);
2738   tem = expand_and (mode, tem, op1, NULL_RTX);
2739   adj_operand
2740     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2741                      adj_operand);
2742
2743   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2744                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2745                       NULL_RTX, 0);
2746   tem = expand_and (mode, tem, op0, NULL_RTX);
2747   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2748                           target);
2749
2750   return target;
2751 }
2752
2753 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2754    in TARGET if that is convenient, and return where the result is.  If the
2755    operation can not be performed, 0 is returned.
2756
2757    MODE is the mode of operation and result.
2758
2759    UNSIGNEDP nonzero means unsigned multiply.
2760
2761    MAX_COST is the total allowed cost for the expanded RTL.  */
2762
2763 rtx
2764 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2765      enum machine_mode mode;
2766      rtx op0, target;
2767      unsigned HOST_WIDE_INT cnst1;
2768      int unsignedp;
2769      int max_cost;
2770 {
2771   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2772   optab mul_highpart_optab;
2773   optab moptab;
2774   rtx tem;
2775   int size = GET_MODE_BITSIZE (mode);
2776   rtx op1, wide_op1;
2777
2778   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2779   if (size > HOST_BITS_PER_WIDE_INT)
2780     abort ();
2781
2782   op1 = gen_int_mode (cnst1, mode);
2783
2784   wide_op1
2785     = immed_double_const (cnst1,
2786                           (unsignedp
2787                            ? (HOST_WIDE_INT) 0
2788                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2789                           wider_mode);
2790
2791   /* expand_mult handles constant multiplication of word_mode
2792      or narrower.  It does a poor job for large modes.  */
2793   if (size < BITS_PER_WORD
2794       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2795     {
2796       /* We have to do this, since expand_binop doesn't do conversion for
2797          multiply.  Maybe change expand_binop to handle widening multiply?  */
2798       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2799
2800       /* We know that this can't have signed overflow, so pretend this is
2801          an unsigned multiply.  */
2802       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2803       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2804                           build_int_2 (size, 0), NULL_RTX, 1);
2805       return convert_modes (mode, wider_mode, tem, unsignedp);
2806     }
2807
2808   if (target == 0)
2809     target = gen_reg_rtx (mode);
2810
2811   /* Firstly, try using a multiplication insn that only generates the needed
2812      high part of the product, and in the sign flavor of unsignedp.  */
2813   if (mul_highpart_cost[(int) mode] < max_cost)
2814     {
2815       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2816       target = expand_binop (mode, mul_highpart_optab,
2817                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2818       if (target)
2819         return target;
2820     }
2821
2822   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2823      Need to adjust the result after the multiplication.  */
2824   if (size - 1 < BITS_PER_WORD
2825       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2826           < max_cost))
2827     {
2828       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2829       target = expand_binop (mode, mul_highpart_optab,
2830                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2831       if (target)
2832         /* We used the wrong signedness.  Adjust the result.  */
2833         return expand_mult_highpart_adjust (mode, target, op0,
2834                                             op1, target, unsignedp);
2835     }
2836
2837   /* Try widening multiplication.  */
2838   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2839   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2840       && mul_widen_cost[(int) wider_mode] < max_cost)
2841     {
2842       op1 = force_reg (mode, op1);
2843       goto try;
2844     }
2845
2846   /* Try widening the mode and perform a non-widening multiplication.  */
2847   moptab = smul_optab;
2848   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2849       && size - 1 < BITS_PER_WORD
2850       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2851     {
2852       op1 = wide_op1;
2853       goto try;
2854     }
2855
2856   /* Try widening multiplication of opposite signedness, and adjust.  */
2857   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2858   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2859       && size - 1 < BITS_PER_WORD
2860       && (mul_widen_cost[(int) wider_mode]
2861           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2862     {
2863       rtx regop1 = force_reg (mode, op1);
2864       tem = expand_binop (wider_mode, moptab, op0, regop1,
2865                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2866       if (tem != 0)
2867         {
2868           /* Extract the high half of the just generated product.  */
2869           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2870                               build_int_2 (size, 0), NULL_RTX, 1);
2871           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2872           /* We used the wrong signedness.  Adjust the result.  */
2873           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2874                                               target, unsignedp);
2875         }
2876     }
2877
2878   return 0;
2879
2880  try:
2881   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2882   tem = expand_binop (wider_mode, moptab, op0, op1,
2883                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2884   if (tem == 0)
2885     return 0;
2886
2887   /* Extract the high half of the just generated product.  */
2888   if (mode == word_mode)
2889     {
2890       return gen_highpart (mode, tem);
2891     }
2892   else
2893     {
2894       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2895                           build_int_2 (size, 0), NULL_RTX, 1);
2896       return convert_modes (mode, wider_mode, tem, unsignedp);
2897     }
2898 }
2899 \f
2900 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2901    if that is convenient, and returning where the result is.
2902    You may request either the quotient or the remainder as the result;
2903    specify REM_FLAG nonzero to get the remainder.
2904
2905    CODE is the expression code for which kind of division this is;
2906    it controls how rounding is done.  MODE is the machine mode to use.
2907    UNSIGNEDP nonzero means do unsigned division.  */
2908
2909 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2910    and then correct it by or'ing in missing high bits
2911    if result of ANDI is nonzero.
2912    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2913    This could optimize to a bfexts instruction.
2914    But C doesn't use these operations, so their optimizations are
2915    left for later.  */
2916 /* ??? For modulo, we don't actually need the highpart of the first product,
2917    the low part will do nicely.  And for small divisors, the second multiply
2918    can also be a low-part only multiply or even be completely left out.
2919    E.g. to calculate the remainder of a division by 3 with a 32 bit
2920    multiply, multiply with 0x55555556 and extract the upper two bits;
2921    the result is exact for inputs up to 0x1fffffff.
2922    The input range can be reduced by using cross-sum rules.
2923    For odd divisors >= 3, the following table gives right shift counts
2924    so that if an number is shifted by an integer multiple of the given
2925    amount, the remainder stays the same:
2926    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2927    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2928    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2929    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2930    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2931
2932    Cross-sum rules for even numbers can be derived by leaving as many bits
2933    to the right alone as the divisor has zeros to the right.
2934    E.g. if x is an unsigned 32 bit number:
2935    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2936    */
2937
2938 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2939
2940 rtx
2941 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2942      int rem_flag;
2943      enum tree_code code;
2944      enum machine_mode mode;
2945      rtx op0, op1, target;
2946      int unsignedp;
2947 {
2948   enum machine_mode compute_mode;
2949   rtx tquotient;
2950   rtx quotient = 0, remainder = 0;
2951   rtx last;
2952   int size;
2953   rtx insn, set;
2954   optab optab1, optab2;
2955   int op1_is_constant, op1_is_pow2;
2956   int max_cost, extra_cost;
2957   static HOST_WIDE_INT last_div_const = 0;
2958
2959   op1_is_constant = GET_CODE (op1) == CONST_INT;
2960   op1_is_pow2 = (op1_is_constant
2961                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2962                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2963
2964   /*
2965      This is the structure of expand_divmod:
2966
2967      First comes code to fix up the operands so we can perform the operations
2968      correctly and efficiently.
2969
2970      Second comes a switch statement with code specific for each rounding mode.
2971      For some special operands this code emits all RTL for the desired
2972      operation, for other cases, it generates only a quotient and stores it in
2973      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2974      to indicate that it has not done anything.
2975
2976      Last comes code that finishes the operation.  If QUOTIENT is set and
2977      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2978      QUOTIENT is not set, it is computed using trunc rounding.
2979
2980      We try to generate special code for division and remainder when OP1 is a
2981      constant.  If |OP1| = 2**n we can use shifts and some other fast
2982      operations.  For other values of OP1, we compute a carefully selected
2983      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2984      by m.
2985
2986      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2987      half of the product.  Different strategies for generating the product are
2988      implemented in expand_mult_highpart.
2989
2990      If what we actually want is the remainder, we generate that by another
2991      by-constant multiplication and a subtraction.  */
2992
2993   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2994      code below will malfunction if we are, so check here and handle
2995      the special case if so.  */
2996   if (op1 == const1_rtx)
2997     return rem_flag ? const0_rtx : op0;
2998
2999     /* When dividing by -1, we could get an overflow.
3000      negv_optab can handle overflows.  */
3001   if (! unsignedp && op1 == constm1_rtx)
3002     {
3003       if (rem_flag)
3004         return const0_rtx;
3005       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3006                           ? negv_optab : neg_optab, op0, target, 0);
3007     }
3008
3009   if (target
3010       /* Don't use the function value register as a target
3011          since we have to read it as well as write it,
3012          and function-inlining gets confused by this.  */
3013       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3014           /* Don't clobber an operand while doing a multi-step calculation.  */
3015           || ((rem_flag || op1_is_constant)
3016               && (reg_mentioned_p (target, op0)
3017                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3018           || reg_mentioned_p (target, op1)
3019           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3020     target = 0;
3021
3022   /* Get the mode in which to perform this computation.  Normally it will
3023      be MODE, but sometimes we can't do the desired operation in MODE.
3024      If so, pick a wider mode in which we can do the operation.  Convert
3025      to that mode at the start to avoid repeated conversions.
3026
3027      First see what operations we need.  These depend on the expression
3028      we are evaluating.  (We assume that divxx3 insns exist under the
3029      same conditions that modxx3 insns and that these insns don't normally
3030      fail.  If these assumptions are not correct, we may generate less
3031      efficient code in some cases.)
3032
3033      Then see if we find a mode in which we can open-code that operation
3034      (either a division, modulus, or shift).  Finally, check for the smallest
3035      mode for which we can do the operation with a library call.  */
3036
3037   /* We might want to refine this now that we have division-by-constant
3038      optimization.  Since expand_mult_highpart tries so many variants, it is
3039      not straightforward to generalize this.  Maybe we should make an array
3040      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3041
3042   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3043             ? (unsignedp ? lshr_optab : ashr_optab)
3044             : (unsignedp ? udiv_optab : sdiv_optab));
3045   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3046             ? optab1
3047             : (unsignedp ? udivmod_optab : sdivmod_optab));
3048
3049   for (compute_mode = mode; compute_mode != VOIDmode;
3050        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3051     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3052         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3053       break;
3054
3055   if (compute_mode == VOIDmode)
3056     for (compute_mode = mode; compute_mode != VOIDmode;
3057          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3058       if (optab1->handlers[(int) compute_mode].libfunc
3059           || optab2->handlers[(int) compute_mode].libfunc)
3060         break;
3061
3062   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3063      in expand_binop.  */
3064   if (compute_mode == VOIDmode)
3065     compute_mode = mode;
3066
3067   if (target && GET_MODE (target) == compute_mode)
3068     tquotient = target;
3069   else
3070     tquotient = gen_reg_rtx (compute_mode);
3071
3072   size = GET_MODE_BITSIZE (compute_mode);
3073 #if 0
3074   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3075      (mode), and thereby get better code when OP1 is a constant.  Do that
3076      later.  It will require going over all usages of SIZE below.  */
3077   size = GET_MODE_BITSIZE (mode);
3078 #endif
3079
3080   /* Only deduct something for a REM if the last divide done was
3081      for a different constant.   Then set the constant of the last
3082      divide.  */
3083   max_cost = div_cost[(int) compute_mode]
3084     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3085                       && INTVAL (op1) == last_div_const)
3086        ? mul_cost[(int) compute_mode] + add_cost : 0);
3087
3088   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3089
3090   /* Now convert to the best mode to use.  */
3091   if (compute_mode != mode)
3092     {
3093       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3094       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3095
3096       /* convert_modes may have placed op1 into a register, so we
3097          must recompute the following.  */
3098       op1_is_constant = GET_CODE (op1) == CONST_INT;
3099       op1_is_pow2 = (op1_is_constant
3100                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3101                           || (! unsignedp
3102                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3103     }
3104
3105   /* If one of the operands is a volatile MEM, copy it into a register.  */
3106
3107   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3108     op0 = force_reg (compute_mode, op0);
3109   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3110     op1 = force_reg (compute_mode, op1);
3111
3112   /* If we need the remainder or if OP1 is constant, we need to
3113      put OP0 in a register in case it has any queued subexpressions.  */
3114   if (rem_flag || op1_is_constant)
3115     op0 = force_reg (compute_mode, op0);
3116
3117   last = get_last_insn ();
3118
3119   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3120   if (unsignedp)
3121     {
3122       if (code == FLOOR_DIV_EXPR)
3123         code = TRUNC_DIV_EXPR;
3124       if (code == FLOOR_MOD_EXPR)
3125         code = TRUNC_MOD_EXPR;
3126       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3127         code = TRUNC_DIV_EXPR;
3128     }
3129
3130   if (op1 != const0_rtx)
3131     switch (code)
3132       {
3133       case TRUNC_MOD_EXPR:
3134       case TRUNC_DIV_EXPR:
3135         if (op1_is_constant)
3136           {
3137             if (unsignedp)
3138               {
3139                 unsigned HOST_WIDE_INT mh, ml;
3140                 int pre_shift, post_shift;
3141                 int dummy;
3142                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3143
3144                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3145                   {
3146                     pre_shift = floor_log2 (d);
3147                     if (rem_flag)
3148                       {
3149                         remainder
3150                           = expand_binop (compute_mode, and_optab, op0,
3151                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3152                                           remainder, 1,
3153                                           OPTAB_LIB_WIDEN);
3154                         if (remainder)
3155                           return gen_lowpart (mode, remainder);
3156                       }
3157                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3158                                              build_int_2 (pre_shift, 0),
3159                                              tquotient, 1);
3160                   }
3161                 else if (size <= HOST_BITS_PER_WIDE_INT)
3162                   {
3163                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3164                       {
3165                         /* Most significant bit of divisor is set; emit an scc
3166                            insn.  */
3167                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3168                                                     compute_mode, 1, 1);
3169                         if (quotient == 0)
3170                           goto fail1;
3171                       }
3172                     else
3173                       {
3174                         /* Find a suitable multiplier and right shift count
3175                            instead of multiplying with D.  */
3176
3177                         mh = choose_multiplier (d, size, size,
3178                                                 &ml, &post_shift, &dummy);
3179
3180                         /* If the suggested multiplier is more than SIZE bits,
3181                            we can do better for even divisors, using an
3182                            initial right shift.  */
3183                         if (mh != 0 && (d & 1) == 0)
3184                           {
3185                             pre_shift = floor_log2 (d & -d);
3186                             mh = choose_multiplier (d >> pre_shift, size,
3187                                                     size - pre_shift,
3188                                                     &ml, &post_shift, &dummy);
3189                             if (mh)
3190                               abort ();
3191                           }
3192                         else
3193                           pre_shift = 0;
3194
3195                         if (mh != 0)
3196                           {
3197                             rtx t1, t2, t3, t4;
3198
3199                             if (post_shift - 1 >= BITS_PER_WORD)
3200                               goto fail1;
3201
3202                             extra_cost = (shift_cost[post_shift - 1]
3203                                           + shift_cost[1] + 2 * add_cost);
3204                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3205                                                        NULL_RTX, 1,
3206                                                        max_cost - extra_cost);
3207                             if (t1 == 0)
3208                               goto fail1;
3209                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3210                                                                op0, t1),
3211                                                 NULL_RTX);
3212                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3213                                                build_int_2 (1, 0), NULL_RTX,1);
3214                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3215                                                               t1, t3),
3216                                                 NULL_RTX);
3217                             quotient
3218                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3219                                               build_int_2 (post_shift - 1, 0),
3220                                               tquotient, 1);
3221                           }
3222                         else
3223                           {
3224                             rtx t1, t2;
3225
3226                             if (pre_shift >= BITS_PER_WORD
3227                                 || post_shift >= BITS_PER_WORD)
3228                               goto fail1;
3229
3230                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3231                                                build_int_2 (pre_shift, 0),
3232                                                NULL_RTX, 1);
3233                             extra_cost = (shift_cost[pre_shift]
3234                                           + shift_cost[post_shift]);
3235                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3236                                                        NULL_RTX, 1,
3237                                                        max_cost - extra_cost);
3238                             if (t2 == 0)
3239                               goto fail1;
3240                             quotient
3241                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3242                                               build_int_2 (post_shift, 0),
3243                                               tquotient, 1);
3244                           }
3245                       }
3246                   }
3247                 else            /* Too wide mode to use tricky code */
3248                   break;
3249
3250                 insn = get_last_insn ();
3251                 if (insn != last
3252                     && (set = single_set (insn)) != 0
3253                     && SET_DEST (set) == quotient)
3254                   set_unique_reg_note (insn,
3255                                        REG_EQUAL,
3256                                        gen_rtx_UDIV (compute_mode, op0, op1));
3257               }
3258             else                /* TRUNC_DIV, signed */
3259               {
3260                 unsigned HOST_WIDE_INT ml;
3261                 int lgup, post_shift;
3262                 HOST_WIDE_INT d = INTVAL (op1);
3263                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3264
3265                 /* n rem d = n rem -d */
3266                 if (rem_flag && d < 0)
3267                   {
3268                     d = abs_d;
3269                     op1 = gen_int_mode (abs_d, compute_mode);
3270                   }
3271
3272                 if (d == 1)
3273                   quotient = op0;
3274                 else if (d == -1)
3275                   quotient = expand_unop (compute_mode, neg_optab, op0,
3276                                           tquotient, 0);
3277                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3278                   {
3279                     /* This case is not handled correctly below.  */
3280                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3281                                                 compute_mode, 1, 1);
3282                     if (quotient == 0)
3283                       goto fail1;
3284                   }
3285                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3286                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3287                          /* ??? The cheap metric is computed only for
3288                             word_mode.  If this operation is wider, this may
3289                             not be so.  Assume true if the optab has an
3290                             expander for this mode.  */
3291                          && (((rem_flag ? smod_optab : sdiv_optab)
3292                               ->handlers[(int) compute_mode].insn_code
3293                               != CODE_FOR_nothing)
3294                              || (sdivmod_optab->handlers[(int) compute_mode]
3295                                  .insn_code != CODE_FOR_nothing)))
3296                   ;
3297                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3298                   {
3299                     lgup = floor_log2 (abs_d);
3300                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3301                       {
3302                         rtx label = gen_label_rtx ();
3303                         rtx t1;
3304
3305                         t1 = copy_to_mode_reg (compute_mode, op0);
3306                         do_cmp_and_jump (t1, const0_rtx, GE,
3307                                          compute_mode, label);
3308                         expand_inc (t1, gen_int_mode (abs_d - 1,
3309                                                       compute_mode));
3310                         emit_label (label);
3311                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3312                                                  build_int_2 (lgup, 0),
3313                                                  tquotient, 0);
3314                       }
3315                     else
3316                       {
3317                         rtx t1, t2, t3;
3318                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3319                                            build_int_2 (size - 1, 0),
3320                                            NULL_RTX, 0);
3321                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3322                                            build_int_2 (size - lgup, 0),
3323                                            NULL_RTX, 1);
3324                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3325                                                           op0, t2),
3326                                             NULL_RTX);
3327                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3328                                                  build_int_2 (lgup, 0),
3329                                                  tquotient, 0);
3330                       }
3331
3332                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3333                        the quotient.  */
3334                     if (d < 0)
3335                       {
3336                         insn = get_last_insn ();
3337                         if (insn != last
3338                             && (set = single_set (insn)) != 0
3339                             && SET_DEST (set) == quotient
3340                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3341                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3342                           set_unique_reg_note (insn,
3343                                                REG_EQUAL,
3344                                                gen_rtx_DIV (compute_mode,
3345                                                             op0,
3346                                                             GEN_INT
3347                                                             (trunc_int_for_mode
3348                                                              (abs_d,
3349                                                               compute_mode))));
3350
3351                         quotient = expand_unop (compute_mode, neg_optab,
3352                                                 quotient, quotient, 0);
3353                       }
3354                   }
3355                 else if (size <= HOST_BITS_PER_WIDE_INT)
3356                   {
3357                     choose_multiplier (abs_d, size, size - 1,
3358                                        &ml, &post_shift, &lgup);
3359                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3360                       {
3361                         rtx t1, t2, t3;
3362
3363                         if (post_shift >= BITS_PER_WORD
3364                             || size - 1 >= BITS_PER_WORD)
3365                           goto fail1;
3366
3367                         extra_cost = (shift_cost[post_shift]
3368                                       + shift_cost[size - 1] + add_cost);
3369                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3370                                                    NULL_RTX, 0,
3371                                                    max_cost - extra_cost);
3372                         if (t1 == 0)
3373                           goto fail1;
3374                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3375                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3376                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3377                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3378                         if (d < 0)
3379                           quotient
3380                             = force_operand (gen_rtx_MINUS (compute_mode,
3381                                                             t3, t2),
3382                                              tquotient);
3383                         else
3384                           quotient
3385                             = force_operand (gen_rtx_MINUS (compute_mode,
3386                                                             t2, t3),
3387                                              tquotient);
3388                       }
3389                     else
3390                       {
3391                         rtx t1, t2, t3, t4;
3392
3393                         if (post_shift >= BITS_PER_WORD
3394                             || size - 1 >= BITS_PER_WORD)
3395                           goto fail1;
3396
3397                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3398                         extra_cost = (shift_cost[post_shift]
3399                                       + shift_cost[size - 1] + 2 * add_cost);
3400                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3401                                                    NULL_RTX, 0,
3402                                                    max_cost - extra_cost);
3403                         if (t1 == 0)
3404                           goto fail1;
3405                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3406                                                           t1, op0),
3407                                             NULL_RTX);
3408                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3409                                            build_int_2 (post_shift, 0),
3410                                            NULL_RTX, 0);
3411                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3412                                            build_int_2 (size - 1, 0),
3413                                            NULL_RTX, 0);
3414                         if (d < 0)
3415                           quotient
3416                             = force_operand (gen_rtx_MINUS (compute_mode,
3417                                                             t4, t3),
3418                                              tquotient);
3419                         else
3420                           quotient
3421                             = force_operand (gen_rtx_MINUS (compute_mode,
3422                                                             t3, t4),
3423                                              tquotient);
3424                       }
3425                   }
3426                 else            /* Too wide mode to use tricky code */
3427                   break;
3428
3429                 insn = get_last_insn ();
3430                 if (insn != last
3431                     && (set = single_set (insn)) != 0
3432                     && SET_DEST (set) == quotient)
3433                   set_unique_reg_note (insn,
3434                                        REG_EQUAL,
3435                                        gen_rtx_DIV (compute_mode, op0, op1));
3436               }
3437             break;
3438           }
3439       fail1:
3440         delete_insns_since (last);
3441         break;
3442
3443       case FLOOR_DIV_EXPR:
3444       case FLOOR_MOD_EXPR:
3445       /* We will come here only for signed operations.  */
3446         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3447           {
3448             unsigned HOST_WIDE_INT mh, ml;
3449             int pre_shift, lgup, post_shift;
3450             HOST_WIDE_INT d = INTVAL (op1);
3451
3452             if (d > 0)
3453               {
3454                 /* We could just as easily deal with negative constants here,
3455                    but it does not seem worth the trouble for GCC 2.6.  */
3456                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3457                   {
3458                     pre_shift = floor_log2 (d);
3459                     if (rem_flag)
3460                       {
3461                         remainder = expand_binop (compute_mode, and_optab, op0,
3462                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3463                                                   remainder, 0, OPTAB_LIB_WIDEN);
3464                         if (remainder)
3465                           return gen_lowpart (mode, remainder);
3466                       }
3467                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3468                                              build_int_2 (pre_shift, 0),
3469                                              tquotient, 0);
3470                   }
3471                 else
3472                   {
3473                     rtx t1, t2, t3, t4;
3474
3475                     mh = choose_multiplier (d, size, size - 1,
3476                                             &ml, &post_shift, &lgup);
3477                     if (mh)
3478                       abort ();
3479
3480                     if (post_shift < BITS_PER_WORD
3481                         && size - 1 < BITS_PER_WORD)
3482                       {
3483                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3484                                            build_int_2 (size - 1, 0),
3485                                            NULL_RTX, 0);
3486                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3487                                            NULL_RTX, 0, OPTAB_WIDEN);
3488                         extra_cost = (shift_cost[post_shift]
3489                                       + shift_cost[size - 1] + 2 * add_cost);
3490                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3491                                                    NULL_RTX, 1,
3492                                                    max_cost - extra_cost);
3493                         if (t3 != 0)
3494                           {
3495                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3496                                                build_int_2 (post_shift, 0),
3497                                                NULL_RTX, 1);
3498                             quotient = expand_binop (compute_mode, xor_optab,
3499                                                      t4, t1, tquotient, 0,
3500                                                      OPTAB_WIDEN);
3501                           }
3502                       }
3503                   }
3504               }
3505             else
3506               {
3507                 rtx nsign, t1, t2, t3, t4;
3508                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3509                                                   op0, constm1_rtx), NULL_RTX);
3510                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3511                                    0, OPTAB_WIDEN);
3512                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3513                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3514                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3515                                     NULL_RTX);
3516                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3517                                     NULL_RTX, 0);
3518                 if (t4)
3519                   {
3520                     rtx t5;
3521                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3522                                       NULL_RTX, 0);
3523                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3524                                                             t4, t5),
3525                                               tquotient);
3526                   }
3527               }
3528           }
3529
3530         if (quotient != 0)
3531           break;
3532         delete_insns_since (last);
3533
3534         /* Try using an instruction that produces both the quotient and
3535            remainder, using truncation.  We can easily compensate the quotient
3536            or remainder to get floor rounding, once we have the remainder.
3537            Notice that we compute also the final remainder value here,
3538            and return the result right away.  */
3539         if (target == 0 || GET_MODE (target) != compute_mode)
3540           target = gen_reg_rtx (compute_mode);
3541
3542         if (rem_flag)
3543           {
3544             remainder
3545               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3546             quotient = gen_reg_rtx (compute_mode);
3547           }
3548         else
3549           {
3550             quotient
3551               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3552             remainder = gen_reg_rtx (compute_mode);
3553           }
3554
3555         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3556                                  quotient, remainder, 0))
3557           {
3558             /* This could be computed with a branch-less sequence.
3559                Save that for later.  */
3560             rtx tem;
3561             rtx label = gen_label_rtx ();
3562             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3563             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3564                                 NULL_RTX, 0, OPTAB_WIDEN);
3565             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3566             expand_dec (quotient, const1_rtx);
3567             expand_inc (remainder, op1);
3568             emit_label (label);
3569             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3570           }
3571
3572         /* No luck with division elimination or divmod.  Have to do it
3573            by conditionally adjusting op0 *and* the result.  */
3574         {
3575           rtx label1, label2, label3, label4, label5;
3576           rtx adjusted_op0;
3577           rtx tem;
3578
3579           quotient = gen_reg_rtx (compute_mode);
3580           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3581           label1 = gen_label_rtx ();
3582           label2 = gen_label_rtx ();
3583           label3 = gen_label_rtx ();
3584           label4 = gen_label_rtx ();
3585           label5 = gen_label_rtx ();
3586           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3587           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3588           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3589                               quotient, 0, OPTAB_LIB_WIDEN);
3590           if (tem != quotient)
3591             emit_move_insn (quotient, tem);
3592           emit_jump_insn (gen_jump (label5));
3593           emit_barrier ();
3594           emit_label (label1);
3595           expand_inc (adjusted_op0, const1_rtx);
3596           emit_jump_insn (gen_jump (label4));
3597           emit_barrier ();
3598           emit_label (label2);
3599           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3600           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3601                               quotient, 0, OPTAB_LIB_WIDEN);
3602           if (tem != quotient)
3603             emit_move_insn (quotient, tem);
3604           emit_jump_insn (gen_jump (label5));
3605           emit_barrier ();
3606           emit_label (label3);
3607           expand_dec (adjusted_op0, const1_rtx);
3608           emit_label (label4);
3609           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3610                               quotient, 0, OPTAB_LIB_WIDEN);
3611           if (tem != quotient)
3612             emit_move_insn (quotient, tem);
3613           expand_dec (quotient, const1_rtx);
3614           emit_label (label5);
3615         }
3616         break;
3617
3618       case CEIL_DIV_EXPR:
3619       case CEIL_MOD_EXPR:
3620         if (unsignedp)
3621           {
3622             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3623               {
3624                 rtx t1, t2, t3;
3625                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3626                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3627                                    build_int_2 (floor_log2 (d), 0),
3628                                    tquotient, 1);
3629                 t2 = expand_binop (compute_mode, and_optab, op0,
3630                                    GEN_INT (d - 1),
3631                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3632                 t3 = gen_reg_rtx (compute_mode);
3633                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3634                                       compute_mode, 1, 1);
3635                 if (t3 == 0)
3636                   {
3637                     rtx lab;
3638                     lab = gen_label_rtx ();
3639                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3640                     expand_inc (t1, const1_rtx);
3641                     emit_label (lab);
3642                     quotient = t1;
3643                   }
3644                 else
3645                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3646                                                           t1, t3),
3647                                             tquotient);
3648                 break;
3649               }
3650
3651             /* Try using an instruction that produces both the quotient and
3652                remainder, using truncation.  We can easily compensate the
3653                quotient or remainder to get ceiling rounding, once we have the
3654                remainder.  Notice that we compute also the final remainder
3655                value here, and return the result right away.  */
3656             if (target == 0 || GET_MODE (target) != compute_mode)
3657               target = gen_reg_rtx (compute_mode);
3658
3659             if (rem_flag)
3660               {
3661                 remainder = (GET_CODE (target) == REG
3662                              ? target : gen_reg_rtx (compute_mode));
3663                 quotient = gen_reg_rtx (compute_mode);
3664               }
3665             else
3666               {
3667                 quotient = (GET_CODE (target) == REG
3668                             ? target : gen_reg_rtx (compute_mode));
3669                 remainder = gen_reg_rtx (compute_mode);
3670               }
3671
3672             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3673                                      remainder, 1))
3674               {
3675                 /* This could be computed with a branch-less sequence.
3676                    Save that for later.  */
3677                 rtx label = gen_label_rtx ();
3678                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3679                                  compute_mode, label);
3680                 expand_inc (quotient, const1_rtx);
3681                 expand_dec (remainder, op1);
3682                 emit_label (label);
3683                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3684               }
3685
3686             /* No luck with division elimination or divmod.  Have to do it
3687                by conditionally adjusting op0 *and* the result.  */
3688             {
3689               rtx label1, label2;
3690               rtx adjusted_op0, tem;
3691
3692               quotient = gen_reg_rtx (compute_mode);
3693               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3694               label1 = gen_label_rtx ();
3695               label2 = gen_label_rtx ();
3696               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3697                                compute_mode, label1);
3698               emit_move_insn  (quotient, const0_rtx);
3699               emit_jump_insn (gen_jump (label2));
3700               emit_barrier ();
3701               emit_label (label1);
3702               expand_dec (adjusted_op0, const1_rtx);
3703               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3704                                   quotient, 1, OPTAB_LIB_WIDEN);
3705               if (tem != quotient)
3706                 emit_move_insn (quotient, tem);
3707               expand_inc (quotient, const1_rtx);
3708               emit_label (label2);
3709             }
3710           }
3711         else /* signed */
3712           {
3713             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3714                 && INTVAL (op1) >= 0)
3715               {
3716                 /* This is extremely similar to the code for the unsigned case
3717                    above.  For 2.7 we should merge these variants, but for
3718                    2.6.1 I don't want to touch the code for unsigned since that
3719                    get used in C.  The signed case will only be used by other
3720                    languages (Ada).  */
3721
3722                 rtx t1, t2, t3;
3723                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3724                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3725                                    build_int_2 (floor_log2 (d), 0),
3726                                    tquotient, 0);
3727                 t2 = expand_binop (compute_mode, and_optab, op0,
3728                                    GEN_INT (d - 1),
3729                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3730                 t3 = gen_reg_rtx (compute_mode);
3731                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3732                                       compute_mode, 1, 1);
3733                 if (t3 == 0)
3734                   {
3735                     rtx lab;
3736                     lab = gen_label_rtx ();
3737                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3738                     expand_inc (t1, const1_rtx);
3739                     emit_label (lab);
3740                     quotient = t1;
3741                   }
3742                 else
3743                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3744                                                           t1, t3),
3745                                             tquotient);
3746                 break;
3747               }
3748
3749             /* Try using an instruction that produces both the quotient and
3750                remainder, using truncation.  We can easily compensate the
3751                quotient or remainder to get ceiling rounding, once we have the
3752                remainder.  Notice that we compute also the final remainder
3753                value here, and return the result right away.  */
3754             if (target == 0 || GET_MODE (target) != compute_mode)
3755               target = gen_reg_rtx (compute_mode);
3756             if (rem_flag)
3757               {
3758                 remainder= (GET_CODE (target) == REG
3759                             ? target : gen_reg_rtx (compute_mode));
3760                 quotient = gen_reg_rtx (compute_mode);
3761               }
3762             else
3763               {
3764                 quotient = (GET_CODE (target) == REG
3765                             ? target : gen_reg_rtx (compute_mode));
3766                 remainder = gen_reg_rtx (compute_mode);
3767               }
3768
3769             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3770                                      remainder, 0))
3771               {
3772                 /* This could be computed with a branch-less sequence.
3773                    Save that for later.  */
3774                 rtx tem;
3775                 rtx label = gen_label_rtx ();
3776                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3777                                  compute_mode, label);
3778                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3779                                     NULL_RTX, 0, OPTAB_WIDEN);
3780                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3781                 expand_inc (quotient, const1_rtx);
3782                 expand_dec (remainder, op1);
3783                 emit_label (label);
3784                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3785               }
3786
3787             /* No luck with division elimination or divmod.  Have to do it
3788                by conditionally adjusting op0 *and* the result.  */
3789             {
3790               rtx label1, label2, label3, label4, label5;
3791               rtx adjusted_op0;
3792               rtx tem;
3793
3794               quotient = gen_reg_rtx (compute_mode);
3795               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3796               label1 = gen_label_rtx ();
3797               label2 = gen_label_rtx ();
3798               label3 = gen_label_rtx ();
3799               label4 = gen_label_rtx ();
3800               label5 = gen_label_rtx ();
3801               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3802               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3803                                compute_mode, label1);
3804               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3805                                   quotient, 0, OPTAB_LIB_WIDEN);
3806               if (tem != quotient)
3807                 emit_move_insn (quotient, tem);
3808               emit_jump_insn (gen_jump (label5));
3809               emit_barrier ();
3810               emit_label (label1);
3811               expand_dec (adjusted_op0, const1_rtx);
3812               emit_jump_insn (gen_jump (label4));
3813               emit_barrier ();
3814               emit_label (label2);
3815               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3816                                compute_mode, label3);
3817               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3818                                   quotient, 0, OPTAB_LIB_WIDEN);
3819               if (tem != quotient)
3820                 emit_move_insn (quotient, tem);
3821               emit_jump_insn (gen_jump (label5));
3822               emit_barrier ();
3823               emit_label (label3);
3824               expand_inc (adjusted_op0, const1_rtx);
3825               emit_label (label4);
3826               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3827                                   quotient, 0, OPTAB_LIB_WIDEN);
3828               if (tem != quotient)
3829                 emit_move_insn (quotient, tem);
3830               expand_inc (quotient, const1_rtx);
3831               emit_label (label5);
3832             }
3833           }
3834         break;
3835
3836       case EXACT_DIV_EXPR:
3837         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3838           {
3839             HOST_WIDE_INT d = INTVAL (op1);
3840             unsigned HOST_WIDE_INT ml;
3841             int pre_shift;
3842             rtx t1;
3843
3844             pre_shift = floor_log2 (d & -d);
3845             ml = invert_mod2n (d >> pre_shift, size);
3846             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3847                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3848             quotient = expand_mult (compute_mode, t1,
3849                                     gen_int_mode (ml, compute_mode),
3850                                     NULL_RTX, 0);
3851
3852             insn = get_last_insn ();
3853             set_unique_reg_note (insn,
3854                                  REG_EQUAL,
3855                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3856                                                  compute_mode,
3857                                                  op0, op1));
3858           }
3859         break;
3860
3861       case ROUND_DIV_EXPR:
3862       case ROUND_MOD_EXPR:
3863         if (unsignedp)
3864           {
3865             rtx tem;
3866             rtx label;
3867             label = gen_label_rtx ();
3868             quotient = gen_reg_rtx (compute_mode);
3869             remainder = gen_reg_rtx (compute_mode);
3870             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3871               {
3872                 rtx tem;
3873                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3874                                          quotient, 1, OPTAB_LIB_WIDEN);
3875                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3876                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3877                                           remainder, 1, OPTAB_LIB_WIDEN);
3878               }
3879             tem = plus_constant (op1, -1);
3880             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3881                                 build_int_2 (1, 0), NULL_RTX, 1);
3882             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3883             expand_inc (quotient, const1_rtx);
3884             expand_dec (remainder, op1);
3885             emit_label (label);
3886           }
3887         else
3888           {
3889             rtx abs_rem, abs_op1, tem, mask;
3890             rtx label;
3891             label = gen_label_rtx ();
3892             quotient = gen_reg_rtx (compute_mode);
3893             remainder = gen_reg_rtx (compute_mode);
3894             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3895               {
3896                 rtx tem;
3897                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3898                                          quotient, 0, OPTAB_LIB_WIDEN);
3899                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3900                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3901                                           remainder, 0, OPTAB_LIB_WIDEN);
3902               }
3903             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3904             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3905             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3906                                 build_int_2 (1, 0), NULL_RTX, 1);
3907             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3908             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3909                                 NULL_RTX, 0, OPTAB_WIDEN);
3910             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3911                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3912             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3913                                 NULL_RTX, 0, OPTAB_WIDEN);
3914             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3915                                 NULL_RTX, 0, OPTAB_WIDEN);
3916             expand_inc (quotient, tem);
3917             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3918                                 NULL_RTX, 0, OPTAB_WIDEN);
3919             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3920                                 NULL_RTX, 0, OPTAB_WIDEN);
3921             expand_dec (remainder, tem);
3922             emit_label (label);
3923           }
3924         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3925
3926       default:
3927         abort ();
3928       }
3929
3930   if (quotient == 0)
3931     {
3932       if (target && GET_MODE (target) != compute_mode)
3933         target = 0;
3934
3935       if (rem_flag)
3936         {
3937           /* Try to produce the remainder without producing the quotient.
3938              If we seem to have a divmod pattern that does not require widening,
3939              don't try widening here.  We should really have an WIDEN argument
3940              to expand_twoval_binop, since what we'd really like to do here is
3941              1) try a mod insn in compute_mode
3942              2) try a divmod insn in compute_mode
3943              3) try a div insn in compute_mode and multiply-subtract to get
3944                 remainder
3945              4) try the same things with widening allowed.  */
3946           remainder
3947             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3948                                  op0, op1, target,
3949                                  unsignedp,
3950                                  ((optab2->handlers[(int) compute_mode].insn_code
3951                                    != CODE_FOR_nothing)
3952                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3953           if (remainder == 0)
3954             {
3955               /* No luck there.  Can we do remainder and divide at once
3956                  without a library call?  */
3957               remainder = gen_reg_rtx (compute_mode);
3958               if (! expand_twoval_binop ((unsignedp
3959                                           ? udivmod_optab
3960                                           : sdivmod_optab),
3961                                          op0, op1,
3962                                          NULL_RTX, remainder, unsignedp))
3963                 remainder = 0;
3964             }
3965
3966           if (remainder)
3967             return gen_lowpart (mode, remainder);
3968         }
3969
3970       /* Produce the quotient.  Try a quotient insn, but not a library call.
3971          If we have a divmod in this mode, use it in preference to widening
3972          the div (for this test we assume it will not fail). Note that optab2
3973          is set to the one of the two optabs that the call below will use.  */
3974       quotient
3975         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3976                              op0, op1, rem_flag ? NULL_RTX : target,
3977                              unsignedp,
3978                              ((optab2->handlers[(int) compute_mode].insn_code
3979                                != CODE_FOR_nothing)
3980                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3981
3982       if (quotient == 0)
3983         {
3984           /* No luck there.  Try a quotient-and-remainder insn,
3985              keeping the quotient alone.  */
3986           quotient = gen_reg_rtx (compute_mode);
3987           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3988                                      op0, op1,
3989                                      quotient, NULL_RTX, unsignedp))
3990             {
3991               quotient = 0;
3992               if (! rem_flag)
3993                 /* Still no luck.  If we are not computing the remainder,
3994                    use a library call for the quotient.  */
3995                 quotient = sign_expand_binop (compute_mode,
3996                                               udiv_optab, sdiv_optab,
3997                                               op0, op1, target,
3998                                               unsignedp, OPTAB_LIB_WIDEN);
3999             }
4000         }
4001     }
4002
4003   if (rem_flag)
4004     {
4005       if (target && GET_MODE (target) != compute_mode)
4006         target = 0;
4007
4008       if (quotient == 0)
4009         /* No divide instruction either.  Use library for remainder.  */
4010         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4011                                        op0, op1, target,
4012                                        unsignedp, OPTAB_LIB_WIDEN);
4013       else
4014         {
4015           /* We divided.  Now finish doing X - Y * (X / Y).  */
4016           remainder = expand_mult (compute_mode, quotient, op1,
4017                                    NULL_RTX, unsignedp);
4018           remainder = expand_binop (compute_mode, sub_optab, op0,
4019                                     remainder, target, unsignedp,
4020                                     OPTAB_LIB_WIDEN);
4021         }
4022     }
4023
4024   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4025 }
4026 \f
4027 /* Return a tree node with data type TYPE, describing the value of X.
4028    Usually this is an RTL_EXPR, if there is no obvious better choice.
4029    X may be an expression, however we only support those expressions
4030    generated by loop.c.  */
4031
4032 tree
4033 make_tree (type, x)
4034      tree type;
4035      rtx x;
4036 {
4037   tree t;
4038
4039   switch (GET_CODE (x))
4040     {
4041     case CONST_INT:
4042       t = build_int_2 (INTVAL (x),
4043                        (TREE_UNSIGNED (type)
4044                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4045                        || INTVAL (x) >= 0 ? 0 : -1);
4046       TREE_TYPE (t) = type;
4047       return t;
4048
4049     case CONST_DOUBLE:
4050       if (GET_MODE (x) == VOIDmode)
4051         {
4052           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4053           TREE_TYPE (t) = type;
4054         }
4055       else
4056         {
4057           REAL_VALUE_TYPE d;
4058
4059           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4060           t = build_real (type, d);
4061         }
4062
4063       return t;
4064
4065     case CONST_VECTOR:
4066       {
4067         int i, units;
4068         rtx elt;
4069         tree t = NULL_TREE;
4070
4071         units = CONST_VECTOR_NUNITS (x);
4072
4073         /* Build a tree with vector elements.  */
4074         for (i = units - 1; i >= 0; --i)
4075           {
4076             elt = CONST_VECTOR_ELT (x, i);
4077             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4078           }
4079
4080         return build_vector (type, t);
4081       }
4082
4083     case PLUS:
4084       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4085                           make_tree (type, XEXP (x, 1))));
4086
4087     case MINUS:
4088       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4089                           make_tree (type, XEXP (x, 1))));
4090
4091     case NEG:
4092       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4093
4094     case MULT:
4095       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4096                           make_tree (type, XEXP (x, 1))));
4097
4098     case ASHIFT:
4099       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4100                           make_tree (type, XEXP (x, 1))));
4101
4102     case LSHIFTRT:
4103       t = (*lang_hooks.types.unsigned_type) (type);
4104       return fold (convert (type,
4105                             build (RSHIFT_EXPR, t,
4106                                    make_tree (t, XEXP (x, 0)),
4107                                    make_tree (type, XEXP (x, 1)))));
4108
4109     case ASHIFTRT:
4110       t = (*lang_hooks.types.signed_type) (type);
4111       return fold (convert (type,
4112                             build (RSHIFT_EXPR, t,
4113                                    make_tree (t, XEXP (x, 0)),
4114                                    make_tree (type, XEXP (x, 1)))));
4115
4116     case DIV:
4117       if (TREE_CODE (type) != REAL_TYPE)
4118         t = (*lang_hooks.types.signed_type) (type);
4119       else
4120         t = type;
4121
4122       return fold (convert (type,
4123                             build (TRUNC_DIV_EXPR, t,
4124                                    make_tree (t, XEXP (x, 0)),
4125                                    make_tree (t, XEXP (x, 1)))));
4126     case UDIV:
4127       t = (*lang_hooks.types.unsigned_type) (type);
4128       return fold (convert (type,
4129                             build (TRUNC_DIV_EXPR, t,
4130                                    make_tree (t, XEXP (x, 0)),
4131                                    make_tree (t, XEXP (x, 1)))));
4132
4133     case SIGN_EXTEND:
4134     case ZERO_EXTEND:
4135       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4136                                              GET_CODE (x) == ZERO_EXTEND);
4137       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4138
4139    default:
4140       t = make_node (RTL_EXPR);
4141       TREE_TYPE (t) = type;
4142
4143 #ifdef POINTERS_EXTEND_UNSIGNED
4144       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4145          ptr_mode.  So convert.  */
4146       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4147         x = convert_memory_address (TYPE_MODE (type), x);
4148 #endif
4149
4150       RTL_EXPR_RTL (t) = x;
4151       /* There are no insns to be output
4152          when this rtl_expr is used.  */
4153       RTL_EXPR_SEQUENCE (t) = 0;
4154       return t;
4155     }
4156 }
4157
4158 /* Check whether the multiplication X * MULT + ADD overflows.
4159    X, MULT and ADD must be CONST_*.
4160    MODE is the machine mode for the computation.
4161    X and MULT must have mode MODE.  ADD may have a different mode.
4162    So can X (defaults to same as MODE).
4163    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4164
4165 bool
4166 const_mult_add_overflow_p (x, mult, add, mode, unsignedp)
4167      rtx x, mult, add;
4168      enum machine_mode mode;
4169      int unsignedp;
4170 {
4171   tree type, mult_type, add_type, result;
4172
4173   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4174
4175   /* In order to get a proper overflow indication from an unsigned
4176      type, we have to pretend that it's a sizetype.  */
4177   mult_type = type;
4178   if (unsignedp)
4179     {
4180       mult_type = copy_node (type);
4181       TYPE_IS_SIZETYPE (mult_type) = 1;
4182     }
4183
4184   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4185               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4186
4187   result = fold (build (PLUS_EXPR, mult_type,
4188                         fold (build (MULT_EXPR, mult_type,
4189                                      make_tree (mult_type, x),
4190                                      make_tree (mult_type, mult))),
4191                         make_tree (add_type, add)));
4192
4193   return TREE_CONSTANT_OVERFLOW (result);
4194 }
4195
4196 /* Return an rtx representing the value of X * MULT + ADD.
4197    TARGET is a suggestion for where to store the result (an rtx).
4198    MODE is the machine mode for the computation.
4199    X and MULT must have mode MODE.  ADD may have a different mode.
4200    So can X (defaults to same as MODE).
4201    UNSIGNEDP is nonzero to do unsigned multiplication.
4202    This may emit insns.  */
4203
4204 rtx
4205 expand_mult_add (x, target, mult, add, mode, unsignedp)
4206      rtx x, target, mult, add;
4207      enum machine_mode mode;
4208      int unsignedp;
4209 {
4210   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4211   tree add_type = (GET_MODE (add) == VOIDmode
4212                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4213                                                               unsignedp));
4214   tree result =  fold (build (PLUS_EXPR, type,
4215                               fold (build (MULT_EXPR, type,
4216                                            make_tree (type, x),
4217                                            make_tree (type, mult))),
4218                               make_tree (add_type, add)));
4219
4220   return expand_expr (result, target, VOIDmode, 0);
4221 }
4222 \f
4223 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4224    and returning TARGET.
4225
4226    If TARGET is 0, a pseudo-register or constant is returned.  */
4227
4228 rtx
4229 expand_and (mode, op0, op1, target)
4230      enum machine_mode mode;
4231      rtx op0, op1, target;
4232 {
4233   rtx tem = 0;
4234
4235   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4236     tem = simplify_binary_operation (AND, mode, op0, op1);
4237   if (tem == 0)
4238     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4239
4240   if (target == 0)
4241     target = tem;
4242   else if (tem != target)
4243     emit_move_insn (target, tem);
4244   return target;
4245 }
4246 \f
4247 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4248    and storing in TARGET.  Normally return TARGET.
4249    Return 0 if that cannot be done.
4250
4251    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4252    it is VOIDmode, they cannot both be CONST_INT.
4253
4254    UNSIGNEDP is for the case where we have to widen the operands
4255    to perform the operation.  It says to use zero-extension.
4256
4257    NORMALIZEP is 1 if we should convert the result to be either zero
4258    or one.  Normalize is -1 if we should convert the result to be
4259    either zero or -1.  If NORMALIZEP is zero, the result will be left
4260    "raw" out of the scc insn.  */
4261
4262 rtx
4263 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4264      rtx target;
4265      enum rtx_code code;
4266      rtx op0, op1;
4267      enum machine_mode mode;
4268      int unsignedp;
4269      int normalizep;
4270 {
4271   rtx subtarget;
4272   enum insn_code icode;
4273   enum machine_mode compare_mode;
4274   enum machine_mode target_mode = GET_MODE (target);
4275   rtx tem;
4276   rtx last = get_last_insn ();
4277   rtx pattern, comparison;
4278
4279   /* ??? Ok to do this and then fail? */
4280   op0 = protect_from_queue (op0, 0);
4281   op1 = protect_from_queue (op1, 0);
4282
4283   if (unsignedp)
4284     code = unsigned_condition (code);
4285
4286   /* If one operand is constant, make it the second one.  Only do this
4287      if the other operand is not constant as well.  */
4288
4289   if (swap_commutative_operands_p (op0, op1))
4290     {
4291       tem = op0;
4292       op0 = op1;
4293       op1 = tem;
4294       code = swap_condition (code);
4295     }
4296
4297   if (mode == VOIDmode)
4298     mode = GET_MODE (op0);
4299
4300   /* For some comparisons with 1 and -1, we can convert this to
4301      comparisons with zero.  This will often produce more opportunities for
4302      store-flag insns.  */
4303
4304   switch (code)
4305     {
4306     case LT:
4307       if (op1 == const1_rtx)
4308         op1 = const0_rtx, code = LE;
4309       break;
4310     case LE:
4311       if (op1 == constm1_rtx)
4312         op1 = const0_rtx, code = LT;
4313       break;
4314     case GE:
4315       if (op1 == const1_rtx)
4316         op1 = const0_rtx, code = GT;
4317       break;
4318     case GT:
4319       if (op1 == constm1_rtx)
4320         op1 = const0_rtx, code = GE;
4321       break;
4322     case GEU:
4323       if (op1 == const1_rtx)
4324         op1 = const0_rtx, code = NE;
4325       break;
4326     case LTU:
4327       if (op1 == const1_rtx)
4328         op1 = const0_rtx, code = EQ;
4329       break;
4330     default:
4331       break;
4332     }
4333
4334   /* If we are comparing a double-word integer with zero, we can convert
4335      the comparison into one involving a single word.  */
4336   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4337       && GET_MODE_CLASS (mode) == MODE_INT
4338       && op1 == const0_rtx
4339       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4340     {
4341       if (code == EQ || code == NE)
4342         {
4343           /* Do a logical OR of the two words and compare the result.  */
4344           rtx op0h = gen_highpart (word_mode, op0);
4345           rtx op0l = gen_lowpart (word_mode, op0);
4346           rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4347                                       NULL_RTX, unsignedp, OPTAB_DIRECT);
4348           if (op0both != 0)
4349             return emit_store_flag (target, code, op0both, op1, word_mode,
4350                                     unsignedp, normalizep);
4351         }
4352       else if (code == LT || code == GE)
4353         /* If testing the sign bit, can just test on high word.  */
4354         return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4355                                 op1, word_mode, unsignedp, normalizep);
4356     }
4357
4358   /* From now on, we won't change CODE, so set ICODE now.  */
4359   icode = setcc_gen_code[(int) code];
4360
4361   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4362      complement of A (for GE) and shifting the sign bit to the low bit.  */
4363   if (op1 == const0_rtx && (code == LT || code == GE)
4364       && GET_MODE_CLASS (mode) == MODE_INT
4365       && (normalizep || STORE_FLAG_VALUE == 1
4366           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4367               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4368                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4369     {
4370       subtarget = target;
4371
4372       /* If the result is to be wider than OP0, it is best to convert it
4373          first.  If it is to be narrower, it is *incorrect* to convert it
4374          first.  */
4375       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4376         {
4377           op0 = protect_from_queue (op0, 0);
4378           op0 = convert_modes (target_mode, mode, op0, 0);
4379           mode = target_mode;
4380         }
4381
4382       if (target_mode != mode)
4383         subtarget = 0;
4384
4385       if (code == GE)
4386         op0 = expand_unop (mode, one_cmpl_optab, op0,
4387                            ((STORE_FLAG_VALUE == 1 || normalizep)
4388                             ? 0 : subtarget), 0);
4389
4390       if (STORE_FLAG_VALUE == 1 || normalizep)
4391         /* If we are supposed to produce a 0/1 value, we want to do
4392            a logical shift from the sign bit to the low-order bit; for
4393            a -1/0 value, we do an arithmetic shift.  */
4394         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4395                             size_int (GET_MODE_BITSIZE (mode) - 1),
4396                             subtarget, normalizep != -1);
4397
4398       if (mode != target_mode)
4399         op0 = convert_modes (target_mode, mode, op0, 0);
4400
4401       return op0;
4402     }
4403
4404   if (icode != CODE_FOR_nothing)
4405     {
4406       insn_operand_predicate_fn pred;
4407
4408       /* We think we may be able to do this with a scc insn.  Emit the
4409          comparison and then the scc insn.
4410
4411          compare_from_rtx may call emit_queue, which would be deleted below
4412          if the scc insn fails.  So call it ourselves before setting LAST.
4413          Likewise for do_pending_stack_adjust.  */
4414
4415       emit_queue ();
4416       do_pending_stack_adjust ();
4417       last = get_last_insn ();
4418
4419       comparison
4420         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4421       if (GET_CODE (comparison) == CONST_INT)
4422         return (comparison == const0_rtx ? const0_rtx
4423                 : normalizep == 1 ? const1_rtx
4424                 : normalizep == -1 ? constm1_rtx
4425                 : const_true_rtx);
4426
4427       /* The code of COMPARISON may not match CODE if compare_from_rtx
4428          decided to swap its operands and reverse the original code.
4429
4430          We know that compare_from_rtx returns either a CONST_INT or
4431          a new comparison code, so it is safe to just extract the
4432          code from COMPARISON.  */
4433       code = GET_CODE (comparison);
4434
4435       /* Get a reference to the target in the proper mode for this insn.  */
4436       compare_mode = insn_data[(int) icode].operand[0].mode;
4437       subtarget = target;
4438       pred = insn_data[(int) icode].operand[0].predicate;
4439       if (preserve_subexpressions_p ()
4440           || ! (*pred) (subtarget, compare_mode))
4441         subtarget = gen_reg_rtx (compare_mode);
4442
4443       pattern = GEN_FCN (icode) (subtarget);
4444       if (pattern)
4445         {
4446           emit_insn (pattern);
4447
4448           /* If we are converting to a wider mode, first convert to
4449              TARGET_MODE, then normalize.  This produces better combining
4450              opportunities on machines that have a SIGN_EXTRACT when we are
4451              testing a single bit.  This mostly benefits the 68k.
4452
4453              If STORE_FLAG_VALUE does not have the sign bit set when
4454              interpreted in COMPARE_MODE, we can do this conversion as
4455              unsigned, which is usually more efficient.  */
4456           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4457             {
4458               convert_move (target, subtarget,
4459                             (GET_MODE_BITSIZE (compare_mode)
4460                              <= HOST_BITS_PER_WIDE_INT)
4461                             && 0 == (STORE_FLAG_VALUE
4462                                      & ((HOST_WIDE_INT) 1
4463                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4464               op0 = target;
4465               compare_mode = target_mode;
4466             }
4467           else
4468             op0 = subtarget;
4469
4470           /* If we want to keep subexpressions around, don't reuse our
4471              last target.  */
4472
4473           if (preserve_subexpressions_p ())
4474             subtarget = 0;
4475
4476           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4477              we don't have to do anything.  */
4478           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4479             ;
4480           /* STORE_FLAG_VALUE might be the most negative number, so write
4481              the comparison this way to avoid a compiler-time warning.  */
4482           else if (- normalizep == STORE_FLAG_VALUE)
4483             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4484
4485           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4486              makes it hard to use a value of just the sign bit due to
4487              ANSI integer constant typing rules.  */
4488           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4489                    && (STORE_FLAG_VALUE
4490                        & ((HOST_WIDE_INT) 1
4491                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4492             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4493                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4494                                 subtarget, normalizep == 1);
4495           else if (STORE_FLAG_VALUE & 1)
4496             {
4497               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4498               if (normalizep == -1)
4499                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4500             }
4501           else
4502             abort ();
4503
4504           /* If we were converting to a smaller mode, do the
4505              conversion now.  */
4506           if (target_mode != compare_mode)
4507             {
4508               convert_move (target, op0, 0);
4509               return target;
4510             }
4511           else
4512             return op0;
4513         }
4514     }
4515
4516   delete_insns_since (last);
4517
4518   /* If expensive optimizations, use different pseudo registers for each
4519      insn, instead of reusing the same pseudo.  This leads to better CSE,
4520      but slows down the compiler, since there are more pseudos */
4521   subtarget = (!flag_expensive_optimizations
4522                && (target_mode == mode)) ? target : NULL_RTX;
4523
4524   /* If we reached here, we can't do this with a scc insn.  However, there
4525      are some comparisons that can be done directly.  For example, if
4526      this is an equality comparison of integers, we can try to exclusive-or
4527      (or subtract) the two operands and use a recursive call to try the
4528      comparison with zero.  Don't do any of these cases if branches are
4529      very cheap.  */
4530
4531   if (BRANCH_COST > 0
4532       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4533       && op1 != const0_rtx)
4534     {
4535       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4536                           OPTAB_WIDEN);
4537
4538       if (tem == 0)
4539         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4540                             OPTAB_WIDEN);
4541       if (tem != 0)
4542         tem = emit_store_flag (target, code, tem, const0_rtx,
4543                                mode, unsignedp, normalizep);
4544       if (tem == 0)
4545         delete_insns_since (last);
4546       return tem;
4547     }
4548
4549   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4550      the constant zero.  Reject all other comparisons at this point.  Only
4551      do LE and GT if branches are expensive since they are expensive on
4552      2-operand machines.  */
4553
4554   if (BRANCH_COST == 0
4555       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4556       || (code != EQ && code != NE
4557           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4558     return 0;
4559
4560   /* See what we need to return.  We can only return a 1, -1, or the
4561      sign bit.  */
4562
4563   if (normalizep == 0)
4564     {
4565       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4566         normalizep = STORE_FLAG_VALUE;
4567
4568       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4569                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4570                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4571         ;
4572       else
4573         return 0;
4574     }
4575
4576   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4577      do the necessary operation below.  */
4578
4579   tem = 0;
4580
4581   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4582      the sign bit set.  */
4583
4584   if (code == LE)
4585     {
4586       /* This is destructive, so SUBTARGET can't be OP0.  */
4587       if (rtx_equal_p (subtarget, op0))
4588         subtarget = 0;
4589
4590       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4591                           OPTAB_WIDEN);
4592       if (tem)
4593         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4594                             OPTAB_WIDEN);
4595     }
4596
4597   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4598      number of bits in the mode of OP0, minus one.  */
4599
4600   if (code == GT)
4601     {
4602       if (rtx_equal_p (subtarget, op0))
4603         subtarget = 0;
4604
4605       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4606                           size_int (GET_MODE_BITSIZE (mode) - 1),
4607                           subtarget, 0);
4608       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4609                           OPTAB_WIDEN);
4610     }
4611
4612   if (code == EQ || code == NE)
4613     {
4614       /* For EQ or NE, one way to do the comparison is to apply an operation
4615          that converts the operand into a positive number if it is nonzero
4616          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4617          for NE we negate.  This puts the result in the sign bit.  Then we
4618          normalize with a shift, if needed.
4619
4620          Two operations that can do the above actions are ABS and FFS, so try
4621          them.  If that doesn't work, and MODE is smaller than a full word,
4622          we can use zero-extension to the wider mode (an unsigned conversion)
4623          as the operation.  */
4624
4625       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4626          that is compensated by the subsequent overflow when subtracting
4627          one / negating.  */
4628
4629       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4630         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4631       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4632         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4633       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4634         {
4635           op0 = protect_from_queue (op0, 0);
4636           tem = convert_modes (word_mode, mode, op0, 1);
4637           mode = word_mode;
4638         }
4639
4640       if (tem != 0)
4641         {
4642           if (code == EQ)
4643             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4644                                 0, OPTAB_WIDEN);
4645           else
4646             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4647         }
4648
4649       /* If we couldn't do it that way, for NE we can "or" the two's complement
4650          of the value with itself.  For EQ, we take the one's complement of
4651          that "or", which is an extra insn, so we only handle EQ if branches
4652          are expensive.  */
4653
4654       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4655         {
4656           if (rtx_equal_p (subtarget, op0))
4657             subtarget = 0;
4658
4659           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4660           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4661                               OPTAB_WIDEN);
4662
4663           if (tem && code == EQ)
4664             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4665         }
4666     }
4667
4668   if (tem && normalizep)
4669     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4670                         size_int (GET_MODE_BITSIZE (mode) - 1),
4671                         subtarget, normalizep == 1);
4672
4673   if (tem)
4674     {
4675       if (GET_MODE (tem) != target_mode)
4676         {
4677           convert_move (target, tem, 0);
4678           tem = target;
4679         }
4680       else if (!subtarget)
4681         {
4682           emit_move_insn (target, tem);
4683           tem = target;
4684         }
4685     }
4686   else
4687     delete_insns_since (last);
4688
4689   return tem;
4690 }
4691
4692 /* Like emit_store_flag, but always succeeds.  */
4693
4694 rtx
4695 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4696      rtx target;
4697      enum rtx_code code;
4698      rtx op0, op1;
4699      enum machine_mode mode;
4700      int unsignedp;
4701      int normalizep;
4702 {
4703   rtx tem, label;
4704
4705   /* First see if emit_store_flag can do the job.  */
4706   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4707   if (tem != 0)
4708     return tem;
4709
4710   if (normalizep == 0)
4711     normalizep = 1;
4712
4713   /* If this failed, we have to do this with set/compare/jump/set code.  */
4714
4715   if (GET_CODE (target) != REG
4716       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4717     target = gen_reg_rtx (GET_MODE (target));
4718
4719   emit_move_insn (target, const1_rtx);
4720   label = gen_label_rtx ();
4721   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4722                            NULL_RTX, label);
4723
4724   emit_move_insn (target, const0_rtx);
4725   emit_label (label);
4726
4727   return target;
4728 }
4729 \f
4730 /* Perform possibly multi-word comparison and conditional jump to LABEL
4731    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4732
4733    The algorithm is based on the code in expr.c:do_jump.
4734
4735    Note that this does not perform a general comparison.  Only variants
4736    generated within expmed.c are correctly handled, others abort (but could
4737    be handled if needed).  */
4738
4739 static void
4740 do_cmp_and_jump (arg1, arg2, op, mode, label)
4741      rtx arg1, arg2, label;
4742      enum rtx_code op;
4743      enum machine_mode mode;
4744 {
4745   /* If this mode is an integer too wide to compare properly,
4746      compare word by word.  Rely on cse to optimize constant cases.  */
4747
4748   if (GET_MODE_CLASS (mode) == MODE_INT
4749       && ! can_compare_p (op, mode, ccp_jump))
4750     {
4751       rtx label2 = gen_label_rtx ();
4752
4753       switch (op)
4754         {
4755         case LTU:
4756           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4757           break;
4758
4759         case LEU:
4760           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4761           break;
4762
4763         case LT:
4764           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4765           break;
4766
4767         case GT:
4768           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4769           break;
4770
4771         case GE:
4772           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4773           break;
4774
4775           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4776              that's the only equality operations we do */
4777         case EQ:
4778           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4779             abort ();
4780           do_jump_by_parts_equality_rtx (arg1, label2, label);
4781           break;
4782
4783         case NE:
4784           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4785             abort ();
4786           do_jump_by_parts_equality_rtx (arg1, label, label2);
4787           break;
4788
4789         default:
4790           abort ();
4791         }
4792
4793       emit_label (label2);
4794     }
4795   else
4796     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4797 }