svn merge -r102224:107263 svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-3_4-branch
[official-gcc.git] / gcc / expmed.c
bloba8039346dd2a474ee1b40fb08e131c4fbb966f67
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
41 unsigned HOST_WIDE_INT,
42 unsigned HOST_WIDE_INT, rtx);
43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT, rtx, int);
49 static rtx mask_rtx (enum machine_mode, int, int, int);
50 static rtx lshift_value (enum machine_mode, rtx, int, int);
51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, int);
53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
55 /* Nonzero means divides or modulus operations are relatively cheap for
56 powers of two, so don't use branches; emit the operation instead.
57 Usually, this will mean that the MD file will emit non-branch
58 sequences. */
60 static int sdiv_pow2_cheap, smod_pow2_cheap;
62 #ifndef SLOW_UNALIGNED_ACCESS
63 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
64 #endif
66 /* For compilers that support multiple targets with different word sizes,
67 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
68 is the H8/300(H) compiler. */
70 #ifndef MAX_BITS_PER_WORD
71 #define MAX_BITS_PER_WORD BITS_PER_WORD
72 #endif
74 /* Reduce conditional compilation elsewhere. */
75 #ifndef HAVE_insv
76 #define HAVE_insv 0
77 #define CODE_FOR_insv CODE_FOR_nothing
78 #define gen_insv(a,b,c,d) NULL_RTX
79 #endif
80 #ifndef HAVE_extv
81 #define HAVE_extv 0
82 #define CODE_FOR_extv CODE_FOR_nothing
83 #define gen_extv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extzv
86 #define HAVE_extzv 0
87 #define CODE_FOR_extzv CODE_FOR_nothing
88 #define gen_extzv(a,b,c,d) NULL_RTX
89 #endif
91 /* Cost of various pieces of RTL. Note that some of these are indexed by
92 shift count and some by mode. */
93 static int add_cost, negate_cost, zero_cost;
94 static int shift_cost[MAX_BITS_PER_WORD];
95 static int shiftadd_cost[MAX_BITS_PER_WORD];
96 static int shiftsub_cost[MAX_BITS_PER_WORD];
97 static int mul_cost[NUM_MACHINE_MODES];
98 static int div_cost[NUM_MACHINE_MODES];
99 static int mul_widen_cost[NUM_MACHINE_MODES];
100 static int mul_highpart_cost[NUM_MACHINE_MODES];
102 void
103 init_expmed (void)
105 rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
106 int dummy;
107 int m;
108 enum machine_mode mode, wider_mode;
110 start_sequence ();
112 /* This is "some random pseudo register" for purposes of calling recog
113 to see what insns exist. */
114 reg = gen_rtx_REG (word_mode, 10000);
116 zero_cost = rtx_cost (const0_rtx, 0);
117 add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
119 shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
120 gen_rtx_ASHIFT (word_mode, reg,
121 const0_rtx)));
123 shiftadd_insn
124 = emit_insn (gen_rtx_SET (VOIDmode, reg,
125 gen_rtx_PLUS (word_mode,
126 gen_rtx_MULT (word_mode,
127 reg, const0_rtx),
128 reg)));
130 shiftsub_insn
131 = emit_insn (gen_rtx_SET (VOIDmode, reg,
132 gen_rtx_MINUS (word_mode,
133 gen_rtx_MULT (word_mode,
134 reg, const0_rtx),
135 reg)));
137 init_recog ();
139 shift_cost[0] = 0;
140 shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
142 for (m = 1; m < MAX_BITS_PER_WORD; m++)
144 rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
145 shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
147 XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
148 if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
149 shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
151 XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
152 if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
153 shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
155 XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
156 if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
157 shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
160 negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
162 sdiv_pow2_cheap
163 = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
164 <= 2 * add_cost);
165 smod_pow2_cheap
166 = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
167 <= 2 * add_cost);
169 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
170 mode != VOIDmode;
171 mode = GET_MODE_WIDER_MODE (mode))
173 reg = gen_rtx_REG (mode, 10000);
174 div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
175 mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
176 wider_mode = GET_MODE_WIDER_MODE (mode);
177 if (wider_mode != VOIDmode)
179 mul_widen_cost[(int) wider_mode]
180 = rtx_cost (gen_rtx_MULT (wider_mode,
181 gen_rtx_ZERO_EXTEND (wider_mode, reg),
182 gen_rtx_ZERO_EXTEND (wider_mode, reg)),
183 SET);
184 mul_highpart_cost[(int) mode]
185 = rtx_cost (gen_rtx_TRUNCATE
186 (mode,
187 gen_rtx_LSHIFTRT (wider_mode,
188 gen_rtx_MULT (wider_mode,
189 gen_rtx_ZERO_EXTEND
190 (wider_mode, reg),
191 gen_rtx_ZERO_EXTEND
192 (wider_mode, reg)),
193 GEN_INT (GET_MODE_BITSIZE (mode)))),
194 SET);
198 end_sequence ();
201 /* Return an rtx representing minus the value of X.
202 MODE is the intended mode of the result,
203 useful if X is a CONST_INT. */
206 negate_rtx (enum machine_mode mode, rtx x)
208 rtx result = simplify_unary_operation (NEG, mode, x, mode);
210 if (result == 0)
211 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
213 return result;
216 /* Report on the availability of insv/extv/extzv and the desired mode
217 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
218 is false; else the mode of the specified operand. If OPNO is -1,
219 all the caller cares about is whether the insn is available. */
220 enum machine_mode
221 mode_for_extraction (enum extraction_pattern pattern, int opno)
223 const struct insn_data *data;
225 switch (pattern)
227 case EP_insv:
228 if (HAVE_insv)
230 data = &insn_data[CODE_FOR_insv];
231 break;
233 return MAX_MACHINE_MODE;
235 case EP_extv:
236 if (HAVE_extv)
238 data = &insn_data[CODE_FOR_extv];
239 break;
241 return MAX_MACHINE_MODE;
243 case EP_extzv:
244 if (HAVE_extzv)
246 data = &insn_data[CODE_FOR_extzv];
247 break;
249 return MAX_MACHINE_MODE;
251 default:
252 abort ();
255 if (opno == -1)
256 return VOIDmode;
258 /* Everyone who uses this function used to follow it with
259 if (result == VOIDmode) result = word_mode; */
260 if (data->operand[opno].mode == VOIDmode)
261 return word_mode;
262 return data->operand[opno].mode;
266 /* Generate code to store value from rtx VALUE
267 into a bit-field within structure STR_RTX
268 containing BITSIZE bits starting at bit BITNUM.
269 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
270 ALIGN is the alignment that STR_RTX is known to have.
271 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
273 /* ??? Note that there are two different ideas here for how
274 to determine the size to count bits within, for a register.
275 One is BITS_PER_WORD, and the other is the size of operand 3
276 of the insv pattern.
278 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
279 else, we use the mode of operand 3. */
282 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
283 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
284 rtx value, HOST_WIDE_INT total_size)
286 unsigned int unit
287 = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
288 unsigned HOST_WIDE_INT offset = bitnum / unit;
289 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
290 rtx op0 = str_rtx;
291 int byte_offset;
292 rtx orig_value;
294 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
296 /* Discount the part of the structure before the desired byte.
297 We need to know how many bytes are safe to reference after it. */
298 if (total_size >= 0)
299 total_size -= (bitpos / BIGGEST_ALIGNMENT
300 * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
302 while (GET_CODE (op0) == SUBREG)
304 /* The following line once was done only if WORDS_BIG_ENDIAN,
305 but I think that is a mistake. WORDS_BIG_ENDIAN is
306 meaningful at a much higher level; when structures are copied
307 between memory and regs, the higher-numbered regs
308 always get higher addresses. */
309 offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
310 /* We used to adjust BITPOS here, but now we do the whole adjustment
311 right after the loop. */
312 op0 = SUBREG_REG (op0);
315 value = protect_from_queue (value, 0);
317 /* Use vec_extract patterns for extracting parts of vectors whenever
318 available. */
319 if (VECTOR_MODE_P (GET_MODE (op0))
320 && GET_CODE (op0) != MEM
321 && (vec_set_optab->handlers[(int)GET_MODE (op0)].insn_code
322 != CODE_FOR_nothing)
323 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
324 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
325 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
327 enum machine_mode outermode = GET_MODE (op0);
328 enum machine_mode innermode = GET_MODE_INNER (outermode);
329 int icode = (int) vec_set_optab->handlers[(int) outermode].insn_code;
330 int pos = bitnum / GET_MODE_BITSIZE (innermode);
331 rtx rtxpos = GEN_INT (pos);
332 rtx src = value;
333 rtx dest = op0;
334 rtx pat, seq;
335 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
336 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
337 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
339 start_sequence ();
341 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
342 src = copy_to_mode_reg (mode1, src);
344 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
345 rtxpos = copy_to_mode_reg (mode1, rtxpos);
347 /* We could handle this, but we should always be called with a pseudo
348 for our targets and all insns should take them as outputs. */
349 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
350 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
351 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
352 abort ();
353 pat = GEN_FCN (icode) (dest, src, rtxpos);
354 seq = get_insns ();
355 end_sequence ();
356 if (pat)
358 emit_insn (seq);
359 emit_insn (pat);
360 return dest;
364 if (flag_force_mem)
366 int old_generating_concat_p = generating_concat_p;
367 generating_concat_p = 0;
368 value = force_not_mem (value);
369 generating_concat_p = old_generating_concat_p;
372 /* If the target is a register, overwriting the entire object, or storing
373 a full-word or multi-word field can be done with just a SUBREG.
375 If the target is memory, storing any naturally aligned field can be
376 done with a simple store. For targets that support fast unaligned
377 memory, any naturally sized, unit aligned field can be done directly. */
379 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
380 + (offset * UNITS_PER_WORD);
382 if (bitpos == 0
383 && bitsize == GET_MODE_BITSIZE (fieldmode)
384 && (GET_CODE (op0) != MEM
385 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
386 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
387 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
388 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
389 || (offset * BITS_PER_UNIT % bitsize == 0
390 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
392 if (GET_MODE (op0) != fieldmode)
394 if (GET_CODE (op0) == SUBREG)
396 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
397 || GET_MODE_CLASS (fieldmode) == MODE_INT
398 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
399 op0 = SUBREG_REG (op0);
400 else
401 /* Else we've got some float mode source being extracted into
402 a different float mode destination -- this combination of
403 subregs results in Severe Tire Damage. */
404 abort ();
406 if (GET_CODE (op0) == REG)
407 op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
408 else
409 op0 = adjust_address (op0, fieldmode, offset);
411 emit_move_insn (op0, value);
412 return value;
415 /* Make sure we are playing with integral modes. Pun with subregs
416 if we aren't. This must come after the entire register case above,
417 since that case is valid for any mode. The following cases are only
418 valid for integral modes. */
420 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
421 if (imode != GET_MODE (op0))
423 if (GET_CODE (op0) == MEM)
424 op0 = adjust_address (op0, imode, 0);
425 else if (imode != BLKmode)
426 op0 = gen_lowpart (imode, op0);
427 else
428 abort ();
432 /* We may be accessing data outside the field, which means
433 we can alias adjacent data. */
434 if (GET_CODE (op0) == MEM)
436 op0 = shallow_copy_rtx (op0);
437 set_mem_alias_set (op0, 0);
438 set_mem_expr (op0, 0);
441 /* If OP0 is a register, BITPOS must count within a word.
442 But as we have it, it counts within whatever size OP0 now has.
443 On a bigendian machine, these are not the same, so convert. */
444 if (BYTES_BIG_ENDIAN
445 && GET_CODE (op0) != MEM
446 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
447 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
449 /* Storing an lsb-aligned field in a register
450 can be done with a movestrict instruction. */
452 if (GET_CODE (op0) != MEM
453 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
454 && bitsize == GET_MODE_BITSIZE (fieldmode)
455 && (movstrict_optab->handlers[(int) fieldmode].insn_code
456 != CODE_FOR_nothing))
458 int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
460 /* Get appropriate low part of the value being stored. */
461 if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
462 value = gen_lowpart (fieldmode, value);
463 else if (!(GET_CODE (value) == SYMBOL_REF
464 || GET_CODE (value) == LABEL_REF
465 || GET_CODE (value) == CONST))
466 value = convert_to_mode (fieldmode, value, 0);
468 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
469 value = copy_to_mode_reg (fieldmode, value);
471 if (GET_CODE (op0) == SUBREG)
473 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
474 || GET_MODE_CLASS (fieldmode) == MODE_INT
475 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
476 op0 = SUBREG_REG (op0);
477 else
478 /* Else we've got some float mode source being extracted into
479 a different float mode destination -- this combination of
480 subregs results in Severe Tire Damage. */
481 abort ();
484 emit_insn (GEN_FCN (icode)
485 (gen_rtx_SUBREG (fieldmode, op0,
486 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
487 + (offset * UNITS_PER_WORD)),
488 value));
490 return value;
493 /* Handle fields bigger than a word. */
495 if (bitsize > BITS_PER_WORD)
497 /* Here we transfer the words of the field
498 in the order least significant first.
499 This is because the most significant word is the one which may
500 be less than full.
501 However, only do that if the value is not BLKmode. */
503 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
504 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
505 unsigned int i;
507 /* This is the mode we must force value to, so that there will be enough
508 subwords to extract. Note that fieldmode will often (always?) be
509 VOIDmode, because that is what store_field uses to indicate that this
510 is a bit field, but passing VOIDmode to operand_subword_force will
511 result in an abort. */
512 fieldmode = GET_MODE (value);
513 if (fieldmode == VOIDmode)
514 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
516 for (i = 0; i < nwords; i++)
518 /* If I is 0, use the low-order word in both field and target;
519 if I is 1, use the next to lowest word; and so on. */
520 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
521 unsigned int bit_offset = (backwards
522 ? MAX ((int) bitsize - ((int) i + 1)
523 * BITS_PER_WORD,
525 : (int) i * BITS_PER_WORD);
527 store_bit_field (op0, MIN (BITS_PER_WORD,
528 bitsize - i * BITS_PER_WORD),
529 bitnum + bit_offset, word_mode,
530 operand_subword_force (value, wordnum, fieldmode),
531 total_size);
533 return value;
536 /* From here on we can assume that the field to be stored in is
537 a full-word (whatever type that is), since it is shorter than a word. */
539 /* OFFSET is the number of words or bytes (UNIT says which)
540 from STR_RTX to the first word or byte containing part of the field. */
542 if (GET_CODE (op0) != MEM)
544 if (offset != 0
545 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
547 if (GET_CODE (op0) != REG)
549 /* Since this is a destination (lvalue), we can't copy it to a
550 pseudo. We can trivially remove a SUBREG that does not
551 change the size of the operand. Such a SUBREG may have been
552 added above. Otherwise, abort. */
553 if (GET_CODE (op0) == SUBREG
554 && (GET_MODE_SIZE (GET_MODE (op0))
555 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
556 op0 = SUBREG_REG (op0);
557 else
558 abort ();
560 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
561 op0, (offset * UNITS_PER_WORD));
563 offset = 0;
565 else
566 op0 = protect_from_queue (op0, 1);
568 /* If VALUE is a floating-point mode, access it as an integer of the
569 corresponding size. This can occur on a machine with 64 bit registers
570 that uses SFmode for float. This can also occur for unaligned float
571 structure fields. */
572 orig_value = value;
573 if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
574 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
575 value = gen_lowpart ((GET_MODE (value) == VOIDmode
576 ? word_mode : int_mode_for_mode (GET_MODE (value))),
577 value);
579 /* Now OFFSET is nonzero only if OP0 is memory
580 and is therefore always measured in bytes. */
582 if (HAVE_insv
583 && GET_MODE (value) != BLKmode
584 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
585 /* Ensure insv's size is wide enough for this field. */
586 && (GET_MODE_BITSIZE (op_mode) >= bitsize)
587 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
588 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
590 int xbitpos = bitpos;
591 rtx value1;
592 rtx xop0 = op0;
593 rtx last = get_last_insn ();
594 rtx pat;
595 enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
596 int save_volatile_ok = volatile_ok;
598 volatile_ok = 1;
600 /* If this machine's insv can only insert into a register, copy OP0
601 into a register and save it back later. */
602 /* This used to check flag_force_mem, but that was a serious
603 de-optimization now that flag_force_mem is enabled by -O2. */
604 if (GET_CODE (op0) == MEM
605 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
606 (op0, VOIDmode)))
608 rtx tempreg;
609 enum machine_mode bestmode;
611 /* Get the mode to use for inserting into this field. If OP0 is
612 BLKmode, get the smallest mode consistent with the alignment. If
613 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
614 mode. Otherwise, use the smallest mode containing the field. */
616 if (GET_MODE (op0) == BLKmode
617 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
618 bestmode
619 = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
620 MEM_VOLATILE_P (op0));
621 else
622 bestmode = GET_MODE (op0);
624 if (bestmode == VOIDmode
625 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
626 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
627 goto insv_loses;
629 /* Adjust address to point to the containing unit of that mode.
630 Compute offset as multiple of this unit, counting in bytes. */
631 unit = GET_MODE_BITSIZE (bestmode);
632 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
633 bitpos = bitnum % unit;
634 op0 = adjust_address (op0, bestmode, offset);
636 /* Fetch that unit, store the bitfield in it, then store
637 the unit. */
638 tempreg = copy_to_reg (op0);
639 store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value,
640 total_size);
641 emit_move_insn (op0, tempreg);
642 return value;
644 volatile_ok = save_volatile_ok;
646 /* Add OFFSET into OP0's address. */
647 if (GET_CODE (xop0) == MEM)
648 xop0 = adjust_address (xop0, byte_mode, offset);
650 /* If xop0 is a register, we need it in MAXMODE
651 to make it acceptable to the format of insv. */
652 if (GET_CODE (xop0) == SUBREG)
653 /* We can't just change the mode, because this might clobber op0,
654 and we will need the original value of op0 if insv fails. */
655 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
656 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
657 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
659 /* On big-endian machines, we count bits from the most significant.
660 If the bit field insn does not, we must invert. */
662 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
663 xbitpos = unit - bitsize - xbitpos;
665 /* We have been counting XBITPOS within UNIT.
666 Count instead within the size of the register. */
667 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
668 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
670 unit = GET_MODE_BITSIZE (maxmode);
672 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
673 value1 = value;
674 if (GET_MODE (value) != maxmode)
676 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
678 /* Optimization: Don't bother really extending VALUE
679 if it has all the bits we will actually use. However,
680 if we must narrow it, be sure we do it correctly. */
682 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
684 rtx tmp;
686 tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
687 if (! tmp)
688 tmp = simplify_gen_subreg (maxmode,
689 force_reg (GET_MODE (value),
690 value1),
691 GET_MODE (value), 0);
692 value1 = tmp;
694 else
695 value1 = gen_lowpart (maxmode, value1);
697 else if (GET_CODE (value) == CONST_INT)
698 value1 = gen_int_mode (INTVAL (value), maxmode);
699 else if (!CONSTANT_P (value))
700 /* Parse phase is supposed to make VALUE's data type
701 match that of the component reference, which is a type
702 at least as wide as the field; so VALUE should have
703 a mode that corresponds to that type. */
704 abort ();
707 /* If this machine's insv insists on a register,
708 get VALUE1 into a register. */
709 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
710 (value1, maxmode)))
711 value1 = force_reg (maxmode, value1);
713 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
714 if (pat)
715 emit_insn (pat);
716 else
718 delete_insns_since (last);
719 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
722 else
723 insv_loses:
724 /* Insv is not available; store using shifts and boolean ops. */
725 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
726 return value;
729 /* Use shifts and boolean operations to store VALUE
730 into a bit field of width BITSIZE
731 in a memory location specified by OP0 except offset by OFFSET bytes.
732 (OFFSET must be 0 if OP0 is a register.)
733 The field starts at position BITPOS within the byte.
734 (If OP0 is a register, it may be a full word or a narrower mode,
735 but BITPOS still counts within a full word,
736 which is significant on bigendian machines.)
738 Note that protect_from_queue has already been done on OP0 and VALUE. */
740 static void
741 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
742 unsigned HOST_WIDE_INT bitsize,
743 unsigned HOST_WIDE_INT bitpos, rtx value)
745 enum machine_mode mode;
746 unsigned int total_bits = BITS_PER_WORD;
747 rtx subtarget, temp;
748 int all_zero = 0;
749 int all_one = 0;
751 /* There is a case not handled here:
752 a structure with a known alignment of just a halfword
753 and a field split across two aligned halfwords within the structure.
754 Or likewise a structure with a known alignment of just a byte
755 and a field split across two bytes.
756 Such cases are not supposed to be able to occur. */
758 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
760 if (offset != 0)
761 abort ();
762 /* Special treatment for a bit field split across two registers. */
763 if (bitsize + bitpos > BITS_PER_WORD)
765 store_split_bit_field (op0, bitsize, bitpos, value);
766 return;
769 else
771 /* Get the proper mode to use for this field. We want a mode that
772 includes the entire field. If such a mode would be larger than
773 a word, we won't be doing the extraction the normal way.
774 We don't want a mode bigger than the destination. */
776 mode = GET_MODE (op0);
777 if (GET_MODE_BITSIZE (mode) == 0
778 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
779 mode = word_mode;
780 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
781 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
783 if (mode == VOIDmode)
785 /* The only way this should occur is if the field spans word
786 boundaries. */
787 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
788 value);
789 return;
792 total_bits = GET_MODE_BITSIZE (mode);
794 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
795 be in the range 0 to total_bits-1, and put any excess bytes in
796 OFFSET. */
797 if (bitpos >= total_bits)
799 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
800 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
801 * BITS_PER_UNIT);
804 /* Get ref to an aligned byte, halfword, or word containing the field.
805 Adjust BITPOS to be position within a word,
806 and OFFSET to be the offset of that word.
807 Then alter OP0 to refer to that word. */
808 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
809 offset -= (offset % (total_bits / BITS_PER_UNIT));
810 op0 = adjust_address (op0, mode, offset);
813 mode = GET_MODE (op0);
815 /* Now MODE is either some integral mode for a MEM as OP0,
816 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
817 The bit field is contained entirely within OP0.
818 BITPOS is the starting bit number within OP0.
819 (OP0's mode may actually be narrower than MODE.) */
821 if (BYTES_BIG_ENDIAN)
822 /* BITPOS is the distance between our msb
823 and that of the containing datum.
824 Convert it to the distance from the lsb. */
825 bitpos = total_bits - bitsize - bitpos;
827 /* Now BITPOS is always the distance between our lsb
828 and that of OP0. */
830 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
831 we must first convert its mode to MODE. */
833 if (GET_CODE (value) == CONST_INT)
835 HOST_WIDE_INT v = INTVAL (value);
837 if (bitsize < HOST_BITS_PER_WIDE_INT)
838 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
840 if (v == 0)
841 all_zero = 1;
842 else if ((bitsize < HOST_BITS_PER_WIDE_INT
843 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
844 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
845 all_one = 1;
847 value = lshift_value (mode, value, bitpos, bitsize);
849 else
851 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
852 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
854 if (GET_MODE (value) != mode)
856 if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
857 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
858 value = gen_lowpart (mode, value);
859 else
860 value = convert_to_mode (mode, value, 1);
863 if (must_and)
864 value = expand_binop (mode, and_optab, value,
865 mask_rtx (mode, 0, bitsize, 0),
866 NULL_RTX, 1, OPTAB_LIB_WIDEN);
867 if (bitpos > 0)
868 value = expand_shift (LSHIFT_EXPR, mode, value,
869 build_int_2 (bitpos, 0), NULL_RTX, 1);
872 /* Now clear the chosen bits in OP0,
873 except that if VALUE is -1 we need not bother. */
875 subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
877 if (! all_one)
879 temp = expand_binop (mode, and_optab, op0,
880 mask_rtx (mode, bitpos, bitsize, 1),
881 subtarget, 1, OPTAB_LIB_WIDEN);
882 subtarget = temp;
884 else
885 temp = op0;
887 /* Now logical-or VALUE into OP0, unless it is zero. */
889 if (! all_zero)
890 temp = expand_binop (mode, ior_optab, temp, value,
891 subtarget, 1, OPTAB_LIB_WIDEN);
892 if (op0 != temp)
893 emit_move_insn (op0, temp);
896 /* Store a bit field that is split across multiple accessible memory objects.
898 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
899 BITSIZE is the field width; BITPOS the position of its first bit
900 (within the word).
901 VALUE is the value to store.
903 This does not yet handle fields wider than BITS_PER_WORD. */
905 static void
906 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
907 unsigned HOST_WIDE_INT bitpos, rtx value)
909 unsigned int unit;
910 unsigned int bitsdone = 0;
912 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
913 much at a time. */
914 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
915 unit = BITS_PER_WORD;
916 else
917 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
919 /* If VALUE is a constant other than a CONST_INT, get it into a register in
920 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
921 that VALUE might be a floating-point constant. */
922 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
924 rtx word = gen_lowpart_common (word_mode, value);
926 if (word && (value != word))
927 value = word;
928 else
929 value = gen_lowpart_common (word_mode,
930 force_reg (GET_MODE (value) != VOIDmode
931 ? GET_MODE (value)
932 : word_mode, value));
934 else if (GET_CODE (value) == ADDRESSOF)
935 value = copy_to_reg (value);
937 while (bitsdone < bitsize)
939 unsigned HOST_WIDE_INT thissize;
940 rtx part, word;
941 unsigned HOST_WIDE_INT thispos;
942 unsigned HOST_WIDE_INT offset;
944 offset = (bitpos + bitsdone) / unit;
945 thispos = (bitpos + bitsdone) % unit;
947 /* THISSIZE must not overrun a word boundary. Otherwise,
948 store_fixed_bit_field will call us again, and we will mutually
949 recurse forever. */
950 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
951 thissize = MIN (thissize, unit - thispos);
953 if (BYTES_BIG_ENDIAN)
955 int total_bits;
957 /* We must do an endian conversion exactly the same way as it is
958 done in extract_bit_field, so that the two calls to
959 extract_fixed_bit_field will have comparable arguments. */
960 if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
961 total_bits = BITS_PER_WORD;
962 else
963 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
965 /* Fetch successively less significant portions. */
966 if (GET_CODE (value) == CONST_INT)
967 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
968 >> (bitsize - bitsdone - thissize))
969 & (((HOST_WIDE_INT) 1 << thissize) - 1));
970 else
971 /* The args are chosen so that the last part includes the
972 lsb. Give extract_bit_field the value it needs (with
973 endianness compensation) to fetch the piece we want. */
974 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
975 total_bits - bitsize + bitsdone,
976 NULL_RTX, 1);
978 else
980 /* Fetch successively more significant portions. */
981 if (GET_CODE (value) == CONST_INT)
982 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
983 >> bitsdone)
984 & (((HOST_WIDE_INT) 1 << thissize) - 1));
985 else
986 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
987 bitsdone, NULL_RTX, 1);
990 /* If OP0 is a register, then handle OFFSET here.
992 When handling multiword bitfields, extract_bit_field may pass
993 down a word_mode SUBREG of a larger REG for a bitfield that actually
994 crosses a word boundary. Thus, for a SUBREG, we must find
995 the current word starting from the base register. */
996 if (GET_CODE (op0) == SUBREG)
998 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
999 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1000 GET_MODE (SUBREG_REG (op0)));
1001 offset = 0;
1003 else if (GET_CODE (op0) == REG)
1005 word = operand_subword_force (op0, offset, GET_MODE (op0));
1006 offset = 0;
1008 else
1009 word = op0;
1011 /* OFFSET is in UNITs, and UNIT is in bits.
1012 store_fixed_bit_field wants offset in bytes. */
1013 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1014 thispos, part);
1015 bitsdone += thissize;
1019 /* Generate code to extract a byte-field from STR_RTX
1020 containing BITSIZE bits, starting at BITNUM,
1021 and put it in TARGET if possible (if TARGET is nonzero).
1022 Regardless of TARGET, we return the rtx for where the value is placed.
1023 It may be a QUEUED.
1025 STR_RTX is the structure containing the byte (a REG or MEM).
1026 UNSIGNEDP is nonzero if this is an unsigned bit field.
1027 MODE is the natural mode of the field value once extracted.
1028 TMODE is the mode the caller would like the value to have;
1029 but the value may be returned with type MODE instead.
1031 TOTAL_SIZE is the size in bytes of the containing structure,
1032 or -1 if varying.
1034 If a TARGET is specified and we can store in it at no extra cost,
1035 we do so, and return TARGET.
1036 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1037 if they are equally easy. */
1040 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1041 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1042 enum machine_mode mode, enum machine_mode tmode,
1043 HOST_WIDE_INT total_size)
1045 unsigned int unit
1046 = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1047 unsigned HOST_WIDE_INT offset = bitnum / unit;
1048 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1049 rtx op0 = str_rtx;
1050 rtx spec_target = target;
1051 rtx spec_target_subreg = 0;
1052 enum machine_mode int_mode;
1053 enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1054 enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1055 enum machine_mode mode1;
1056 int byte_offset;
1058 /* Discount the part of the structure before the desired byte.
1059 We need to know how many bytes are safe to reference after it. */
1060 if (total_size >= 0)
1061 total_size -= (bitpos / BIGGEST_ALIGNMENT
1062 * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1064 if (tmode == VOIDmode)
1065 tmode = mode;
1067 while (GET_CODE (op0) == SUBREG)
1069 bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1070 if (bitpos > unit)
1072 offset += (bitpos / unit);
1073 bitpos %= unit;
1075 op0 = SUBREG_REG (op0);
1078 if (GET_CODE (op0) == REG
1079 && mode == GET_MODE (op0)
1080 && bitnum == 0
1081 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1083 /* We're trying to extract a full register from itself. */
1084 return op0;
1087 /* Use vec_extract patterns for extracting parts of vectors whenever
1088 available. */
1089 if (VECTOR_MODE_P (GET_MODE (op0))
1090 && GET_CODE (op0) != MEM
1091 && (vec_extract_optab->handlers[(int)GET_MODE (op0)].insn_code
1092 != CODE_FOR_nothing)
1093 && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1094 == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1096 enum machine_mode outermode = GET_MODE (op0);
1097 enum machine_mode innermode = GET_MODE_INNER (outermode);
1098 int icode = (int) vec_extract_optab->handlers[(int) outermode].insn_code;
1099 int pos = bitnum / GET_MODE_BITSIZE (innermode);
1100 rtx rtxpos = GEN_INT (pos);
1101 rtx src = op0;
1102 rtx dest = NULL, pat, seq;
1103 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1104 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1105 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1107 if (innermode == tmode || innermode == mode)
1108 dest = target;
1110 if (!dest)
1111 dest = gen_reg_rtx (innermode);
1113 start_sequence ();
1115 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1116 dest = copy_to_mode_reg (mode0, dest);
1118 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1119 src = copy_to_mode_reg (mode1, src);
1121 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1122 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1124 /* We could handle this, but we should always be called with a pseudo
1125 for our targets and all insns should take them as outputs. */
1126 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1127 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1128 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1129 abort ();
1131 pat = GEN_FCN (icode) (dest, src, rtxpos);
1132 seq = get_insns ();
1133 end_sequence ();
1134 if (pat)
1136 emit_insn (seq);
1137 emit_insn (pat);
1138 return dest;
1142 /* Make sure we are playing with integral modes. Pun with subregs
1143 if we aren't. */
1145 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1146 if (imode != GET_MODE (op0))
1148 if (GET_CODE (op0) == MEM)
1149 op0 = adjust_address (op0, imode, 0);
1150 else if (imode != BLKmode)
1151 op0 = gen_lowpart (imode, op0);
1152 else
1153 abort ();
1157 /* We may be accessing data outside the field, which means
1158 we can alias adjacent data. */
1159 if (GET_CODE (op0) == MEM)
1161 op0 = shallow_copy_rtx (op0);
1162 set_mem_alias_set (op0, 0);
1163 set_mem_expr (op0, 0);
1166 /* Extraction of a full-word or multi-word value from a structure
1167 in a register or aligned memory can be done with just a SUBREG.
1168 A subword value in the least significant part of a register
1169 can also be extracted with a SUBREG. For this, we need the
1170 byte offset of the value in op0. */
1172 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1174 /* If OP0 is a register, BITPOS must count within a word.
1175 But as we have it, it counts within whatever size OP0 now has.
1176 On a bigendian machine, these are not the same, so convert. */
1177 if (BYTES_BIG_ENDIAN
1178 && GET_CODE (op0) != MEM
1179 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1180 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1182 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1183 If that's wrong, the solution is to test for it and set TARGET to 0
1184 if needed. */
1186 /* Only scalar integer modes can be converted via subregs. There is an
1187 additional problem for FP modes here in that they can have a precision
1188 which is different from the size. mode_for_size uses precision, but
1189 we want a mode based on the size, so we must avoid calling it for FP
1190 modes. */
1191 mode1 = (SCALAR_INT_MODE_P (tmode)
1192 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1193 : mode);
1195 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1196 && bitpos % BITS_PER_WORD == 0)
1197 || (mode1 != BLKmode
1198 /* ??? The big endian test here is wrong. This is correct
1199 if the value is in a register, and if mode_for_size is not
1200 the same mode as op0. This causes us to get unnecessarily
1201 inefficient code from the Thumb port when -mbig-endian. */
1202 && (BYTES_BIG_ENDIAN
1203 ? bitpos + bitsize == BITS_PER_WORD
1204 : bitpos == 0)))
1205 && ((GET_CODE (op0) != MEM
1206 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1207 GET_MODE_BITSIZE (GET_MODE (op0)))
1208 && GET_MODE_SIZE (mode1) != 0
1209 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1210 || (GET_CODE (op0) == MEM
1211 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1212 || (offset * BITS_PER_UNIT % bitsize == 0
1213 && MEM_ALIGN (op0) % bitsize == 0)))))
1215 if (mode1 != GET_MODE (op0))
1217 if (GET_CODE (op0) == SUBREG)
1219 if (GET_MODE (SUBREG_REG (op0)) == mode1
1220 || GET_MODE_CLASS (mode1) == MODE_INT
1221 || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1222 op0 = SUBREG_REG (op0);
1223 else
1224 /* Else we've got some float mode source being extracted into
1225 a different float mode destination -- this combination of
1226 subregs results in Severe Tire Damage. */
1227 goto no_subreg_mode_swap;
1229 if (GET_CODE (op0) == REG)
1230 op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1231 else
1232 op0 = adjust_address (op0, mode1, offset);
1234 if (mode1 != mode)
1235 return convert_to_mode (tmode, op0, unsignedp);
1236 return op0;
1238 no_subreg_mode_swap:
1240 /* Handle fields bigger than a word. */
1242 if (bitsize > BITS_PER_WORD)
1244 /* Here we transfer the words of the field
1245 in the order least significant first.
1246 This is because the most significant word is the one which may
1247 be less than full. */
1249 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1250 unsigned int i;
1252 if (target == 0 || GET_CODE (target) != REG)
1253 target = gen_reg_rtx (mode);
1255 /* Indicate for flow that the entire target reg is being set. */
1256 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1258 for (i = 0; i < nwords; i++)
1260 /* If I is 0, use the low-order word in both field and target;
1261 if I is 1, use the next to lowest word; and so on. */
1262 /* Word number in TARGET to use. */
1263 unsigned int wordnum
1264 = (WORDS_BIG_ENDIAN
1265 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1266 : i);
1267 /* Offset from start of field in OP0. */
1268 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1269 ? MAX (0, ((int) bitsize - ((int) i + 1)
1270 * (int) BITS_PER_WORD))
1271 : (int) i * BITS_PER_WORD);
1272 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1273 rtx result_part
1274 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1275 bitsize - i * BITS_PER_WORD),
1276 bitnum + bit_offset, 1, target_part, mode,
1277 word_mode, total_size);
1279 if (target_part == 0)
1280 abort ();
1282 if (result_part != target_part)
1283 emit_move_insn (target_part, result_part);
1286 if (unsignedp)
1288 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1289 need to be zero'd out. */
1290 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1292 unsigned int i, total_words;
1294 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1295 for (i = nwords; i < total_words; i++)
1296 emit_move_insn
1297 (operand_subword (target,
1298 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1299 1, VOIDmode),
1300 const0_rtx);
1302 return target;
1305 /* Signed bit field: sign-extend with two arithmetic shifts. */
1306 target = expand_shift (LSHIFT_EXPR, mode, target,
1307 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1308 NULL_RTX, 0);
1309 return expand_shift (RSHIFT_EXPR, mode, target,
1310 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1311 NULL_RTX, 0);
1314 /* From here on we know the desired field is smaller than a word. */
1316 /* Check if there is a correspondingly-sized integer field, so we can
1317 safely extract it as one size of integer, if necessary; then
1318 truncate or extend to the size that is wanted; then use SUBREGs or
1319 convert_to_mode to get one of the modes we really wanted. */
1321 int_mode = int_mode_for_mode (tmode);
1322 if (int_mode == BLKmode)
1323 int_mode = int_mode_for_mode (mode);
1324 if (int_mode == BLKmode)
1325 abort (); /* Should probably push op0 out to memory and then
1326 do a load. */
1328 /* OFFSET is the number of words or bytes (UNIT says which)
1329 from STR_RTX to the first word or byte containing part of the field. */
1331 if (GET_CODE (op0) != MEM)
1333 if (offset != 0
1334 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1336 if (GET_CODE (op0) != REG)
1337 op0 = copy_to_reg (op0);
1338 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1339 op0, (offset * UNITS_PER_WORD));
1341 offset = 0;
1343 else
1344 op0 = protect_from_queue (str_rtx, 1);
1346 /* Now OFFSET is nonzero only for memory operands. */
1348 if (unsignedp)
1350 if (HAVE_extzv
1351 && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1352 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1353 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1355 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1356 rtx bitsize_rtx, bitpos_rtx;
1357 rtx last = get_last_insn ();
1358 rtx xop0 = op0;
1359 rtx xtarget = target;
1360 rtx xspec_target = spec_target;
1361 rtx xspec_target_subreg = spec_target_subreg;
1362 rtx pat;
1363 enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1365 if (GET_CODE (xop0) == MEM)
1367 int save_volatile_ok = volatile_ok;
1368 volatile_ok = 1;
1370 /* Is the memory operand acceptable? */
1371 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1372 (xop0, GET_MODE (xop0))))
1374 /* No, load into a reg and extract from there. */
1375 enum machine_mode bestmode;
1377 /* Get the mode to use for inserting into this field. If
1378 OP0 is BLKmode, get the smallest mode consistent with the
1379 alignment. If OP0 is a non-BLKmode object that is no
1380 wider than MAXMODE, use its mode. Otherwise, use the
1381 smallest mode containing the field. */
1383 if (GET_MODE (xop0) == BLKmode
1384 || (GET_MODE_SIZE (GET_MODE (op0))
1385 > GET_MODE_SIZE (maxmode)))
1386 bestmode = get_best_mode (bitsize, bitnum,
1387 MEM_ALIGN (xop0), maxmode,
1388 MEM_VOLATILE_P (xop0));
1389 else
1390 bestmode = GET_MODE (xop0);
1392 if (bestmode == VOIDmode
1393 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1394 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1395 goto extzv_loses;
1397 /* Compute offset as multiple of this unit,
1398 counting in bytes. */
1399 unit = GET_MODE_BITSIZE (bestmode);
1400 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1401 xbitpos = bitnum % unit;
1402 xop0 = adjust_address (xop0, bestmode, xoffset);
1404 /* Fetch it to a register in that size. */
1405 xop0 = force_reg (bestmode, xop0);
1407 /* XBITPOS counts within UNIT, which is what is expected. */
1409 else
1410 /* Get ref to first byte containing part of the field. */
1411 xop0 = adjust_address (xop0, byte_mode, xoffset);
1413 volatile_ok = save_volatile_ok;
1416 /* If op0 is a register, we need it in MAXMODE (which is usually
1417 SImode). to make it acceptable to the format of extzv. */
1418 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1419 goto extzv_loses;
1420 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1421 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1423 /* On big-endian machines, we count bits from the most significant.
1424 If the bit field insn does not, we must invert. */
1425 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1426 xbitpos = unit - bitsize - xbitpos;
1428 /* Now convert from counting within UNIT to counting in MAXMODE. */
1429 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1430 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1432 unit = GET_MODE_BITSIZE (maxmode);
1434 if (xtarget == 0
1435 || (flag_force_mem && GET_CODE (xtarget) == MEM))
1436 xtarget = xspec_target = gen_reg_rtx (tmode);
1438 if (GET_MODE (xtarget) != maxmode)
1440 if (GET_CODE (xtarget) == REG)
1442 int wider = (GET_MODE_SIZE (maxmode)
1443 > GET_MODE_SIZE (GET_MODE (xtarget)));
1444 xtarget = gen_lowpart (maxmode, xtarget);
1445 if (wider)
1446 xspec_target_subreg = xtarget;
1448 else
1449 xtarget = gen_reg_rtx (maxmode);
1452 /* If this machine's extzv insists on a register target,
1453 make sure we have one. */
1454 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1455 (xtarget, maxmode)))
1456 xtarget = gen_reg_rtx (maxmode);
1458 bitsize_rtx = GEN_INT (bitsize);
1459 bitpos_rtx = GEN_INT (xbitpos);
1461 pat = gen_extzv (protect_from_queue (xtarget, 1),
1462 xop0, bitsize_rtx, bitpos_rtx);
1463 if (pat)
1465 emit_insn (pat);
1466 target = xtarget;
1467 spec_target = xspec_target;
1468 spec_target_subreg = xspec_target_subreg;
1470 else
1472 delete_insns_since (last);
1473 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1474 bitpos, target, 1);
1477 else
1478 extzv_loses:
1479 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1480 bitpos, target, 1);
1482 else
1484 if (HAVE_extv
1485 && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1486 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1487 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1489 int xbitpos = bitpos, xoffset = offset;
1490 rtx bitsize_rtx, bitpos_rtx;
1491 rtx last = get_last_insn ();
1492 rtx xop0 = op0, xtarget = target;
1493 rtx xspec_target = spec_target;
1494 rtx xspec_target_subreg = spec_target_subreg;
1495 rtx pat;
1496 enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1498 if (GET_CODE (xop0) == MEM)
1500 /* Is the memory operand acceptable? */
1501 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1502 (xop0, GET_MODE (xop0))))
1504 /* No, load into a reg and extract from there. */
1505 enum machine_mode bestmode;
1507 /* Get the mode to use for inserting into this field. If
1508 OP0 is BLKmode, get the smallest mode consistent with the
1509 alignment. If OP0 is a non-BLKmode object that is no
1510 wider than MAXMODE, use its mode. Otherwise, use the
1511 smallest mode containing the field. */
1513 if (GET_MODE (xop0) == BLKmode
1514 || (GET_MODE_SIZE (GET_MODE (op0))
1515 > GET_MODE_SIZE (maxmode)))
1516 bestmode = get_best_mode (bitsize, bitnum,
1517 MEM_ALIGN (xop0), maxmode,
1518 MEM_VOLATILE_P (xop0));
1519 else
1520 bestmode = GET_MODE (xop0);
1522 if (bestmode == VOIDmode
1523 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1524 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1525 goto extv_loses;
1527 /* Compute offset as multiple of this unit,
1528 counting in bytes. */
1529 unit = GET_MODE_BITSIZE (bestmode);
1530 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1531 xbitpos = bitnum % unit;
1532 xop0 = adjust_address (xop0, bestmode, xoffset);
1534 /* Fetch it to a register in that size. */
1535 xop0 = force_reg (bestmode, xop0);
1537 /* XBITPOS counts within UNIT, which is what is expected. */
1539 else
1540 /* Get ref to first byte containing part of the field. */
1541 xop0 = adjust_address (xop0, byte_mode, xoffset);
1544 /* If op0 is a register, we need it in MAXMODE (which is usually
1545 SImode) to make it acceptable to the format of extv. */
1546 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1547 goto extv_loses;
1548 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1549 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1551 /* On big-endian machines, we count bits from the most significant.
1552 If the bit field insn does not, we must invert. */
1553 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1554 xbitpos = unit - bitsize - xbitpos;
1556 /* XBITPOS counts within a size of UNIT.
1557 Adjust to count within a size of MAXMODE. */
1558 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1559 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1561 unit = GET_MODE_BITSIZE (maxmode);
1563 if (xtarget == 0
1564 || (flag_force_mem && GET_CODE (xtarget) == MEM))
1565 xtarget = xspec_target = gen_reg_rtx (tmode);
1567 if (GET_MODE (xtarget) != maxmode)
1569 if (GET_CODE (xtarget) == REG)
1571 int wider = (GET_MODE_SIZE (maxmode)
1572 > GET_MODE_SIZE (GET_MODE (xtarget)));
1573 xtarget = gen_lowpart (maxmode, xtarget);
1574 if (wider)
1575 xspec_target_subreg = xtarget;
1577 else
1578 xtarget = gen_reg_rtx (maxmode);
1581 /* If this machine's extv insists on a register target,
1582 make sure we have one. */
1583 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1584 (xtarget, maxmode)))
1585 xtarget = gen_reg_rtx (maxmode);
1587 bitsize_rtx = GEN_INT (bitsize);
1588 bitpos_rtx = GEN_INT (xbitpos);
1590 pat = gen_extv (protect_from_queue (xtarget, 1),
1591 xop0, bitsize_rtx, bitpos_rtx);
1592 if (pat)
1594 emit_insn (pat);
1595 target = xtarget;
1596 spec_target = xspec_target;
1597 spec_target_subreg = xspec_target_subreg;
1599 else
1601 delete_insns_since (last);
1602 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1603 bitpos, target, 0);
1606 else
1607 extv_loses:
1608 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1609 bitpos, target, 0);
1611 if (target == spec_target)
1612 return target;
1613 if (target == spec_target_subreg)
1614 return spec_target;
1615 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1617 /* If the target mode is floating-point, first convert to the
1618 integer mode of that size and then access it as a floating-point
1619 value via a SUBREG. */
1620 if (GET_MODE_CLASS (tmode) != MODE_INT
1621 && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1623 target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1624 MODE_INT, 0),
1625 target, unsignedp);
1626 return gen_lowpart (tmode, target);
1628 else
1629 return convert_to_mode (tmode, target, unsignedp);
1631 return target;
1634 /* Extract a bit field using shifts and boolean operations
1635 Returns an rtx to represent the value.
1636 OP0 addresses a register (word) or memory (byte).
1637 BITPOS says which bit within the word or byte the bit field starts in.
1638 OFFSET says how many bytes farther the bit field starts;
1639 it is 0 if OP0 is a register.
1640 BITSIZE says how many bits long the bit field is.
1641 (If OP0 is a register, it may be narrower than a full word,
1642 but BITPOS still counts within a full word,
1643 which is significant on bigendian machines.)
1645 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1646 If TARGET is nonzero, attempts to store the value there
1647 and return TARGET, but this is not guaranteed.
1648 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1650 static rtx
1651 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1652 unsigned HOST_WIDE_INT offset,
1653 unsigned HOST_WIDE_INT bitsize,
1654 unsigned HOST_WIDE_INT bitpos, rtx target,
1655 int unsignedp)
1657 unsigned int total_bits = BITS_PER_WORD;
1658 enum machine_mode mode;
1660 if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1662 /* Special treatment for a bit field split across two registers. */
1663 if (bitsize + bitpos > BITS_PER_WORD)
1664 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1666 else
1668 /* Get the proper mode to use for this field. We want a mode that
1669 includes the entire field. If such a mode would be larger than
1670 a word, we won't be doing the extraction the normal way. */
1672 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1673 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1675 if (mode == VOIDmode)
1676 /* The only way this should occur is if the field spans word
1677 boundaries. */
1678 return extract_split_bit_field (op0, bitsize,
1679 bitpos + offset * BITS_PER_UNIT,
1680 unsignedp);
1682 total_bits = GET_MODE_BITSIZE (mode);
1684 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1685 be in the range 0 to total_bits-1, and put any excess bytes in
1686 OFFSET. */
1687 if (bitpos >= total_bits)
1689 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1690 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1691 * BITS_PER_UNIT);
1694 /* Get ref to an aligned byte, halfword, or word containing the field.
1695 Adjust BITPOS to be position within a word,
1696 and OFFSET to be the offset of that word.
1697 Then alter OP0 to refer to that word. */
1698 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1699 offset -= (offset % (total_bits / BITS_PER_UNIT));
1700 op0 = adjust_address (op0, mode, offset);
1703 mode = GET_MODE (op0);
1705 if (BYTES_BIG_ENDIAN)
1706 /* BITPOS is the distance between our msb and that of OP0.
1707 Convert it to the distance from the lsb. */
1708 bitpos = total_bits - bitsize - bitpos;
1710 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1711 We have reduced the big-endian case to the little-endian case. */
1713 if (unsignedp)
1715 if (bitpos)
1717 /* If the field does not already start at the lsb,
1718 shift it so it does. */
1719 tree amount = build_int_2 (bitpos, 0);
1720 /* Maybe propagate the target for the shift. */
1721 /* But not if we will return it--could confuse integrate.c. */
1722 rtx subtarget = (target != 0 && GET_CODE (target) == REG
1723 && !REG_FUNCTION_VALUE_P (target)
1724 ? target : 0);
1725 if (tmode != mode) subtarget = 0;
1726 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1728 /* Convert the value to the desired mode. */
1729 if (mode != tmode)
1730 op0 = convert_to_mode (tmode, op0, 1);
1732 /* Unless the msb of the field used to be the msb when we shifted,
1733 mask out the upper bits. */
1735 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1736 return expand_binop (GET_MODE (op0), and_optab, op0,
1737 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1738 target, 1, OPTAB_LIB_WIDEN);
1739 return op0;
1742 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1743 then arithmetic-shift its lsb to the lsb of the word. */
1744 op0 = force_reg (mode, op0);
1745 if (mode != tmode)
1746 target = 0;
1748 /* Find the narrowest integer mode that contains the field. */
1750 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1751 mode = GET_MODE_WIDER_MODE (mode))
1752 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1754 op0 = convert_to_mode (mode, op0, 0);
1755 break;
1758 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1760 tree amount
1761 = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1762 /* Maybe propagate the target for the shift. */
1763 /* But not if we will return the result--could confuse integrate.c. */
1764 rtx subtarget = (target != 0 && GET_CODE (target) == REG
1765 && ! REG_FUNCTION_VALUE_P (target)
1766 ? target : 0);
1767 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1770 return expand_shift (RSHIFT_EXPR, mode, op0,
1771 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1772 target, 0);
1775 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1776 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1777 complement of that if COMPLEMENT. The mask is truncated if
1778 necessary to the width of mode MODE. The mask is zero-extended if
1779 BITSIZE+BITPOS is too small for MODE. */
1781 static rtx
1782 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1784 HOST_WIDE_INT masklow, maskhigh;
1786 if (bitsize == 0)
1787 masklow = 0;
1788 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1789 masklow = (HOST_WIDE_INT) -1 << bitpos;
1790 else
1791 masklow = 0;
1793 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1794 masklow &= ((unsigned HOST_WIDE_INT) -1
1795 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1797 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1798 maskhigh = -1;
1799 else
1800 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1802 if (bitsize == 0)
1803 maskhigh = 0;
1804 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1805 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1806 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807 else
1808 maskhigh = 0;
1810 if (complement)
1812 maskhigh = ~maskhigh;
1813 masklow = ~masklow;
1816 return immed_double_const (masklow, maskhigh, mode);
1819 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1820 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1822 static rtx
1823 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1825 unsigned HOST_WIDE_INT v = INTVAL (value);
1826 HOST_WIDE_INT low, high;
1828 if (bitsize < HOST_BITS_PER_WIDE_INT)
1829 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1831 if (bitpos < HOST_BITS_PER_WIDE_INT)
1833 low = v << bitpos;
1834 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1836 else
1838 low = 0;
1839 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1842 return immed_double_const (low, high, mode);
1845 /* Extract a bit field that is split across two words
1846 and return an RTX for the result.
1848 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1849 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1850 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1852 static rtx
1853 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1854 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1856 unsigned int unit;
1857 unsigned int bitsdone = 0;
1858 rtx result = NULL_RTX;
1859 int first = 1;
1861 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1862 much at a time. */
1863 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1864 unit = BITS_PER_WORD;
1865 else
1866 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1868 while (bitsdone < bitsize)
1870 unsigned HOST_WIDE_INT thissize;
1871 rtx part, word;
1872 unsigned HOST_WIDE_INT thispos;
1873 unsigned HOST_WIDE_INT offset;
1875 offset = (bitpos + bitsdone) / unit;
1876 thispos = (bitpos + bitsdone) % unit;
1878 /* THISSIZE must not overrun a word boundary. Otherwise,
1879 extract_fixed_bit_field will call us again, and we will mutually
1880 recurse forever. */
1881 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1882 thissize = MIN (thissize, unit - thispos);
1884 /* If OP0 is a register, then handle OFFSET here.
1886 When handling multiword bitfields, extract_bit_field may pass
1887 down a word_mode SUBREG of a larger REG for a bitfield that actually
1888 crosses a word boundary. Thus, for a SUBREG, we must find
1889 the current word starting from the base register. */
1890 if (GET_CODE (op0) == SUBREG)
1892 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1893 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1894 GET_MODE (SUBREG_REG (op0)));
1895 offset = 0;
1897 else if (GET_CODE (op0) == REG)
1899 word = operand_subword_force (op0, offset, GET_MODE (op0));
1900 offset = 0;
1902 else
1903 word = op0;
1905 /* Extract the parts in bit-counting order,
1906 whose meaning is determined by BYTES_PER_UNIT.
1907 OFFSET is in UNITs, and UNIT is in bits.
1908 extract_fixed_bit_field wants offset in bytes. */
1909 part = extract_fixed_bit_field (word_mode, word,
1910 offset * unit / BITS_PER_UNIT,
1911 thissize, thispos, 0, 1);
1912 bitsdone += thissize;
1914 /* Shift this part into place for the result. */
1915 if (BYTES_BIG_ENDIAN)
1917 if (bitsize != bitsdone)
1918 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1919 build_int_2 (bitsize - bitsdone, 0), 0, 1);
1921 else
1923 if (bitsdone != thissize)
1924 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1925 build_int_2 (bitsdone - thissize, 0), 0, 1);
1928 if (first)
1929 result = part;
1930 else
1931 /* Combine the parts with bitwise or. This works
1932 because we extracted each part as an unsigned bit field. */
1933 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1934 OPTAB_LIB_WIDEN);
1936 first = 0;
1939 /* Unsigned bit field: we are done. */
1940 if (unsignedp)
1941 return result;
1942 /* Signed bit field: sign-extend with two arithmetic shifts. */
1943 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1944 build_int_2 (BITS_PER_WORD - bitsize, 0),
1945 NULL_RTX, 0);
1946 return expand_shift (RSHIFT_EXPR, word_mode, result,
1947 build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1950 /* Add INC into TARGET. */
1952 void
1953 expand_inc (rtx target, rtx inc)
1955 rtx value = expand_binop (GET_MODE (target), add_optab,
1956 target, inc,
1957 target, 0, OPTAB_LIB_WIDEN);
1958 if (value != target)
1959 emit_move_insn (target, value);
1962 /* Subtract DEC from TARGET. */
1964 void
1965 expand_dec (rtx target, rtx dec)
1967 rtx value = expand_binop (GET_MODE (target), sub_optab,
1968 target, dec,
1969 target, 0, OPTAB_LIB_WIDEN);
1970 if (value != target)
1971 emit_move_insn (target, value);
1974 /* Output a shift instruction for expression code CODE,
1975 with SHIFTED being the rtx for the value to shift,
1976 and AMOUNT the tree for the amount to shift by.
1977 Store the result in the rtx TARGET, if that is convenient.
1978 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1979 Return the rtx for where the value is. */
1982 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1983 tree amount, rtx target, int unsignedp)
1985 rtx op1, temp = 0;
1986 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1987 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1988 int try;
1990 /* Previously detected shift-counts computed by NEGATE_EXPR
1991 and shifted in the other direction; but that does not work
1992 on all machines. */
1994 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1996 #ifdef SHIFT_COUNT_TRUNCATED
1997 if (SHIFT_COUNT_TRUNCATED)
1999 if (GET_CODE (op1) == CONST_INT
2000 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2001 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2002 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2003 % GET_MODE_BITSIZE (mode));
2004 else if (GET_CODE (op1) == SUBREG
2005 && subreg_lowpart_p (op1))
2006 op1 = SUBREG_REG (op1);
2008 #endif
2010 if (op1 == const0_rtx)
2011 return shifted;
2013 for (try = 0; temp == 0 && try < 3; try++)
2015 enum optab_methods methods;
2017 if (try == 0)
2018 methods = OPTAB_DIRECT;
2019 else if (try == 1)
2020 methods = OPTAB_WIDEN;
2021 else
2022 methods = OPTAB_LIB_WIDEN;
2024 if (rotate)
2026 /* Widening does not work for rotation. */
2027 if (methods == OPTAB_WIDEN)
2028 continue;
2029 else if (methods == OPTAB_LIB_WIDEN)
2031 /* If we have been unable to open-code this by a rotation,
2032 do it as the IOR of two shifts. I.e., to rotate A
2033 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2034 where C is the bitsize of A.
2036 It is theoretically possible that the target machine might
2037 not be able to perform either shift and hence we would
2038 be making two libcalls rather than just the one for the
2039 shift (similarly if IOR could not be done). We will allow
2040 this extremely unlikely lossage to avoid complicating the
2041 code below. */
2043 rtx subtarget = target == shifted ? 0 : target;
2044 rtx temp1;
2045 tree type = TREE_TYPE (amount);
2046 tree new_amount = make_tree (type, op1);
2047 tree other_amount
2048 = fold (build (MINUS_EXPR, type,
2049 convert (type,
2050 build_int_2 (GET_MODE_BITSIZE (mode),
2051 0)),
2052 amount));
2054 shifted = force_reg (mode, shifted);
2056 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2057 mode, shifted, new_amount, subtarget, 1);
2058 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2059 mode, shifted, other_amount, 0, 1);
2060 return expand_binop (mode, ior_optab, temp, temp1, target,
2061 unsignedp, methods);
2064 temp = expand_binop (mode,
2065 left ? rotl_optab : rotr_optab,
2066 shifted, op1, target, unsignedp, methods);
2068 /* If we don't have the rotate, but we are rotating by a constant
2069 that is in range, try a rotate in the opposite direction. */
2071 if (temp == 0 && GET_CODE (op1) == CONST_INT
2072 && INTVAL (op1) > 0
2073 && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2074 temp = expand_binop (mode,
2075 left ? rotr_optab : rotl_optab,
2076 shifted,
2077 GEN_INT (GET_MODE_BITSIZE (mode)
2078 - INTVAL (op1)),
2079 target, unsignedp, methods);
2081 else if (unsignedp)
2082 temp = expand_binop (mode,
2083 left ? ashl_optab : lshr_optab,
2084 shifted, op1, target, unsignedp, methods);
2086 /* Do arithmetic shifts.
2087 Also, if we are going to widen the operand, we can just as well
2088 use an arithmetic right-shift instead of a logical one. */
2089 if (temp == 0 && ! rotate
2090 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2092 enum optab_methods methods1 = methods;
2094 /* If trying to widen a log shift to an arithmetic shift,
2095 don't accept an arithmetic shift of the same size. */
2096 if (unsignedp)
2097 methods1 = OPTAB_MUST_WIDEN;
2099 /* Arithmetic shift */
2101 temp = expand_binop (mode,
2102 left ? ashl_optab : ashr_optab,
2103 shifted, op1, target, unsignedp, methods1);
2106 /* We used to try extzv here for logical right shifts, but that was
2107 only useful for one machine, the VAX, and caused poor code
2108 generation there for lshrdi3, so the code was deleted and a
2109 define_expand for lshrsi3 was added to vax.md. */
2112 if (temp == 0)
2113 abort ();
2114 return temp;
2117 enum alg_code { alg_zero, alg_m, alg_shift,
2118 alg_add_t_m2, alg_sub_t_m2,
2119 alg_add_factor, alg_sub_factor,
2120 alg_add_t2_m, alg_sub_t2_m,
2121 alg_add, alg_subtract, alg_factor, alg_shiftop };
2123 /* This structure records a sequence of operations.
2124 `ops' is the number of operations recorded.
2125 `cost' is their total cost.
2126 The operations are stored in `op' and the corresponding
2127 logarithms of the integer coefficients in `log'.
2129 These are the operations:
2130 alg_zero total := 0;
2131 alg_m total := multiplicand;
2132 alg_shift total := total * coeff
2133 alg_add_t_m2 total := total + multiplicand * coeff;
2134 alg_sub_t_m2 total := total - multiplicand * coeff;
2135 alg_add_factor total := total * coeff + total;
2136 alg_sub_factor total := total * coeff - total;
2137 alg_add_t2_m total := total * coeff + multiplicand;
2138 alg_sub_t2_m total := total * coeff - multiplicand;
2140 The first operand must be either alg_zero or alg_m. */
2142 struct algorithm
2144 short cost;
2145 short ops;
2146 /* The size of the OP and LOG fields are not directly related to the
2147 word size, but the worst-case algorithms will be if we have few
2148 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2149 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2150 in total wordsize operations. */
2151 enum alg_code op[MAX_BITS_PER_WORD];
2152 char log[MAX_BITS_PER_WORD];
2155 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
2156 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2157 int, unsigned HOST_WIDE_INT *,
2158 int *, int *);
2159 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2160 /* Compute and return the best algorithm for multiplying by T.
2161 The algorithm must cost less than cost_limit
2162 If retval.cost >= COST_LIMIT, no algorithm was found and all
2163 other field of the returned struct are undefined. */
2165 static void
2166 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2167 int cost_limit)
2169 int m;
2170 struct algorithm *alg_in, *best_alg;
2171 int cost;
2172 unsigned HOST_WIDE_INT q;
2174 /* Indicate that no algorithm is yet found. If no algorithm
2175 is found, this value will be returned and indicate failure. */
2176 alg_out->cost = cost_limit;
2178 if (cost_limit <= 0)
2179 return;
2181 /* t == 1 can be done in zero cost. */
2182 if (t == 1)
2184 alg_out->ops = 1;
2185 alg_out->cost = 0;
2186 alg_out->op[0] = alg_m;
2187 return;
2190 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2191 fail now. */
2192 if (t == 0)
2194 if (zero_cost >= cost_limit)
2195 return;
2196 else
2198 alg_out->ops = 1;
2199 alg_out->cost = zero_cost;
2200 alg_out->op[0] = alg_zero;
2201 return;
2205 /* We'll be needing a couple extra algorithm structures now. */
2207 alg_in = alloca (sizeof (struct algorithm));
2208 best_alg = alloca (sizeof (struct algorithm));
2210 /* If we have a group of zero bits at the low-order part of T, try
2211 multiplying by the remaining bits and then doing a shift. */
2213 if ((t & 1) == 0)
2215 m = floor_log2 (t & -t); /* m = number of low zero bits */
2216 if (m < BITS_PER_WORD)
2218 q = t >> m;
2219 cost = shift_cost[m];
2220 synth_mult (alg_in, q, cost_limit - cost);
2222 cost += alg_in->cost;
2223 if (cost < cost_limit)
2225 struct algorithm *x;
2226 x = alg_in, alg_in = best_alg, best_alg = x;
2227 best_alg->log[best_alg->ops] = m;
2228 best_alg->op[best_alg->ops] = alg_shift;
2229 cost_limit = cost;
2234 /* If we have an odd number, add or subtract one. */
2235 if ((t & 1) != 0)
2237 unsigned HOST_WIDE_INT w;
2239 for (w = 1; (w & t) != 0; w <<= 1)
2241 /* If T was -1, then W will be zero after the loop. This is another
2242 case where T ends with ...111. Handling this with (T + 1) and
2243 subtract 1 produces slightly better code and results in algorithm
2244 selection much faster than treating it like the ...0111 case
2245 below. */
2246 if (w == 0
2247 || (w > 2
2248 /* Reject the case where t is 3.
2249 Thus we prefer addition in that case. */
2250 && t != 3))
2252 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2254 cost = add_cost;
2255 synth_mult (alg_in, t + 1, cost_limit - cost);
2257 cost += alg_in->cost;
2258 if (cost < cost_limit)
2260 struct algorithm *x;
2261 x = alg_in, alg_in = best_alg, best_alg = x;
2262 best_alg->log[best_alg->ops] = 0;
2263 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2264 cost_limit = cost;
2267 else
2269 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2271 cost = add_cost;
2272 synth_mult (alg_in, t - 1, cost_limit - cost);
2274 cost += alg_in->cost;
2275 if (cost < cost_limit)
2277 struct algorithm *x;
2278 x = alg_in, alg_in = best_alg, best_alg = x;
2279 best_alg->log[best_alg->ops] = 0;
2280 best_alg->op[best_alg->ops] = alg_add_t_m2;
2281 cost_limit = cost;
2286 /* Look for factors of t of the form
2287 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2288 If we find such a factor, we can multiply by t using an algorithm that
2289 multiplies by q, shift the result by m and add/subtract it to itself.
2291 We search for large factors first and loop down, even if large factors
2292 are less probable than small; if we find a large factor we will find a
2293 good sequence quickly, and therefore be able to prune (by decreasing
2294 COST_LIMIT) the search. */
2296 for (m = floor_log2 (t - 1); m >= 2; m--)
2298 unsigned HOST_WIDE_INT d;
2300 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2301 if (t % d == 0 && t > d && m < BITS_PER_WORD)
2303 cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2304 synth_mult (alg_in, t / d, cost_limit - cost);
2306 cost += alg_in->cost;
2307 if (cost < cost_limit)
2309 struct algorithm *x;
2310 x = alg_in, alg_in = best_alg, best_alg = x;
2311 best_alg->log[best_alg->ops] = m;
2312 best_alg->op[best_alg->ops] = alg_add_factor;
2313 cost_limit = cost;
2315 /* Other factors will have been taken care of in the recursion. */
2316 break;
2319 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2320 if (t % d == 0 && t > d && m < BITS_PER_WORD)
2322 cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2323 synth_mult (alg_in, t / d, cost_limit - cost);
2325 cost += alg_in->cost;
2326 if (cost < cost_limit)
2328 struct algorithm *x;
2329 x = alg_in, alg_in = best_alg, best_alg = x;
2330 best_alg->log[best_alg->ops] = m;
2331 best_alg->op[best_alg->ops] = alg_sub_factor;
2332 cost_limit = cost;
2334 break;
2338 /* Try shift-and-add (load effective address) instructions,
2339 i.e. do a*3, a*5, a*9. */
2340 if ((t & 1) != 0)
2342 q = t - 1;
2343 q = q & -q;
2344 m = exact_log2 (q);
2345 if (m >= 0 && m < BITS_PER_WORD)
2347 cost = shiftadd_cost[m];
2348 synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2350 cost += alg_in->cost;
2351 if (cost < cost_limit)
2353 struct algorithm *x;
2354 x = alg_in, alg_in = best_alg, best_alg = x;
2355 best_alg->log[best_alg->ops] = m;
2356 best_alg->op[best_alg->ops] = alg_add_t2_m;
2357 cost_limit = cost;
2361 q = t + 1;
2362 q = q & -q;
2363 m = exact_log2 (q);
2364 if (m >= 0 && m < BITS_PER_WORD)
2366 cost = shiftsub_cost[m];
2367 synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2369 cost += alg_in->cost;
2370 if (cost < cost_limit)
2372 struct algorithm *x;
2373 x = alg_in, alg_in = best_alg, best_alg = x;
2374 best_alg->log[best_alg->ops] = m;
2375 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2376 cost_limit = cost;
2381 /* If cost_limit has not decreased since we stored it in alg_out->cost,
2382 we have not found any algorithm. */
2383 if (cost_limit == alg_out->cost)
2384 return;
2386 /* If we are getting a too long sequence for `struct algorithm'
2387 to record, make this search fail. */
2388 if (best_alg->ops == MAX_BITS_PER_WORD)
2389 return;
2391 /* Copy the algorithm from temporary space to the space at alg_out.
2392 We avoid using structure assignment because the majority of
2393 best_alg is normally undefined, and this is a critical function. */
2394 alg_out->ops = best_alg->ops + 1;
2395 alg_out->cost = cost_limit;
2396 memcpy (alg_out->op, best_alg->op,
2397 alg_out->ops * sizeof *alg_out->op);
2398 memcpy (alg_out->log, best_alg->log,
2399 alg_out->ops * sizeof *alg_out->log);
2402 /* Perform a multiplication and return an rtx for the result.
2403 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2404 TARGET is a suggestion for where to store the result (an rtx).
2406 We check specially for a constant integer as OP1.
2407 If you want this check for OP0 as well, then before calling
2408 you should swap the two operands if OP0 would be constant. */
2411 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2412 int unsignedp)
2414 rtx const_op1 = op1;
2416 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2417 less than or equal in size to `unsigned int' this doesn't matter.
2418 If the mode is larger than `unsigned int', then synth_mult works only
2419 if the constant value exactly fits in an `unsigned int' without any
2420 truncation. This means that multiplying by negative values does
2421 not work; results are off by 2^32 on a 32 bit machine. */
2423 /* If we are multiplying in DImode, it may still be a win
2424 to try to work with shifts and adds. */
2425 if (GET_CODE (op1) == CONST_DOUBLE
2426 && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2427 && HOST_BITS_PER_INT >= BITS_PER_WORD
2428 && CONST_DOUBLE_HIGH (op1) == 0)
2429 const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2430 else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2431 && GET_CODE (op1) == CONST_INT
2432 && INTVAL (op1) < 0)
2433 const_op1 = 0;
2435 /* We used to test optimize here, on the grounds that it's better to
2436 produce a smaller program when -O is not used.
2437 But this causes such a terrible slowdown sometimes
2438 that it seems better to use synth_mult always. */
2440 if (const_op1 && GET_CODE (const_op1) == CONST_INT
2441 && (unsignedp || ! flag_trapv))
2443 struct algorithm alg;
2444 struct algorithm alg2;
2445 HOST_WIDE_INT val = INTVAL (op1);
2446 HOST_WIDE_INT val_so_far;
2447 rtx insn;
2448 int mult_cost;
2449 enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2451 /* op0 must be register to make mult_cost match the precomputed
2452 shiftadd_cost array. */
2453 op0 = force_reg (mode, op0);
2455 /* Try to do the computation three ways: multiply by the negative of OP1
2456 and then negate, do the multiplication directly, or do multiplication
2457 by OP1 - 1. */
2459 mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2460 mult_cost = MIN (12 * add_cost, mult_cost);
2462 synth_mult (&alg, val, mult_cost);
2464 /* This works only if the inverted value actually fits in an
2465 `unsigned int' */
2466 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2468 synth_mult (&alg2, - val,
2469 (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2470 if (alg2.cost + negate_cost < alg.cost)
2471 alg = alg2, variant = negate_variant;
2474 /* This proves very useful for division-by-constant. */
2475 synth_mult (&alg2, val - 1,
2476 (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2477 if (alg2.cost + add_cost < alg.cost)
2478 alg = alg2, variant = add_variant;
2480 if (alg.cost < mult_cost)
2482 /* We found something cheaper than a multiply insn. */
2483 int opno;
2484 rtx accum, tem;
2485 enum machine_mode nmode;
2487 op0 = protect_from_queue (op0, 0);
2489 /* Avoid referencing memory over and over.
2490 For speed, but also for correctness when mem is volatile. */
2491 if (GET_CODE (op0) == MEM)
2492 op0 = force_reg (mode, op0);
2494 /* ACCUM starts out either as OP0 or as a zero, depending on
2495 the first operation. */
2497 if (alg.op[0] == alg_zero)
2499 accum = copy_to_mode_reg (mode, const0_rtx);
2500 val_so_far = 0;
2502 else if (alg.op[0] == alg_m)
2504 accum = copy_to_mode_reg (mode, op0);
2505 val_so_far = 1;
2507 else
2508 abort ();
2510 for (opno = 1; opno < alg.ops; opno++)
2512 int log = alg.log[opno];
2513 int preserve = preserve_subexpressions_p ();
2514 rtx shift_subtarget = preserve ? 0 : accum;
2515 rtx add_target
2516 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2517 && ! preserve)
2518 ? target : 0;
2519 rtx accum_target = preserve ? 0 : accum;
2521 switch (alg.op[opno])
2523 case alg_shift:
2524 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2525 build_int_2 (log, 0), NULL_RTX, 0);
2526 val_so_far <<= log;
2527 break;
2529 case alg_add_t_m2:
2530 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2531 build_int_2 (log, 0), NULL_RTX, 0);
2532 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2533 add_target
2534 ? add_target : accum_target);
2535 val_so_far += (HOST_WIDE_INT) 1 << log;
2536 break;
2538 case alg_sub_t_m2:
2539 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2540 build_int_2 (log, 0), NULL_RTX, 0);
2541 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2542 add_target
2543 ? add_target : accum_target);
2544 val_so_far -= (HOST_WIDE_INT) 1 << log;
2545 break;
2547 case alg_add_t2_m:
2548 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2549 build_int_2 (log, 0), shift_subtarget,
2551 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2552 add_target
2553 ? add_target : accum_target);
2554 val_so_far = (val_so_far << log) + 1;
2555 break;
2557 case alg_sub_t2_m:
2558 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2559 build_int_2 (log, 0), shift_subtarget,
2561 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2562 add_target
2563 ? add_target : accum_target);
2564 val_so_far = (val_so_far << log) - 1;
2565 break;
2567 case alg_add_factor:
2568 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2569 build_int_2 (log, 0), NULL_RTX, 0);
2570 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2571 add_target
2572 ? add_target : accum_target);
2573 val_so_far += val_so_far << log;
2574 break;
2576 case alg_sub_factor:
2577 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2578 build_int_2 (log, 0), NULL_RTX, 0);
2579 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2580 (add_target ? add_target
2581 : preserve ? 0 : tem));
2582 val_so_far = (val_so_far << log) - val_so_far;
2583 break;
2585 default:
2586 abort ();
2589 /* Write a REG_EQUAL note on the last insn so that we can cse
2590 multiplication sequences. Note that if ACCUM is a SUBREG,
2591 we've set the inner register and must properly indicate
2592 that. */
2594 tem = op0, nmode = mode;
2595 if (GET_CODE (accum) == SUBREG)
2597 nmode = GET_MODE (SUBREG_REG (accum));
2598 tem = gen_lowpart (nmode, op0);
2601 insn = get_last_insn ();
2602 set_unique_reg_note (insn,
2603 REG_EQUAL,
2604 gen_rtx_MULT (nmode, tem,
2605 GEN_INT (val_so_far)));
2608 if (variant == negate_variant)
2610 val_so_far = - val_so_far;
2611 accum = expand_unop (mode, neg_optab, accum, target, 0);
2613 else if (variant == add_variant)
2615 val_so_far = val_so_far + 1;
2616 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2619 if (val != val_so_far)
2620 abort ();
2622 return accum;
2626 if (GET_CODE (op0) == CONST_DOUBLE)
2628 rtx temp = op0;
2629 op0 = op1;
2630 op1 = temp;
2633 /* Expand x*2.0 as x+x. */
2634 if (GET_CODE (op1) == CONST_DOUBLE
2635 && GET_MODE_CLASS (mode) == MODE_FLOAT)
2637 REAL_VALUE_TYPE d;
2638 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2640 if (REAL_VALUES_EQUAL (d, dconst2))
2642 op0 = force_reg (GET_MODE (op0), op0);
2643 return expand_binop (mode, add_optab, op0, op0,
2644 target, unsignedp, OPTAB_LIB_WIDEN);
2648 /* This used to use umul_optab if unsigned, but for non-widening multiply
2649 there is no difference between signed and unsigned. */
2650 op0 = expand_binop (mode,
2651 ! unsignedp
2652 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2653 ? smulv_optab : smul_optab,
2654 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2655 if (op0 == 0)
2656 abort ();
2657 return op0;
2660 /* Return the smallest n such that 2**n >= X. */
2663 ceil_log2 (unsigned HOST_WIDE_INT x)
2665 return floor_log2 (x - 1) + 1;
2668 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2669 replace division by D, and put the least significant N bits of the result
2670 in *MULTIPLIER_PTR and return the most significant bit.
2672 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2673 needed precision is in PRECISION (should be <= N).
2675 PRECISION should be as small as possible so this function can choose
2676 multiplier more freely.
2678 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
2679 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2681 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2682 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
2684 static
2685 unsigned HOST_WIDE_INT
2686 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2687 unsigned HOST_WIDE_INT *multiplier_ptr,
2688 int *post_shift_ptr, int *lgup_ptr)
2690 HOST_WIDE_INT mhigh_hi, mlow_hi;
2691 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2692 int lgup, post_shift;
2693 int pow, pow2;
2694 unsigned HOST_WIDE_INT nl, dummy1;
2695 HOST_WIDE_INT nh, dummy2;
2697 /* lgup = ceil(log2(divisor)); */
2698 lgup = ceil_log2 (d);
2700 if (lgup > n)
2701 abort ();
2703 pow = n + lgup;
2704 pow2 = n + lgup - precision;
2706 if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2708 /* We could handle this with some effort, but this case is much better
2709 handled directly with a scc insn, so rely on caller using that. */
2710 abort ();
2713 /* mlow = 2^(N + lgup)/d */
2714 if (pow >= HOST_BITS_PER_WIDE_INT)
2716 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2717 nl = 0;
2719 else
2721 nh = 0;
2722 nl = (unsigned HOST_WIDE_INT) 1 << pow;
2724 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2725 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2727 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2728 if (pow2 >= HOST_BITS_PER_WIDE_INT)
2729 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2730 else
2731 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2732 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2733 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2735 if (mhigh_hi && nh - d >= d)
2736 abort ();
2737 if (mhigh_hi > 1 || mlow_hi > 1)
2738 abort ();
2739 /* Assert that mlow < mhigh. */
2740 if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2741 abort ();
2743 /* If precision == N, then mlow, mhigh exceed 2^N
2744 (but they do not exceed 2^(N+1)). */
2746 /* Reduce to lowest terms. */
2747 for (post_shift = lgup; post_shift > 0; post_shift--)
2749 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2750 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2751 if (ml_lo >= mh_lo)
2752 break;
2754 mlow_hi = 0;
2755 mlow_lo = ml_lo;
2756 mhigh_hi = 0;
2757 mhigh_lo = mh_lo;
2760 *post_shift_ptr = post_shift;
2761 *lgup_ptr = lgup;
2762 if (n < HOST_BITS_PER_WIDE_INT)
2764 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2765 *multiplier_ptr = mhigh_lo & mask;
2766 return mhigh_lo >= mask;
2768 else
2770 *multiplier_ptr = mhigh_lo;
2771 return mhigh_hi;
2775 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2776 congruent to 1 (mod 2**N). */
2778 static unsigned HOST_WIDE_INT
2779 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2781 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
2783 /* The algorithm notes that the choice y = x satisfies
2784 x*y == 1 mod 2^3, since x is assumed odd.
2785 Each iteration doubles the number of bits of significance in y. */
2787 unsigned HOST_WIDE_INT mask;
2788 unsigned HOST_WIDE_INT y = x;
2789 int nbit = 3;
2791 mask = (n == HOST_BITS_PER_WIDE_INT
2792 ? ~(unsigned HOST_WIDE_INT) 0
2793 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2795 while (nbit < n)
2797 y = y * (2 - x*y) & mask; /* Modulo 2^N */
2798 nbit *= 2;
2800 return y;
2803 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2804 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
2805 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
2806 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2807 become signed.
2809 The result is put in TARGET if that is convenient.
2811 MODE is the mode of operation. */
2814 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2815 rtx op1, rtx target, int unsignedp)
2817 rtx tem;
2818 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2820 tem = expand_shift (RSHIFT_EXPR, mode, op0,
2821 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2822 NULL_RTX, 0);
2823 tem = expand_and (mode, tem, op1, NULL_RTX);
2824 adj_operand
2825 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2826 adj_operand);
2828 tem = expand_shift (RSHIFT_EXPR, mode, op1,
2829 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2830 NULL_RTX, 0);
2831 tem = expand_and (mode, tem, op0, NULL_RTX);
2832 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2833 target);
2835 return target;
2838 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2839 in TARGET if that is convenient, and return where the result is. If the
2840 operation can not be performed, 0 is returned.
2842 MODE is the mode of operation and result.
2844 UNSIGNEDP nonzero means unsigned multiply.
2846 MAX_COST is the total allowed cost for the expanded RTL. */
2849 expand_mult_highpart (enum machine_mode mode, rtx op0,
2850 unsigned HOST_WIDE_INT cnst1, rtx target,
2851 int unsignedp, int max_cost)
2853 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2854 optab mul_highpart_optab;
2855 optab moptab;
2856 rtx tem;
2857 int size = GET_MODE_BITSIZE (mode);
2858 rtx op1, wide_op1;
2860 /* We can't support modes wider than HOST_BITS_PER_INT. */
2861 if (size > HOST_BITS_PER_WIDE_INT)
2862 abort ();
2864 op1 = gen_int_mode (cnst1, mode);
2866 wide_op1
2867 = immed_double_const (cnst1,
2868 (unsignedp
2869 ? (HOST_WIDE_INT) 0
2870 : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2871 wider_mode);
2873 /* expand_mult handles constant multiplication of word_mode
2874 or narrower. It does a poor job for large modes. */
2875 if (size < BITS_PER_WORD
2876 && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2878 /* We have to do this, since expand_binop doesn't do conversion for
2879 multiply. Maybe change expand_binop to handle widening multiply? */
2880 op0 = convert_to_mode (wider_mode, op0, unsignedp);
2882 /* We know that this can't have signed overflow, so pretend this is
2883 an unsigned multiply. */
2884 tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2885 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2886 build_int_2 (size, 0), NULL_RTX, 1);
2887 return convert_modes (mode, wider_mode, tem, unsignedp);
2890 if (target == 0)
2891 target = gen_reg_rtx (mode);
2893 /* Firstly, try using a multiplication insn that only generates the needed
2894 high part of the product, and in the sign flavor of unsignedp. */
2895 if (mul_highpart_cost[(int) mode] < max_cost)
2897 mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2898 target = expand_binop (mode, mul_highpart_optab,
2899 op0, op1, target, unsignedp, OPTAB_DIRECT);
2900 if (target)
2901 return target;
2904 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2905 Need to adjust the result after the multiplication. */
2906 if (size - 1 < BITS_PER_WORD
2907 && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2908 < max_cost))
2910 mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2911 target = expand_binop (mode, mul_highpart_optab,
2912 op0, op1, target, unsignedp, OPTAB_DIRECT);
2913 if (target)
2914 /* We used the wrong signedness. Adjust the result. */
2915 return expand_mult_highpart_adjust (mode, target, op0,
2916 op1, target, unsignedp);
2919 /* Try widening multiplication. */
2920 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2921 if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2922 && mul_widen_cost[(int) wider_mode] < max_cost)
2924 op1 = force_reg (mode, op1);
2925 goto try;
2928 /* Try widening the mode and perform a non-widening multiplication. */
2929 moptab = smul_optab;
2930 if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2931 && size - 1 < BITS_PER_WORD
2932 && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2934 op1 = wide_op1;
2935 goto try;
2938 /* Try widening multiplication of opposite signedness, and adjust. */
2939 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2940 if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2941 && size - 1 < BITS_PER_WORD
2942 && (mul_widen_cost[(int) wider_mode]
2943 + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2945 rtx regop1 = force_reg (mode, op1);
2946 tem = expand_binop (wider_mode, moptab, op0, regop1,
2947 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2948 if (tem != 0)
2950 /* Extract the high half of the just generated product. */
2951 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2952 build_int_2 (size, 0), NULL_RTX, 1);
2953 tem = convert_modes (mode, wider_mode, tem, unsignedp);
2954 /* We used the wrong signedness. Adjust the result. */
2955 return expand_mult_highpart_adjust (mode, tem, op0, op1,
2956 target, unsignedp);
2960 return 0;
2962 try:
2963 /* Pass NULL_RTX as target since TARGET has wrong mode. */
2964 tem = expand_binop (wider_mode, moptab, op0, op1,
2965 NULL_RTX, unsignedp, OPTAB_WIDEN);
2966 if (tem == 0)
2967 return 0;
2969 /* Extract the high half of the just generated product. */
2970 if (mode == word_mode)
2972 return gen_highpart (mode, tem);
2974 else
2976 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2977 build_int_2 (size, 0), NULL_RTX, 1);
2978 return convert_modes (mode, wider_mode, tem, unsignedp);
2982 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2983 if that is convenient, and returning where the result is.
2984 You may request either the quotient or the remainder as the result;
2985 specify REM_FLAG nonzero to get the remainder.
2987 CODE is the expression code for which kind of division this is;
2988 it controls how rounding is done. MODE is the machine mode to use.
2989 UNSIGNEDP nonzero means do unsigned division. */
2991 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2992 and then correct it by or'ing in missing high bits
2993 if result of ANDI is nonzero.
2994 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2995 This could optimize to a bfexts instruction.
2996 But C doesn't use these operations, so their optimizations are
2997 left for later. */
2998 /* ??? For modulo, we don't actually need the highpart of the first product,
2999 the low part will do nicely. And for small divisors, the second multiply
3000 can also be a low-part only multiply or even be completely left out.
3001 E.g. to calculate the remainder of a division by 3 with a 32 bit
3002 multiply, multiply with 0x55555556 and extract the upper two bits;
3003 the result is exact for inputs up to 0x1fffffff.
3004 The input range can be reduced by using cross-sum rules.
3005 For odd divisors >= 3, the following table gives right shift counts
3006 so that if a number is shifted by an integer multiple of the given
3007 amount, the remainder stays the same:
3008 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3009 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3010 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3011 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3012 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3014 Cross-sum rules for even numbers can be derived by leaving as many bits
3015 to the right alone as the divisor has zeros to the right.
3016 E.g. if x is an unsigned 32 bit number:
3017 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3020 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3023 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3024 rtx op0, rtx op1, rtx target, int unsignedp)
3026 enum machine_mode compute_mode;
3027 rtx tquotient;
3028 rtx quotient = 0, remainder = 0;
3029 rtx last;
3030 int size;
3031 rtx insn, set;
3032 optab optab1, optab2;
3033 int op1_is_constant, op1_is_pow2 = 0;
3034 int max_cost, extra_cost;
3035 static HOST_WIDE_INT last_div_const = 0;
3036 static HOST_WIDE_INT ext_op1;
3038 op1_is_constant = GET_CODE (op1) == CONST_INT;
3039 if (op1_is_constant)
3041 ext_op1 = INTVAL (op1);
3042 if (unsignedp)
3043 ext_op1 &= GET_MODE_MASK (mode);
3044 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3045 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3049 This is the structure of expand_divmod:
3051 First comes code to fix up the operands so we can perform the operations
3052 correctly and efficiently.
3054 Second comes a switch statement with code specific for each rounding mode.
3055 For some special operands this code emits all RTL for the desired
3056 operation, for other cases, it generates only a quotient and stores it in
3057 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3058 to indicate that it has not done anything.
3060 Last comes code that finishes the operation. If QUOTIENT is set and
3061 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3062 QUOTIENT is not set, it is computed using trunc rounding.
3064 We try to generate special code for division and remainder when OP1 is a
3065 constant. If |OP1| = 2**n we can use shifts and some other fast
3066 operations. For other values of OP1, we compute a carefully selected
3067 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3068 by m.
3070 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3071 half of the product. Different strategies for generating the product are
3072 implemented in expand_mult_highpart.
3074 If what we actually want is the remainder, we generate that by another
3075 by-constant multiplication and a subtraction. */
3077 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3078 code below will malfunction if we are, so check here and handle
3079 the special case if so. */
3080 if (op1 == const1_rtx)
3081 return rem_flag ? const0_rtx : op0;
3083 /* When dividing by -1, we could get an overflow.
3084 negv_optab can handle overflows. */
3085 if (! unsignedp && op1 == constm1_rtx)
3087 if (rem_flag)
3088 return const0_rtx;
3089 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3090 ? negv_optab : neg_optab, op0, target, 0);
3093 if (target
3094 /* Don't use the function value register as a target
3095 since we have to read it as well as write it,
3096 and function-inlining gets confused by this. */
3097 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3098 /* Don't clobber an operand while doing a multi-step calculation. */
3099 || ((rem_flag || op1_is_constant)
3100 && (reg_mentioned_p (target, op0)
3101 || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3102 || reg_mentioned_p (target, op1)
3103 || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3104 target = 0;
3106 /* Get the mode in which to perform this computation. Normally it will
3107 be MODE, but sometimes we can't do the desired operation in MODE.
3108 If so, pick a wider mode in which we can do the operation. Convert
3109 to that mode at the start to avoid repeated conversions.
3111 First see what operations we need. These depend on the expression
3112 we are evaluating. (We assume that divxx3 insns exist under the
3113 same conditions that modxx3 insns and that these insns don't normally
3114 fail. If these assumptions are not correct, we may generate less
3115 efficient code in some cases.)
3117 Then see if we find a mode in which we can open-code that operation
3118 (either a division, modulus, or shift). Finally, check for the smallest
3119 mode for which we can do the operation with a library call. */
3121 /* We might want to refine this now that we have division-by-constant
3122 optimization. Since expand_mult_highpart tries so many variants, it is
3123 not straightforward to generalize this. Maybe we should make an array
3124 of possible modes in init_expmed? Save this for GCC 2.7. */
3126 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3127 ? (unsignedp ? lshr_optab : ashr_optab)
3128 : (unsignedp ? udiv_optab : sdiv_optab));
3129 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3130 ? optab1
3131 : (unsignedp ? udivmod_optab : sdivmod_optab));
3133 for (compute_mode = mode; compute_mode != VOIDmode;
3134 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3135 if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3136 || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3137 break;
3139 if (compute_mode == VOIDmode)
3140 for (compute_mode = mode; compute_mode != VOIDmode;
3141 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3142 if (optab1->handlers[(int) compute_mode].libfunc
3143 || optab2->handlers[(int) compute_mode].libfunc)
3144 break;
3146 /* If we still couldn't find a mode, use MODE, but we'll probably abort
3147 in expand_binop. */
3148 if (compute_mode == VOIDmode)
3149 compute_mode = mode;
3151 if (target && GET_MODE (target) == compute_mode)
3152 tquotient = target;
3153 else
3154 tquotient = gen_reg_rtx (compute_mode);
3156 size = GET_MODE_BITSIZE (compute_mode);
3157 #if 0
3158 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3159 (mode), and thereby get better code when OP1 is a constant. Do that
3160 later. It will require going over all usages of SIZE below. */
3161 size = GET_MODE_BITSIZE (mode);
3162 #endif
3164 /* Only deduct something for a REM if the last divide done was
3165 for a different constant. Then set the constant of the last
3166 divide. */
3167 max_cost = div_cost[(int) compute_mode]
3168 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3169 && INTVAL (op1) == last_div_const)
3170 ? mul_cost[(int) compute_mode] + add_cost : 0);
3172 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3174 /* Now convert to the best mode to use. */
3175 if (compute_mode != mode)
3177 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3178 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3180 /* convert_modes may have placed op1 into a register, so we
3181 must recompute the following. */
3182 op1_is_constant = GET_CODE (op1) == CONST_INT;
3183 op1_is_pow2 = (op1_is_constant
3184 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3185 || (! unsignedp
3186 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3189 /* If one of the operands is a volatile MEM, copy it into a register. */
3191 if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3192 op0 = force_reg (compute_mode, op0);
3193 if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3194 op1 = force_reg (compute_mode, op1);
3196 /* If we need the remainder or if OP1 is constant, we need to
3197 put OP0 in a register in case it has any queued subexpressions. */
3198 if (rem_flag || op1_is_constant)
3199 op0 = force_reg (compute_mode, op0);
3201 last = get_last_insn ();
3203 /* Promote floor rounding to trunc rounding for unsigned operations. */
3204 if (unsignedp)
3206 if (code == FLOOR_DIV_EXPR)
3207 code = TRUNC_DIV_EXPR;
3208 if (code == FLOOR_MOD_EXPR)
3209 code = TRUNC_MOD_EXPR;
3210 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3211 code = TRUNC_DIV_EXPR;
3214 if (op1 != const0_rtx)
3215 switch (code)
3217 case TRUNC_MOD_EXPR:
3218 case TRUNC_DIV_EXPR:
3219 if (op1_is_constant)
3221 if (unsignedp)
3223 unsigned HOST_WIDE_INT mh, ml;
3224 int pre_shift, post_shift;
3225 int dummy;
3226 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3227 & GET_MODE_MASK (compute_mode));
3229 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3231 pre_shift = floor_log2 (d);
3232 if (rem_flag)
3234 remainder
3235 = expand_binop (compute_mode, and_optab, op0,
3236 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3237 remainder, 1,
3238 OPTAB_LIB_WIDEN);
3239 if (remainder)
3240 return gen_lowpart (mode, remainder);
3242 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3243 build_int_2 (pre_shift, 0),
3244 tquotient, 1);
3246 else if (size <= HOST_BITS_PER_WIDE_INT)
3248 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3250 /* Most significant bit of divisor is set; emit an scc
3251 insn. */
3252 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3253 compute_mode, 1, 1);
3254 if (quotient == 0)
3255 goto fail1;
3257 else
3259 /* Find a suitable multiplier and right shift count
3260 instead of multiplying with D. */
3262 mh = choose_multiplier (d, size, size,
3263 &ml, &post_shift, &dummy);
3265 /* If the suggested multiplier is more than SIZE bits,
3266 we can do better for even divisors, using an
3267 initial right shift. */
3268 if (mh != 0 && (d & 1) == 0)
3270 pre_shift = floor_log2 (d & -d);
3271 mh = choose_multiplier (d >> pre_shift, size,
3272 size - pre_shift,
3273 &ml, &post_shift, &dummy);
3274 if (mh)
3275 abort ();
3277 else
3278 pre_shift = 0;
3280 if (mh != 0)
3282 rtx t1, t2, t3, t4;
3284 if (post_shift - 1 >= BITS_PER_WORD)
3285 goto fail1;
3287 extra_cost = (shift_cost[post_shift - 1]
3288 + shift_cost[1] + 2 * add_cost);
3289 t1 = expand_mult_highpart (compute_mode, op0, ml,
3290 NULL_RTX, 1,
3291 max_cost - extra_cost);
3292 if (t1 == 0)
3293 goto fail1;
3294 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3295 op0, t1),
3296 NULL_RTX);
3297 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3298 build_int_2 (1, 0), NULL_RTX,1);
3299 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3300 t1, t3),
3301 NULL_RTX);
3302 quotient
3303 = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3304 build_int_2 (post_shift - 1, 0),
3305 tquotient, 1);
3307 else
3309 rtx t1, t2;
3311 if (pre_shift >= BITS_PER_WORD
3312 || post_shift >= BITS_PER_WORD)
3313 goto fail1;
3315 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3316 build_int_2 (pre_shift, 0),
3317 NULL_RTX, 1);
3318 extra_cost = (shift_cost[pre_shift]
3319 + shift_cost[post_shift]);
3320 t2 = expand_mult_highpart (compute_mode, t1, ml,
3321 NULL_RTX, 1,
3322 max_cost - extra_cost);
3323 if (t2 == 0)
3324 goto fail1;
3325 quotient
3326 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3327 build_int_2 (post_shift, 0),
3328 tquotient, 1);
3332 else /* Too wide mode to use tricky code */
3333 break;
3335 insn = get_last_insn ();
3336 if (insn != last
3337 && (set = single_set (insn)) != 0
3338 && SET_DEST (set) == quotient)
3339 set_unique_reg_note (insn,
3340 REG_EQUAL,
3341 gen_rtx_UDIV (compute_mode, op0, op1));
3343 else /* TRUNC_DIV, signed */
3345 unsigned HOST_WIDE_INT ml;
3346 int lgup, post_shift;
3347 HOST_WIDE_INT d = INTVAL (op1);
3348 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3350 /* n rem d = n rem -d */
3351 if (rem_flag && d < 0)
3353 d = abs_d;
3354 op1 = gen_int_mode (abs_d, compute_mode);
3357 if (d == 1)
3358 quotient = op0;
3359 else if (d == -1)
3360 quotient = expand_unop (compute_mode, neg_optab, op0,
3361 tquotient, 0);
3362 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3364 /* This case is not handled correctly below. */
3365 quotient = emit_store_flag (tquotient, EQ, op0, op1,
3366 compute_mode, 1, 1);
3367 if (quotient == 0)
3368 goto fail1;
3370 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3371 && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3372 /* ??? The cheap metric is computed only for
3373 word_mode. If this operation is wider, this may
3374 not be so. Assume true if the optab has an
3375 expander for this mode. */
3376 && (((rem_flag ? smod_optab : sdiv_optab)
3377 ->handlers[(int) compute_mode].insn_code
3378 != CODE_FOR_nothing)
3379 || (sdivmod_optab->handlers[(int) compute_mode]
3380 .insn_code != CODE_FOR_nothing)))
3382 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3384 lgup = floor_log2 (abs_d);
3385 if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3387 rtx label = gen_label_rtx ();
3388 rtx t1;
3390 t1 = copy_to_mode_reg (compute_mode, op0);
3391 do_cmp_and_jump (t1, const0_rtx, GE,
3392 compute_mode, label);
3393 expand_inc (t1, gen_int_mode (abs_d - 1,
3394 compute_mode));
3395 emit_label (label);
3396 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3397 build_int_2 (lgup, 0),
3398 tquotient, 0);
3400 else
3402 rtx t1, t2, t3;
3403 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3404 build_int_2 (size - 1, 0),
3405 NULL_RTX, 0);
3406 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3407 build_int_2 (size - lgup, 0),
3408 NULL_RTX, 1);
3409 t3 = force_operand (gen_rtx_PLUS (compute_mode,
3410 op0, t2),
3411 NULL_RTX);
3412 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3413 build_int_2 (lgup, 0),
3414 tquotient, 0);
3417 /* We have computed OP0 / abs(OP1). If OP1 is negative, negate
3418 the quotient. */
3419 if (d < 0)
3421 insn = get_last_insn ();
3422 if (insn != last
3423 && (set = single_set (insn)) != 0
3424 && SET_DEST (set) == quotient
3425 && abs_d < ((unsigned HOST_WIDE_INT) 1
3426 << (HOST_BITS_PER_WIDE_INT - 1)))
3427 set_unique_reg_note (insn,
3428 REG_EQUAL,
3429 gen_rtx_DIV (compute_mode,
3430 op0,
3431 GEN_INT
3432 (trunc_int_for_mode
3433 (abs_d,
3434 compute_mode))));
3436 quotient = expand_unop (compute_mode, neg_optab,
3437 quotient, quotient, 0);
3440 else if (size <= HOST_BITS_PER_WIDE_INT)
3442 choose_multiplier (abs_d, size, size - 1,
3443 &ml, &post_shift, &lgup);
3444 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3446 rtx t1, t2, t3;
3448 if (post_shift >= BITS_PER_WORD
3449 || size - 1 >= BITS_PER_WORD)
3450 goto fail1;
3452 extra_cost = (shift_cost[post_shift]
3453 + shift_cost[size - 1] + add_cost);
3454 t1 = expand_mult_highpart (compute_mode, op0, ml,
3455 NULL_RTX, 0,
3456 max_cost - extra_cost);
3457 if (t1 == 0)
3458 goto fail1;
3459 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3460 build_int_2 (post_shift, 0), NULL_RTX, 0);
3461 t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3462 build_int_2 (size - 1, 0), NULL_RTX, 0);
3463 if (d < 0)
3464 quotient
3465 = force_operand (gen_rtx_MINUS (compute_mode,
3466 t3, t2),
3467 tquotient);
3468 else
3469 quotient
3470 = force_operand (gen_rtx_MINUS (compute_mode,
3471 t2, t3),
3472 tquotient);
3474 else
3476 rtx t1, t2, t3, t4;
3478 if (post_shift >= BITS_PER_WORD
3479 || size - 1 >= BITS_PER_WORD)
3480 goto fail1;
3482 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3483 extra_cost = (shift_cost[post_shift]
3484 + shift_cost[size - 1] + 2 * add_cost);
3485 t1 = expand_mult_highpart (compute_mode, op0, ml,
3486 NULL_RTX, 0,
3487 max_cost - extra_cost);
3488 if (t1 == 0)
3489 goto fail1;
3490 t2 = force_operand (gen_rtx_PLUS (compute_mode,
3491 t1, op0),
3492 NULL_RTX);
3493 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3494 build_int_2 (post_shift, 0),
3495 NULL_RTX, 0);
3496 t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3497 build_int_2 (size - 1, 0),
3498 NULL_RTX, 0);
3499 if (d < 0)
3500 quotient
3501 = force_operand (gen_rtx_MINUS (compute_mode,
3502 t4, t3),
3503 tquotient);
3504 else
3505 quotient
3506 = force_operand (gen_rtx_MINUS (compute_mode,
3507 t3, t4),
3508 tquotient);
3511 else /* Too wide mode to use tricky code */
3512 break;
3514 insn = get_last_insn ();
3515 if (insn != last
3516 && (set = single_set (insn)) != 0
3517 && SET_DEST (set) == quotient)
3518 set_unique_reg_note (insn,
3519 REG_EQUAL,
3520 gen_rtx_DIV (compute_mode, op0, op1));
3522 break;
3524 fail1:
3525 delete_insns_since (last);
3526 break;
3528 case FLOOR_DIV_EXPR:
3529 case FLOOR_MOD_EXPR:
3530 /* We will come here only for signed operations. */
3531 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3533 unsigned HOST_WIDE_INT mh, ml;
3534 int pre_shift, lgup, post_shift;
3535 HOST_WIDE_INT d = INTVAL (op1);
3537 if (d > 0)
3539 /* We could just as easily deal with negative constants here,
3540 but it does not seem worth the trouble for GCC 2.6. */
3541 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3543 pre_shift = floor_log2 (d);
3544 if (rem_flag)
3546 remainder = expand_binop (compute_mode, and_optab, op0,
3547 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3548 remainder, 0, OPTAB_LIB_WIDEN);
3549 if (remainder)
3550 return gen_lowpart (mode, remainder);
3552 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3553 build_int_2 (pre_shift, 0),
3554 tquotient, 0);
3556 else
3558 rtx t1, t2, t3, t4;
3560 mh = choose_multiplier (d, size, size - 1,
3561 &ml, &post_shift, &lgup);
3562 if (mh)
3563 abort ();
3565 if (post_shift < BITS_PER_WORD
3566 && size - 1 < BITS_PER_WORD)
3568 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3569 build_int_2 (size - 1, 0),
3570 NULL_RTX, 0);
3571 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3572 NULL_RTX, 0, OPTAB_WIDEN);
3573 extra_cost = (shift_cost[post_shift]
3574 + shift_cost[size - 1] + 2 * add_cost);
3575 t3 = expand_mult_highpart (compute_mode, t2, ml,
3576 NULL_RTX, 1,
3577 max_cost - extra_cost);
3578 if (t3 != 0)
3580 t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3581 build_int_2 (post_shift, 0),
3582 NULL_RTX, 1);
3583 quotient = expand_binop (compute_mode, xor_optab,
3584 t4, t1, tquotient, 0,
3585 OPTAB_WIDEN);
3590 else
3592 rtx nsign, t1, t2, t3, t4;
3593 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3594 op0, constm1_rtx), NULL_RTX);
3595 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3596 0, OPTAB_WIDEN);
3597 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3598 build_int_2 (size - 1, 0), NULL_RTX, 0);
3599 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3600 NULL_RTX);
3601 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3602 NULL_RTX, 0);
3603 if (t4)
3605 rtx t5;
3606 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3607 NULL_RTX, 0);
3608 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3609 t4, t5),
3610 tquotient);
3615 if (quotient != 0)
3616 break;
3617 delete_insns_since (last);
3619 /* Try using an instruction that produces both the quotient and
3620 remainder, using truncation. We can easily compensate the quotient
3621 or remainder to get floor rounding, once we have the remainder.
3622 Notice that we compute also the final remainder value here,
3623 and return the result right away. */
3624 if (target == 0 || GET_MODE (target) != compute_mode)
3625 target = gen_reg_rtx (compute_mode);
3627 if (rem_flag)
3629 remainder
3630 = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3631 quotient = gen_reg_rtx (compute_mode);
3633 else
3635 quotient
3636 = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3637 remainder = gen_reg_rtx (compute_mode);
3640 if (expand_twoval_binop (sdivmod_optab, op0, op1,
3641 quotient, remainder, 0))
3643 /* This could be computed with a branch-less sequence.
3644 Save that for later. */
3645 rtx tem;
3646 rtx label = gen_label_rtx ();
3647 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3648 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3649 NULL_RTX, 0, OPTAB_WIDEN);
3650 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3651 expand_dec (quotient, const1_rtx);
3652 expand_inc (remainder, op1);
3653 emit_label (label);
3654 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3657 /* No luck with division elimination or divmod. Have to do it
3658 by conditionally adjusting op0 *and* the result. */
3660 rtx label1, label2, label3, label4, label5;
3661 rtx adjusted_op0;
3662 rtx tem;
3664 quotient = gen_reg_rtx (compute_mode);
3665 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3666 label1 = gen_label_rtx ();
3667 label2 = gen_label_rtx ();
3668 label3 = gen_label_rtx ();
3669 label4 = gen_label_rtx ();
3670 label5 = gen_label_rtx ();
3671 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3672 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3673 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3674 quotient, 0, OPTAB_LIB_WIDEN);
3675 if (tem != quotient)
3676 emit_move_insn (quotient, tem);
3677 emit_jump_insn (gen_jump (label5));
3678 emit_barrier ();
3679 emit_label (label1);
3680 expand_inc (adjusted_op0, const1_rtx);
3681 emit_jump_insn (gen_jump (label4));
3682 emit_barrier ();
3683 emit_label (label2);
3684 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3685 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3686 quotient, 0, OPTAB_LIB_WIDEN);
3687 if (tem != quotient)
3688 emit_move_insn (quotient, tem);
3689 emit_jump_insn (gen_jump (label5));
3690 emit_barrier ();
3691 emit_label (label3);
3692 expand_dec (adjusted_op0, const1_rtx);
3693 emit_label (label4);
3694 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3695 quotient, 0, OPTAB_LIB_WIDEN);
3696 if (tem != quotient)
3697 emit_move_insn (quotient, tem);
3698 expand_dec (quotient, const1_rtx);
3699 emit_label (label5);
3701 break;
3703 case CEIL_DIV_EXPR:
3704 case CEIL_MOD_EXPR:
3705 if (unsignedp)
3707 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3709 rtx t1, t2, t3;
3710 unsigned HOST_WIDE_INT d = INTVAL (op1);
3711 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3712 build_int_2 (floor_log2 (d), 0),
3713 tquotient, 1);
3714 t2 = expand_binop (compute_mode, and_optab, op0,
3715 GEN_INT (d - 1),
3716 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3717 t3 = gen_reg_rtx (compute_mode);
3718 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3719 compute_mode, 1, 1);
3720 if (t3 == 0)
3722 rtx lab;
3723 lab = gen_label_rtx ();
3724 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3725 expand_inc (t1, const1_rtx);
3726 emit_label (lab);
3727 quotient = t1;
3729 else
3730 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3731 t1, t3),
3732 tquotient);
3733 break;
3736 /* Try using an instruction that produces both the quotient and
3737 remainder, using truncation. We can easily compensate the
3738 quotient or remainder to get ceiling rounding, once we have the
3739 remainder. Notice that we compute also the final remainder
3740 value here, and return the result right away. */
3741 if (target == 0 || GET_MODE (target) != compute_mode)
3742 target = gen_reg_rtx (compute_mode);
3744 if (rem_flag)
3746 remainder = (GET_CODE (target) == REG
3747 ? target : gen_reg_rtx (compute_mode));
3748 quotient = gen_reg_rtx (compute_mode);
3750 else
3752 quotient = (GET_CODE (target) == REG
3753 ? target : gen_reg_rtx (compute_mode));
3754 remainder = gen_reg_rtx (compute_mode);
3757 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3758 remainder, 1))
3760 /* This could be computed with a branch-less sequence.
3761 Save that for later. */
3762 rtx label = gen_label_rtx ();
3763 do_cmp_and_jump (remainder, const0_rtx, EQ,
3764 compute_mode, label);
3765 expand_inc (quotient, const1_rtx);
3766 expand_dec (remainder, op1);
3767 emit_label (label);
3768 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3771 /* No luck with division elimination or divmod. Have to do it
3772 by conditionally adjusting op0 *and* the result. */
3774 rtx label1, label2;
3775 rtx adjusted_op0, tem;
3777 quotient = gen_reg_rtx (compute_mode);
3778 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3779 label1 = gen_label_rtx ();
3780 label2 = gen_label_rtx ();
3781 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3782 compute_mode, label1);
3783 emit_move_insn (quotient, const0_rtx);
3784 emit_jump_insn (gen_jump (label2));
3785 emit_barrier ();
3786 emit_label (label1);
3787 expand_dec (adjusted_op0, const1_rtx);
3788 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3789 quotient, 1, OPTAB_LIB_WIDEN);
3790 if (tem != quotient)
3791 emit_move_insn (quotient, tem);
3792 expand_inc (quotient, const1_rtx);
3793 emit_label (label2);
3796 else /* signed */
3798 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3799 && INTVAL (op1) >= 0)
3801 /* This is extremely similar to the code for the unsigned case
3802 above. For 2.7 we should merge these variants, but for
3803 2.6.1 I don't want to touch the code for unsigned since that
3804 get used in C. The signed case will only be used by other
3805 languages (Ada). */
3807 rtx t1, t2, t3;
3808 unsigned HOST_WIDE_INT d = INTVAL (op1);
3809 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3810 build_int_2 (floor_log2 (d), 0),
3811 tquotient, 0);
3812 t2 = expand_binop (compute_mode, and_optab, op0,
3813 GEN_INT (d - 1),
3814 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3815 t3 = gen_reg_rtx (compute_mode);
3816 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3817 compute_mode, 1, 1);
3818 if (t3 == 0)
3820 rtx lab;
3821 lab = gen_label_rtx ();
3822 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3823 expand_inc (t1, const1_rtx);
3824 emit_label (lab);
3825 quotient = t1;
3827 else
3828 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3829 t1, t3),
3830 tquotient);
3831 break;
3834 /* Try using an instruction that produces both the quotient and
3835 remainder, using truncation. We can easily compensate the
3836 quotient or remainder to get ceiling rounding, once we have the
3837 remainder. Notice that we compute also the final remainder
3838 value here, and return the result right away. */
3839 if (target == 0 || GET_MODE (target) != compute_mode)
3840 target = gen_reg_rtx (compute_mode);
3841 if (rem_flag)
3843 remainder= (GET_CODE (target) == REG
3844 ? target : gen_reg_rtx (compute_mode));
3845 quotient = gen_reg_rtx (compute_mode);
3847 else
3849 quotient = (GET_CODE (target) == REG
3850 ? target : gen_reg_rtx (compute_mode));
3851 remainder = gen_reg_rtx (compute_mode);
3854 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3855 remainder, 0))
3857 /* This could be computed with a branch-less sequence.
3858 Save that for later. */
3859 rtx tem;
3860 rtx label = gen_label_rtx ();
3861 do_cmp_and_jump (remainder, const0_rtx, EQ,
3862 compute_mode, label);
3863 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3864 NULL_RTX, 0, OPTAB_WIDEN);
3865 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3866 expand_inc (quotient, const1_rtx);
3867 expand_dec (remainder, op1);
3868 emit_label (label);
3869 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3872 /* No luck with division elimination or divmod. Have to do it
3873 by conditionally adjusting op0 *and* the result. */
3875 rtx label1, label2, label3, label4, label5;
3876 rtx adjusted_op0;
3877 rtx tem;
3879 quotient = gen_reg_rtx (compute_mode);
3880 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3881 label1 = gen_label_rtx ();
3882 label2 = gen_label_rtx ();
3883 label3 = gen_label_rtx ();
3884 label4 = gen_label_rtx ();
3885 label5 = gen_label_rtx ();
3886 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3887 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3888 compute_mode, label1);
3889 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3890 quotient, 0, OPTAB_LIB_WIDEN);
3891 if (tem != quotient)
3892 emit_move_insn (quotient, tem);
3893 emit_jump_insn (gen_jump (label5));
3894 emit_barrier ();
3895 emit_label (label1);
3896 expand_dec (adjusted_op0, const1_rtx);
3897 emit_jump_insn (gen_jump (label4));
3898 emit_barrier ();
3899 emit_label (label2);
3900 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3901 compute_mode, label3);
3902 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3903 quotient, 0, OPTAB_LIB_WIDEN);
3904 if (tem != quotient)
3905 emit_move_insn (quotient, tem);
3906 emit_jump_insn (gen_jump (label5));
3907 emit_barrier ();
3908 emit_label (label3);
3909 expand_inc (adjusted_op0, const1_rtx);
3910 emit_label (label4);
3911 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3912 quotient, 0, OPTAB_LIB_WIDEN);
3913 if (tem != quotient)
3914 emit_move_insn (quotient, tem);
3915 expand_inc (quotient, const1_rtx);
3916 emit_label (label5);
3919 break;
3921 case EXACT_DIV_EXPR:
3922 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3924 HOST_WIDE_INT d = INTVAL (op1);
3925 unsigned HOST_WIDE_INT ml;
3926 int pre_shift;
3927 rtx t1;
3929 pre_shift = floor_log2 (d & -d);
3930 ml = invert_mod2n (d >> pre_shift, size);
3931 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3932 build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3933 quotient = expand_mult (compute_mode, t1,
3934 gen_int_mode (ml, compute_mode),
3935 NULL_RTX, 1);
3937 insn = get_last_insn ();
3938 set_unique_reg_note (insn,
3939 REG_EQUAL,
3940 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3941 compute_mode,
3942 op0, op1));
3944 break;
3946 case ROUND_DIV_EXPR:
3947 case ROUND_MOD_EXPR:
3948 if (unsignedp)
3950 rtx tem;
3951 rtx label;
3952 label = gen_label_rtx ();
3953 quotient = gen_reg_rtx (compute_mode);
3954 remainder = gen_reg_rtx (compute_mode);
3955 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3957 rtx tem;
3958 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3959 quotient, 1, OPTAB_LIB_WIDEN);
3960 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3961 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3962 remainder, 1, OPTAB_LIB_WIDEN);
3964 tem = plus_constant (op1, -1);
3965 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3966 build_int_2 (1, 0), NULL_RTX, 1);
3967 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3968 expand_inc (quotient, const1_rtx);
3969 expand_dec (remainder, op1);
3970 emit_label (label);
3972 else
3974 rtx abs_rem, abs_op1, tem, mask;
3975 rtx label;
3976 label = gen_label_rtx ();
3977 quotient = gen_reg_rtx (compute_mode);
3978 remainder = gen_reg_rtx (compute_mode);
3979 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3981 rtx tem;
3982 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3983 quotient, 0, OPTAB_LIB_WIDEN);
3984 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3985 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3986 remainder, 0, OPTAB_LIB_WIDEN);
3988 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3989 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3990 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3991 build_int_2 (1, 0), NULL_RTX, 1);
3992 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3993 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3994 NULL_RTX, 0, OPTAB_WIDEN);
3995 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3996 build_int_2 (size - 1, 0), NULL_RTX, 0);
3997 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3998 NULL_RTX, 0, OPTAB_WIDEN);
3999 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4000 NULL_RTX, 0, OPTAB_WIDEN);
4001 expand_inc (quotient, tem);
4002 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4003 NULL_RTX, 0, OPTAB_WIDEN);
4004 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4005 NULL_RTX, 0, OPTAB_WIDEN);
4006 expand_dec (remainder, tem);
4007 emit_label (label);
4009 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4011 default:
4012 abort ();
4015 if (quotient == 0)
4017 if (target && GET_MODE (target) != compute_mode)
4018 target = 0;
4020 if (rem_flag)
4022 /* Try to produce the remainder without producing the quotient.
4023 If we seem to have a divmod pattern that does not require widening,
4024 don't try widening here. We should really have a WIDEN argument
4025 to expand_twoval_binop, since what we'd really like to do here is
4026 1) try a mod insn in compute_mode
4027 2) try a divmod insn in compute_mode
4028 3) try a div insn in compute_mode and multiply-subtract to get
4029 remainder
4030 4) try the same things with widening allowed. */
4031 remainder
4032 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4033 op0, op1, target,
4034 unsignedp,
4035 ((optab2->handlers[(int) compute_mode].insn_code
4036 != CODE_FOR_nothing)
4037 ? OPTAB_DIRECT : OPTAB_WIDEN));
4038 if (remainder == 0)
4040 /* No luck there. Can we do remainder and divide at once
4041 without a library call? */
4042 remainder = gen_reg_rtx (compute_mode);
4043 if (! expand_twoval_binop ((unsignedp
4044 ? udivmod_optab
4045 : sdivmod_optab),
4046 op0, op1,
4047 NULL_RTX, remainder, unsignedp))
4048 remainder = 0;
4051 if (remainder)
4052 return gen_lowpart (mode, remainder);
4055 /* Produce the quotient. Try a quotient insn, but not a library call.
4056 If we have a divmod in this mode, use it in preference to widening
4057 the div (for this test we assume it will not fail). Note that optab2
4058 is set to the one of the two optabs that the call below will use. */
4059 quotient
4060 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4061 op0, op1, rem_flag ? NULL_RTX : target,
4062 unsignedp,
4063 ((optab2->handlers[(int) compute_mode].insn_code
4064 != CODE_FOR_nothing)
4065 ? OPTAB_DIRECT : OPTAB_WIDEN));
4067 if (quotient == 0)
4069 /* No luck there. Try a quotient-and-remainder insn,
4070 keeping the quotient alone. */
4071 quotient = gen_reg_rtx (compute_mode);
4072 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4073 op0, op1,
4074 quotient, NULL_RTX, unsignedp))
4076 quotient = 0;
4077 if (! rem_flag)
4078 /* Still no luck. If we are not computing the remainder,
4079 use a library call for the quotient. */
4080 quotient = sign_expand_binop (compute_mode,
4081 udiv_optab, sdiv_optab,
4082 op0, op1, target,
4083 unsignedp, OPTAB_LIB_WIDEN);
4088 if (rem_flag)
4090 if (target && GET_MODE (target) != compute_mode)
4091 target = 0;
4093 if (quotient == 0)
4094 /* No divide instruction either. Use library for remainder. */
4095 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4096 op0, op1, target,
4097 unsignedp, OPTAB_LIB_WIDEN);
4098 else
4100 /* We divided. Now finish doing X - Y * (X / Y). */
4101 remainder = expand_mult (compute_mode, quotient, op1,
4102 NULL_RTX, unsignedp);
4103 remainder = expand_binop (compute_mode, sub_optab, op0,
4104 remainder, target, unsignedp,
4105 OPTAB_LIB_WIDEN);
4109 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4112 /* Return a tree node with data type TYPE, describing the value of X.
4113 Usually this is an RTL_EXPR, if there is no obvious better choice.
4114 X may be an expression, however we only support those expressions
4115 generated by loop.c. */
4117 tree
4118 make_tree (tree type, rtx x)
4120 tree t;
4122 switch (GET_CODE (x))
4124 case CONST_INT:
4125 t = build_int_2 (INTVAL (x),
4126 (TREE_UNSIGNED (type)
4127 && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4128 || INTVAL (x) >= 0 ? 0 : -1);
4129 TREE_TYPE (t) = type;
4130 return t;
4132 case CONST_DOUBLE:
4133 if (GET_MODE (x) == VOIDmode)
4135 t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4136 TREE_TYPE (t) = type;
4138 else
4140 REAL_VALUE_TYPE d;
4142 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4143 t = build_real (type, d);
4146 return t;
4148 case CONST_VECTOR:
4150 int i, units;
4151 rtx elt;
4152 tree t = NULL_TREE;
4154 units = CONST_VECTOR_NUNITS (x);
4156 /* Build a tree with vector elements. */
4157 for (i = units - 1; i >= 0; --i)
4159 elt = CONST_VECTOR_ELT (x, i);
4160 t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4163 return build_vector (type, t);
4166 case PLUS:
4167 return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4168 make_tree (type, XEXP (x, 1))));
4170 case MINUS:
4171 return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4172 make_tree (type, XEXP (x, 1))));
4174 case NEG:
4175 return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4177 case MULT:
4178 return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4179 make_tree (type, XEXP (x, 1))));
4181 case ASHIFT:
4182 return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4183 make_tree (type, XEXP (x, 1))));
4185 case LSHIFTRT:
4186 t = (*lang_hooks.types.unsigned_type) (type);
4187 return fold (convert (type,
4188 build (RSHIFT_EXPR, t,
4189 make_tree (t, XEXP (x, 0)),
4190 make_tree (type, XEXP (x, 1)))));
4192 case ASHIFTRT:
4193 t = (*lang_hooks.types.signed_type) (type);
4194 return fold (convert (type,
4195 build (RSHIFT_EXPR, t,
4196 make_tree (t, XEXP (x, 0)),
4197 make_tree (type, XEXP (x, 1)))));
4199 case DIV:
4200 if (TREE_CODE (type) != REAL_TYPE)
4201 t = (*lang_hooks.types.signed_type) (type);
4202 else
4203 t = type;
4205 return fold (convert (type,
4206 build (TRUNC_DIV_EXPR, t,
4207 make_tree (t, XEXP (x, 0)),
4208 make_tree (t, XEXP (x, 1)))));
4209 case UDIV:
4210 t = (*lang_hooks.types.unsigned_type) (type);
4211 return fold (convert (type,
4212 build (TRUNC_DIV_EXPR, t,
4213 make_tree (t, XEXP (x, 0)),
4214 make_tree (t, XEXP (x, 1)))));
4216 case SIGN_EXTEND:
4217 case ZERO_EXTEND:
4218 t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4219 GET_CODE (x) == ZERO_EXTEND);
4220 return fold (convert (type, make_tree (t, XEXP (x, 0))));
4222 default:
4223 t = make_node (RTL_EXPR);
4224 TREE_TYPE (t) = type;
4226 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4227 ptr_mode. So convert. */
4228 if (POINTER_TYPE_P (type))
4229 x = convert_memory_address (TYPE_MODE (type), x);
4231 RTL_EXPR_RTL (t) = x;
4232 /* There are no insns to be output
4233 when this rtl_expr is used. */
4234 RTL_EXPR_SEQUENCE (t) = 0;
4235 return t;
4239 /* Check whether the multiplication X * MULT + ADD overflows.
4240 X, MULT and ADD must be CONST_*.
4241 MODE is the machine mode for the computation.
4242 X and MULT must have mode MODE. ADD may have a different mode.
4243 So can X (defaults to same as MODE).
4244 UNSIGNEDP is nonzero to do unsigned multiplication. */
4246 bool
4247 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4249 tree type, mult_type, add_type, result;
4251 type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4253 /* In order to get a proper overflow indication from an unsigned
4254 type, we have to pretend that it's a sizetype. */
4255 mult_type = type;
4256 if (unsignedp)
4258 mult_type = copy_node (type);
4259 TYPE_IS_SIZETYPE (mult_type) = 1;
4262 add_type = (GET_MODE (add) == VOIDmode ? mult_type
4263 : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4265 result = fold (build (PLUS_EXPR, mult_type,
4266 fold (build (MULT_EXPR, mult_type,
4267 make_tree (mult_type, x),
4268 make_tree (mult_type, mult))),
4269 make_tree (add_type, add)));
4271 return TREE_CONSTANT_OVERFLOW (result);
4274 /* Return an rtx representing the value of X * MULT + ADD.
4275 TARGET is a suggestion for where to store the result (an rtx).
4276 MODE is the machine mode for the computation.
4277 X and MULT must have mode MODE. ADD may have a different mode.
4278 So can X (defaults to same as MODE).
4279 UNSIGNEDP is nonzero to do unsigned multiplication.
4280 This may emit insns. */
4283 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4284 int unsignedp)
4286 tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4287 tree add_type = (GET_MODE (add) == VOIDmode
4288 ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4289 unsignedp));
4290 tree result = fold (build (PLUS_EXPR, type,
4291 fold (build (MULT_EXPR, type,
4292 make_tree (type, x),
4293 make_tree (type, mult))),
4294 make_tree (add_type, add)));
4296 return expand_expr (result, target, VOIDmode, 0);
4299 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4300 and returning TARGET.
4302 If TARGET is 0, a pseudo-register or constant is returned. */
4305 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4307 rtx tem = 0;
4309 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4310 tem = simplify_binary_operation (AND, mode, op0, op1);
4311 if (tem == 0)
4312 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4314 if (target == 0)
4315 target = tem;
4316 else if (tem != target)
4317 emit_move_insn (target, tem);
4318 return target;
4321 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4322 and storing in TARGET. Normally return TARGET.
4323 Return 0 if that cannot be done.
4325 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
4326 it is VOIDmode, they cannot both be CONST_INT.
4328 UNSIGNEDP is for the case where we have to widen the operands
4329 to perform the operation. It says to use zero-extension.
4331 NORMALIZEP is 1 if we should convert the result to be either zero
4332 or one. Normalize is -1 if we should convert the result to be
4333 either zero or -1. If NORMALIZEP is zero, the result will be left
4334 "raw" out of the scc insn. */
4337 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4338 enum machine_mode mode, int unsignedp, int normalizep)
4340 rtx subtarget;
4341 enum insn_code icode;
4342 enum machine_mode compare_mode;
4343 enum machine_mode target_mode = GET_MODE (target);
4344 rtx tem;
4345 rtx last = get_last_insn ();
4346 rtx pattern, comparison;
4348 /* ??? Ok to do this and then fail? */
4349 op0 = protect_from_queue (op0, 0);
4350 op1 = protect_from_queue (op1, 0);
4352 if (unsignedp)
4353 code = unsigned_condition (code);
4355 /* If one operand is constant, make it the second one. Only do this
4356 if the other operand is not constant as well. */
4358 if (swap_commutative_operands_p (op0, op1))
4360 tem = op0;
4361 op0 = op1;
4362 op1 = tem;
4363 code = swap_condition (code);
4366 if (mode == VOIDmode)
4367 mode = GET_MODE (op0);
4369 /* For some comparisons with 1 and -1, we can convert this to
4370 comparisons with zero. This will often produce more opportunities for
4371 store-flag insns. */
4373 switch (code)
4375 case LT:
4376 if (op1 == const1_rtx)
4377 op1 = const0_rtx, code = LE;
4378 break;
4379 case LE:
4380 if (op1 == constm1_rtx)
4381 op1 = const0_rtx, code = LT;
4382 break;
4383 case GE:
4384 if (op1 == const1_rtx)
4385 op1 = const0_rtx, code = GT;
4386 break;
4387 case GT:
4388 if (op1 == constm1_rtx)
4389 op1 = const0_rtx, code = GE;
4390 break;
4391 case GEU:
4392 if (op1 == const1_rtx)
4393 op1 = const0_rtx, code = NE;
4394 break;
4395 case LTU:
4396 if (op1 == const1_rtx)
4397 op1 = const0_rtx, code = EQ;
4398 break;
4399 default:
4400 break;
4403 /* If we are comparing a double-word integer with zero, we can convert
4404 the comparison into one involving a single word. */
4405 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4406 && GET_MODE_CLASS (mode) == MODE_INT
4407 && op1 == const0_rtx
4408 && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4410 if (code == EQ || code == NE)
4412 rtx op00, op01, op0both;
4414 /* Do a logical OR of the two words and compare the result. */
4415 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4416 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4417 op0both = expand_binop (word_mode, ior_optab, op00, op01,
4418 NULL_RTX, unsignedp, OPTAB_DIRECT);
4419 if (op0both != 0)
4420 return emit_store_flag (target, code, op0both, op1, word_mode,
4421 unsignedp, normalizep);
4423 else if (code == LT || code == GE)
4425 rtx op0h;
4427 /* If testing the sign bit, can just test on high word. */
4428 op0h = simplify_gen_subreg (word_mode, op0, mode,
4429 subreg_highpart_offset (word_mode, mode));
4430 return emit_store_flag (target, code, op0h, op1, word_mode,
4431 unsignedp, normalizep);
4435 /* From now on, we won't change CODE, so set ICODE now. */
4436 icode = setcc_gen_code[(int) code];
4438 /* If this is A < 0 or A >= 0, we can do this by taking the ones
4439 complement of A (for GE) and shifting the sign bit to the low bit. */
4440 if (op1 == const0_rtx && (code == LT || code == GE)
4441 && GET_MODE_CLASS (mode) == MODE_INT
4442 && (normalizep || STORE_FLAG_VALUE == 1
4443 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4444 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4445 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4447 subtarget = target;
4449 /* If the result is to be wider than OP0, it is best to convert it
4450 first. If it is to be narrower, it is *incorrect* to convert it
4451 first. */
4452 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4454 op0 = protect_from_queue (op0, 0);
4455 op0 = convert_modes (target_mode, mode, op0, 0);
4456 mode = target_mode;
4459 if (target_mode != mode)
4460 subtarget = 0;
4462 if (code == GE)
4463 op0 = expand_unop (mode, one_cmpl_optab, op0,
4464 ((STORE_FLAG_VALUE == 1 || normalizep)
4465 ? 0 : subtarget), 0);
4467 if (STORE_FLAG_VALUE == 1 || normalizep)
4468 /* If we are supposed to produce a 0/1 value, we want to do
4469 a logical shift from the sign bit to the low-order bit; for
4470 a -1/0 value, we do an arithmetic shift. */
4471 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4472 size_int (GET_MODE_BITSIZE (mode) - 1),
4473 subtarget, normalizep != -1);
4475 if (mode != target_mode)
4476 op0 = convert_modes (target_mode, mode, op0, 0);
4478 return op0;
4481 if (icode != CODE_FOR_nothing)
4483 insn_operand_predicate_fn pred;
4485 /* We think we may be able to do this with a scc insn. Emit the
4486 comparison and then the scc insn.
4488 compare_from_rtx may call emit_queue, which would be deleted below
4489 if the scc insn fails. So call it ourselves before setting LAST.
4490 Likewise for do_pending_stack_adjust. */
4492 emit_queue ();
4493 do_pending_stack_adjust ();
4494 last = get_last_insn ();
4496 comparison
4497 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4498 if (GET_CODE (comparison) == CONST_INT)
4499 return (comparison == const0_rtx ? const0_rtx
4500 : normalizep == 1 ? const1_rtx
4501 : normalizep == -1 ? constm1_rtx
4502 : const_true_rtx);
4504 /* The code of COMPARISON may not match CODE if compare_from_rtx
4505 decided to swap its operands and reverse the original code.
4507 We know that compare_from_rtx returns either a CONST_INT or
4508 a new comparison code, so it is safe to just extract the
4509 code from COMPARISON. */
4510 code = GET_CODE (comparison);
4512 /* Get a reference to the target in the proper mode for this insn. */
4513 compare_mode = insn_data[(int) icode].operand[0].mode;
4514 subtarget = target;
4515 pred = insn_data[(int) icode].operand[0].predicate;
4516 if (preserve_subexpressions_p ()
4517 || ! (*pred) (subtarget, compare_mode))
4518 subtarget = gen_reg_rtx (compare_mode);
4520 pattern = GEN_FCN (icode) (subtarget);
4521 if (pattern)
4523 emit_insn (pattern);
4525 /* If we are converting to a wider mode, first convert to
4526 TARGET_MODE, then normalize. This produces better combining
4527 opportunities on machines that have a SIGN_EXTRACT when we are
4528 testing a single bit. This mostly benefits the 68k.
4530 If STORE_FLAG_VALUE does not have the sign bit set when
4531 interpreted in COMPARE_MODE, we can do this conversion as
4532 unsigned, which is usually more efficient. */
4533 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4535 convert_move (target, subtarget,
4536 (GET_MODE_BITSIZE (compare_mode)
4537 <= HOST_BITS_PER_WIDE_INT)
4538 && 0 == (STORE_FLAG_VALUE
4539 & ((HOST_WIDE_INT) 1
4540 << (GET_MODE_BITSIZE (compare_mode) -1))));
4541 op0 = target;
4542 compare_mode = target_mode;
4544 else
4545 op0 = subtarget;
4547 /* If we want to keep subexpressions around, don't reuse our
4548 last target. */
4550 if (preserve_subexpressions_p ())
4551 subtarget = 0;
4553 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
4554 we don't have to do anything. */
4555 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4557 /* STORE_FLAG_VALUE might be the most negative number, so write
4558 the comparison this way to avoid a compiler-time warning. */
4559 else if (- normalizep == STORE_FLAG_VALUE)
4560 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4562 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4563 makes it hard to use a value of just the sign bit due to
4564 ANSI integer constant typing rules. */
4565 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4566 && (STORE_FLAG_VALUE
4567 & ((HOST_WIDE_INT) 1
4568 << (GET_MODE_BITSIZE (compare_mode) - 1))))
4569 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4570 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4571 subtarget, normalizep == 1);
4572 else if (STORE_FLAG_VALUE & 1)
4574 op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4575 if (normalizep == -1)
4576 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4578 else
4579 abort ();
4581 /* If we were converting to a smaller mode, do the
4582 conversion now. */
4583 if (target_mode != compare_mode)
4585 convert_move (target, op0, 0);
4586 return target;
4588 else
4589 return op0;
4593 delete_insns_since (last);
4595 /* If expensive optimizations, use different pseudo registers for each
4596 insn, instead of reusing the same pseudo. This leads to better CSE,
4597 but slows down the compiler, since there are more pseudos */
4598 subtarget = (!flag_expensive_optimizations
4599 && (target_mode == mode)) ? target : NULL_RTX;
4601 /* If we reached here, we can't do this with a scc insn. However, there
4602 are some comparisons that can be done directly. For example, if
4603 this is an equality comparison of integers, we can try to exclusive-or
4604 (or subtract) the two operands and use a recursive call to try the
4605 comparison with zero. Don't do any of these cases if branches are
4606 very cheap. */
4608 if (BRANCH_COST > 0
4609 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4610 && op1 != const0_rtx)
4612 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4613 OPTAB_WIDEN);
4615 if (tem == 0)
4616 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4617 OPTAB_WIDEN);
4618 if (tem != 0)
4619 tem = emit_store_flag (target, code, tem, const0_rtx,
4620 mode, unsignedp, normalizep);
4621 if (tem == 0)
4622 delete_insns_since (last);
4623 return tem;
4626 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4627 the constant zero. Reject all other comparisons at this point. Only
4628 do LE and GT if branches are expensive since they are expensive on
4629 2-operand machines. */
4631 if (BRANCH_COST == 0
4632 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4633 || (code != EQ && code != NE
4634 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4635 return 0;
4637 /* See what we need to return. We can only return a 1, -1, or the
4638 sign bit. */
4640 if (normalizep == 0)
4642 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4643 normalizep = STORE_FLAG_VALUE;
4645 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4646 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4647 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4649 else
4650 return 0;
4653 /* Try to put the result of the comparison in the sign bit. Assume we can't
4654 do the necessary operation below. */
4656 tem = 0;
4658 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
4659 the sign bit set. */
4661 if (code == LE)
4663 /* This is destructive, so SUBTARGET can't be OP0. */
4664 if (rtx_equal_p (subtarget, op0))
4665 subtarget = 0;
4667 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4668 OPTAB_WIDEN);
4669 if (tem)
4670 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4671 OPTAB_WIDEN);
4674 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4675 number of bits in the mode of OP0, minus one. */
4677 if (code == GT)
4679 if (rtx_equal_p (subtarget, op0))
4680 subtarget = 0;
4682 tem = expand_shift (RSHIFT_EXPR, mode, op0,
4683 size_int (GET_MODE_BITSIZE (mode) - 1),
4684 subtarget, 0);
4685 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4686 OPTAB_WIDEN);
4689 if (code == EQ || code == NE)
4691 /* For EQ or NE, one way to do the comparison is to apply an operation
4692 that converts the operand into a positive number if it is nonzero
4693 or zero if it was originally zero. Then, for EQ, we subtract 1 and
4694 for NE we negate. This puts the result in the sign bit. Then we
4695 normalize with a shift, if needed.
4697 Two operations that can do the above actions are ABS and FFS, so try
4698 them. If that doesn't work, and MODE is smaller than a full word,
4699 we can use zero-extension to the wider mode (an unsigned conversion)
4700 as the operation. */
4702 /* Note that ABS doesn't yield a positive number for INT_MIN, but
4703 that is compensated by the subsequent overflow when subtracting
4704 one / negating. */
4706 if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4707 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4708 else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4709 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4710 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4712 op0 = protect_from_queue (op0, 0);
4713 tem = convert_modes (word_mode, mode, op0, 1);
4714 mode = word_mode;
4717 if (tem != 0)
4719 if (code == EQ)
4720 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4721 0, OPTAB_WIDEN);
4722 else
4723 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4726 /* If we couldn't do it that way, for NE we can "or" the two's complement
4727 of the value with itself. For EQ, we take the one's complement of
4728 that "or", which is an extra insn, so we only handle EQ if branches
4729 are expensive. */
4731 if (tem == 0 && (code == NE || BRANCH_COST > 1))
4733 if (rtx_equal_p (subtarget, op0))
4734 subtarget = 0;
4736 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4737 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4738 OPTAB_WIDEN);
4740 if (tem && code == EQ)
4741 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4745 if (tem && normalizep)
4746 tem = expand_shift (RSHIFT_EXPR, mode, tem,
4747 size_int (GET_MODE_BITSIZE (mode) - 1),
4748 subtarget, normalizep == 1);
4750 if (tem)
4752 if (GET_MODE (tem) != target_mode)
4754 convert_move (target, tem, 0);
4755 tem = target;
4757 else if (!subtarget)
4759 emit_move_insn (target, tem);
4760 tem = target;
4763 else
4764 delete_insns_since (last);
4766 return tem;
4769 /* Like emit_store_flag, but always succeeds. */
4772 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4773 enum machine_mode mode, int unsignedp, int normalizep)
4775 rtx tem, label;
4777 /* First see if emit_store_flag can do the job. */
4778 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4779 if (tem != 0)
4780 return tem;
4782 if (normalizep == 0)
4783 normalizep = 1;
4785 /* If this failed, we have to do this with set/compare/jump/set code. */
4787 if (GET_CODE (target) != REG
4788 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4789 target = gen_reg_rtx (GET_MODE (target));
4791 emit_move_insn (target, const1_rtx);
4792 label = gen_label_rtx ();
4793 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4794 NULL_RTX, label);
4796 emit_move_insn (target, const0_rtx);
4797 emit_label (label);
4799 return target;
4802 /* Perform possibly multi-word comparison and conditional jump to LABEL
4803 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4805 The algorithm is based on the code in expr.c:do_jump.
4807 Note that this does not perform a general comparison. Only variants
4808 generated within expmed.c are correctly handled, others abort (but could
4809 be handled if needed). */
4811 static void
4812 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
4813 rtx label)
4815 /* If this mode is an integer too wide to compare properly,
4816 compare word by word. Rely on cse to optimize constant cases. */
4818 if (GET_MODE_CLASS (mode) == MODE_INT
4819 && ! can_compare_p (op, mode, ccp_jump))
4821 rtx label2 = gen_label_rtx ();
4823 switch (op)
4825 case LTU:
4826 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4827 break;
4829 case LEU:
4830 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4831 break;
4833 case LT:
4834 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4835 break;
4837 case GT:
4838 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4839 break;
4841 case GE:
4842 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4843 break;
4845 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
4846 that's the only equality operations we do */
4847 case EQ:
4848 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4849 abort ();
4850 do_jump_by_parts_equality_rtx (arg1, label2, label);
4851 break;
4853 case NE:
4854 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4855 abort ();
4856 do_jump_by_parts_equality_rtx (arg1, label, label2);
4857 break;
4859 default:
4860 abort ();
4863 emit_label (label2);
4865 else
4866 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);