* Makefile.in (rtlanal.o): Depend on $(TM_P_H).
[official-gcc.git] / gcc / expmed.c
blobb08a8c60c4620e5d821bcbbf61f4607a134ee830
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "toplev.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "real.h"
35 #include "recog.h"
37 static void store_fixed_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT,
38 unsigned HOST_WIDE_INT,
39 unsigned HOST_WIDE_INT, rtx,
40 unsigned int));
41 static void store_split_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT,
42 unsigned HOST_WIDE_INT, rtx,
43 unsigned int));
44 static rtx extract_fixed_bit_field PARAMS ((enum machine_mode, rtx,
45 unsigned HOST_WIDE_INT,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 rtx, int, unsigned int));
49 static rtx mask_rtx PARAMS ((enum machine_mode, int,
50 int, int));
51 static rtx lshift_value PARAMS ((enum machine_mode, rtx,
52 int, int));
53 static rtx extract_split_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT, int,
55 unsigned int));
56 static void do_cmp_and_jump PARAMS ((rtx, rtx, enum rtx_code,
57 enum machine_mode, rtx));
59 /* Non-zero means divides or modulus operations are relatively cheap for
60 powers of two, so don't use branches; emit the operation instead.
61 Usually, this will mean that the MD file will emit non-branch
62 sequences. */
64 static int sdiv_pow2_cheap, smod_pow2_cheap;
66 #ifndef SLOW_UNALIGNED_ACCESS
67 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
68 #endif
70 /* For compilers that support multiple targets with different word sizes,
71 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
72 is the H8/300(H) compiler. */
74 #ifndef MAX_BITS_PER_WORD
75 #define MAX_BITS_PER_WORD BITS_PER_WORD
76 #endif
78 /* Reduce conditional compilation elsewhere. */
79 #ifndef HAVE_insv
80 #define HAVE_insv 0
81 #define CODE_FOR_insv CODE_FOR_nothing
82 #define gen_insv(a,b,c,d) NULL_RTX
83 #endif
84 #ifndef HAVE_extv
85 #define HAVE_extv 0
86 #define CODE_FOR_extv CODE_FOR_nothing
87 #define gen_extv(a,b,c,d) NULL_RTX
88 #endif
89 #ifndef HAVE_extzv
90 #define HAVE_extzv 0
91 #define CODE_FOR_extzv CODE_FOR_nothing
92 #define gen_extzv(a,b,c,d) NULL_RTX
93 #endif
95 /* Cost of various pieces of RTL. Note that some of these are indexed by
96 shift count and some by mode. */
97 static int add_cost, negate_cost, zero_cost;
98 static int shift_cost[MAX_BITS_PER_WORD];
99 static int shiftadd_cost[MAX_BITS_PER_WORD];
100 static int shiftsub_cost[MAX_BITS_PER_WORD];
101 static int mul_cost[NUM_MACHINE_MODES];
102 static int div_cost[NUM_MACHINE_MODES];
103 static int mul_widen_cost[NUM_MACHINE_MODES];
104 static int mul_highpart_cost[NUM_MACHINE_MODES];
106 void
107 init_expmed ()
109 /* This is "some random pseudo register" for purposes of calling recog
110 to see what insns exist. */
111 rtx reg = gen_rtx_REG (word_mode, 10000);
112 rtx shift_insn, shiftadd_insn, shiftsub_insn;
113 int dummy;
114 int m;
115 enum machine_mode mode, wider_mode;
117 start_sequence ();
119 reg = gen_rtx_REG (word_mode, 10000);
121 zero_cost = rtx_cost (const0_rtx, 0);
122 add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
124 shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
125 gen_rtx_ASHIFT (word_mode, reg,
126 const0_rtx)));
128 shiftadd_insn
129 = emit_insn (gen_rtx_SET (VOIDmode, reg,
130 gen_rtx_PLUS (word_mode,
131 gen_rtx_MULT (word_mode,
132 reg, const0_rtx),
133 reg)));
135 shiftsub_insn
136 = emit_insn (gen_rtx_SET (VOIDmode, reg,
137 gen_rtx_MINUS (word_mode,
138 gen_rtx_MULT (word_mode,
139 reg, const0_rtx),
140 reg)));
142 init_recog ();
144 shift_cost[0] = 0;
145 shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
147 for (m = 1; m < MAX_BITS_PER_WORD; m++)
149 shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
151 XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
152 if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
153 shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
155 XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
156 = GEN_INT ((HOST_WIDE_INT) 1 << m);
157 if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
158 shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
160 XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
161 = GEN_INT ((HOST_WIDE_INT) 1 << m);
162 if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
163 shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
166 negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
168 sdiv_pow2_cheap
169 = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
170 <= 2 * add_cost);
171 smod_pow2_cheap
172 = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
173 <= 2 * add_cost);
175 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
176 mode != VOIDmode;
177 mode = GET_MODE_WIDER_MODE (mode))
179 reg = gen_rtx_REG (mode, 10000);
180 div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
181 mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
182 wider_mode = GET_MODE_WIDER_MODE (mode);
183 if (wider_mode != VOIDmode)
185 mul_widen_cost[(int) wider_mode]
186 = rtx_cost (gen_rtx_MULT (wider_mode,
187 gen_rtx_ZERO_EXTEND (wider_mode, reg),
188 gen_rtx_ZERO_EXTEND (wider_mode, reg)),
189 SET);
190 mul_highpart_cost[(int) mode]
191 = rtx_cost (gen_rtx_TRUNCATE
192 (mode,
193 gen_rtx_LSHIFTRT (wider_mode,
194 gen_rtx_MULT (wider_mode,
195 gen_rtx_ZERO_EXTEND
196 (wider_mode, reg),
197 gen_rtx_ZERO_EXTEND
198 (wider_mode, reg)),
199 GEN_INT (GET_MODE_BITSIZE (mode)))),
200 SET);
204 end_sequence ();
207 /* Return an rtx representing minus the value of X.
208 MODE is the intended mode of the result,
209 useful if X is a CONST_INT. */
212 negate_rtx (mode, x)
213 enum machine_mode mode;
214 rtx x;
216 rtx result = simplify_unary_operation (NEG, mode, x, mode);
218 if (result == 0)
219 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
221 return result;
224 /* Report on the availability of insv/extv/extzv and the desired mode
225 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
226 is false; else the mode of the specified operand. If OPNO is -1,
227 all the caller cares about is whether the insn is available. */
228 enum machine_mode
229 mode_for_extraction (pattern, opno)
230 enum extraction_pattern pattern;
231 int opno;
233 const struct insn_data *data;
235 switch (pattern)
237 case EP_insv:
238 if (HAVE_insv)
240 data = &insn_data[CODE_FOR_insv];
241 break;
243 return MAX_MACHINE_MODE;
245 case EP_extv:
246 if (HAVE_extv)
248 data = &insn_data[CODE_FOR_extv];
249 break;
251 return MAX_MACHINE_MODE;
253 case EP_extzv:
254 if (HAVE_extzv)
256 data = &insn_data[CODE_FOR_extzv];
257 break;
259 return MAX_MACHINE_MODE;
261 default:
262 abort ();
265 if (opno == -1)
266 return VOIDmode;
268 /* Everyone who uses this function used to follow it with
269 if (result == VOIDmode) result = word_mode; */
270 if (data->operand[opno].mode == VOIDmode)
271 return word_mode;
272 return data->operand[opno].mode;
276 /* Generate code to store value from rtx VALUE
277 into a bit-field within structure STR_RTX
278 containing BITSIZE bits starting at bit BITNUM.
279 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
280 ALIGN is the alignment that STR_RTX is known to have.
281 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
283 /* ??? Note that there are two different ideas here for how
284 to determine the size to count bits within, for a register.
285 One is BITS_PER_WORD, and the other is the size of operand 3
286 of the insv pattern.
288 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
289 else, we use the mode of operand 3. */
292 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
293 rtx str_rtx;
294 unsigned HOST_WIDE_INT bitsize;
295 unsigned HOST_WIDE_INT bitnum;
296 enum machine_mode fieldmode;
297 rtx value;
298 unsigned int align;
299 HOST_WIDE_INT total_size;
301 unsigned int unit
302 = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
303 unsigned HOST_WIDE_INT offset = bitnum / unit;
304 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
305 rtx op0 = str_rtx;
307 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
309 /* It is wrong to have align==0, since every object is aligned at
310 least at a bit boundary. This usually means a bug elsewhere. */
311 if (align == 0)
312 abort ();
314 /* Discount the part of the structure before the desired byte.
315 We need to know how many bytes are safe to reference after it. */
316 if (total_size >= 0)
317 total_size -= (bitpos / BIGGEST_ALIGNMENT
318 * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
320 while (GET_CODE (op0) == SUBREG)
322 /* The following line once was done only if WORDS_BIG_ENDIAN,
323 but I think that is a mistake. WORDS_BIG_ENDIAN is
324 meaningful at a much higher level; when structures are copied
325 between memory and regs, the higher-numbered regs
326 always get higher addresses. */
327 offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
328 /* We used to adjust BITPOS here, but now we do the whole adjustment
329 right after the loop. */
330 op0 = SUBREG_REG (op0);
333 value = protect_from_queue (value, 0);
335 if (flag_force_mem)
336 value = force_not_mem (value);
338 /* If the target is a register, overwriting the entire object, or storing
339 a full-word or multi-word field can be done with just a SUBREG.
341 If the target is memory, storing any naturally aligned field can be
342 done with a simple store. For targets that support fast unaligned
343 memory, any naturally sized, unit aligned field can be done directly. */
345 if (bitpos == 0
346 && bitsize == GET_MODE_BITSIZE (fieldmode)
347 && (GET_CODE (op0) != MEM
348 ? (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
349 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
350 : (! SLOW_UNALIGNED_ACCESS (fieldmode, align)
351 || (offset * BITS_PER_UNIT % bitsize == 0
352 && align % GET_MODE_BITSIZE (fieldmode) == 0))))
354 if (GET_MODE (op0) != fieldmode)
356 if (GET_CODE (op0) == SUBREG)
358 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
359 || GET_MODE_CLASS (fieldmode) == MODE_INT
360 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
361 op0 = SUBREG_REG (op0);
362 else
363 /* Else we've got some float mode source being extracted into
364 a different float mode destination -- this combination of
365 subregs results in Severe Tire Damage. */
366 abort ();
368 if (GET_CODE (op0) == REG)
369 op0 = gen_rtx_SUBREG (fieldmode, op0,
370 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
371 + (offset * UNITS_PER_WORD));
372 else
373 op0 = adjust_address (op0, fieldmode, offset);
375 emit_move_insn (op0, value);
376 return value;
379 /* Make sure we are playing with integral modes. Pun with subregs
380 if we aren't. This must come after the entire register case above,
381 since that case is valid for any mode. The following cases are only
382 valid for integral modes. */
384 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
385 if (imode != GET_MODE (op0))
387 if (GET_CODE (op0) == MEM)
388 op0 = adjust_address (op0, imode, 0);
389 else if (imode != BLKmode)
390 op0 = gen_lowpart (imode, op0);
391 else
392 abort ();
396 /* If OP0 is a register, BITPOS must count within a word.
397 But as we have it, it counts within whatever size OP0 now has.
398 On a bigendian machine, these are not the same, so convert. */
399 if (BYTES_BIG_ENDIAN
400 && GET_CODE (op0) != MEM
401 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
402 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
404 /* Storing an lsb-aligned field in a register
405 can be done with a movestrict instruction. */
407 if (GET_CODE (op0) != MEM
408 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
409 && bitsize == GET_MODE_BITSIZE (fieldmode)
410 && (movstrict_optab->handlers[(int) fieldmode].insn_code
411 != CODE_FOR_nothing))
413 int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
415 /* Get appropriate low part of the value being stored. */
416 if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
417 value = gen_lowpart (fieldmode, value);
418 else if (!(GET_CODE (value) == SYMBOL_REF
419 || GET_CODE (value) == LABEL_REF
420 || GET_CODE (value) == CONST))
421 value = convert_to_mode (fieldmode, value, 0);
423 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
424 value = copy_to_mode_reg (fieldmode, value);
426 if (GET_CODE (op0) == SUBREG)
428 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
429 || GET_MODE_CLASS (fieldmode) == MODE_INT
430 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
431 op0 = SUBREG_REG (op0);
432 else
433 /* Else we've got some float mode source being extracted into
434 a different float mode destination -- this combination of
435 subregs results in Severe Tire Damage. */
436 abort ();
439 emit_insn (GEN_FCN (icode)
440 (gen_rtx_SUBREG (fieldmode, op0,
441 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
442 + (offset * UNITS_PER_WORD)),
443 value));
445 return value;
448 /* Handle fields bigger than a word. */
450 if (bitsize > BITS_PER_WORD)
452 /* Here we transfer the words of the field
453 in the order least significant first.
454 This is because the most significant word is the one which may
455 be less than full.
456 However, only do that if the value is not BLKmode. */
458 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
459 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
460 unsigned int i;
462 /* This is the mode we must force value to, so that there will be enough
463 subwords to extract. Note that fieldmode will often (always?) be
464 VOIDmode, because that is what store_field uses to indicate that this
465 is a bit field, but passing VOIDmode to operand_subword_force will
466 result in an abort. */
467 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
469 for (i = 0; i < nwords; i++)
471 /* If I is 0, use the low-order word in both field and target;
472 if I is 1, use the next to lowest word; and so on. */
473 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
474 unsigned int bit_offset = (backwards
475 ? MAX ((int) bitsize - ((int) i + 1)
476 * BITS_PER_WORD,
478 : (int) i * BITS_PER_WORD);
480 store_bit_field (op0, MIN (BITS_PER_WORD,
481 bitsize - i * BITS_PER_WORD),
482 bitnum + bit_offset, word_mode,
483 operand_subword_force (value, wordnum,
484 (GET_MODE (value) == VOIDmode
485 ? fieldmode
486 : GET_MODE (value))),
487 align, total_size);
489 return value;
492 /* From here on we can assume that the field to be stored in is
493 a full-word (whatever type that is), since it is shorter than a word. */
495 /* OFFSET is the number of words or bytes (UNIT says which)
496 from STR_RTX to the first word or byte containing part of the field. */
498 if (GET_CODE (op0) != MEM)
500 if (offset != 0
501 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
503 if (GET_CODE (op0) != REG)
505 /* Since this is a destination (lvalue), we can't copy it to a
506 pseudo. We can trivially remove a SUBREG that does not
507 change the size of the operand. Such a SUBREG may have been
508 added above. Otherwise, abort. */
509 if (GET_CODE (op0) == SUBREG
510 && (GET_MODE_SIZE (GET_MODE (op0))
511 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
512 op0 = SUBREG_REG (op0);
513 else
514 abort ();
516 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
517 op0, (offset * UNITS_PER_WORD));
519 offset = 0;
521 else
523 op0 = protect_from_queue (op0, 1);
526 /* If VALUE is a floating-point mode, access it as an integer of the
527 corresponding size. This can occur on a machine with 64 bit registers
528 that uses SFmode for float. This can also occur for unaligned float
529 structure fields. */
530 if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
532 if (GET_CODE (value) != REG)
533 value = copy_to_reg (value);
534 value = gen_rtx_SUBREG (word_mode, value, 0);
537 /* Now OFFSET is nonzero only if OP0 is memory
538 and is therefore always measured in bytes. */
540 if (HAVE_insv
541 && GET_MODE (value) != BLKmode
542 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
543 /* Ensure insv's size is wide enough for this field. */
544 && (GET_MODE_BITSIZE (op_mode) >= bitsize)
545 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
546 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
548 int xbitpos = bitpos;
549 rtx value1;
550 rtx xop0 = op0;
551 rtx last = get_last_insn ();
552 rtx pat;
553 enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
554 int save_volatile_ok = volatile_ok;
556 volatile_ok = 1;
558 /* If this machine's insv can only insert into a register, copy OP0
559 into a register and save it back later. */
560 /* This used to check flag_force_mem, but that was a serious
561 de-optimization now that flag_force_mem is enabled by -O2. */
562 if (GET_CODE (op0) == MEM
563 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
564 (op0, VOIDmode)))
566 rtx tempreg;
567 enum machine_mode bestmode;
569 /* Get the mode to use for inserting into this field. If OP0 is
570 BLKmode, get the smallest mode consistent with the alignment. If
571 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
572 mode. Otherwise, use the smallest mode containing the field. */
574 if (GET_MODE (op0) == BLKmode
575 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
576 bestmode
577 = get_best_mode (bitsize, bitnum, align, maxmode,
578 MEM_VOLATILE_P (op0));
579 else
580 bestmode = GET_MODE (op0);
582 if (bestmode == VOIDmode
583 || (SLOW_UNALIGNED_ACCESS (bestmode, align)
584 && GET_MODE_BITSIZE (bestmode) > align))
585 goto insv_loses;
587 /* Adjust address to point to the containing unit of that mode. */
588 unit = GET_MODE_BITSIZE (bestmode);
589 /* Compute offset as multiple of this unit, counting in bytes. */
590 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
591 bitpos = bitnum % unit;
592 op0 = adjust_address (op0, bestmode, offset);
594 /* Fetch that unit, store the bitfield in it, then store
595 the unit. */
596 tempreg = copy_to_reg (op0);
597 store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
598 align, total_size);
599 emit_move_insn (op0, tempreg);
600 return value;
602 volatile_ok = save_volatile_ok;
604 /* Add OFFSET into OP0's address. */
605 if (GET_CODE (xop0) == MEM)
606 xop0 = adjust_address (xop0, byte_mode, offset);
608 /* If xop0 is a register, we need it in MAXMODE
609 to make it acceptable to the format of insv. */
610 if (GET_CODE (xop0) == SUBREG)
611 /* We can't just change the mode, because this might clobber op0,
612 and we will need the original value of op0 if insv fails. */
613 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
614 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
615 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
617 /* On big-endian machines, we count bits from the most significant.
618 If the bit field insn does not, we must invert. */
620 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
621 xbitpos = unit - bitsize - xbitpos;
623 /* We have been counting XBITPOS within UNIT.
624 Count instead within the size of the register. */
625 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
626 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
628 unit = GET_MODE_BITSIZE (maxmode);
630 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
631 value1 = value;
632 if (GET_MODE (value) != maxmode)
634 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
636 /* Optimization: Don't bother really extending VALUE
637 if it has all the bits we will actually use. However,
638 if we must narrow it, be sure we do it correctly. */
640 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
642 /* Avoid making subreg of a subreg, or of a mem. */
643 if (GET_CODE (value1) != REG)
644 value1 = copy_to_reg (value1);
645 value1 = gen_rtx_SUBREG (maxmode, value1, 0);
647 else
648 value1 = gen_lowpart (maxmode, value1);
650 else if (GET_CODE (value) == CONST_INT)
651 value1 = GEN_INT (trunc_int_for_mode (INTVAL (value), maxmode));
652 else if (!CONSTANT_P (value))
653 /* Parse phase is supposed to make VALUE's data type
654 match that of the component reference, which is a type
655 at least as wide as the field; so VALUE should have
656 a mode that corresponds to that type. */
657 abort ();
660 /* If this machine's insv insists on a register,
661 get VALUE1 into a register. */
662 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
663 (value1, maxmode)))
664 value1 = force_reg (maxmode, value1);
666 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
667 if (pat)
668 emit_insn (pat);
669 else
671 delete_insns_since (last);
672 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
675 else
676 insv_loses:
677 /* Insv is not available; store using shifts and boolean ops. */
678 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
679 return value;
682 /* Use shifts and boolean operations to store VALUE
683 into a bit field of width BITSIZE
684 in a memory location specified by OP0 except offset by OFFSET bytes.
685 (OFFSET must be 0 if OP0 is a register.)
686 The field starts at position BITPOS within the byte.
687 (If OP0 is a register, it may be a full word or a narrower mode,
688 but BITPOS still counts within a full word,
689 which is significant on bigendian machines.)
690 STRUCT_ALIGN is the alignment the structure is known to have.
692 Note that protect_from_queue has already been done on OP0 and VALUE. */
694 static void
695 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
696 rtx op0;
697 unsigned HOST_WIDE_INT offset, bitsize, bitpos;
698 rtx value;
699 unsigned int struct_align;
701 enum machine_mode mode;
702 unsigned int total_bits = BITS_PER_WORD;
703 rtx subtarget, temp;
704 int all_zero = 0;
705 int all_one = 0;
707 if (! SLOW_UNALIGNED_ACCESS (word_mode, struct_align))
708 struct_align = BIGGEST_ALIGNMENT;
710 /* There is a case not handled here:
711 a structure with a known alignment of just a halfword
712 and a field split across two aligned halfwords within the structure.
713 Or likewise a structure with a known alignment of just a byte
714 and a field split across two bytes.
715 Such cases are not supposed to be able to occur. */
717 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
719 if (offset != 0)
720 abort ();
721 /* Special treatment for a bit field split across two registers. */
722 if (bitsize + bitpos > BITS_PER_WORD)
724 store_split_bit_field (op0, bitsize, bitpos,
725 value, BITS_PER_WORD);
726 return;
729 else
731 /* Get the proper mode to use for this field. We want a mode that
732 includes the entire field. If such a mode would be larger than
733 a word, we won't be doing the extraction the normal way.
734 We don't want a mode bigger than the destination. */
736 mode = GET_MODE (op0);
737 if (GET_MODE_BITSIZE (mode) == 0
738 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
739 mode = word_mode;
740 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
741 struct_align, mode,
742 GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
744 if (mode == VOIDmode)
746 /* The only way this should occur is if the field spans word
747 boundaries. */
748 store_split_bit_field (op0,
749 bitsize, bitpos + offset * BITS_PER_UNIT,
750 value, struct_align);
751 return;
754 total_bits = GET_MODE_BITSIZE (mode);
756 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
757 be in the range 0 to total_bits-1, and put any excess bytes in
758 OFFSET. */
759 if (bitpos >= total_bits)
761 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
762 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
763 * BITS_PER_UNIT);
766 /* Get ref to an aligned byte, halfword, or word containing the field.
767 Adjust BITPOS to be position within a word,
768 and OFFSET to be the offset of that word.
769 Then alter OP0 to refer to that word. */
770 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
771 offset -= (offset % (total_bits / BITS_PER_UNIT));
772 op0 = adjust_address (op0, mode, offset);
775 mode = GET_MODE (op0);
777 /* Now MODE is either some integral mode for a MEM as OP0,
778 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
779 The bit field is contained entirely within OP0.
780 BITPOS is the starting bit number within OP0.
781 (OP0's mode may actually be narrower than MODE.) */
783 if (BYTES_BIG_ENDIAN)
784 /* BITPOS is the distance between our msb
785 and that of the containing datum.
786 Convert it to the distance from the lsb. */
787 bitpos = total_bits - bitsize - bitpos;
789 /* Now BITPOS is always the distance between our lsb
790 and that of OP0. */
792 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
793 we must first convert its mode to MODE. */
795 if (GET_CODE (value) == CONST_INT)
797 HOST_WIDE_INT v = INTVAL (value);
799 if (bitsize < HOST_BITS_PER_WIDE_INT)
800 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
802 if (v == 0)
803 all_zero = 1;
804 else if ((bitsize < HOST_BITS_PER_WIDE_INT
805 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
806 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
807 all_one = 1;
809 value = lshift_value (mode, value, bitpos, bitsize);
811 else
813 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
814 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
816 if (GET_MODE (value) != mode)
818 if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
819 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
820 value = gen_lowpart (mode, value);
821 else
822 value = convert_to_mode (mode, value, 1);
825 if (must_and)
826 value = expand_binop (mode, and_optab, value,
827 mask_rtx (mode, 0, bitsize, 0),
828 NULL_RTX, 1, OPTAB_LIB_WIDEN);
829 if (bitpos > 0)
830 value = expand_shift (LSHIFT_EXPR, mode, value,
831 build_int_2 (bitpos, 0), NULL_RTX, 1);
834 /* Now clear the chosen bits in OP0,
835 except that if VALUE is -1 we need not bother. */
837 subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
839 if (! all_one)
841 temp = expand_binop (mode, and_optab, op0,
842 mask_rtx (mode, bitpos, bitsize, 1),
843 subtarget, 1, OPTAB_LIB_WIDEN);
844 subtarget = temp;
846 else
847 temp = op0;
849 /* Now logical-or VALUE into OP0, unless it is zero. */
851 if (! all_zero)
852 temp = expand_binop (mode, ior_optab, temp, value,
853 subtarget, 1, OPTAB_LIB_WIDEN);
854 if (op0 != temp)
855 emit_move_insn (op0, temp);
858 /* Store a bit field that is split across multiple accessible memory objects.
860 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
861 BITSIZE is the field width; BITPOS the position of its first bit
862 (within the word).
863 VALUE is the value to store.
864 ALIGN is the known alignment of OP0.
865 This is also the size of the memory objects to be used.
867 This does not yet handle fields wider than BITS_PER_WORD. */
869 static void
870 store_split_bit_field (op0, bitsize, bitpos, value, align)
871 rtx op0;
872 unsigned HOST_WIDE_INT bitsize, bitpos;
873 rtx value;
874 unsigned int align;
876 unsigned int unit;
877 unsigned int bitsdone = 0;
879 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
880 much at a time. */
881 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
882 unit = BITS_PER_WORD;
883 else
884 unit = MIN (align, BITS_PER_WORD);
886 /* If VALUE is a constant other than a CONST_INT, get it into a register in
887 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
888 that VALUE might be a floating-point constant. */
889 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
891 rtx word = gen_lowpart_common (word_mode, value);
893 if (word && (value != word))
894 value = word;
895 else
896 value = gen_lowpart_common (word_mode,
897 force_reg (GET_MODE (value) != VOIDmode
898 ? GET_MODE (value)
899 : word_mode, value));
901 else if (GET_CODE (value) == ADDRESSOF)
902 value = copy_to_reg (value);
904 while (bitsdone < bitsize)
906 unsigned HOST_WIDE_INT thissize;
907 rtx part, word;
908 unsigned HOST_WIDE_INT thispos;
909 unsigned HOST_WIDE_INT offset;
911 offset = (bitpos + bitsdone) / unit;
912 thispos = (bitpos + bitsdone) % unit;
914 /* THISSIZE must not overrun a word boundary. Otherwise,
915 store_fixed_bit_field will call us again, and we will mutually
916 recurse forever. */
917 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
918 thissize = MIN (thissize, unit - thispos);
920 if (BYTES_BIG_ENDIAN)
922 int total_bits;
924 /* We must do an endian conversion exactly the same way as it is
925 done in extract_bit_field, so that the two calls to
926 extract_fixed_bit_field will have comparable arguments. */
927 if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
928 total_bits = BITS_PER_WORD;
929 else
930 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
932 /* Fetch successively less significant portions. */
933 if (GET_CODE (value) == CONST_INT)
934 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
935 >> (bitsize - bitsdone - thissize))
936 & (((HOST_WIDE_INT) 1 << thissize) - 1));
937 else
938 /* The args are chosen so that the last part includes the
939 lsb. Give extract_bit_field the value it needs (with
940 endianness compensation) to fetch the piece we want.
942 ??? We have no idea what the alignment of VALUE is, so
943 we have to use a guess. */
944 part
945 = extract_fixed_bit_field
946 (word_mode, value, 0, thissize,
947 total_bits - bitsize + bitsdone, NULL_RTX, 1,
948 GET_MODE (value) == VOIDmode
949 ? UNITS_PER_WORD
950 : (GET_MODE (value) == BLKmode
951 ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
953 else
955 /* Fetch successively more significant portions. */
956 if (GET_CODE (value) == CONST_INT)
957 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
958 >> bitsdone)
959 & (((HOST_WIDE_INT) 1 << thissize) - 1));
960 else
961 part
962 = extract_fixed_bit_field
963 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
964 GET_MODE (value) == VOIDmode
965 ? UNITS_PER_WORD
966 : (GET_MODE (value) == BLKmode
967 ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
970 /* If OP0 is a register, then handle OFFSET here.
972 When handling multiword bitfields, extract_bit_field may pass
973 down a word_mode SUBREG of a larger REG for a bitfield that actually
974 crosses a word boundary. Thus, for a SUBREG, we must find
975 the current word starting from the base register. */
976 if (GET_CODE (op0) == SUBREG)
978 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
979 word = operand_subword_force (SUBREG_REG (op0), word_offset,
980 GET_MODE (SUBREG_REG (op0)));
981 offset = 0;
983 else if (GET_CODE (op0) == REG)
985 word = operand_subword_force (op0, offset, GET_MODE (op0));
986 offset = 0;
988 else
989 word = op0;
991 /* OFFSET is in UNITs, and UNIT is in bits.
992 store_fixed_bit_field wants offset in bytes. */
993 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
994 thissize, thispos, part, align);
995 bitsdone += thissize;
999 /* Generate code to extract a byte-field from STR_RTX
1000 containing BITSIZE bits, starting at BITNUM,
1001 and put it in TARGET if possible (if TARGET is nonzero).
1002 Regardless of TARGET, we return the rtx for where the value is placed.
1003 It may be a QUEUED.
1005 STR_RTX is the structure containing the byte (a REG or MEM).
1006 UNSIGNEDP is nonzero if this is an unsigned bit field.
1007 MODE is the natural mode of the field value once extracted.
1008 TMODE is the mode the caller would like the value to have;
1009 but the value may be returned with type MODE instead.
1011 ALIGN is the alignment that STR_RTX is known to have.
1012 TOTAL_SIZE is the size in bytes of the containing structure,
1013 or -1 if varying.
1015 If a TARGET is specified and we can store in it at no extra cost,
1016 we do so, and return TARGET.
1017 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1018 if they are equally easy. */
1021 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
1022 target, mode, tmode, align, total_size)
1023 rtx str_rtx;
1024 unsigned HOST_WIDE_INT bitsize;
1025 unsigned HOST_WIDE_INT bitnum;
1026 int unsignedp;
1027 rtx target;
1028 enum machine_mode mode, tmode;
1029 unsigned int align;
1030 HOST_WIDE_INT total_size;
1032 unsigned int unit
1033 = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1034 unsigned HOST_WIDE_INT offset = bitnum / unit;
1035 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1036 rtx op0 = str_rtx;
1037 rtx spec_target = target;
1038 rtx spec_target_subreg = 0;
1039 enum machine_mode int_mode;
1040 enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1041 enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1043 /* Discount the part of the structure before the desired byte.
1044 We need to know how many bytes are safe to reference after it. */
1045 if (total_size >= 0)
1046 total_size -= (bitpos / BIGGEST_ALIGNMENT
1047 * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1049 if (tmode == VOIDmode)
1050 tmode = mode;
1051 while (GET_CODE (op0) == SUBREG)
1053 int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1054 int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1056 offset += SUBREG_BYTE (op0) / UNITS_PER_WORD;
1058 inner_size = MIN (inner_size, BITS_PER_WORD);
1060 if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1062 bitpos += inner_size - outer_size;
1063 if (bitpos > unit)
1065 offset += (bitpos / unit);
1066 bitpos %= unit;
1070 op0 = SUBREG_REG (op0);
1073 if (GET_CODE (op0) == REG
1074 && mode == GET_MODE (op0)
1075 && bitnum == 0
1076 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1078 /* We're trying to extract a full register from itself. */
1079 return op0;
1082 /* Make sure we are playing with integral modes. Pun with subregs
1083 if we aren't. */
1085 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1086 if (imode != GET_MODE (op0))
1088 if (GET_CODE (op0) == MEM)
1089 op0 = adjust_address (op0, imode, 0);
1090 else if (imode != BLKmode)
1091 op0 = gen_lowpart (imode, op0);
1092 else
1093 abort ();
1097 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1098 If that's wrong, the solution is to test for it and set TARGET to 0
1099 if needed. */
1101 /* If OP0 is a register, BITPOS must count within a word.
1102 But as we have it, it counts within whatever size OP0 now has.
1103 On a bigendian machine, these are not the same, so convert. */
1104 if (BYTES_BIG_ENDIAN
1105 && GET_CODE (op0) != MEM
1106 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1107 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1109 /* Extracting a full-word or multi-word value
1110 from a structure in a register or aligned memory.
1111 This can be done with just SUBREG.
1112 So too extracting a subword value in
1113 the least significant part of the register. */
1115 if (((GET_CODE (op0) != MEM
1116 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1117 GET_MODE_BITSIZE (GET_MODE (op0))))
1118 || (GET_CODE (op0) == MEM
1119 && (! SLOW_UNALIGNED_ACCESS (mode, align)
1120 || (offset * BITS_PER_UNIT % bitsize == 0
1121 && align % bitsize == 0))))
1122 && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1123 && bitpos % BITS_PER_WORD == 0)
1124 || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1125 /* ??? The big endian test here is wrong. This is correct
1126 if the value is in a register, and if mode_for_size is not
1127 the same mode as op0. This causes us to get unnecessarily
1128 inefficient code from the Thumb port when -mbig-endian. */
1129 && (BYTES_BIG_ENDIAN
1130 ? bitpos + bitsize == BITS_PER_WORD
1131 : bitpos == 0))))
1133 enum machine_mode mode1
1134 = (VECTOR_MODE_P (tmode) ? mode
1135 : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1137 if (mode1 != GET_MODE (op0))
1139 if (GET_CODE (op0) == SUBREG)
1141 if (GET_MODE (SUBREG_REG (op0)) == mode1
1142 || GET_MODE_CLASS (mode1) == MODE_INT
1143 || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1144 op0 = SUBREG_REG (op0);
1145 else
1146 /* Else we've got some float mode source being extracted into
1147 a different float mode destination -- this combination of
1148 subregs results in Severe Tire Damage. */
1149 abort ();
1151 if (GET_CODE (op0) == REG)
1152 op0 = gen_rtx_SUBREG (mode1, op0,
1153 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
1154 + (offset * UNITS_PER_WORD));
1155 else
1156 op0 = adjust_address (op0, mode1, offset);
1158 if (mode1 != mode)
1159 return convert_to_mode (tmode, op0, unsignedp);
1160 return op0;
1163 /* Handle fields bigger than a word. */
1165 if (bitsize > BITS_PER_WORD)
1167 /* Here we transfer the words of the field
1168 in the order least significant first.
1169 This is because the most significant word is the one which may
1170 be less than full. */
1172 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1173 unsigned int i;
1175 if (target == 0 || GET_CODE (target) != REG)
1176 target = gen_reg_rtx (mode);
1178 /* Indicate for flow that the entire target reg is being set. */
1179 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1181 for (i = 0; i < nwords; i++)
1183 /* If I is 0, use the low-order word in both field and target;
1184 if I is 1, use the next to lowest word; and so on. */
1185 /* Word number in TARGET to use. */
1186 unsigned int wordnum
1187 = (WORDS_BIG_ENDIAN
1188 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1189 : i);
1190 /* Offset from start of field in OP0. */
1191 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1192 ? MAX (0, ((int) bitsize - ((int) i + 1)
1193 * (int) BITS_PER_WORD))
1194 : (int) i * BITS_PER_WORD);
1195 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1196 rtx result_part
1197 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1198 bitsize - i * BITS_PER_WORD),
1199 bitnum + bit_offset, 1, target_part, mode,
1200 word_mode, align, total_size);
1202 if (target_part == 0)
1203 abort ();
1205 if (result_part != target_part)
1206 emit_move_insn (target_part, result_part);
1209 if (unsignedp)
1211 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1212 need to be zero'd out. */
1213 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1215 unsigned int i, total_words;
1217 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1218 for (i = nwords; i < total_words; i++)
1220 int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1221 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1222 emit_move_insn (target_part, const0_rtx);
1225 return target;
1228 /* Signed bit field: sign-extend with two arithmetic shifts. */
1229 target = expand_shift (LSHIFT_EXPR, mode, target,
1230 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1231 NULL_RTX, 0);
1232 return expand_shift (RSHIFT_EXPR, mode, target,
1233 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1234 NULL_RTX, 0);
1237 /* From here on we know the desired field is smaller than a word. */
1239 /* Check if there is a correspondingly-sized integer field, so we can
1240 safely extract it as one size of integer, if necessary; then
1241 truncate or extend to the size that is wanted; then use SUBREGs or
1242 convert_to_mode to get one of the modes we really wanted. */
1244 int_mode = int_mode_for_mode (tmode);
1245 if (int_mode == BLKmode)
1246 int_mode = int_mode_for_mode (mode);
1247 if (int_mode == BLKmode)
1248 abort(); /* Should probably push op0 out to memory and then
1249 do a load. */
1251 /* OFFSET is the number of words or bytes (UNIT says which)
1252 from STR_RTX to the first word or byte containing part of the field. */
1254 if (GET_CODE (op0) != MEM)
1256 if (offset != 0
1257 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1259 if (GET_CODE (op0) != REG)
1260 op0 = copy_to_reg (op0);
1261 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1262 op0, (offset * UNITS_PER_WORD));
1264 offset = 0;
1266 else
1268 op0 = protect_from_queue (str_rtx, 1);
1271 /* Now OFFSET is nonzero only for memory operands. */
1273 if (unsignedp)
1275 if (HAVE_extzv
1276 && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1277 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1278 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1280 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1281 rtx bitsize_rtx, bitpos_rtx;
1282 rtx last = get_last_insn ();
1283 rtx xop0 = op0;
1284 rtx xtarget = target;
1285 rtx xspec_target = spec_target;
1286 rtx xspec_target_subreg = spec_target_subreg;
1287 rtx pat;
1288 enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1290 if (GET_CODE (xop0) == MEM)
1292 int save_volatile_ok = volatile_ok;
1293 volatile_ok = 1;
1295 /* Is the memory operand acceptable? */
1296 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1297 (xop0, GET_MODE (xop0))))
1299 /* No, load into a reg and extract from there. */
1300 enum machine_mode bestmode;
1302 /* Get the mode to use for inserting into this field. If
1303 OP0 is BLKmode, get the smallest mode consistent with the
1304 alignment. If OP0 is a non-BLKmode object that is no
1305 wider than MAXMODE, use its mode. Otherwise, use the
1306 smallest mode containing the field. */
1308 if (GET_MODE (xop0) == BLKmode
1309 || (GET_MODE_SIZE (GET_MODE (op0))
1310 > GET_MODE_SIZE (maxmode)))
1311 bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1312 MEM_VOLATILE_P (xop0));
1313 else
1314 bestmode = GET_MODE (xop0);
1316 if (bestmode == VOIDmode
1317 || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1318 && GET_MODE_BITSIZE (bestmode) > align))
1319 goto extzv_loses;
1321 /* Compute offset as multiple of this unit,
1322 counting in bytes. */
1323 unit = GET_MODE_BITSIZE (bestmode);
1324 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1325 xbitpos = bitnum % unit;
1326 xop0 = adjust_address (xop0, bestmode, xoffset);
1328 /* Fetch it to a register in that size. */
1329 xop0 = force_reg (bestmode, xop0);
1331 /* XBITPOS counts within UNIT, which is what is expected. */
1333 else
1334 /* Get ref to first byte containing part of the field. */
1335 xop0 = adjust_address (xop0, byte_mode, xoffset);
1337 volatile_ok = save_volatile_ok;
1340 /* If op0 is a register, we need it in MAXMODE (which is usually
1341 SImode). to make it acceptable to the format of extzv. */
1342 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1343 goto extzv_loses;
1344 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1345 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1347 /* On big-endian machines, we count bits from the most significant.
1348 If the bit field insn does not, we must invert. */
1349 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1350 xbitpos = unit - bitsize - xbitpos;
1352 /* Now convert from counting within UNIT to counting in MAXMODE. */
1353 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1354 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1356 unit = GET_MODE_BITSIZE (maxmode);
1358 if (xtarget == 0
1359 || (flag_force_mem && GET_CODE (xtarget) == MEM))
1360 xtarget = xspec_target = gen_reg_rtx (tmode);
1362 if (GET_MODE (xtarget) != maxmode)
1364 if (GET_CODE (xtarget) == REG)
1366 int wider = (GET_MODE_SIZE (maxmode)
1367 > GET_MODE_SIZE (GET_MODE (xtarget)));
1368 xtarget = gen_lowpart (maxmode, xtarget);
1369 if (wider)
1370 xspec_target_subreg = xtarget;
1372 else
1373 xtarget = gen_reg_rtx (maxmode);
1376 /* If this machine's extzv insists on a register target,
1377 make sure we have one. */
1378 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1379 (xtarget, maxmode)))
1380 xtarget = gen_reg_rtx (maxmode);
1382 bitsize_rtx = GEN_INT (bitsize);
1383 bitpos_rtx = GEN_INT (xbitpos);
1385 pat = gen_extzv (protect_from_queue (xtarget, 1),
1386 xop0, bitsize_rtx, bitpos_rtx);
1387 if (pat)
1389 emit_insn (pat);
1390 target = xtarget;
1391 spec_target = xspec_target;
1392 spec_target_subreg = xspec_target_subreg;
1394 else
1396 delete_insns_since (last);
1397 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1398 bitpos, target, 1, align);
1401 else
1402 extzv_loses:
1403 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1404 bitpos, target, 1, align);
1406 else
1408 if (HAVE_extv
1409 && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1410 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1411 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1413 int xbitpos = bitpos, xoffset = offset;
1414 rtx bitsize_rtx, bitpos_rtx;
1415 rtx last = get_last_insn ();
1416 rtx xop0 = op0, xtarget = target;
1417 rtx xspec_target = spec_target;
1418 rtx xspec_target_subreg = spec_target_subreg;
1419 rtx pat;
1420 enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1422 if (GET_CODE (xop0) == MEM)
1424 /* Is the memory operand acceptable? */
1425 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1426 (xop0, GET_MODE (xop0))))
1428 /* No, load into a reg and extract from there. */
1429 enum machine_mode bestmode;
1431 /* Get the mode to use for inserting into this field. If
1432 OP0 is BLKmode, get the smallest mode consistent with the
1433 alignment. If OP0 is a non-BLKmode object that is no
1434 wider than MAXMODE, use its mode. Otherwise, use the
1435 smallest mode containing the field. */
1437 if (GET_MODE (xop0) == BLKmode
1438 || (GET_MODE_SIZE (GET_MODE (op0))
1439 > GET_MODE_SIZE (maxmode)))
1440 bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1441 MEM_VOLATILE_P (xop0));
1442 else
1443 bestmode = GET_MODE (xop0);
1445 if (bestmode == VOIDmode
1446 || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1447 && GET_MODE_BITSIZE (bestmode) > align))
1448 goto extv_loses;
1450 /* Compute offset as multiple of this unit,
1451 counting in bytes. */
1452 unit = GET_MODE_BITSIZE (bestmode);
1453 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1454 xbitpos = bitnum % unit;
1455 xop0 = adjust_address (xop0, bestmode, xoffset);
1457 /* Fetch it to a register in that size. */
1458 xop0 = force_reg (bestmode, xop0);
1460 /* XBITPOS counts within UNIT, which is what is expected. */
1462 else
1463 /* Get ref to first byte containing part of the field. */
1464 xop0 = adjust_address (xop0, byte_mode, xoffset);
1467 /* If op0 is a register, we need it in MAXMODE (which is usually
1468 SImode) to make it acceptable to the format of extv. */
1469 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1470 goto extv_loses;
1471 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1472 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1474 /* On big-endian machines, we count bits from the most significant.
1475 If the bit field insn does not, we must invert. */
1476 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1477 xbitpos = unit - bitsize - xbitpos;
1479 /* XBITPOS counts within a size of UNIT.
1480 Adjust to count within a size of MAXMODE. */
1481 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1482 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1484 unit = GET_MODE_BITSIZE (maxmode);
1486 if (xtarget == 0
1487 || (flag_force_mem && GET_CODE (xtarget) == MEM))
1488 xtarget = xspec_target = gen_reg_rtx (tmode);
1490 if (GET_MODE (xtarget) != maxmode)
1492 if (GET_CODE (xtarget) == REG)
1494 int wider = (GET_MODE_SIZE (maxmode)
1495 > GET_MODE_SIZE (GET_MODE (xtarget)));
1496 xtarget = gen_lowpart (maxmode, xtarget);
1497 if (wider)
1498 xspec_target_subreg = xtarget;
1500 else
1501 xtarget = gen_reg_rtx (maxmode);
1504 /* If this machine's extv insists on a register target,
1505 make sure we have one. */
1506 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1507 (xtarget, maxmode)))
1508 xtarget = gen_reg_rtx (maxmode);
1510 bitsize_rtx = GEN_INT (bitsize);
1511 bitpos_rtx = GEN_INT (xbitpos);
1513 pat = gen_extv (protect_from_queue (xtarget, 1),
1514 xop0, bitsize_rtx, bitpos_rtx);
1515 if (pat)
1517 emit_insn (pat);
1518 target = xtarget;
1519 spec_target = xspec_target;
1520 spec_target_subreg = xspec_target_subreg;
1522 else
1524 delete_insns_since (last);
1525 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1526 bitpos, target, 0, align);
1529 else
1530 extv_loses:
1531 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1532 bitpos, target, 0, align);
1534 if (target == spec_target)
1535 return target;
1536 if (target == spec_target_subreg)
1537 return spec_target;
1538 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1540 /* If the target mode is floating-point, first convert to the
1541 integer mode of that size and then access it as a floating-point
1542 value via a SUBREG. */
1543 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1545 target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1546 MODE_INT, 0),
1547 target, unsignedp);
1548 if (GET_CODE (target) != REG)
1549 target = copy_to_reg (target);
1550 return gen_rtx_SUBREG (tmode, target, 0);
1552 else
1553 return convert_to_mode (tmode, target, unsignedp);
1555 return target;
1558 /* Extract a bit field using shifts and boolean operations
1559 Returns an rtx to represent the value.
1560 OP0 addresses a register (word) or memory (byte).
1561 BITPOS says which bit within the word or byte the bit field starts in.
1562 OFFSET says how many bytes farther the bit field starts;
1563 it is 0 if OP0 is a register.
1564 BITSIZE says how many bits long the bit field is.
1565 (If OP0 is a register, it may be narrower than a full word,
1566 but BITPOS still counts within a full word,
1567 which is significant on bigendian machines.)
1569 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1570 If TARGET is nonzero, attempts to store the value there
1571 and return TARGET, but this is not guaranteed.
1572 If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1574 ALIGN is the alignment that STR_RTX is known to have. */
1576 static rtx
1577 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1578 target, unsignedp, align)
1579 enum machine_mode tmode;
1580 rtx op0, target;
1581 unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1582 int unsignedp;
1583 unsigned int align;
1585 unsigned int total_bits = BITS_PER_WORD;
1586 enum machine_mode mode;
1588 if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1590 /* Special treatment for a bit field split across two registers. */
1591 if (bitsize + bitpos > BITS_PER_WORD)
1592 return extract_split_bit_field (op0, bitsize, bitpos,
1593 unsignedp, align);
1595 else
1597 /* Get the proper mode to use for this field. We want a mode that
1598 includes the entire field. If such a mode would be larger than
1599 a word, we won't be doing the extraction the normal way. */
1601 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, align,
1602 word_mode,
1603 GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1605 if (mode == VOIDmode)
1606 /* The only way this should occur is if the field spans word
1607 boundaries. */
1608 return extract_split_bit_field (op0, bitsize,
1609 bitpos + offset * BITS_PER_UNIT,
1610 unsignedp, align);
1612 total_bits = GET_MODE_BITSIZE (mode);
1614 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1615 be in the range 0 to total_bits-1, and put any excess bytes in
1616 OFFSET. */
1617 if (bitpos >= total_bits)
1619 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1620 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1621 * BITS_PER_UNIT);
1624 /* Get ref to an aligned byte, halfword, or word containing the field.
1625 Adjust BITPOS to be position within a word,
1626 and OFFSET to be the offset of that word.
1627 Then alter OP0 to refer to that word. */
1628 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1629 offset -= (offset % (total_bits / BITS_PER_UNIT));
1630 op0 = adjust_address (op0, mode, offset);
1633 mode = GET_MODE (op0);
1635 if (BYTES_BIG_ENDIAN)
1637 /* BITPOS is the distance between our msb and that of OP0.
1638 Convert it to the distance from the lsb. */
1640 bitpos = total_bits - bitsize - bitpos;
1643 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1644 We have reduced the big-endian case to the little-endian case. */
1646 if (unsignedp)
1648 if (bitpos)
1650 /* If the field does not already start at the lsb,
1651 shift it so it does. */
1652 tree amount = build_int_2 (bitpos, 0);
1653 /* Maybe propagate the target for the shift. */
1654 /* But not if we will return it--could confuse integrate.c. */
1655 rtx subtarget = (target != 0 && GET_CODE (target) == REG
1656 && !REG_FUNCTION_VALUE_P (target)
1657 ? target : 0);
1658 if (tmode != mode) subtarget = 0;
1659 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1661 /* Convert the value to the desired mode. */
1662 if (mode != tmode)
1663 op0 = convert_to_mode (tmode, op0, 1);
1665 /* Unless the msb of the field used to be the msb when we shifted,
1666 mask out the upper bits. */
1668 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1669 #if 0
1670 #ifdef SLOW_ZERO_EXTEND
1671 /* Always generate an `and' if
1672 we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1673 will combine fruitfully with the zero-extend. */
1674 || tmode != mode
1675 #endif
1676 #endif
1678 return expand_binop (GET_MODE (op0), and_optab, op0,
1679 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1680 target, 1, OPTAB_LIB_WIDEN);
1681 return op0;
1684 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1685 then arithmetic-shift its lsb to the lsb of the word. */
1686 op0 = force_reg (mode, op0);
1687 if (mode != tmode)
1688 target = 0;
1690 /* Find the narrowest integer mode that contains the field. */
1692 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1693 mode = GET_MODE_WIDER_MODE (mode))
1694 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1696 op0 = convert_to_mode (mode, op0, 0);
1697 break;
1700 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1702 tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1703 /* Maybe propagate the target for the shift. */
1704 /* But not if we will return the result--could confuse integrate.c. */
1705 rtx subtarget = (target != 0 && GET_CODE (target) == REG
1706 && ! REG_FUNCTION_VALUE_P (target)
1707 ? target : 0);
1708 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1711 return expand_shift (RSHIFT_EXPR, mode, op0,
1712 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1713 target, 0);
1716 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1717 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1718 complement of that if COMPLEMENT. The mask is truncated if
1719 necessary to the width of mode MODE. The mask is zero-extended if
1720 BITSIZE+BITPOS is too small for MODE. */
1722 static rtx
1723 mask_rtx (mode, bitpos, bitsize, complement)
1724 enum machine_mode mode;
1725 int bitpos, bitsize, complement;
1727 HOST_WIDE_INT masklow, maskhigh;
1729 if (bitpos < HOST_BITS_PER_WIDE_INT)
1730 masklow = (HOST_WIDE_INT) -1 << bitpos;
1731 else
1732 masklow = 0;
1734 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1735 masklow &= ((unsigned HOST_WIDE_INT) -1
1736 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1738 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1739 maskhigh = -1;
1740 else
1741 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1743 if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1744 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1745 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1746 else
1747 maskhigh = 0;
1749 if (complement)
1751 maskhigh = ~maskhigh;
1752 masklow = ~masklow;
1755 return immed_double_const (masklow, maskhigh, mode);
1758 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1759 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1761 static rtx
1762 lshift_value (mode, value, bitpos, bitsize)
1763 enum machine_mode mode;
1764 rtx value;
1765 int bitpos, bitsize;
1767 unsigned HOST_WIDE_INT v = INTVAL (value);
1768 HOST_WIDE_INT low, high;
1770 if (bitsize < HOST_BITS_PER_WIDE_INT)
1771 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1773 if (bitpos < HOST_BITS_PER_WIDE_INT)
1775 low = v << bitpos;
1776 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1778 else
1780 low = 0;
1781 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1784 return immed_double_const (low, high, mode);
1787 /* Extract a bit field that is split across two words
1788 and return an RTX for the result.
1790 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1791 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1792 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1794 ALIGN is the known alignment of OP0. This is also the size of the
1795 memory objects to be used. */
1797 static rtx
1798 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1799 rtx op0;
1800 unsigned HOST_WIDE_INT bitsize, bitpos;
1801 int unsignedp;
1802 unsigned int align;
1804 unsigned int unit;
1805 unsigned int bitsdone = 0;
1806 rtx result = NULL_RTX;
1807 int first = 1;
1809 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1810 much at a time. */
1811 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1812 unit = BITS_PER_WORD;
1813 else
1814 unit = MIN (align, BITS_PER_WORD);
1816 while (bitsdone < bitsize)
1818 unsigned HOST_WIDE_INT thissize;
1819 rtx part, word;
1820 unsigned HOST_WIDE_INT thispos;
1821 unsigned HOST_WIDE_INT offset;
1823 offset = (bitpos + bitsdone) / unit;
1824 thispos = (bitpos + bitsdone) % unit;
1826 /* THISSIZE must not overrun a word boundary. Otherwise,
1827 extract_fixed_bit_field will call us again, and we will mutually
1828 recurse forever. */
1829 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1830 thissize = MIN (thissize, unit - thispos);
1832 /* If OP0 is a register, then handle OFFSET here.
1834 When handling multiword bitfields, extract_bit_field may pass
1835 down a word_mode SUBREG of a larger REG for a bitfield that actually
1836 crosses a word boundary. Thus, for a SUBREG, we must find
1837 the current word starting from the base register. */
1838 if (GET_CODE (op0) == SUBREG)
1840 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1841 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1842 GET_MODE (SUBREG_REG (op0)));
1843 offset = 0;
1845 else if (GET_CODE (op0) == REG)
1847 word = operand_subword_force (op0, offset, GET_MODE (op0));
1848 offset = 0;
1850 else
1851 word = op0;
1853 /* Extract the parts in bit-counting order,
1854 whose meaning is determined by BYTES_PER_UNIT.
1855 OFFSET is in UNITs, and UNIT is in bits.
1856 extract_fixed_bit_field wants offset in bytes. */
1857 part = extract_fixed_bit_field (word_mode, word,
1858 offset * unit / BITS_PER_UNIT,
1859 thissize, thispos, 0, 1, align);
1860 bitsdone += thissize;
1862 /* Shift this part into place for the result. */
1863 if (BYTES_BIG_ENDIAN)
1865 if (bitsize != bitsdone)
1866 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1867 build_int_2 (bitsize - bitsdone, 0), 0, 1);
1869 else
1871 if (bitsdone != thissize)
1872 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1873 build_int_2 (bitsdone - thissize, 0), 0, 1);
1876 if (first)
1877 result = part;
1878 else
1879 /* Combine the parts with bitwise or. This works
1880 because we extracted each part as an unsigned bit field. */
1881 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1882 OPTAB_LIB_WIDEN);
1884 first = 0;
1887 /* Unsigned bit field: we are done. */
1888 if (unsignedp)
1889 return result;
1890 /* Signed bit field: sign-extend with two arithmetic shifts. */
1891 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1892 build_int_2 (BITS_PER_WORD - bitsize, 0),
1893 NULL_RTX, 0);
1894 return expand_shift (RSHIFT_EXPR, word_mode, result,
1895 build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1898 /* Add INC into TARGET. */
1900 void
1901 expand_inc (target, inc)
1902 rtx target, inc;
1904 rtx value = expand_binop (GET_MODE (target), add_optab,
1905 target, inc,
1906 target, 0, OPTAB_LIB_WIDEN);
1907 if (value != target)
1908 emit_move_insn (target, value);
1911 /* Subtract DEC from TARGET. */
1913 void
1914 expand_dec (target, dec)
1915 rtx target, dec;
1917 rtx value = expand_binop (GET_MODE (target), sub_optab,
1918 target, dec,
1919 target, 0, OPTAB_LIB_WIDEN);
1920 if (value != target)
1921 emit_move_insn (target, value);
1924 /* Output a shift instruction for expression code CODE,
1925 with SHIFTED being the rtx for the value to shift,
1926 and AMOUNT the tree for the amount to shift by.
1927 Store the result in the rtx TARGET, if that is convenient.
1928 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1929 Return the rtx for where the value is. */
1932 expand_shift (code, mode, shifted, amount, target, unsignedp)
1933 enum tree_code code;
1934 enum machine_mode mode;
1935 rtx shifted;
1936 tree amount;
1937 rtx target;
1938 int unsignedp;
1940 rtx op1, temp = 0;
1941 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1942 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1943 int try;
1945 /* Previously detected shift-counts computed by NEGATE_EXPR
1946 and shifted in the other direction; but that does not work
1947 on all machines. */
1949 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1951 #ifdef SHIFT_COUNT_TRUNCATED
1952 if (SHIFT_COUNT_TRUNCATED)
1954 if (GET_CODE (op1) == CONST_INT
1955 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1956 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1957 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1958 % GET_MODE_BITSIZE (mode));
1959 else if (GET_CODE (op1) == SUBREG
1960 && SUBREG_BYTE (op1) == 0)
1961 op1 = SUBREG_REG (op1);
1963 #endif
1965 if (op1 == const0_rtx)
1966 return shifted;
1968 for (try = 0; temp == 0 && try < 3; try++)
1970 enum optab_methods methods;
1972 if (try == 0)
1973 methods = OPTAB_DIRECT;
1974 else if (try == 1)
1975 methods = OPTAB_WIDEN;
1976 else
1977 methods = OPTAB_LIB_WIDEN;
1979 if (rotate)
1981 /* Widening does not work for rotation. */
1982 if (methods == OPTAB_WIDEN)
1983 continue;
1984 else if (methods == OPTAB_LIB_WIDEN)
1986 /* If we have been unable to open-code this by a rotation,
1987 do it as the IOR of two shifts. I.e., to rotate A
1988 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1989 where C is the bitsize of A.
1991 It is theoretically possible that the target machine might
1992 not be able to perform either shift and hence we would
1993 be making two libcalls rather than just the one for the
1994 shift (similarly if IOR could not be done). We will allow
1995 this extremely unlikely lossage to avoid complicating the
1996 code below. */
1998 rtx subtarget = target == shifted ? 0 : target;
1999 rtx temp1;
2000 tree type = TREE_TYPE (amount);
2001 tree new_amount = make_tree (type, op1);
2002 tree other_amount
2003 = fold (build (MINUS_EXPR, type,
2004 convert (type,
2005 build_int_2 (GET_MODE_BITSIZE (mode),
2006 0)),
2007 amount));
2009 shifted = force_reg (mode, shifted);
2011 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2012 mode, shifted, new_amount, subtarget, 1);
2013 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2014 mode, shifted, other_amount, 0, 1);
2015 return expand_binop (mode, ior_optab, temp, temp1, target,
2016 unsignedp, methods);
2019 temp = expand_binop (mode,
2020 left ? rotl_optab : rotr_optab,
2021 shifted, op1, target, unsignedp, methods);
2023 /* If we don't have the rotate, but we are rotating by a constant
2024 that is in range, try a rotate in the opposite direction. */
2026 if (temp == 0 && GET_CODE (op1) == CONST_INT
2027 && INTVAL (op1) > 0
2028 && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2029 temp = expand_binop (mode,
2030 left ? rotr_optab : rotl_optab,
2031 shifted,
2032 GEN_INT (GET_MODE_BITSIZE (mode)
2033 - INTVAL (op1)),
2034 target, unsignedp, methods);
2036 else if (unsignedp)
2037 temp = expand_binop (mode,
2038 left ? ashl_optab : lshr_optab,
2039 shifted, op1, target, unsignedp, methods);
2041 /* Do arithmetic shifts.
2042 Also, if we are going to widen the operand, we can just as well
2043 use an arithmetic right-shift instead of a logical one. */
2044 if (temp == 0 && ! rotate
2045 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2047 enum optab_methods methods1 = methods;
2049 /* If trying to widen a log shift to an arithmetic shift,
2050 don't accept an arithmetic shift of the same size. */
2051 if (unsignedp)
2052 methods1 = OPTAB_MUST_WIDEN;
2054 /* Arithmetic shift */
2056 temp = expand_binop (mode,
2057 left ? ashl_optab : ashr_optab,
2058 shifted, op1, target, unsignedp, methods1);
2061 /* We used to try extzv here for logical right shifts, but that was
2062 only useful for one machine, the VAX, and caused poor code
2063 generation there for lshrdi3, so the code was deleted and a
2064 define_expand for lshrsi3 was added to vax.md. */
2067 if (temp == 0)
2068 abort ();
2069 return temp;
2072 enum alg_code { alg_zero, alg_m, alg_shift,
2073 alg_add_t_m2, alg_sub_t_m2,
2074 alg_add_factor, alg_sub_factor,
2075 alg_add_t2_m, alg_sub_t2_m,
2076 alg_add, alg_subtract, alg_factor, alg_shiftop };
2078 /* This structure records a sequence of operations.
2079 `ops' is the number of operations recorded.
2080 `cost' is their total cost.
2081 The operations are stored in `op' and the corresponding
2082 logarithms of the integer coefficients in `log'.
2084 These are the operations:
2085 alg_zero total := 0;
2086 alg_m total := multiplicand;
2087 alg_shift total := total * coeff
2088 alg_add_t_m2 total := total + multiplicand * coeff;
2089 alg_sub_t_m2 total := total - multiplicand * coeff;
2090 alg_add_factor total := total * coeff + total;
2091 alg_sub_factor total := total * coeff - total;
2092 alg_add_t2_m total := total * coeff + multiplicand;
2093 alg_sub_t2_m total := total * coeff - multiplicand;
2095 The first operand must be either alg_zero or alg_m. */
2097 struct algorithm
2099 short cost;
2100 short ops;
2101 /* The size of the OP and LOG fields are not directly related to the
2102 word size, but the worst-case algorithms will be if we have few
2103 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2104 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2105 in total wordsize operations. */
2106 enum alg_code op[MAX_BITS_PER_WORD];
2107 char log[MAX_BITS_PER_WORD];
2110 static void synth_mult PARAMS ((struct algorithm *,
2111 unsigned HOST_WIDE_INT,
2112 int));
2113 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2114 int, int,
2115 unsigned HOST_WIDE_INT *,
2116 int *, int *));
2117 static unsigned HOST_WIDE_INT invert_mod2n PARAMS ((unsigned HOST_WIDE_INT,
2118 int));
2119 /* Compute and return the best algorithm for multiplying by T.
2120 The algorithm must cost less than cost_limit
2121 If retval.cost >= COST_LIMIT, no algorithm was found and all
2122 other field of the returned struct are undefined. */
2124 static void
2125 synth_mult (alg_out, t, cost_limit)
2126 struct algorithm *alg_out;
2127 unsigned HOST_WIDE_INT t;
2128 int cost_limit;
2130 int m;
2131 struct algorithm *alg_in, *best_alg;
2132 int cost;
2133 unsigned HOST_WIDE_INT q;
2135 /* Indicate that no algorithm is yet found. If no algorithm
2136 is found, this value will be returned and indicate failure. */
2137 alg_out->cost = cost_limit;
2139 if (cost_limit <= 0)
2140 return;
2142 /* t == 1 can be done in zero cost. */
2143 if (t == 1)
2145 alg_out->ops = 1;
2146 alg_out->cost = 0;
2147 alg_out->op[0] = alg_m;
2148 return;
2151 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2152 fail now. */
2153 if (t == 0)
2155 if (zero_cost >= cost_limit)
2156 return;
2157 else
2159 alg_out->ops = 1;
2160 alg_out->cost = zero_cost;
2161 alg_out->op[0] = alg_zero;
2162 return;
2166 /* We'll be needing a couple extra algorithm structures now. */
2168 alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2169 best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2171 /* If we have a group of zero bits at the low-order part of T, try
2172 multiplying by the remaining bits and then doing a shift. */
2174 if ((t & 1) == 0)
2176 m = floor_log2 (t & -t); /* m = number of low zero bits */
2177 if (m < BITS_PER_WORD)
2179 q = t >> m;
2180 cost = shift_cost[m];
2181 synth_mult (alg_in, q, cost_limit - cost);
2183 cost += alg_in->cost;
2184 if (cost < cost_limit)
2186 struct algorithm *x;
2187 x = alg_in, alg_in = best_alg, best_alg = x;
2188 best_alg->log[best_alg->ops] = m;
2189 best_alg->op[best_alg->ops] = alg_shift;
2190 cost_limit = cost;
2195 /* If we have an odd number, add or subtract one. */
2196 if ((t & 1) != 0)
2198 unsigned HOST_WIDE_INT w;
2200 for (w = 1; (w & t) != 0; w <<= 1)
2202 /* If T was -1, then W will be zero after the loop. This is another
2203 case where T ends with ...111. Handling this with (T + 1) and
2204 subtract 1 produces slightly better code and results in algorithm
2205 selection much faster than treating it like the ...0111 case
2206 below. */
2207 if (w == 0
2208 || (w > 2
2209 /* Reject the case where t is 3.
2210 Thus we prefer addition in that case. */
2211 && t != 3))
2213 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2215 cost = add_cost;
2216 synth_mult (alg_in, t + 1, cost_limit - cost);
2218 cost += alg_in->cost;
2219 if (cost < cost_limit)
2221 struct algorithm *x;
2222 x = alg_in, alg_in = best_alg, best_alg = x;
2223 best_alg->log[best_alg->ops] = 0;
2224 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2225 cost_limit = cost;
2228 else
2230 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2232 cost = add_cost;
2233 synth_mult (alg_in, t - 1, cost_limit - cost);
2235 cost += alg_in->cost;
2236 if (cost < cost_limit)
2238 struct algorithm *x;
2239 x = alg_in, alg_in = best_alg, best_alg = x;
2240 best_alg->log[best_alg->ops] = 0;
2241 best_alg->op[best_alg->ops] = alg_add_t_m2;
2242 cost_limit = cost;
2247 /* Look for factors of t of the form
2248 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2249 If we find such a factor, we can multiply by t using an algorithm that
2250 multiplies by q, shift the result by m and add/subtract it to itself.
2252 We search for large factors first and loop down, even if large factors
2253 are less probable than small; if we find a large factor we will find a
2254 good sequence quickly, and therefore be able to prune (by decreasing
2255 COST_LIMIT) the search. */
2257 for (m = floor_log2 (t - 1); m >= 2; m--)
2259 unsigned HOST_WIDE_INT d;
2261 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2262 if (t % d == 0 && t > d && m < BITS_PER_WORD)
2264 cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2265 synth_mult (alg_in, t / d, cost_limit - cost);
2267 cost += alg_in->cost;
2268 if (cost < cost_limit)
2270 struct algorithm *x;
2271 x = alg_in, alg_in = best_alg, best_alg = x;
2272 best_alg->log[best_alg->ops] = m;
2273 best_alg->op[best_alg->ops] = alg_add_factor;
2274 cost_limit = cost;
2276 /* Other factors will have been taken care of in the recursion. */
2277 break;
2280 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2281 if (t % d == 0 && t > d && m < BITS_PER_WORD)
2283 cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2284 synth_mult (alg_in, t / d, cost_limit - cost);
2286 cost += alg_in->cost;
2287 if (cost < cost_limit)
2289 struct algorithm *x;
2290 x = alg_in, alg_in = best_alg, best_alg = x;
2291 best_alg->log[best_alg->ops] = m;
2292 best_alg->op[best_alg->ops] = alg_sub_factor;
2293 cost_limit = cost;
2295 break;
2299 /* Try shift-and-add (load effective address) instructions,
2300 i.e. do a*3, a*5, a*9. */
2301 if ((t & 1) != 0)
2303 q = t - 1;
2304 q = q & -q;
2305 m = exact_log2 (q);
2306 if (m >= 0 && m < BITS_PER_WORD)
2308 cost = shiftadd_cost[m];
2309 synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2311 cost += alg_in->cost;
2312 if (cost < cost_limit)
2314 struct algorithm *x;
2315 x = alg_in, alg_in = best_alg, best_alg = x;
2316 best_alg->log[best_alg->ops] = m;
2317 best_alg->op[best_alg->ops] = alg_add_t2_m;
2318 cost_limit = cost;
2322 q = t + 1;
2323 q = q & -q;
2324 m = exact_log2 (q);
2325 if (m >= 0 && m < BITS_PER_WORD)
2327 cost = shiftsub_cost[m];
2328 synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2330 cost += alg_in->cost;
2331 if (cost < cost_limit)
2333 struct algorithm *x;
2334 x = alg_in, alg_in = best_alg, best_alg = x;
2335 best_alg->log[best_alg->ops] = m;
2336 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2337 cost_limit = cost;
2342 /* If cost_limit has not decreased since we stored it in alg_out->cost,
2343 we have not found any algorithm. */
2344 if (cost_limit == alg_out->cost)
2345 return;
2347 /* If we are getting a too long sequence for `struct algorithm'
2348 to record, make this search fail. */
2349 if (best_alg->ops == MAX_BITS_PER_WORD)
2350 return;
2352 /* Copy the algorithm from temporary space to the space at alg_out.
2353 We avoid using structure assignment because the majority of
2354 best_alg is normally undefined, and this is a critical function. */
2355 alg_out->ops = best_alg->ops + 1;
2356 alg_out->cost = cost_limit;
2357 memcpy (alg_out->op, best_alg->op,
2358 alg_out->ops * sizeof *alg_out->op);
2359 memcpy (alg_out->log, best_alg->log,
2360 alg_out->ops * sizeof *alg_out->log);
2363 /* Perform a multiplication and return an rtx for the result.
2364 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2365 TARGET is a suggestion for where to store the result (an rtx).
2367 We check specially for a constant integer as OP1.
2368 If you want this check for OP0 as well, then before calling
2369 you should swap the two operands if OP0 would be constant. */
2372 expand_mult (mode, op0, op1, target, unsignedp)
2373 enum machine_mode mode;
2374 rtx op0, op1, target;
2375 int unsignedp;
2377 rtx const_op1 = op1;
2379 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2380 less than or equal in size to `unsigned int' this doesn't matter.
2381 If the mode is larger than `unsigned int', then synth_mult works only
2382 if the constant value exactly fits in an `unsigned int' without any
2383 truncation. This means that multiplying by negative values does
2384 not work; results are off by 2^32 on a 32 bit machine. */
2386 /* If we are multiplying in DImode, it may still be a win
2387 to try to work with shifts and adds. */
2388 if (GET_CODE (op1) == CONST_DOUBLE
2389 && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2390 && HOST_BITS_PER_INT >= BITS_PER_WORD
2391 && CONST_DOUBLE_HIGH (op1) == 0)
2392 const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2393 else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2394 && GET_CODE (op1) == CONST_INT
2395 && INTVAL (op1) < 0)
2396 const_op1 = 0;
2398 /* We used to test optimize here, on the grounds that it's better to
2399 produce a smaller program when -O is not used.
2400 But this causes such a terrible slowdown sometimes
2401 that it seems better to use synth_mult always. */
2403 if (const_op1 && GET_CODE (const_op1) == CONST_INT
2404 && (unsignedp || ! flag_trapv))
2406 struct algorithm alg;
2407 struct algorithm alg2;
2408 HOST_WIDE_INT val = INTVAL (op1);
2409 HOST_WIDE_INT val_so_far;
2410 rtx insn;
2411 int mult_cost;
2412 enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2414 /* Try to do the computation three ways: multiply by the negative of OP1
2415 and then negate, do the multiplication directly, or do multiplication
2416 by OP1 - 1. */
2418 mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2419 mult_cost = MIN (12 * add_cost, mult_cost);
2421 synth_mult (&alg, val, mult_cost);
2423 /* This works only if the inverted value actually fits in an
2424 `unsigned int' */
2425 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2427 synth_mult (&alg2, - val,
2428 (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2429 if (alg2.cost + negate_cost < alg.cost)
2430 alg = alg2, variant = negate_variant;
2433 /* This proves very useful for division-by-constant. */
2434 synth_mult (&alg2, val - 1,
2435 (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2436 if (alg2.cost + add_cost < alg.cost)
2437 alg = alg2, variant = add_variant;
2439 if (alg.cost < mult_cost)
2441 /* We found something cheaper than a multiply insn. */
2442 int opno;
2443 rtx accum, tem;
2444 enum machine_mode nmode;
2446 op0 = protect_from_queue (op0, 0);
2448 /* Avoid referencing memory over and over.
2449 For speed, but also for correctness when mem is volatile. */
2450 if (GET_CODE (op0) == MEM)
2451 op0 = force_reg (mode, op0);
2453 /* ACCUM starts out either as OP0 or as a zero, depending on
2454 the first operation. */
2456 if (alg.op[0] == alg_zero)
2458 accum = copy_to_mode_reg (mode, const0_rtx);
2459 val_so_far = 0;
2461 else if (alg.op[0] == alg_m)
2463 accum = copy_to_mode_reg (mode, op0);
2464 val_so_far = 1;
2466 else
2467 abort ();
2469 for (opno = 1; opno < alg.ops; opno++)
2471 int log = alg.log[opno];
2472 int preserve = preserve_subexpressions_p ();
2473 rtx shift_subtarget = preserve ? 0 : accum;
2474 rtx add_target
2475 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2476 && ! preserve)
2477 ? target : 0;
2478 rtx accum_target = preserve ? 0 : accum;
2480 switch (alg.op[opno])
2482 case alg_shift:
2483 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2484 build_int_2 (log, 0), NULL_RTX, 0);
2485 val_so_far <<= log;
2486 break;
2488 case alg_add_t_m2:
2489 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2490 build_int_2 (log, 0), NULL_RTX, 0);
2491 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2492 add_target
2493 ? add_target : accum_target);
2494 val_so_far += (HOST_WIDE_INT) 1 << log;
2495 break;
2497 case alg_sub_t_m2:
2498 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2499 build_int_2 (log, 0), NULL_RTX, 0);
2500 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2501 add_target
2502 ? add_target : accum_target);
2503 val_so_far -= (HOST_WIDE_INT) 1 << log;
2504 break;
2506 case alg_add_t2_m:
2507 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2508 build_int_2 (log, 0), shift_subtarget,
2510 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2511 add_target
2512 ? add_target : accum_target);
2513 val_so_far = (val_so_far << log) + 1;
2514 break;
2516 case alg_sub_t2_m:
2517 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2518 build_int_2 (log, 0), shift_subtarget,
2520 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2521 add_target
2522 ? add_target : accum_target);
2523 val_so_far = (val_so_far << log) - 1;
2524 break;
2526 case alg_add_factor:
2527 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2528 build_int_2 (log, 0), NULL_RTX, 0);
2529 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2530 add_target
2531 ? add_target : accum_target);
2532 val_so_far += val_so_far << log;
2533 break;
2535 case alg_sub_factor:
2536 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2537 build_int_2 (log, 0), NULL_RTX, 0);
2538 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2539 (add_target ? add_target
2540 : preserve ? 0 : tem));
2541 val_so_far = (val_so_far << log) - val_so_far;
2542 break;
2544 default:
2545 abort ();
2548 /* Write a REG_EQUAL note on the last insn so that we can cse
2549 multiplication sequences. Note that if ACCUM is a SUBREG,
2550 we've set the inner register and must properly indicate
2551 that. */
2553 tem = op0, nmode = mode;
2554 if (GET_CODE (accum) == SUBREG)
2556 nmode = GET_MODE (SUBREG_REG (accum));
2557 tem = gen_lowpart (nmode, op0);
2560 insn = get_last_insn ();
2561 set_unique_reg_note (insn,
2562 REG_EQUAL,
2563 gen_rtx_MULT (nmode, tem,
2564 GEN_INT (val_so_far)));
2567 if (variant == negate_variant)
2569 val_so_far = - val_so_far;
2570 accum = expand_unop (mode, neg_optab, accum, target, 0);
2572 else if (variant == add_variant)
2574 val_so_far = val_so_far + 1;
2575 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2578 if (val != val_so_far)
2579 abort ();
2581 return accum;
2585 /* This used to use umul_optab if unsigned, but for non-widening multiply
2586 there is no difference between signed and unsigned. */
2587 op0 = expand_binop (mode,
2588 ! unsignedp
2589 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2590 ? smulv_optab : smul_optab,
2591 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2592 if (op0 == 0)
2593 abort ();
2594 return op0;
2597 /* Return the smallest n such that 2**n >= X. */
2600 ceil_log2 (x)
2601 unsigned HOST_WIDE_INT x;
2603 return floor_log2 (x - 1) + 1;
2606 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2607 replace division by D, and put the least significant N bits of the result
2608 in *MULTIPLIER_PTR and return the most significant bit.
2610 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2611 needed precision is in PRECISION (should be <= N).
2613 PRECISION should be as small as possible so this function can choose
2614 multiplier more freely.
2616 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
2617 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2619 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2620 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
2622 static
2623 unsigned HOST_WIDE_INT
2624 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2625 unsigned HOST_WIDE_INT d;
2626 int n;
2627 int precision;
2628 unsigned HOST_WIDE_INT *multiplier_ptr;
2629 int *post_shift_ptr;
2630 int *lgup_ptr;
2632 HOST_WIDE_INT mhigh_hi, mlow_hi;
2633 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2634 int lgup, post_shift;
2635 int pow, pow2;
2636 unsigned HOST_WIDE_INT nl, dummy1;
2637 HOST_WIDE_INT nh, dummy2;
2639 /* lgup = ceil(log2(divisor)); */
2640 lgup = ceil_log2 (d);
2642 if (lgup > n)
2643 abort ();
2645 pow = n + lgup;
2646 pow2 = n + lgup - precision;
2648 if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2650 /* We could handle this with some effort, but this case is much better
2651 handled directly with a scc insn, so rely on caller using that. */
2652 abort ();
2655 /* mlow = 2^(N + lgup)/d */
2656 if (pow >= HOST_BITS_PER_WIDE_INT)
2658 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2659 nl = 0;
2661 else
2663 nh = 0;
2664 nl = (unsigned HOST_WIDE_INT) 1 << pow;
2666 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2667 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2669 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2670 if (pow2 >= HOST_BITS_PER_WIDE_INT)
2671 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2672 else
2673 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2674 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2675 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2677 if (mhigh_hi && nh - d >= d)
2678 abort ();
2679 if (mhigh_hi > 1 || mlow_hi > 1)
2680 abort ();
2681 /* assert that mlow < mhigh. */
2682 if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2683 abort();
2685 /* If precision == N, then mlow, mhigh exceed 2^N
2686 (but they do not exceed 2^(N+1)). */
2688 /* Reduce to lowest terms */
2689 for (post_shift = lgup; post_shift > 0; post_shift--)
2691 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2692 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2693 if (ml_lo >= mh_lo)
2694 break;
2696 mlow_hi = 0;
2697 mlow_lo = ml_lo;
2698 mhigh_hi = 0;
2699 mhigh_lo = mh_lo;
2702 *post_shift_ptr = post_shift;
2703 *lgup_ptr = lgup;
2704 if (n < HOST_BITS_PER_WIDE_INT)
2706 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2707 *multiplier_ptr = mhigh_lo & mask;
2708 return mhigh_lo >= mask;
2710 else
2712 *multiplier_ptr = mhigh_lo;
2713 return mhigh_hi;
2717 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2718 congruent to 1 (mod 2**N). */
2720 static unsigned HOST_WIDE_INT
2721 invert_mod2n (x, n)
2722 unsigned HOST_WIDE_INT x;
2723 int n;
2725 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
2727 /* The algorithm notes that the choice y = x satisfies
2728 x*y == 1 mod 2^3, since x is assumed odd.
2729 Each iteration doubles the number of bits of significance in y. */
2731 unsigned HOST_WIDE_INT mask;
2732 unsigned HOST_WIDE_INT y = x;
2733 int nbit = 3;
2735 mask = (n == HOST_BITS_PER_WIDE_INT
2736 ? ~(unsigned HOST_WIDE_INT) 0
2737 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2739 while (nbit < n)
2741 y = y * (2 - x*y) & mask; /* Modulo 2^N */
2742 nbit *= 2;
2744 return y;
2747 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2748 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
2749 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
2750 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2751 become signed.
2753 The result is put in TARGET if that is convenient.
2755 MODE is the mode of operation. */
2758 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2759 enum machine_mode mode;
2760 rtx adj_operand, op0, op1, target;
2761 int unsignedp;
2763 rtx tem;
2764 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2766 tem = expand_shift (RSHIFT_EXPR, mode, op0,
2767 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2768 NULL_RTX, 0);
2769 tem = expand_and (tem, op1, NULL_RTX);
2770 adj_operand
2771 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2772 adj_operand);
2774 tem = expand_shift (RSHIFT_EXPR, mode, op1,
2775 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2776 NULL_RTX, 0);
2777 tem = expand_and (tem, op0, NULL_RTX);
2778 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2779 target);
2781 return target;
2784 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2785 in TARGET if that is convenient, and return where the result is. If the
2786 operation can not be performed, 0 is returned.
2788 MODE is the mode of operation and result.
2790 UNSIGNEDP nonzero means unsigned multiply.
2792 MAX_COST is the total allowed cost for the expanded RTL. */
2795 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2796 enum machine_mode mode;
2797 rtx op0, target;
2798 unsigned HOST_WIDE_INT cnst1;
2799 int unsignedp;
2800 int max_cost;
2802 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2803 optab mul_highpart_optab;
2804 optab moptab;
2805 rtx tem;
2806 int size = GET_MODE_BITSIZE (mode);
2807 rtx op1, wide_op1;
2809 /* We can't support modes wider than HOST_BITS_PER_INT. */
2810 if (size > HOST_BITS_PER_WIDE_INT)
2811 abort ();
2813 op1 = GEN_INT (trunc_int_for_mode (cnst1, mode));
2815 if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2816 wide_op1 = op1;
2817 else
2818 wide_op1
2819 = immed_double_const (cnst1,
2820 (unsignedp
2821 ? (HOST_WIDE_INT) 0
2822 : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2823 wider_mode);
2825 /* expand_mult handles constant multiplication of word_mode
2826 or narrower. It does a poor job for large modes. */
2827 if (size < BITS_PER_WORD
2828 && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2830 /* We have to do this, since expand_binop doesn't do conversion for
2831 multiply. Maybe change expand_binop to handle widening multiply? */
2832 op0 = convert_to_mode (wider_mode, op0, unsignedp);
2834 /* We know that this can't have signed overflow, so pretend this is
2835 an unsigned multiply. */
2836 tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2837 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2838 build_int_2 (size, 0), NULL_RTX, 1);
2839 return convert_modes (mode, wider_mode, tem, unsignedp);
2842 if (target == 0)
2843 target = gen_reg_rtx (mode);
2845 /* Firstly, try using a multiplication insn that only generates the needed
2846 high part of the product, and in the sign flavor of unsignedp. */
2847 if (mul_highpart_cost[(int) mode] < max_cost)
2849 mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2850 target = expand_binop (mode, mul_highpart_optab,
2851 op0, op1, target, unsignedp, OPTAB_DIRECT);
2852 if (target)
2853 return target;
2856 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2857 Need to adjust the result after the multiplication. */
2858 if (size - 1 < BITS_PER_WORD
2859 && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2860 < max_cost))
2862 mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2863 target = expand_binop (mode, mul_highpart_optab,
2864 op0, op1, target, unsignedp, OPTAB_DIRECT);
2865 if (target)
2866 /* We used the wrong signedness. Adjust the result. */
2867 return expand_mult_highpart_adjust (mode, target, op0,
2868 op1, target, unsignedp);
2871 /* Try widening multiplication. */
2872 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2873 if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2874 && mul_widen_cost[(int) wider_mode] < max_cost)
2876 op1 = force_reg (mode, op1);
2877 goto try;
2880 /* Try widening the mode and perform a non-widening multiplication. */
2881 moptab = smul_optab;
2882 if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2883 && size - 1 < BITS_PER_WORD
2884 && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2886 op1 = wide_op1;
2887 goto try;
2890 /* Try widening multiplication of opposite signedness, and adjust. */
2891 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2892 if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2893 && size - 1 < BITS_PER_WORD
2894 && (mul_widen_cost[(int) wider_mode]
2895 + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2897 rtx regop1 = force_reg (mode, op1);
2898 tem = expand_binop (wider_mode, moptab, op0, regop1,
2899 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2900 if (tem != 0)
2902 /* Extract the high half of the just generated product. */
2903 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2904 build_int_2 (size, 0), NULL_RTX, 1);
2905 tem = convert_modes (mode, wider_mode, tem, unsignedp);
2906 /* We used the wrong signedness. Adjust the result. */
2907 return expand_mult_highpart_adjust (mode, tem, op0, op1,
2908 target, unsignedp);
2912 return 0;
2914 try:
2915 /* Pass NULL_RTX as target since TARGET has wrong mode. */
2916 tem = expand_binop (wider_mode, moptab, op0, op1,
2917 NULL_RTX, unsignedp, OPTAB_WIDEN);
2918 if (tem == 0)
2919 return 0;
2921 /* Extract the high half of the just generated product. */
2922 if (mode == word_mode)
2924 return gen_highpart (mode, tem);
2926 else
2928 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2929 build_int_2 (size, 0), NULL_RTX, 1);
2930 return convert_modes (mode, wider_mode, tem, unsignedp);
2934 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2935 if that is convenient, and returning where the result is.
2936 You may request either the quotient or the remainder as the result;
2937 specify REM_FLAG nonzero to get the remainder.
2939 CODE is the expression code for which kind of division this is;
2940 it controls how rounding is done. MODE is the machine mode to use.
2941 UNSIGNEDP nonzero means do unsigned division. */
2943 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2944 and then correct it by or'ing in missing high bits
2945 if result of ANDI is nonzero.
2946 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2947 This could optimize to a bfexts instruction.
2948 But C doesn't use these operations, so their optimizations are
2949 left for later. */
2950 /* ??? For modulo, we don't actually need the highpart of the first product,
2951 the low part will do nicely. And for small divisors, the second multiply
2952 can also be a low-part only multiply or even be completely left out.
2953 E.g. to calculate the remainder of a division by 3 with a 32 bit
2954 multiply, multiply with 0x55555556 and extract the upper two bits;
2955 the result is exact for inputs up to 0x1fffffff.
2956 The input range can be reduced by using cross-sum rules.
2957 For odd divisors >= 3, the following table gives right shift counts
2958 so that if an number is shifted by an integer multiple of the given
2959 amount, the remainder stays the same:
2960 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2961 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2962 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2963 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2964 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2966 Cross-sum rules for even numbers can be derived by leaving as many bits
2967 to the right alone as the divisor has zeros to the right.
2968 E.g. if x is an unsigned 32 bit number:
2969 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2972 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2975 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2976 int rem_flag;
2977 enum tree_code code;
2978 enum machine_mode mode;
2979 rtx op0, op1, target;
2980 int unsignedp;
2982 enum machine_mode compute_mode;
2983 rtx tquotient;
2984 rtx quotient = 0, remainder = 0;
2985 rtx last;
2986 int size;
2987 rtx insn, set;
2988 optab optab1, optab2;
2989 int op1_is_constant, op1_is_pow2;
2990 int max_cost, extra_cost;
2991 static HOST_WIDE_INT last_div_const = 0;
2993 op1_is_constant = GET_CODE (op1) == CONST_INT;
2994 op1_is_pow2 = (op1_is_constant
2995 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2996 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2999 This is the structure of expand_divmod:
3001 First comes code to fix up the operands so we can perform the operations
3002 correctly and efficiently.
3004 Second comes a switch statement with code specific for each rounding mode.
3005 For some special operands this code emits all RTL for the desired
3006 operation, for other cases, it generates only a quotient and stores it in
3007 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3008 to indicate that it has not done anything.
3010 Last comes code that finishes the operation. If QUOTIENT is set and
3011 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3012 QUOTIENT is not set, it is computed using trunc rounding.
3014 We try to generate special code for division and remainder when OP1 is a
3015 constant. If |OP1| = 2**n we can use shifts and some other fast
3016 operations. For other values of OP1, we compute a carefully selected
3017 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3018 by m.
3020 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3021 half of the product. Different strategies for generating the product are
3022 implemented in expand_mult_highpart.
3024 If what we actually want is the remainder, we generate that by another
3025 by-constant multiplication and a subtraction. */
3027 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3028 code below will malfunction if we are, so check here and handle
3029 the special case if so. */
3030 if (op1 == const1_rtx)
3031 return rem_flag ? const0_rtx : op0;
3033 /* When dividing by -1, we could get an overflow.
3034 negv_optab can handle overflows. */
3035 if (! unsignedp && op1 == constm1_rtx)
3037 if (rem_flag)
3038 return const0_rtx;
3039 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3040 ? negv_optab : neg_optab, op0, target, 0);
3043 if (target
3044 /* Don't use the function value register as a target
3045 since we have to read it as well as write it,
3046 and function-inlining gets confused by this. */
3047 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3048 /* Don't clobber an operand while doing a multi-step calculation. */
3049 || ((rem_flag || op1_is_constant)
3050 && (reg_mentioned_p (target, op0)
3051 || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3052 || reg_mentioned_p (target, op1)
3053 || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3054 target = 0;
3056 /* Get the mode in which to perform this computation. Normally it will
3057 be MODE, but sometimes we can't do the desired operation in MODE.
3058 If so, pick a wider mode in which we can do the operation. Convert
3059 to that mode at the start to avoid repeated conversions.
3061 First see what operations we need. These depend on the expression
3062 we are evaluating. (We assume that divxx3 insns exist under the
3063 same conditions that modxx3 insns and that these insns don't normally
3064 fail. If these assumptions are not correct, we may generate less
3065 efficient code in some cases.)
3067 Then see if we find a mode in which we can open-code that operation
3068 (either a division, modulus, or shift). Finally, check for the smallest
3069 mode for which we can do the operation with a library call. */
3071 /* We might want to refine this now that we have division-by-constant
3072 optimization. Since expand_mult_highpart tries so many variants, it is
3073 not straightforward to generalize this. Maybe we should make an array
3074 of possible modes in init_expmed? Save this for GCC 2.7. */
3076 optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
3077 : (unsignedp ? udiv_optab : sdiv_optab));
3078 optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3080 for (compute_mode = mode; compute_mode != VOIDmode;
3081 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3082 if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3083 || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3084 break;
3086 if (compute_mode == VOIDmode)
3087 for (compute_mode = mode; compute_mode != VOIDmode;
3088 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3089 if (optab1->handlers[(int) compute_mode].libfunc
3090 || optab2->handlers[(int) compute_mode].libfunc)
3091 break;
3093 /* If we still couldn't find a mode, use MODE, but we'll probably abort
3094 in expand_binop. */
3095 if (compute_mode == VOIDmode)
3096 compute_mode = mode;
3098 if (target && GET_MODE (target) == compute_mode)
3099 tquotient = target;
3100 else
3101 tquotient = gen_reg_rtx (compute_mode);
3103 size = GET_MODE_BITSIZE (compute_mode);
3104 #if 0
3105 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3106 (mode), and thereby get better code when OP1 is a constant. Do that
3107 later. It will require going over all usages of SIZE below. */
3108 size = GET_MODE_BITSIZE (mode);
3109 #endif
3111 /* Only deduct something for a REM if the last divide done was
3112 for a different constant. Then set the constant of the last
3113 divide. */
3114 max_cost = div_cost[(int) compute_mode]
3115 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3116 && INTVAL (op1) == last_div_const)
3117 ? mul_cost[(int) compute_mode] + add_cost : 0);
3119 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3121 /* Now convert to the best mode to use. */
3122 if (compute_mode != mode)
3124 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3125 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3127 /* convert_modes may have placed op1 into a register, so we
3128 must recompute the following. */
3129 op1_is_constant = GET_CODE (op1) == CONST_INT;
3130 op1_is_pow2 = (op1_is_constant
3131 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3132 || (! unsignedp
3133 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3136 /* If one of the operands is a volatile MEM, copy it into a register. */
3138 if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3139 op0 = force_reg (compute_mode, op0);
3140 if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3141 op1 = force_reg (compute_mode, op1);
3143 /* If we need the remainder or if OP1 is constant, we need to
3144 put OP0 in a register in case it has any queued subexpressions. */
3145 if (rem_flag || op1_is_constant)
3146 op0 = force_reg (compute_mode, op0);
3148 last = get_last_insn ();
3150 /* Promote floor rounding to trunc rounding for unsigned operations. */
3151 if (unsignedp)
3153 if (code == FLOOR_DIV_EXPR)
3154 code = TRUNC_DIV_EXPR;
3155 if (code == FLOOR_MOD_EXPR)
3156 code = TRUNC_MOD_EXPR;
3157 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3158 code = TRUNC_DIV_EXPR;
3161 if (op1 != const0_rtx)
3162 switch (code)
3164 case TRUNC_MOD_EXPR:
3165 case TRUNC_DIV_EXPR:
3166 if (op1_is_constant)
3168 if (unsignedp)
3170 unsigned HOST_WIDE_INT mh, ml;
3171 int pre_shift, post_shift;
3172 int dummy;
3173 unsigned HOST_WIDE_INT d = INTVAL (op1);
3175 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3177 pre_shift = floor_log2 (d);
3178 if (rem_flag)
3180 remainder
3181 = expand_binop (compute_mode, and_optab, op0,
3182 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3183 remainder, 1,
3184 OPTAB_LIB_WIDEN);
3185 if (remainder)
3186 return gen_lowpart (mode, remainder);
3188 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3189 build_int_2 (pre_shift, 0),
3190 tquotient, 1);
3192 else if (size <= HOST_BITS_PER_WIDE_INT)
3194 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3196 /* Most significant bit of divisor is set; emit an scc
3197 insn. */
3198 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3199 compute_mode, 1, 1);
3200 if (quotient == 0)
3201 goto fail1;
3203 else
3205 /* Find a suitable multiplier and right shift count
3206 instead of multiplying with D. */
3208 mh = choose_multiplier (d, size, size,
3209 &ml, &post_shift, &dummy);
3211 /* If the suggested multiplier is more than SIZE bits,
3212 we can do better for even divisors, using an
3213 initial right shift. */
3214 if (mh != 0 && (d & 1) == 0)
3216 pre_shift = floor_log2 (d & -d);
3217 mh = choose_multiplier (d >> pre_shift, size,
3218 size - pre_shift,
3219 &ml, &post_shift, &dummy);
3220 if (mh)
3221 abort ();
3223 else
3224 pre_shift = 0;
3226 if (mh != 0)
3228 rtx t1, t2, t3, t4;
3230 if (post_shift - 1 >= BITS_PER_WORD)
3231 goto fail1;
3233 extra_cost = (shift_cost[post_shift - 1]
3234 + shift_cost[1] + 2 * add_cost);
3235 t1 = expand_mult_highpart (compute_mode, op0, ml,
3236 NULL_RTX, 1,
3237 max_cost - extra_cost);
3238 if (t1 == 0)
3239 goto fail1;
3240 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3241 op0, t1),
3242 NULL_RTX);
3243 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3244 build_int_2 (1, 0), NULL_RTX,1);
3245 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3246 t1, t3),
3247 NULL_RTX);
3248 quotient
3249 = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3250 build_int_2 (post_shift - 1, 0),
3251 tquotient, 1);
3253 else
3255 rtx t1, t2;
3257 if (pre_shift >= BITS_PER_WORD
3258 || post_shift >= BITS_PER_WORD)
3259 goto fail1;
3261 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3262 build_int_2 (pre_shift, 0),
3263 NULL_RTX, 1);
3264 extra_cost = (shift_cost[pre_shift]
3265 + shift_cost[post_shift]);
3266 t2 = expand_mult_highpart (compute_mode, t1, ml,
3267 NULL_RTX, 1,
3268 max_cost - extra_cost);
3269 if (t2 == 0)
3270 goto fail1;
3271 quotient
3272 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3273 build_int_2 (post_shift, 0),
3274 tquotient, 1);
3278 else /* Too wide mode to use tricky code */
3279 break;
3281 insn = get_last_insn ();
3282 if (insn != last
3283 && (set = single_set (insn)) != 0
3284 && SET_DEST (set) == quotient)
3285 set_unique_reg_note (insn,
3286 REG_EQUAL,
3287 gen_rtx_UDIV (compute_mode, op0, op1));
3289 else /* TRUNC_DIV, signed */
3291 unsigned HOST_WIDE_INT ml;
3292 int lgup, post_shift;
3293 HOST_WIDE_INT d = INTVAL (op1);
3294 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3296 /* n rem d = n rem -d */
3297 if (rem_flag && d < 0)
3299 d = abs_d;
3300 op1 = GEN_INT (trunc_int_for_mode (abs_d, compute_mode));
3303 if (d == 1)
3304 quotient = op0;
3305 else if (d == -1)
3306 quotient = expand_unop (compute_mode, neg_optab, op0,
3307 tquotient, 0);
3308 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3310 /* This case is not handled correctly below. */
3311 quotient = emit_store_flag (tquotient, EQ, op0, op1,
3312 compute_mode, 1, 1);
3313 if (quotient == 0)
3314 goto fail1;
3316 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3317 && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3319 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3321 lgup = floor_log2 (abs_d);
3322 if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3324 rtx label = gen_label_rtx ();
3325 rtx t1;
3327 t1 = copy_to_mode_reg (compute_mode, op0);
3328 do_cmp_and_jump (t1, const0_rtx, GE,
3329 compute_mode, label);
3330 expand_inc (t1, GEN_INT (trunc_int_for_mode
3331 (abs_d - 1, compute_mode)));
3332 emit_label (label);
3333 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3334 build_int_2 (lgup, 0),
3335 tquotient, 0);
3337 else
3339 rtx t1, t2, t3;
3340 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3341 build_int_2 (size - 1, 0),
3342 NULL_RTX, 0);
3343 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3344 build_int_2 (size - lgup, 0),
3345 NULL_RTX, 1);
3346 t3 = force_operand (gen_rtx_PLUS (compute_mode,
3347 op0, t2),
3348 NULL_RTX);
3349 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3350 build_int_2 (lgup, 0),
3351 tquotient, 0);
3354 /* We have computed OP0 / abs(OP1). If OP1 is negative, negate
3355 the quotient. */
3356 if (d < 0)
3358 insn = get_last_insn ();
3359 if (insn != last
3360 && (set = single_set (insn)) != 0
3361 && SET_DEST (set) == quotient
3362 && abs_d < ((unsigned HOST_WIDE_INT) 1
3363 << (HOST_BITS_PER_WIDE_INT - 1)))
3364 set_unique_reg_note (insn,
3365 REG_EQUAL,
3366 gen_rtx_DIV (compute_mode,
3367 op0,
3368 GEN_INT
3369 (trunc_int_for_mode
3370 (abs_d,
3371 compute_mode))));
3373 quotient = expand_unop (compute_mode, neg_optab,
3374 quotient, quotient, 0);
3377 else if (size <= HOST_BITS_PER_WIDE_INT)
3379 choose_multiplier (abs_d, size, size - 1,
3380 &ml, &post_shift, &lgup);
3381 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3383 rtx t1, t2, t3;
3385 if (post_shift >= BITS_PER_WORD
3386 || size - 1 >= BITS_PER_WORD)
3387 goto fail1;
3389 extra_cost = (shift_cost[post_shift]
3390 + shift_cost[size - 1] + add_cost);
3391 t1 = expand_mult_highpart (compute_mode, op0, ml,
3392 NULL_RTX, 0,
3393 max_cost - extra_cost);
3394 if (t1 == 0)
3395 goto fail1;
3396 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3397 build_int_2 (post_shift, 0), NULL_RTX, 0);
3398 t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3399 build_int_2 (size - 1, 0), NULL_RTX, 0);
3400 if (d < 0)
3401 quotient
3402 = force_operand (gen_rtx_MINUS (compute_mode,
3403 t3, t2),
3404 tquotient);
3405 else
3406 quotient
3407 = force_operand (gen_rtx_MINUS (compute_mode,
3408 t2, t3),
3409 tquotient);
3411 else
3413 rtx t1, t2, t3, t4;
3415 if (post_shift >= BITS_PER_WORD
3416 || size - 1 >= BITS_PER_WORD)
3417 goto fail1;
3419 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3420 extra_cost = (shift_cost[post_shift]
3421 + shift_cost[size - 1] + 2 * add_cost);
3422 t1 = expand_mult_highpart (compute_mode, op0, ml,
3423 NULL_RTX, 0,
3424 max_cost - extra_cost);
3425 if (t1 == 0)
3426 goto fail1;
3427 t2 = force_operand (gen_rtx_PLUS (compute_mode,
3428 t1, op0),
3429 NULL_RTX);
3430 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3431 build_int_2 (post_shift, 0),
3432 NULL_RTX, 0);
3433 t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3434 build_int_2 (size - 1, 0),
3435 NULL_RTX, 0);
3436 if (d < 0)
3437 quotient
3438 = force_operand (gen_rtx_MINUS (compute_mode,
3439 t4, t3),
3440 tquotient);
3441 else
3442 quotient
3443 = force_operand (gen_rtx_MINUS (compute_mode,
3444 t3, t4),
3445 tquotient);
3448 else /* Too wide mode to use tricky code */
3449 break;
3451 insn = get_last_insn ();
3452 if (insn != last
3453 && (set = single_set (insn)) != 0
3454 && SET_DEST (set) == quotient)
3455 set_unique_reg_note (insn,
3456 REG_EQUAL,
3457 gen_rtx_DIV (compute_mode, op0, op1));
3459 break;
3461 fail1:
3462 delete_insns_since (last);
3463 break;
3465 case FLOOR_DIV_EXPR:
3466 case FLOOR_MOD_EXPR:
3467 /* We will come here only for signed operations. */
3468 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3470 unsigned HOST_WIDE_INT mh, ml;
3471 int pre_shift, lgup, post_shift;
3472 HOST_WIDE_INT d = INTVAL (op1);
3474 if (d > 0)
3476 /* We could just as easily deal with negative constants here,
3477 but it does not seem worth the trouble for GCC 2.6. */
3478 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3480 pre_shift = floor_log2 (d);
3481 if (rem_flag)
3483 remainder = expand_binop (compute_mode, and_optab, op0,
3484 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3485 remainder, 0, OPTAB_LIB_WIDEN);
3486 if (remainder)
3487 return gen_lowpart (mode, remainder);
3489 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3490 build_int_2 (pre_shift, 0),
3491 tquotient, 0);
3493 else
3495 rtx t1, t2, t3, t4;
3497 mh = choose_multiplier (d, size, size - 1,
3498 &ml, &post_shift, &lgup);
3499 if (mh)
3500 abort ();
3502 if (post_shift < BITS_PER_WORD
3503 && size - 1 < BITS_PER_WORD)
3505 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3506 build_int_2 (size - 1, 0),
3507 NULL_RTX, 0);
3508 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3509 NULL_RTX, 0, OPTAB_WIDEN);
3510 extra_cost = (shift_cost[post_shift]
3511 + shift_cost[size - 1] + 2 * add_cost);
3512 t3 = expand_mult_highpart (compute_mode, t2, ml,
3513 NULL_RTX, 1,
3514 max_cost - extra_cost);
3515 if (t3 != 0)
3517 t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3518 build_int_2 (post_shift, 0),
3519 NULL_RTX, 1);
3520 quotient = expand_binop (compute_mode, xor_optab,
3521 t4, t1, tquotient, 0,
3522 OPTAB_WIDEN);
3527 else
3529 rtx nsign, t1, t2, t3, t4;
3530 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3531 op0, constm1_rtx), NULL_RTX);
3532 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3533 0, OPTAB_WIDEN);
3534 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3535 build_int_2 (size - 1, 0), NULL_RTX, 0);
3536 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3537 NULL_RTX);
3538 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3539 NULL_RTX, 0);
3540 if (t4)
3542 rtx t5;
3543 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3544 NULL_RTX, 0);
3545 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3546 t4, t5),
3547 tquotient);
3552 if (quotient != 0)
3553 break;
3554 delete_insns_since (last);
3556 /* Try using an instruction that produces both the quotient and
3557 remainder, using truncation. We can easily compensate the quotient
3558 or remainder to get floor rounding, once we have the remainder.
3559 Notice that we compute also the final remainder value here,
3560 and return the result right away. */
3561 if (target == 0 || GET_MODE (target) != compute_mode)
3562 target = gen_reg_rtx (compute_mode);
3564 if (rem_flag)
3566 remainder
3567 = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3568 quotient = gen_reg_rtx (compute_mode);
3570 else
3572 quotient
3573 = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3574 remainder = gen_reg_rtx (compute_mode);
3577 if (expand_twoval_binop (sdivmod_optab, op0, op1,
3578 quotient, remainder, 0))
3580 /* This could be computed with a branch-less sequence.
3581 Save that for later. */
3582 rtx tem;
3583 rtx label = gen_label_rtx ();
3584 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3585 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3586 NULL_RTX, 0, OPTAB_WIDEN);
3587 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3588 expand_dec (quotient, const1_rtx);
3589 expand_inc (remainder, op1);
3590 emit_label (label);
3591 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3594 /* No luck with division elimination or divmod. Have to do it
3595 by conditionally adjusting op0 *and* the result. */
3597 rtx label1, label2, label3, label4, label5;
3598 rtx adjusted_op0;
3599 rtx tem;
3601 quotient = gen_reg_rtx (compute_mode);
3602 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3603 label1 = gen_label_rtx ();
3604 label2 = gen_label_rtx ();
3605 label3 = gen_label_rtx ();
3606 label4 = gen_label_rtx ();
3607 label5 = gen_label_rtx ();
3608 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3609 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3610 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3611 quotient, 0, OPTAB_LIB_WIDEN);
3612 if (tem != quotient)
3613 emit_move_insn (quotient, tem);
3614 emit_jump_insn (gen_jump (label5));
3615 emit_barrier ();
3616 emit_label (label1);
3617 expand_inc (adjusted_op0, const1_rtx);
3618 emit_jump_insn (gen_jump (label4));
3619 emit_barrier ();
3620 emit_label (label2);
3621 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3622 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3623 quotient, 0, OPTAB_LIB_WIDEN);
3624 if (tem != quotient)
3625 emit_move_insn (quotient, tem);
3626 emit_jump_insn (gen_jump (label5));
3627 emit_barrier ();
3628 emit_label (label3);
3629 expand_dec (adjusted_op0, const1_rtx);
3630 emit_label (label4);
3631 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3632 quotient, 0, OPTAB_LIB_WIDEN);
3633 if (tem != quotient)
3634 emit_move_insn (quotient, tem);
3635 expand_dec (quotient, const1_rtx);
3636 emit_label (label5);
3638 break;
3640 case CEIL_DIV_EXPR:
3641 case CEIL_MOD_EXPR:
3642 if (unsignedp)
3644 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3646 rtx t1, t2, t3;
3647 unsigned HOST_WIDE_INT d = INTVAL (op1);
3648 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3649 build_int_2 (floor_log2 (d), 0),
3650 tquotient, 1);
3651 t2 = expand_binop (compute_mode, and_optab, op0,
3652 GEN_INT (d - 1),
3653 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654 t3 = gen_reg_rtx (compute_mode);
3655 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3656 compute_mode, 1, 1);
3657 if (t3 == 0)
3659 rtx lab;
3660 lab = gen_label_rtx ();
3661 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3662 expand_inc (t1, const1_rtx);
3663 emit_label (lab);
3664 quotient = t1;
3666 else
3667 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3668 t1, t3),
3669 tquotient);
3670 break;
3673 /* Try using an instruction that produces both the quotient and
3674 remainder, using truncation. We can easily compensate the
3675 quotient or remainder to get ceiling rounding, once we have the
3676 remainder. Notice that we compute also the final remainder
3677 value here, and return the result right away. */
3678 if (target == 0 || GET_MODE (target) != compute_mode)
3679 target = gen_reg_rtx (compute_mode);
3681 if (rem_flag)
3683 remainder = (GET_CODE (target) == REG
3684 ? target : gen_reg_rtx (compute_mode));
3685 quotient = gen_reg_rtx (compute_mode);
3687 else
3689 quotient = (GET_CODE (target) == REG
3690 ? target : gen_reg_rtx (compute_mode));
3691 remainder = gen_reg_rtx (compute_mode);
3694 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3695 remainder, 1))
3697 /* This could be computed with a branch-less sequence.
3698 Save that for later. */
3699 rtx label = gen_label_rtx ();
3700 do_cmp_and_jump (remainder, const0_rtx, EQ,
3701 compute_mode, label);
3702 expand_inc (quotient, const1_rtx);
3703 expand_dec (remainder, op1);
3704 emit_label (label);
3705 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3708 /* No luck with division elimination or divmod. Have to do it
3709 by conditionally adjusting op0 *and* the result. */
3711 rtx label1, label2;
3712 rtx adjusted_op0, tem;
3714 quotient = gen_reg_rtx (compute_mode);
3715 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3716 label1 = gen_label_rtx ();
3717 label2 = gen_label_rtx ();
3718 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3719 compute_mode, label1);
3720 emit_move_insn (quotient, const0_rtx);
3721 emit_jump_insn (gen_jump (label2));
3722 emit_barrier ();
3723 emit_label (label1);
3724 expand_dec (adjusted_op0, const1_rtx);
3725 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3726 quotient, 1, OPTAB_LIB_WIDEN);
3727 if (tem != quotient)
3728 emit_move_insn (quotient, tem);
3729 expand_inc (quotient, const1_rtx);
3730 emit_label (label2);
3733 else /* signed */
3735 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3736 && INTVAL (op1) >= 0)
3738 /* This is extremely similar to the code for the unsigned case
3739 above. For 2.7 we should merge these variants, but for
3740 2.6.1 I don't want to touch the code for unsigned since that
3741 get used in C. The signed case will only be used by other
3742 languages (Ada). */
3744 rtx t1, t2, t3;
3745 unsigned HOST_WIDE_INT d = INTVAL (op1);
3746 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3747 build_int_2 (floor_log2 (d), 0),
3748 tquotient, 0);
3749 t2 = expand_binop (compute_mode, and_optab, op0,
3750 GEN_INT (d - 1),
3751 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3752 t3 = gen_reg_rtx (compute_mode);
3753 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3754 compute_mode, 1, 1);
3755 if (t3 == 0)
3757 rtx lab;
3758 lab = gen_label_rtx ();
3759 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3760 expand_inc (t1, const1_rtx);
3761 emit_label (lab);
3762 quotient = t1;
3764 else
3765 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3766 t1, t3),
3767 tquotient);
3768 break;
3771 /* Try using an instruction that produces both the quotient and
3772 remainder, using truncation. We can easily compensate the
3773 quotient or remainder to get ceiling rounding, once we have the
3774 remainder. Notice that we compute also the final remainder
3775 value here, and return the result right away. */
3776 if (target == 0 || GET_MODE (target) != compute_mode)
3777 target = gen_reg_rtx (compute_mode);
3778 if (rem_flag)
3780 remainder= (GET_CODE (target) == REG
3781 ? target : gen_reg_rtx (compute_mode));
3782 quotient = gen_reg_rtx (compute_mode);
3784 else
3786 quotient = (GET_CODE (target) == REG
3787 ? target : gen_reg_rtx (compute_mode));
3788 remainder = gen_reg_rtx (compute_mode);
3791 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3792 remainder, 0))
3794 /* This could be computed with a branch-less sequence.
3795 Save that for later. */
3796 rtx tem;
3797 rtx label = gen_label_rtx ();
3798 do_cmp_and_jump (remainder, const0_rtx, EQ,
3799 compute_mode, label);
3800 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3801 NULL_RTX, 0, OPTAB_WIDEN);
3802 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3803 expand_inc (quotient, const1_rtx);
3804 expand_dec (remainder, op1);
3805 emit_label (label);
3806 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3809 /* No luck with division elimination or divmod. Have to do it
3810 by conditionally adjusting op0 *and* the result. */
3812 rtx label1, label2, label3, label4, label5;
3813 rtx adjusted_op0;
3814 rtx tem;
3816 quotient = gen_reg_rtx (compute_mode);
3817 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3818 label1 = gen_label_rtx ();
3819 label2 = gen_label_rtx ();
3820 label3 = gen_label_rtx ();
3821 label4 = gen_label_rtx ();
3822 label5 = gen_label_rtx ();
3823 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3824 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3825 compute_mode, label1);
3826 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3827 quotient, 0, OPTAB_LIB_WIDEN);
3828 if (tem != quotient)
3829 emit_move_insn (quotient, tem);
3830 emit_jump_insn (gen_jump (label5));
3831 emit_barrier ();
3832 emit_label (label1);
3833 expand_dec (adjusted_op0, const1_rtx);
3834 emit_jump_insn (gen_jump (label4));
3835 emit_barrier ();
3836 emit_label (label2);
3837 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3838 compute_mode, label3);
3839 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3840 quotient, 0, OPTAB_LIB_WIDEN);
3841 if (tem != quotient)
3842 emit_move_insn (quotient, tem);
3843 emit_jump_insn (gen_jump (label5));
3844 emit_barrier ();
3845 emit_label (label3);
3846 expand_inc (adjusted_op0, const1_rtx);
3847 emit_label (label4);
3848 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3849 quotient, 0, OPTAB_LIB_WIDEN);
3850 if (tem != quotient)
3851 emit_move_insn (quotient, tem);
3852 expand_inc (quotient, const1_rtx);
3853 emit_label (label5);
3856 break;
3858 case EXACT_DIV_EXPR:
3859 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3861 HOST_WIDE_INT d = INTVAL (op1);
3862 unsigned HOST_WIDE_INT ml;
3863 int pre_shift;
3864 rtx t1;
3866 pre_shift = floor_log2 (d & -d);
3867 ml = invert_mod2n (d >> pre_shift, size);
3868 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3869 build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3870 quotient = expand_mult (compute_mode, t1,
3871 GEN_INT (trunc_int_for_mode
3872 (ml, compute_mode)),
3873 NULL_RTX, 0);
3875 insn = get_last_insn ();
3876 set_unique_reg_note (insn,
3877 REG_EQUAL,
3878 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3879 compute_mode,
3880 op0, op1));
3882 break;
3884 case ROUND_DIV_EXPR:
3885 case ROUND_MOD_EXPR:
3886 if (unsignedp)
3888 rtx tem;
3889 rtx label;
3890 label = gen_label_rtx ();
3891 quotient = gen_reg_rtx (compute_mode);
3892 remainder = gen_reg_rtx (compute_mode);
3893 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3895 rtx tem;
3896 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3897 quotient, 1, OPTAB_LIB_WIDEN);
3898 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3899 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3900 remainder, 1, OPTAB_LIB_WIDEN);
3902 tem = plus_constant (op1, -1);
3903 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3904 build_int_2 (1, 0), NULL_RTX, 1);
3905 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3906 expand_inc (quotient, const1_rtx);
3907 expand_dec (remainder, op1);
3908 emit_label (label);
3910 else
3912 rtx abs_rem, abs_op1, tem, mask;
3913 rtx label;
3914 label = gen_label_rtx ();
3915 quotient = gen_reg_rtx (compute_mode);
3916 remainder = gen_reg_rtx (compute_mode);
3917 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3919 rtx tem;
3920 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3921 quotient, 0, OPTAB_LIB_WIDEN);
3922 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3923 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3924 remainder, 0, OPTAB_LIB_WIDEN);
3926 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3927 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3928 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3929 build_int_2 (1, 0), NULL_RTX, 1);
3930 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3931 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3932 NULL_RTX, 0, OPTAB_WIDEN);
3933 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3934 build_int_2 (size - 1, 0), NULL_RTX, 0);
3935 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3936 NULL_RTX, 0, OPTAB_WIDEN);
3937 tem = expand_binop (compute_mode, sub_optab, tem, mask,
3938 NULL_RTX, 0, OPTAB_WIDEN);
3939 expand_inc (quotient, tem);
3940 tem = expand_binop (compute_mode, xor_optab, mask, op1,
3941 NULL_RTX, 0, OPTAB_WIDEN);
3942 tem = expand_binop (compute_mode, sub_optab, tem, mask,
3943 NULL_RTX, 0, OPTAB_WIDEN);
3944 expand_dec (remainder, tem);
3945 emit_label (label);
3947 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3949 default:
3950 abort ();
3953 if (quotient == 0)
3955 if (target && GET_MODE (target) != compute_mode)
3956 target = 0;
3958 if (rem_flag)
3960 /* Try to produce the remainder without producing the quotient.
3961 If we seem to have a divmod patten that does not require widening,
3962 don't try windening here. We should really have an WIDEN argument
3963 to expand_twoval_binop, since what we'd really like to do here is
3964 1) try a mod insn in compute_mode
3965 2) try a divmod insn in compute_mode
3966 3) try a div insn in compute_mode and multiply-subtract to get
3967 remainder
3968 4) try the same things with widening allowed. */
3969 remainder
3970 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3971 op0, op1, target,
3972 unsignedp,
3973 ((optab2->handlers[(int) compute_mode].insn_code
3974 != CODE_FOR_nothing)
3975 ? OPTAB_DIRECT : OPTAB_WIDEN));
3976 if (remainder == 0)
3978 /* No luck there. Can we do remainder and divide at once
3979 without a library call? */
3980 remainder = gen_reg_rtx (compute_mode);
3981 if (! expand_twoval_binop ((unsignedp
3982 ? udivmod_optab
3983 : sdivmod_optab),
3984 op0, op1,
3985 NULL_RTX, remainder, unsignedp))
3986 remainder = 0;
3989 if (remainder)
3990 return gen_lowpart (mode, remainder);
3993 /* Produce the quotient. Try a quotient insn, but not a library call.
3994 If we have a divmod in this mode, use it in preference to widening
3995 the div (for this test we assume it will not fail). Note that optab2
3996 is set to the one of the two optabs that the call below will use. */
3997 quotient
3998 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3999 op0, op1, rem_flag ? NULL_RTX : target,
4000 unsignedp,
4001 ((optab2->handlers[(int) compute_mode].insn_code
4002 != CODE_FOR_nothing)
4003 ? OPTAB_DIRECT : OPTAB_WIDEN));
4005 if (quotient == 0)
4007 /* No luck there. Try a quotient-and-remainder insn,
4008 keeping the quotient alone. */
4009 quotient = gen_reg_rtx (compute_mode);
4010 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4011 op0, op1,
4012 quotient, NULL_RTX, unsignedp))
4014 quotient = 0;
4015 if (! rem_flag)
4016 /* Still no luck. If we are not computing the remainder,
4017 use a library call for the quotient. */
4018 quotient = sign_expand_binop (compute_mode,
4019 udiv_optab, sdiv_optab,
4020 op0, op1, target,
4021 unsignedp, OPTAB_LIB_WIDEN);
4026 if (rem_flag)
4028 if (target && GET_MODE (target) != compute_mode)
4029 target = 0;
4031 if (quotient == 0)
4032 /* No divide instruction either. Use library for remainder. */
4033 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4034 op0, op1, target,
4035 unsignedp, OPTAB_LIB_WIDEN);
4036 else
4038 /* We divided. Now finish doing X - Y * (X / Y). */
4039 remainder = expand_mult (compute_mode, quotient, op1,
4040 NULL_RTX, unsignedp);
4041 remainder = expand_binop (compute_mode, sub_optab, op0,
4042 remainder, target, unsignedp,
4043 OPTAB_LIB_WIDEN);
4047 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4050 /* Return a tree node with data type TYPE, describing the value of X.
4051 Usually this is an RTL_EXPR, if there is no obvious better choice.
4052 X may be an expression, however we only support those expressions
4053 generated by loop.c. */
4055 tree
4056 make_tree (type, x)
4057 tree type;
4058 rtx x;
4060 tree t;
4062 switch (GET_CODE (x))
4064 case CONST_INT:
4065 t = build_int_2 (INTVAL (x),
4066 (TREE_UNSIGNED (type)
4067 && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4068 || INTVAL (x) >= 0 ? 0 : -1);
4069 TREE_TYPE (t) = type;
4070 return t;
4072 case CONST_DOUBLE:
4073 if (GET_MODE (x) == VOIDmode)
4075 t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4076 TREE_TYPE (t) = type;
4078 else
4080 REAL_VALUE_TYPE d;
4082 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4083 t = build_real (type, d);
4086 return t;
4088 case PLUS:
4089 return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4090 make_tree (type, XEXP (x, 1))));
4092 case MINUS:
4093 return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4094 make_tree (type, XEXP (x, 1))));
4096 case NEG:
4097 return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4099 case MULT:
4100 return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4101 make_tree (type, XEXP (x, 1))));
4103 case ASHIFT:
4104 return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4105 make_tree (type, XEXP (x, 1))));
4107 case LSHIFTRT:
4108 return fold (convert (type,
4109 build (RSHIFT_EXPR, unsigned_type (type),
4110 make_tree (unsigned_type (type),
4111 XEXP (x, 0)),
4112 make_tree (type, XEXP (x, 1)))));
4114 case ASHIFTRT:
4115 return fold (convert (type,
4116 build (RSHIFT_EXPR, signed_type (type),
4117 make_tree (signed_type (type), XEXP (x, 0)),
4118 make_tree (type, XEXP (x, 1)))));
4120 case DIV:
4121 if (TREE_CODE (type) != REAL_TYPE)
4122 t = signed_type (type);
4123 else
4124 t = type;
4126 return fold (convert (type,
4127 build (TRUNC_DIV_EXPR, t,
4128 make_tree (t, XEXP (x, 0)),
4129 make_tree (t, XEXP (x, 1)))));
4130 case UDIV:
4131 t = unsigned_type (type);
4132 return fold (convert (type,
4133 build (TRUNC_DIV_EXPR, t,
4134 make_tree (t, XEXP (x, 0)),
4135 make_tree (t, XEXP (x, 1)))));
4136 default:
4137 t = make_node (RTL_EXPR);
4138 TREE_TYPE (t) = type;
4140 #ifdef POINTERS_EXTEND_UNSIGNED
4141 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4142 ptr_mode. So convert. */
4143 if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4144 x = convert_memory_address (TYPE_MODE (type), x);
4145 #endif
4147 RTL_EXPR_RTL (t) = x;
4148 /* There are no insns to be output
4149 when this rtl_expr is used. */
4150 RTL_EXPR_SEQUENCE (t) = 0;
4151 return t;
4155 /* Return an rtx representing the value of X * MULT + ADD.
4156 TARGET is a suggestion for where to store the result (an rtx).
4157 MODE is the machine mode for the computation.
4158 X and MULT must have mode MODE. ADD may have a different mode.
4159 So can X (defaults to same as MODE).
4160 UNSIGNEDP is non-zero to do unsigned multiplication.
4161 This may emit insns. */
4164 expand_mult_add (x, target, mult, add, mode, unsignedp)
4165 rtx x, target, mult, add;
4166 enum machine_mode mode;
4167 int unsignedp;
4169 tree type = type_for_mode (mode, unsignedp);
4170 tree add_type = (GET_MODE (add) == VOIDmode
4171 ? type : type_for_mode (GET_MODE (add), unsignedp));
4172 tree result = fold (build (PLUS_EXPR, type,
4173 fold (build (MULT_EXPR, type,
4174 make_tree (type, x),
4175 make_tree (type, mult))),
4176 make_tree (add_type, add)));
4178 return expand_expr (result, target, VOIDmode, 0);
4181 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4182 and returning TARGET.
4184 If TARGET is 0, a pseudo-register or constant is returned. */
4187 expand_and (op0, op1, target)
4188 rtx op0, op1, target;
4190 enum machine_mode mode = VOIDmode;
4191 rtx tem;
4193 if (GET_MODE (op0) != VOIDmode)
4194 mode = GET_MODE (op0);
4195 else if (GET_MODE (op1) != VOIDmode)
4196 mode = GET_MODE (op1);
4198 if (mode != VOIDmode)
4199 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4200 else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4201 tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4202 else
4203 abort ();
4205 if (target == 0)
4206 target = tem;
4207 else if (tem != target)
4208 emit_move_insn (target, tem);
4209 return target;
4212 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4213 and storing in TARGET. Normally return TARGET.
4214 Return 0 if that cannot be done.
4216 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
4217 it is VOIDmode, they cannot both be CONST_INT.
4219 UNSIGNEDP is for the case where we have to widen the operands
4220 to perform the operation. It says to use zero-extension.
4222 NORMALIZEP is 1 if we should convert the result to be either zero
4223 or one. Normalize is -1 if we should convert the result to be
4224 either zero or -1. If NORMALIZEP is zero, the result will be left
4225 "raw" out of the scc insn. */
4228 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4229 rtx target;
4230 enum rtx_code code;
4231 rtx op0, op1;
4232 enum machine_mode mode;
4233 int unsignedp;
4234 int normalizep;
4236 rtx subtarget;
4237 enum insn_code icode;
4238 enum machine_mode compare_mode;
4239 enum machine_mode target_mode = GET_MODE (target);
4240 rtx tem;
4241 rtx last = get_last_insn ();
4242 rtx pattern, comparison;
4244 if (unsignedp)
4245 code = unsigned_condition (code);
4247 /* If one operand is constant, make it the second one. Only do this
4248 if the other operand is not constant as well. */
4250 if (swap_commutative_operands_p (op0, op1))
4252 tem = op0;
4253 op0 = op1;
4254 op1 = tem;
4255 code = swap_condition (code);
4258 if (mode == VOIDmode)
4259 mode = GET_MODE (op0);
4261 /* For some comparisons with 1 and -1, we can convert this to
4262 comparisons with zero. This will often produce more opportunities for
4263 store-flag insns. */
4265 switch (code)
4267 case LT:
4268 if (op1 == const1_rtx)
4269 op1 = const0_rtx, code = LE;
4270 break;
4271 case LE:
4272 if (op1 == constm1_rtx)
4273 op1 = const0_rtx, code = LT;
4274 break;
4275 case GE:
4276 if (op1 == const1_rtx)
4277 op1 = const0_rtx, code = GT;
4278 break;
4279 case GT:
4280 if (op1 == constm1_rtx)
4281 op1 = const0_rtx, code = GE;
4282 break;
4283 case GEU:
4284 if (op1 == const1_rtx)
4285 op1 = const0_rtx, code = NE;
4286 break;
4287 case LTU:
4288 if (op1 == const1_rtx)
4289 op1 = const0_rtx, code = EQ;
4290 break;
4291 default:
4292 break;
4295 /* If we are comparing a double-word integer with zero, we can convert
4296 the comparison into one involving a single word. */
4297 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4298 && GET_MODE_CLASS (mode) == MODE_INT
4299 && op1 == const0_rtx)
4301 if (code == EQ || code == NE)
4303 /* Do a logical OR of the two words and compare the result. */
4304 rtx op0h = gen_highpart (word_mode, op0);
4305 rtx op0l = gen_lowpart (word_mode, op0);
4306 rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4307 NULL_RTX, unsignedp, OPTAB_DIRECT);
4308 if (op0both != 0)
4309 return emit_store_flag (target, code, op0both, op1, word_mode,
4310 unsignedp, normalizep);
4312 else if (code == LT || code == GE)
4313 /* If testing the sign bit, can just test on high word. */
4314 return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4315 op1, word_mode, unsignedp, normalizep);
4318 /* From now on, we won't change CODE, so set ICODE now. */
4319 icode = setcc_gen_code[(int) code];
4321 /* If this is A < 0 or A >= 0, we can do this by taking the ones
4322 complement of A (for GE) and shifting the sign bit to the low bit. */
4323 if (op1 == const0_rtx && (code == LT || code == GE)
4324 && GET_MODE_CLASS (mode) == MODE_INT
4325 && (normalizep || STORE_FLAG_VALUE == 1
4326 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4327 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4328 == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4330 subtarget = target;
4332 /* If the result is to be wider than OP0, it is best to convert it
4333 first. If it is to be narrower, it is *incorrect* to convert it
4334 first. */
4335 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4337 op0 = protect_from_queue (op0, 0);
4338 op0 = convert_modes (target_mode, mode, op0, 0);
4339 mode = target_mode;
4342 if (target_mode != mode)
4343 subtarget = 0;
4345 if (code == GE)
4346 op0 = expand_unop (mode, one_cmpl_optab, op0,
4347 ((STORE_FLAG_VALUE == 1 || normalizep)
4348 ? 0 : subtarget), 0);
4350 if (STORE_FLAG_VALUE == 1 || normalizep)
4351 /* If we are supposed to produce a 0/1 value, we want to do
4352 a logical shift from the sign bit to the low-order bit; for
4353 a -1/0 value, we do an arithmetic shift. */
4354 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4355 size_int (GET_MODE_BITSIZE (mode) - 1),
4356 subtarget, normalizep != -1);
4358 if (mode != target_mode)
4359 op0 = convert_modes (target_mode, mode, op0, 0);
4361 return op0;
4364 if (icode != CODE_FOR_nothing)
4366 insn_operand_predicate_fn pred;
4368 /* We think we may be able to do this with a scc insn. Emit the
4369 comparison and then the scc insn.
4371 compare_from_rtx may call emit_queue, which would be deleted below
4372 if the scc insn fails. So call it ourselves before setting LAST.
4373 Likewise for do_pending_stack_adjust. */
4375 emit_queue ();
4376 do_pending_stack_adjust ();
4377 last = get_last_insn ();
4379 comparison
4380 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4381 if (GET_CODE (comparison) == CONST_INT)
4382 return (comparison == const0_rtx ? const0_rtx
4383 : normalizep == 1 ? const1_rtx
4384 : normalizep == -1 ? constm1_rtx
4385 : const_true_rtx);
4387 /* If the code of COMPARISON doesn't match CODE, something is
4388 wrong; we can no longer be sure that we have the operation.
4389 We could handle this case, but it should not happen. */
4391 if (GET_CODE (comparison) != code)
4392 abort ();
4394 /* Get a reference to the target in the proper mode for this insn. */
4395 compare_mode = insn_data[(int) icode].operand[0].mode;
4396 subtarget = target;
4397 pred = insn_data[(int) icode].operand[0].predicate;
4398 if (preserve_subexpressions_p ()
4399 || ! (*pred) (subtarget, compare_mode))
4400 subtarget = gen_reg_rtx (compare_mode);
4402 pattern = GEN_FCN (icode) (subtarget);
4403 if (pattern)
4405 emit_insn (pattern);
4407 /* If we are converting to a wider mode, first convert to
4408 TARGET_MODE, then normalize. This produces better combining
4409 opportunities on machines that have a SIGN_EXTRACT when we are
4410 testing a single bit. This mostly benefits the 68k.
4412 If STORE_FLAG_VALUE does not have the sign bit set when
4413 interpreted in COMPARE_MODE, we can do this conversion as
4414 unsigned, which is usually more efficient. */
4415 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4417 convert_move (target, subtarget,
4418 (GET_MODE_BITSIZE (compare_mode)
4419 <= HOST_BITS_PER_WIDE_INT)
4420 && 0 == (STORE_FLAG_VALUE
4421 & ((HOST_WIDE_INT) 1
4422 << (GET_MODE_BITSIZE (compare_mode) -1))));
4423 op0 = target;
4424 compare_mode = target_mode;
4426 else
4427 op0 = subtarget;
4429 /* If we want to keep subexpressions around, don't reuse our
4430 last target. */
4432 if (preserve_subexpressions_p ())
4433 subtarget = 0;
4435 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
4436 we don't have to do anything. */
4437 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4439 /* STORE_FLAG_VALUE might be the most negative number, so write
4440 the comparison this way to avoid a compiler-time warning. */
4441 else if (- normalizep == STORE_FLAG_VALUE)
4442 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4444 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4445 makes it hard to use a value of just the sign bit due to
4446 ANSI integer constant typing rules. */
4447 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4448 && (STORE_FLAG_VALUE
4449 & ((HOST_WIDE_INT) 1
4450 << (GET_MODE_BITSIZE (compare_mode) - 1))))
4451 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4452 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4453 subtarget, normalizep == 1);
4454 else if (STORE_FLAG_VALUE & 1)
4456 op0 = expand_and (op0, const1_rtx, subtarget);
4457 if (normalizep == -1)
4458 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4460 else
4461 abort ();
4463 /* If we were converting to a smaller mode, do the
4464 conversion now. */
4465 if (target_mode != compare_mode)
4467 convert_move (target, op0, 0);
4468 return target;
4470 else
4471 return op0;
4475 delete_insns_since (last);
4477 /* If expensive optimizations, use different pseudo registers for each
4478 insn, instead of reusing the same pseudo. This leads to better CSE,
4479 but slows down the compiler, since there are more pseudos */
4480 subtarget = (!flag_expensive_optimizations
4481 && (target_mode == mode)) ? target : NULL_RTX;
4483 /* If we reached here, we can't do this with a scc insn. However, there
4484 are some comparisons that can be done directly. For example, if
4485 this is an equality comparison of integers, we can try to exclusive-or
4486 (or subtract) the two operands and use a recursive call to try the
4487 comparison with zero. Don't do any of these cases if branches are
4488 very cheap. */
4490 if (BRANCH_COST > 0
4491 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4492 && op1 != const0_rtx)
4494 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4495 OPTAB_WIDEN);
4497 if (tem == 0)
4498 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4499 OPTAB_WIDEN);
4500 if (tem != 0)
4501 tem = emit_store_flag (target, code, tem, const0_rtx,
4502 mode, unsignedp, normalizep);
4503 if (tem == 0)
4504 delete_insns_since (last);
4505 return tem;
4508 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4509 the constant zero. Reject all other comparisons at this point. Only
4510 do LE and GT if branches are expensive since they are expensive on
4511 2-operand machines. */
4513 if (BRANCH_COST == 0
4514 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4515 || (code != EQ && code != NE
4516 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4517 return 0;
4519 /* See what we need to return. We can only return a 1, -1, or the
4520 sign bit. */
4522 if (normalizep == 0)
4524 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4525 normalizep = STORE_FLAG_VALUE;
4527 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4528 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4529 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4531 else
4532 return 0;
4535 /* Try to put the result of the comparison in the sign bit. Assume we can't
4536 do the necessary operation below. */
4538 tem = 0;
4540 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
4541 the sign bit set. */
4543 if (code == LE)
4545 /* This is destructive, so SUBTARGET can't be OP0. */
4546 if (rtx_equal_p (subtarget, op0))
4547 subtarget = 0;
4549 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4550 OPTAB_WIDEN);
4551 if (tem)
4552 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4553 OPTAB_WIDEN);
4556 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4557 number of bits in the mode of OP0, minus one. */
4559 if (code == GT)
4561 if (rtx_equal_p (subtarget, op0))
4562 subtarget = 0;
4564 tem = expand_shift (RSHIFT_EXPR, mode, op0,
4565 size_int (GET_MODE_BITSIZE (mode) - 1),
4566 subtarget, 0);
4567 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4568 OPTAB_WIDEN);
4571 if (code == EQ || code == NE)
4573 /* For EQ or NE, one way to do the comparison is to apply an operation
4574 that converts the operand into a positive number if it is non-zero
4575 or zero if it was originally zero. Then, for EQ, we subtract 1 and
4576 for NE we negate. This puts the result in the sign bit. Then we
4577 normalize with a shift, if needed.
4579 Two operations that can do the above actions are ABS and FFS, so try
4580 them. If that doesn't work, and MODE is smaller than a full word,
4581 we can use zero-extension to the wider mode (an unsigned conversion)
4582 as the operation. */
4584 /* Note that ABS doesn't yield a positive number for INT_MIN, but
4585 that is compensated by the subsequent overflow when subtracting
4586 one / negating. */
4588 if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4589 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4590 else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4591 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4592 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4594 op0 = protect_from_queue (op0, 0);
4595 tem = convert_modes (word_mode, mode, op0, 1);
4596 mode = word_mode;
4599 if (tem != 0)
4601 if (code == EQ)
4602 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4603 0, OPTAB_WIDEN);
4604 else
4605 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4608 /* If we couldn't do it that way, for NE we can "or" the two's complement
4609 of the value with itself. For EQ, we take the one's complement of
4610 that "or", which is an extra insn, so we only handle EQ if branches
4611 are expensive. */
4613 if (tem == 0 && (code == NE || BRANCH_COST > 1))
4615 if (rtx_equal_p (subtarget, op0))
4616 subtarget = 0;
4618 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4619 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4620 OPTAB_WIDEN);
4622 if (tem && code == EQ)
4623 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4627 if (tem && normalizep)
4628 tem = expand_shift (RSHIFT_EXPR, mode, tem,
4629 size_int (GET_MODE_BITSIZE (mode) - 1),
4630 subtarget, normalizep == 1);
4632 if (tem)
4634 if (GET_MODE (tem) != target_mode)
4636 convert_move (target, tem, 0);
4637 tem = target;
4639 else if (!subtarget)
4641 emit_move_insn (target, tem);
4642 tem = target;
4645 else
4646 delete_insns_since (last);
4648 return tem;
4651 /* Like emit_store_flag, but always succeeds. */
4654 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4655 rtx target;
4656 enum rtx_code code;
4657 rtx op0, op1;
4658 enum machine_mode mode;
4659 int unsignedp;
4660 int normalizep;
4662 rtx tem, label;
4664 /* First see if emit_store_flag can do the job. */
4665 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4666 if (tem != 0)
4667 return tem;
4669 if (normalizep == 0)
4670 normalizep = 1;
4672 /* If this failed, we have to do this with set/compare/jump/set code. */
4674 if (GET_CODE (target) != REG
4675 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4676 target = gen_reg_rtx (GET_MODE (target));
4678 emit_move_insn (target, const1_rtx);
4679 label = gen_label_rtx ();
4680 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 0,
4681 NULL_RTX, label);
4683 emit_move_insn (target, const0_rtx);
4684 emit_label (label);
4686 return target;
4689 /* Perform possibly multi-word comparison and conditional jump to LABEL
4690 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4692 The algorithm is based on the code in expr.c:do_jump.
4694 Note that this does not perform a general comparison. Only variants
4695 generated within expmed.c are correctly handled, others abort (but could
4696 be handled if needed). */
4698 static void
4699 do_cmp_and_jump (arg1, arg2, op, mode, label)
4700 rtx arg1, arg2, label;
4701 enum rtx_code op;
4702 enum machine_mode mode;
4704 /* If this mode is an integer too wide to compare properly,
4705 compare word by word. Rely on cse to optimize constant cases. */
4707 if (GET_MODE_CLASS (mode) == MODE_INT
4708 && ! can_compare_p (op, mode, ccp_jump))
4710 rtx label2 = gen_label_rtx ();
4712 switch (op)
4714 case LTU:
4715 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4716 break;
4718 case LEU:
4719 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4720 break;
4722 case LT:
4723 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4724 break;
4726 case GT:
4727 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4728 break;
4730 case GE:
4731 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4732 break;
4734 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
4735 that's the only equality operations we do */
4736 case EQ:
4737 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4738 abort();
4739 do_jump_by_parts_equality_rtx (arg1, label2, label);
4740 break;
4742 case NE:
4743 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4744 abort();
4745 do_jump_by_parts_equality_rtx (arg1, label, label2);
4746 break;
4748 default:
4749 abort();
4752 emit_label (label2);
4754 else
4756 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);