* config.gcc: Add sh-*-symbianelf target.
[official-gcc.git] / gcc / expmed.c
blobc1f9873e978148d5eebdc1e7394baf75d0ad1292
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
41 unsigned HOST_WIDE_INT,
42 unsigned HOST_WIDE_INT, rtx);
43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT, rtx, int);
49 static rtx mask_rtx (enum machine_mode, int, int, int);
50 static rtx lshift_value (enum machine_mode, rtx, int, int);
51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, int);
53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56 /* Nonzero means divides or modulus operations are relatively cheap for
57 powers of two, so don't use branches; emit the operation instead.
58 Usually, this will mean that the MD file will emit non-branch
59 sequences. */
61 static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
62 static int smod_pow2_cheap[NUM_MACHINE_MODES];
64 #ifndef SLOW_UNALIGNED_ACCESS
65 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
66 #endif
68 /* For compilers that support multiple targets with different word sizes,
69 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
70 is the H8/300(H) compiler. */
72 #ifndef MAX_BITS_PER_WORD
73 #define MAX_BITS_PER_WORD BITS_PER_WORD
74 #endif
76 /* Reduce conditional compilation elsewhere. */
77 #ifndef HAVE_insv
78 #define HAVE_insv 0
79 #define CODE_FOR_insv CODE_FOR_nothing
80 #define gen_insv(a,b,c,d) NULL_RTX
81 #endif
82 #ifndef HAVE_extv
83 #define HAVE_extv 0
84 #define CODE_FOR_extv CODE_FOR_nothing
85 #define gen_extv(a,b,c,d) NULL_RTX
86 #endif
87 #ifndef HAVE_extzv
88 #define HAVE_extzv 0
89 #define CODE_FOR_extzv CODE_FOR_nothing
90 #define gen_extzv(a,b,c,d) NULL_RTX
91 #endif
93 /* Cost of various pieces of RTL. Note that some of these are indexed by
94 shift count and some by mode. */
95 static int zero_cost;
96 static int add_cost[NUM_MACHINE_MODES];
97 static int neg_cost[NUM_MACHINE_MODES];
98 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
99 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
100 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
101 static int mul_cost[NUM_MACHINE_MODES];
102 static int div_cost[NUM_MACHINE_MODES];
103 static int mul_widen_cost[NUM_MACHINE_MODES];
104 static int mul_highpart_cost[NUM_MACHINE_MODES];
106 void
107 init_expmed (void)
109 struct
111 struct rtx_def reg;
112 struct rtx_def plus; rtunion plus_fld1;
113 struct rtx_def neg;
114 struct rtx_def udiv; rtunion udiv_fld1;
115 struct rtx_def mult; rtunion mult_fld1;
116 struct rtx_def div; rtunion div_fld1;
117 struct rtx_def mod; rtunion mod_fld1;
118 struct rtx_def zext;
119 struct rtx_def wide_mult; rtunion wide_mult_fld1;
120 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
121 struct rtx_def wide_trunc;
122 struct rtx_def shift; rtunion shift_fld1;
123 struct rtx_def shift_mult; rtunion shift_mult_fld1;
124 struct rtx_def shift_add; rtunion shift_add_fld1;
125 struct rtx_def shift_sub; rtunion shift_sub_fld1;
126 } all;
128 rtx pow2[MAX_BITS_PER_WORD];
129 rtx cint[MAX_BITS_PER_WORD];
130 int m, n;
131 enum machine_mode mode, wider_mode;
133 zero_cost = rtx_cost (const0_rtx, 0);
135 for (m = 1; m < MAX_BITS_PER_WORD; m++)
137 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
138 cint[m] = GEN_INT (m);
141 memset (&all, 0, sizeof all);
143 PUT_CODE (&all.reg, REG);
144 REGNO (&all.reg) = 10000;
146 PUT_CODE (&all.plus, PLUS);
147 XEXP (&all.plus, 0) = &all.reg;
148 XEXP (&all.plus, 1) = &all.reg;
150 PUT_CODE (&all.neg, NEG);
151 XEXP (&all.neg, 0) = &all.reg;
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
157 PUT_CODE (&all.mult, MULT);
158 XEXP (&all.mult, 0) = &all.reg;
159 XEXP (&all.mult, 1) = &all.reg;
161 PUT_CODE (&all.div, DIV);
162 XEXP (&all.div, 0) = &all.reg;
163 XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
165 PUT_CODE (&all.mod, MOD);
166 XEXP (&all.mod, 0) = &all.reg;
167 XEXP (&all.mod, 1) = XEXP (&all.div, 1);
169 PUT_CODE (&all.zext, ZERO_EXTEND);
170 XEXP (&all.zext, 0) = &all.reg;
172 PUT_CODE (&all.wide_mult, MULT);
173 XEXP (&all.wide_mult, 0) = &all.zext;
174 XEXP (&all.wide_mult, 1) = &all.zext;
176 PUT_CODE (&all.wide_lshr, LSHIFTRT);
177 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
179 PUT_CODE (&all.wide_trunc, TRUNCATE);
180 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
182 PUT_CODE (&all.shift, ASHIFT);
183 XEXP (&all.shift, 0) = &all.reg;
185 PUT_CODE (&all.shift_mult, MULT);
186 XEXP (&all.shift_mult, 0) = &all.reg;
188 PUT_CODE (&all.shift_add, PLUS);
189 XEXP (&all.shift_add, 0) = &all.shift_mult;
190 XEXP (&all.shift_add, 1) = &all.reg;
192 PUT_CODE (&all.shift_sub, MINUS);
193 XEXP (&all.shift_sub, 0) = &all.shift_mult;
194 XEXP (&all.shift_sub, 1) = &all.reg;
196 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
197 mode != VOIDmode;
198 mode = GET_MODE_WIDER_MODE (mode))
200 PUT_MODE (&all.reg, mode);
201 PUT_MODE (&all.plus, mode);
202 PUT_MODE (&all.neg, mode);
203 PUT_MODE (&all.udiv, mode);
204 PUT_MODE (&all.mult, mode);
205 PUT_MODE (&all.div, mode);
206 PUT_MODE (&all.mod, mode);
207 PUT_MODE (&all.wide_trunc, mode);
208 PUT_MODE (&all.shift, mode);
209 PUT_MODE (&all.shift_mult, mode);
210 PUT_MODE (&all.shift_add, mode);
211 PUT_MODE (&all.shift_sub, mode);
213 add_cost[mode] = rtx_cost (&all.plus, SET);
214 neg_cost[mode] = rtx_cost (&all.neg, SET);
215 div_cost[mode] = rtx_cost (&all.udiv, SET);
216 mul_cost[mode] = rtx_cost (&all.mult, SET);
218 sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
219 smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 2 * add_cost[mode]);
221 wider_mode = GET_MODE_WIDER_MODE (mode);
222 if (wider_mode != VOIDmode)
224 PUT_MODE (&all.zext, wider_mode);
225 PUT_MODE (&all.wide_mult, wider_mode);
226 PUT_MODE (&all.wide_lshr, wider_mode);
227 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
229 mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
230 mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
233 shift_cost[mode][0] = 0;
234 shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
236 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
237 for (m = 1; m < n; m++)
239 XEXP (&all.shift, 1) = cint[m];
240 XEXP (&all.shift_mult, 1) = pow2[m];
242 shift_cost[mode][m] = rtx_cost (&all.shift, SET);
243 shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
244 shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
249 /* Return an rtx representing minus the value of X.
250 MODE is the intended mode of the result,
251 useful if X is a CONST_INT. */
254 negate_rtx (enum machine_mode mode, rtx x)
256 rtx result = simplify_unary_operation (NEG, mode, x, mode);
258 if (result == 0)
259 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
261 return result;
264 /* Report on the availability of insv/extv/extzv and the desired mode
265 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
266 is false; else the mode of the specified operand. If OPNO is -1,
267 all the caller cares about is whether the insn is available. */
268 enum machine_mode
269 mode_for_extraction (enum extraction_pattern pattern, int opno)
271 const struct insn_data *data;
273 switch (pattern)
275 case EP_insv:
276 if (HAVE_insv)
278 data = &insn_data[CODE_FOR_insv];
279 break;
281 return MAX_MACHINE_MODE;
283 case EP_extv:
284 if (HAVE_extv)
286 data = &insn_data[CODE_FOR_extv];
287 break;
289 return MAX_MACHINE_MODE;
291 case EP_extzv:
292 if (HAVE_extzv)
294 data = &insn_data[CODE_FOR_extzv];
295 break;
297 return MAX_MACHINE_MODE;
299 default:
300 abort ();
303 if (opno == -1)
304 return VOIDmode;
306 /* Everyone who uses this function used to follow it with
307 if (result == VOIDmode) result = word_mode; */
308 if (data->operand[opno].mode == VOIDmode)
309 return word_mode;
310 return data->operand[opno].mode;
314 /* Generate code to store value from rtx VALUE
315 into a bit-field within structure STR_RTX
316 containing BITSIZE bits starting at bit BITNUM.
317 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
318 ALIGN is the alignment that STR_RTX is known to have.
319 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
321 /* ??? Note that there are two different ideas here for how
322 to determine the size to count bits within, for a register.
323 One is BITS_PER_WORD, and the other is the size of operand 3
324 of the insv pattern.
326 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
327 else, we use the mode of operand 3. */
330 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
331 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
332 rtx value)
334 unsigned int unit
335 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
336 unsigned HOST_WIDE_INT offset = bitnum / unit;
337 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
338 rtx op0 = str_rtx;
339 int byte_offset;
341 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
343 while (GET_CODE (op0) == SUBREG)
345 /* The following line once was done only if WORDS_BIG_ENDIAN,
346 but I think that is a mistake. WORDS_BIG_ENDIAN is
347 meaningful at a much higher level; when structures are copied
348 between memory and regs, the higher-numbered regs
349 always get higher addresses. */
350 offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
351 /* We used to adjust BITPOS here, but now we do the whole adjustment
352 right after the loop. */
353 op0 = SUBREG_REG (op0);
356 value = protect_from_queue (value, 0);
358 /* Use vec_set patterns for inserting parts of vectors whenever
359 available. */
360 if (VECTOR_MODE_P (GET_MODE (op0))
361 && !MEM_P (op0)
362 && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
363 != CODE_FOR_nothing)
364 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
365 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
366 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
368 enum machine_mode outermode = GET_MODE (op0);
369 enum machine_mode innermode = GET_MODE_INNER (outermode);
370 int icode = (int) vec_set_optab->handlers[outermode].insn_code;
371 int pos = bitnum / GET_MODE_BITSIZE (innermode);
372 rtx rtxpos = GEN_INT (pos);
373 rtx src = value;
374 rtx dest = op0;
375 rtx pat, seq;
376 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
377 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
378 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
380 start_sequence ();
382 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
383 src = copy_to_mode_reg (mode1, src);
385 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
386 rtxpos = copy_to_mode_reg (mode1, rtxpos);
388 /* We could handle this, but we should always be called with a pseudo
389 for our targets and all insns should take them as outputs. */
390 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
391 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
392 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
393 abort ();
394 pat = GEN_FCN (icode) (dest, src, rtxpos);
395 seq = get_insns ();
396 end_sequence ();
397 if (pat)
399 emit_insn (seq);
400 emit_insn (pat);
401 return dest;
405 if (flag_force_mem)
407 int old_generating_concat_p = generating_concat_p;
408 generating_concat_p = 0;
409 value = force_not_mem (value);
410 generating_concat_p = old_generating_concat_p;
413 /* If the target is a register, overwriting the entire object, or storing
414 a full-word or multi-word field can be done with just a SUBREG.
416 If the target is memory, storing any naturally aligned field can be
417 done with a simple store. For targets that support fast unaligned
418 memory, any naturally sized, unit aligned field can be done directly. */
420 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
421 + (offset * UNITS_PER_WORD);
423 if (bitpos == 0
424 && bitsize == GET_MODE_BITSIZE (fieldmode)
425 && (!MEM_P (op0)
426 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
427 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
428 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
429 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
430 || (offset * BITS_PER_UNIT % bitsize == 0
431 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
433 if (GET_MODE (op0) != fieldmode)
435 if (GET_CODE (op0) == SUBREG)
437 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
438 || GET_MODE_CLASS (fieldmode) == MODE_INT
439 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
440 op0 = SUBREG_REG (op0);
441 else
442 /* Else we've got some float mode source being extracted into
443 a different float mode destination -- this combination of
444 subregs results in Severe Tire Damage. */
445 abort ();
447 if (REG_P (op0))
448 op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
449 else
450 op0 = adjust_address (op0, fieldmode, offset);
452 emit_move_insn (op0, value);
453 return value;
456 /* Make sure we are playing with integral modes. Pun with subregs
457 if we aren't. This must come after the entire register case above,
458 since that case is valid for any mode. The following cases are only
459 valid for integral modes. */
461 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
462 if (imode != GET_MODE (op0))
464 if (MEM_P (op0))
465 op0 = adjust_address (op0, imode, 0);
466 else if (imode != BLKmode)
467 op0 = gen_lowpart (imode, op0);
468 else
469 abort ();
473 /* We may be accessing data outside the field, which means
474 we can alias adjacent data. */
475 if (MEM_P (op0))
477 op0 = shallow_copy_rtx (op0);
478 set_mem_alias_set (op0, 0);
479 set_mem_expr (op0, 0);
482 /* If OP0 is a register, BITPOS must count within a word.
483 But as we have it, it counts within whatever size OP0 now has.
484 On a bigendian machine, these are not the same, so convert. */
485 if (BYTES_BIG_ENDIAN
486 && !MEM_P (op0)
487 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
488 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
490 /* Storing an lsb-aligned field in a register
491 can be done with a movestrict instruction. */
493 if (!MEM_P (op0)
494 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
495 && bitsize == GET_MODE_BITSIZE (fieldmode)
496 && (movstrict_optab->handlers[fieldmode].insn_code
497 != CODE_FOR_nothing))
499 int icode = movstrict_optab->handlers[fieldmode].insn_code;
501 /* Get appropriate low part of the value being stored. */
502 if (GET_CODE (value) == CONST_INT || REG_P (value))
503 value = gen_lowpart (fieldmode, value);
504 else if (!(GET_CODE (value) == SYMBOL_REF
505 || GET_CODE (value) == LABEL_REF
506 || GET_CODE (value) == CONST))
507 value = convert_to_mode (fieldmode, value, 0);
509 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
510 value = copy_to_mode_reg (fieldmode, value);
512 if (GET_CODE (op0) == SUBREG)
514 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
515 || GET_MODE_CLASS (fieldmode) == MODE_INT
516 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
517 op0 = SUBREG_REG (op0);
518 else
519 /* Else we've got some float mode source being extracted into
520 a different float mode destination -- this combination of
521 subregs results in Severe Tire Damage. */
522 abort ();
525 emit_insn (GEN_FCN (icode)
526 (gen_rtx_SUBREG (fieldmode, op0,
527 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
528 + (offset * UNITS_PER_WORD)),
529 value));
531 return value;
534 /* Handle fields bigger than a word. */
536 if (bitsize > BITS_PER_WORD)
538 /* Here we transfer the words of the field
539 in the order least significant first.
540 This is because the most significant word is the one which may
541 be less than full.
542 However, only do that if the value is not BLKmode. */
544 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
545 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
546 unsigned int i;
548 /* This is the mode we must force value to, so that there will be enough
549 subwords to extract. Note that fieldmode will often (always?) be
550 VOIDmode, because that is what store_field uses to indicate that this
551 is a bit field, but passing VOIDmode to operand_subword_force will
552 result in an abort. */
553 fieldmode = GET_MODE (value);
554 if (fieldmode == VOIDmode)
555 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
557 for (i = 0; i < nwords; i++)
559 /* If I is 0, use the low-order word in both field and target;
560 if I is 1, use the next to lowest word; and so on. */
561 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
562 unsigned int bit_offset = (backwards
563 ? MAX ((int) bitsize - ((int) i + 1)
564 * BITS_PER_WORD,
566 : (int) i * BITS_PER_WORD);
568 store_bit_field (op0, MIN (BITS_PER_WORD,
569 bitsize - i * BITS_PER_WORD),
570 bitnum + bit_offset, word_mode,
571 operand_subword_force (value, wordnum, fieldmode));
573 return value;
576 /* From here on we can assume that the field to be stored in is
577 a full-word (whatever type that is), since it is shorter than a word. */
579 /* OFFSET is the number of words or bytes (UNIT says which)
580 from STR_RTX to the first word or byte containing part of the field. */
582 if (!MEM_P (op0))
584 if (offset != 0
585 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
587 if (!REG_P (op0))
589 /* Since this is a destination (lvalue), we can't copy it to a
590 pseudo. We can trivially remove a SUBREG that does not
591 change the size of the operand. Such a SUBREG may have been
592 added above. Otherwise, abort. */
593 if (GET_CODE (op0) == SUBREG
594 && (GET_MODE_SIZE (GET_MODE (op0))
595 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
596 op0 = SUBREG_REG (op0);
597 else
598 abort ();
600 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
601 op0, (offset * UNITS_PER_WORD));
603 offset = 0;
605 else
606 op0 = protect_from_queue (op0, 1);
608 /* If VALUE is a floating-point mode, access it as an integer of the
609 corresponding size. This can occur on a machine with 64 bit registers
610 that uses SFmode for float. This can also occur for unaligned float
611 structure fields. */
612 if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
613 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
614 value = gen_lowpart ((GET_MODE (value) == VOIDmode
615 ? word_mode : int_mode_for_mode (GET_MODE (value))),
616 value);
618 /* Now OFFSET is nonzero only if OP0 is memory
619 and is therefore always measured in bytes. */
621 if (HAVE_insv
622 && GET_MODE (value) != BLKmode
623 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
624 /* Ensure insv's size is wide enough for this field. */
625 && (GET_MODE_BITSIZE (op_mode) >= bitsize)
626 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
627 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
629 int xbitpos = bitpos;
630 rtx value1;
631 rtx xop0 = op0;
632 rtx last = get_last_insn ();
633 rtx pat;
634 enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
635 int save_volatile_ok = volatile_ok;
637 volatile_ok = 1;
639 /* If this machine's insv can only insert into a register, copy OP0
640 into a register and save it back later. */
641 /* This used to check flag_force_mem, but that was a serious
642 de-optimization now that flag_force_mem is enabled by -O2. */
643 if (MEM_P (op0)
644 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
645 (op0, VOIDmode)))
647 rtx tempreg;
648 enum machine_mode bestmode;
650 /* Get the mode to use for inserting into this field. If OP0 is
651 BLKmode, get the smallest mode consistent with the alignment. If
652 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
653 mode. Otherwise, use the smallest mode containing the field. */
655 if (GET_MODE (op0) == BLKmode
656 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
657 bestmode
658 = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
659 MEM_VOLATILE_P (op0));
660 else
661 bestmode = GET_MODE (op0);
663 if (bestmode == VOIDmode
664 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
665 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
666 goto insv_loses;
668 /* Adjust address to point to the containing unit of that mode.
669 Compute offset as multiple of this unit, counting in bytes. */
670 unit = GET_MODE_BITSIZE (bestmode);
671 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
672 bitpos = bitnum % unit;
673 op0 = adjust_address (op0, bestmode, offset);
675 /* Fetch that unit, store the bitfield in it, then store
676 the unit. */
677 tempreg = copy_to_reg (op0);
678 store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
679 emit_move_insn (op0, tempreg);
680 return value;
682 volatile_ok = save_volatile_ok;
684 /* Add OFFSET into OP0's address. */
685 if (MEM_P (xop0))
686 xop0 = adjust_address (xop0, byte_mode, offset);
688 /* If xop0 is a register, we need it in MAXMODE
689 to make it acceptable to the format of insv. */
690 if (GET_CODE (xop0) == SUBREG)
691 /* We can't just change the mode, because this might clobber op0,
692 and we will need the original value of op0 if insv fails. */
693 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
694 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
695 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
697 /* On big-endian machines, we count bits from the most significant.
698 If the bit field insn does not, we must invert. */
700 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
701 xbitpos = unit - bitsize - xbitpos;
703 /* We have been counting XBITPOS within UNIT.
704 Count instead within the size of the register. */
705 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
706 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
708 unit = GET_MODE_BITSIZE (maxmode);
710 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
711 value1 = value;
712 if (GET_MODE (value) != maxmode)
714 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
716 /* Optimization: Don't bother really extending VALUE
717 if it has all the bits we will actually use. However,
718 if we must narrow it, be sure we do it correctly. */
720 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
722 rtx tmp;
724 tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
725 if (! tmp)
726 tmp = simplify_gen_subreg (maxmode,
727 force_reg (GET_MODE (value),
728 value1),
729 GET_MODE (value), 0);
730 value1 = tmp;
732 else
733 value1 = gen_lowpart (maxmode, value1);
735 else if (GET_CODE (value) == CONST_INT)
736 value1 = gen_int_mode (INTVAL (value), maxmode);
737 else if (!CONSTANT_P (value))
738 /* Parse phase is supposed to make VALUE's data type
739 match that of the component reference, which is a type
740 at least as wide as the field; so VALUE should have
741 a mode that corresponds to that type. */
742 abort ();
745 /* If this machine's insv insists on a register,
746 get VALUE1 into a register. */
747 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
748 (value1, maxmode)))
749 value1 = force_reg (maxmode, value1);
751 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
752 if (pat)
753 emit_insn (pat);
754 else
756 delete_insns_since (last);
757 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
760 else
761 insv_loses:
762 /* Insv is not available; store using shifts and boolean ops. */
763 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
764 return value;
767 /* Use shifts and boolean operations to store VALUE
768 into a bit field of width BITSIZE
769 in a memory location specified by OP0 except offset by OFFSET bytes.
770 (OFFSET must be 0 if OP0 is a register.)
771 The field starts at position BITPOS within the byte.
772 (If OP0 is a register, it may be a full word or a narrower mode,
773 but BITPOS still counts within a full word,
774 which is significant on bigendian machines.)
776 Note that protect_from_queue has already been done on OP0 and VALUE. */
778 static void
779 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
780 unsigned HOST_WIDE_INT bitsize,
781 unsigned HOST_WIDE_INT bitpos, rtx value)
783 enum machine_mode mode;
784 unsigned int total_bits = BITS_PER_WORD;
785 rtx subtarget, temp;
786 int all_zero = 0;
787 int all_one = 0;
789 /* There is a case not handled here:
790 a structure with a known alignment of just a halfword
791 and a field split across two aligned halfwords within the structure.
792 Or likewise a structure with a known alignment of just a byte
793 and a field split across two bytes.
794 Such cases are not supposed to be able to occur. */
796 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
798 if (offset != 0)
799 abort ();
800 /* Special treatment for a bit field split across two registers. */
801 if (bitsize + bitpos > BITS_PER_WORD)
803 store_split_bit_field (op0, bitsize, bitpos, value);
804 return;
807 else
809 /* Get the proper mode to use for this field. We want a mode that
810 includes the entire field. If such a mode would be larger than
811 a word, we won't be doing the extraction the normal way.
812 We don't want a mode bigger than the destination. */
814 mode = GET_MODE (op0);
815 if (GET_MODE_BITSIZE (mode) == 0
816 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
817 mode = word_mode;
818 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
819 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
821 if (mode == VOIDmode)
823 /* The only way this should occur is if the field spans word
824 boundaries. */
825 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
826 value);
827 return;
830 total_bits = GET_MODE_BITSIZE (mode);
832 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
833 be in the range 0 to total_bits-1, and put any excess bytes in
834 OFFSET. */
835 if (bitpos >= total_bits)
837 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
838 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
839 * BITS_PER_UNIT);
842 /* Get ref to an aligned byte, halfword, or word containing the field.
843 Adjust BITPOS to be position within a word,
844 and OFFSET to be the offset of that word.
845 Then alter OP0 to refer to that word. */
846 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
847 offset -= (offset % (total_bits / BITS_PER_UNIT));
848 op0 = adjust_address (op0, mode, offset);
851 mode = GET_MODE (op0);
853 /* Now MODE is either some integral mode for a MEM as OP0,
854 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
855 The bit field is contained entirely within OP0.
856 BITPOS is the starting bit number within OP0.
857 (OP0's mode may actually be narrower than MODE.) */
859 if (BYTES_BIG_ENDIAN)
860 /* BITPOS is the distance between our msb
861 and that of the containing datum.
862 Convert it to the distance from the lsb. */
863 bitpos = total_bits - bitsize - bitpos;
865 /* Now BITPOS is always the distance between our lsb
866 and that of OP0. */
868 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
869 we must first convert its mode to MODE. */
871 if (GET_CODE (value) == CONST_INT)
873 HOST_WIDE_INT v = INTVAL (value);
875 if (bitsize < HOST_BITS_PER_WIDE_INT)
876 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
878 if (v == 0)
879 all_zero = 1;
880 else if ((bitsize < HOST_BITS_PER_WIDE_INT
881 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
882 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
883 all_one = 1;
885 value = lshift_value (mode, value, bitpos, bitsize);
887 else
889 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
890 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
892 if (GET_MODE (value) != mode)
894 if ((REG_P (value) || GET_CODE (value) == SUBREG)
895 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
896 value = gen_lowpart (mode, value);
897 else
898 value = convert_to_mode (mode, value, 1);
901 if (must_and)
902 value = expand_binop (mode, and_optab, value,
903 mask_rtx (mode, 0, bitsize, 0),
904 NULL_RTX, 1, OPTAB_LIB_WIDEN);
905 if (bitpos > 0)
906 value = expand_shift (LSHIFT_EXPR, mode, value,
907 build_int_2 (bitpos, 0), NULL_RTX, 1);
910 /* Now clear the chosen bits in OP0,
911 except that if VALUE is -1 we need not bother. */
913 subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
915 if (! all_one)
917 temp = expand_binop (mode, and_optab, op0,
918 mask_rtx (mode, bitpos, bitsize, 1),
919 subtarget, 1, OPTAB_LIB_WIDEN);
920 subtarget = temp;
922 else
923 temp = op0;
925 /* Now logical-or VALUE into OP0, unless it is zero. */
927 if (! all_zero)
928 temp = expand_binop (mode, ior_optab, temp, value,
929 subtarget, 1, OPTAB_LIB_WIDEN);
930 if (op0 != temp)
931 emit_move_insn (op0, temp);
934 /* Store a bit field that is split across multiple accessible memory objects.
936 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
937 BITSIZE is the field width; BITPOS the position of its first bit
938 (within the word).
939 VALUE is the value to store.
941 This does not yet handle fields wider than BITS_PER_WORD. */
943 static void
944 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
945 unsigned HOST_WIDE_INT bitpos, rtx value)
947 unsigned int unit;
948 unsigned int bitsdone = 0;
950 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
951 much at a time. */
952 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
953 unit = BITS_PER_WORD;
954 else
955 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
957 /* If VALUE is a constant other than a CONST_INT, get it into a register in
958 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
959 that VALUE might be a floating-point constant. */
960 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
962 rtx word = gen_lowpart_common (word_mode, value);
964 if (word && (value != word))
965 value = word;
966 else
967 value = gen_lowpart_common (word_mode,
968 force_reg (GET_MODE (value) != VOIDmode
969 ? GET_MODE (value)
970 : word_mode, value));
973 while (bitsdone < bitsize)
975 unsigned HOST_WIDE_INT thissize;
976 rtx part, word;
977 unsigned HOST_WIDE_INT thispos;
978 unsigned HOST_WIDE_INT offset;
980 offset = (bitpos + bitsdone) / unit;
981 thispos = (bitpos + bitsdone) % unit;
983 /* THISSIZE must not overrun a word boundary. Otherwise,
984 store_fixed_bit_field will call us again, and we will mutually
985 recurse forever. */
986 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
987 thissize = MIN (thissize, unit - thispos);
989 if (BYTES_BIG_ENDIAN)
991 int total_bits;
993 /* We must do an endian conversion exactly the same way as it is
994 done in extract_bit_field, so that the two calls to
995 extract_fixed_bit_field will have comparable arguments. */
996 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
997 total_bits = BITS_PER_WORD;
998 else
999 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1001 /* Fetch successively less significant portions. */
1002 if (GET_CODE (value) == CONST_INT)
1003 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1004 >> (bitsize - bitsdone - thissize))
1005 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1006 else
1007 /* The args are chosen so that the last part includes the
1008 lsb. Give extract_bit_field the value it needs (with
1009 endianness compensation) to fetch the piece we want. */
1010 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1011 total_bits - bitsize + bitsdone,
1012 NULL_RTX, 1);
1014 else
1016 /* Fetch successively more significant portions. */
1017 if (GET_CODE (value) == CONST_INT)
1018 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1019 >> bitsdone)
1020 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1021 else
1022 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1023 bitsdone, NULL_RTX, 1);
1026 /* If OP0 is a register, then handle OFFSET here.
1028 When handling multiword bitfields, extract_bit_field may pass
1029 down a word_mode SUBREG of a larger REG for a bitfield that actually
1030 crosses a word boundary. Thus, for a SUBREG, we must find
1031 the current word starting from the base register. */
1032 if (GET_CODE (op0) == SUBREG)
1034 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1035 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1036 GET_MODE (SUBREG_REG (op0)));
1037 offset = 0;
1039 else if (REG_P (op0))
1041 word = operand_subword_force (op0, offset, GET_MODE (op0));
1042 offset = 0;
1044 else
1045 word = op0;
1047 /* OFFSET is in UNITs, and UNIT is in bits.
1048 store_fixed_bit_field wants offset in bytes. */
1049 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1050 thispos, part);
1051 bitsdone += thissize;
1055 /* Generate code to extract a byte-field from STR_RTX
1056 containing BITSIZE bits, starting at BITNUM,
1057 and put it in TARGET if possible (if TARGET is nonzero).
1058 Regardless of TARGET, we return the rtx for where the value is placed.
1059 It may be a QUEUED.
1061 STR_RTX is the structure containing the byte (a REG or MEM).
1062 UNSIGNEDP is nonzero if this is an unsigned bit field.
1063 MODE is the natural mode of the field value once extracted.
1064 TMODE is the mode the caller would like the value to have;
1065 but the value may be returned with type MODE instead.
1067 TOTAL_SIZE is the size in bytes of the containing structure,
1068 or -1 if varying.
1070 If a TARGET is specified and we can store in it at no extra cost,
1071 we do so, and return TARGET.
1072 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1073 if they are equally easy. */
1076 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1077 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1078 enum machine_mode mode, enum machine_mode tmode)
1080 unsigned int unit
1081 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1082 unsigned HOST_WIDE_INT offset = bitnum / unit;
1083 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1084 rtx op0 = str_rtx;
1085 rtx spec_target = target;
1086 rtx spec_target_subreg = 0;
1087 enum machine_mode int_mode;
1088 enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1089 enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1090 enum machine_mode mode1;
1091 int byte_offset;
1093 if (tmode == VOIDmode)
1094 tmode = mode;
1096 while (GET_CODE (op0) == SUBREG)
1098 bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1099 if (bitpos > unit)
1101 offset += (bitpos / unit);
1102 bitpos %= unit;
1104 op0 = SUBREG_REG (op0);
1107 if (REG_P (op0)
1108 && mode == GET_MODE (op0)
1109 && bitnum == 0
1110 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1112 /* We're trying to extract a full register from itself. */
1113 return op0;
1116 /* Use vec_extract patterns for extracting parts of vectors whenever
1117 available. */
1118 if (VECTOR_MODE_P (GET_MODE (op0))
1119 && !MEM_P (op0)
1120 && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1121 != CODE_FOR_nothing)
1122 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1123 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1125 enum machine_mode outermode = GET_MODE (op0);
1126 enum machine_mode innermode = GET_MODE_INNER (outermode);
1127 int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1128 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1129 rtx rtxpos = GEN_INT (pos);
1130 rtx src = op0;
1131 rtx dest = NULL, pat, seq;
1132 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1133 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1134 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1136 if (innermode == tmode || innermode == mode)
1137 dest = target;
1139 if (!dest)
1140 dest = gen_reg_rtx (innermode);
1142 start_sequence ();
1144 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1145 dest = copy_to_mode_reg (mode0, dest);
1147 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1148 src = copy_to_mode_reg (mode1, src);
1150 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1151 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1153 /* We could handle this, but we should always be called with a pseudo
1154 for our targets and all insns should take them as outputs. */
1155 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1156 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1157 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1158 abort ();
1160 pat = GEN_FCN (icode) (dest, src, rtxpos);
1161 seq = get_insns ();
1162 end_sequence ();
1163 if (pat)
1165 emit_insn (seq);
1166 emit_insn (pat);
1167 return dest;
1171 /* Make sure we are playing with integral modes. Pun with subregs
1172 if we aren't. */
1174 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1175 if (imode != GET_MODE (op0))
1177 if (MEM_P (op0))
1178 op0 = adjust_address (op0, imode, 0);
1179 else if (imode != BLKmode)
1180 op0 = gen_lowpart (imode, op0);
1181 else
1182 abort ();
1186 /* We may be accessing data outside the field, which means
1187 we can alias adjacent data. */
1188 if (MEM_P (op0))
1190 op0 = shallow_copy_rtx (op0);
1191 set_mem_alias_set (op0, 0);
1192 set_mem_expr (op0, 0);
1195 /* Extraction of a full-word or multi-word value from a structure
1196 in a register or aligned memory can be done with just a SUBREG.
1197 A subword value in the least significant part of a register
1198 can also be extracted with a SUBREG. For this, we need the
1199 byte offset of the value in op0. */
1201 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1203 /* If OP0 is a register, BITPOS must count within a word.
1204 But as we have it, it counts within whatever size OP0 now has.
1205 On a bigendian machine, these are not the same, so convert. */
1206 if (BYTES_BIG_ENDIAN
1207 && !MEM_P (op0)
1208 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1209 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1211 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1212 If that's wrong, the solution is to test for it and set TARGET to 0
1213 if needed. */
1215 /* Only scalar integer modes can be converted via subregs. There is an
1216 additional problem for FP modes here in that they can have a precision
1217 which is different from the size. mode_for_size uses precision, but
1218 we want a mode based on the size, so we must avoid calling it for FP
1219 modes. */
1220 mode1 = (SCALAR_INT_MODE_P (tmode)
1221 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1222 : mode);
1224 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1225 && bitpos % BITS_PER_WORD == 0)
1226 || (mode1 != BLKmode
1227 /* ??? The big endian test here is wrong. This is correct
1228 if the value is in a register, and if mode_for_size is not
1229 the same mode as op0. This causes us to get unnecessarily
1230 inefficient code from the Thumb port when -mbig-endian. */
1231 && (BYTES_BIG_ENDIAN
1232 ? bitpos + bitsize == BITS_PER_WORD
1233 : bitpos == 0)))
1234 && ((!MEM_P (op0)
1235 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1236 GET_MODE_BITSIZE (GET_MODE (op0)))
1237 && GET_MODE_SIZE (mode1) != 0
1238 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1239 || (MEM_P (op0)
1240 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1241 || (offset * BITS_PER_UNIT % bitsize == 0
1242 && MEM_ALIGN (op0) % bitsize == 0)))))
1244 if (mode1 != GET_MODE (op0))
1246 if (GET_CODE (op0) == SUBREG)
1248 if (GET_MODE (SUBREG_REG (op0)) == mode1
1249 || GET_MODE_CLASS (mode1) == MODE_INT
1250 || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1251 op0 = SUBREG_REG (op0);
1252 else
1253 /* Else we've got some float mode source being extracted into
1254 a different float mode destination -- this combination of
1255 subregs results in Severe Tire Damage. */
1256 goto no_subreg_mode_swap;
1258 if (REG_P (op0))
1259 op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1260 else
1261 op0 = adjust_address (op0, mode1, offset);
1263 if (mode1 != mode)
1264 return convert_to_mode (tmode, op0, unsignedp);
1265 return op0;
1267 no_subreg_mode_swap:
1269 /* Handle fields bigger than a word. */
1271 if (bitsize > BITS_PER_WORD)
1273 /* Here we transfer the words of the field
1274 in the order least significant first.
1275 This is because the most significant word is the one which may
1276 be less than full. */
1278 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1279 unsigned int i;
1281 if (target == 0 || !REG_P (target))
1282 target = gen_reg_rtx (mode);
1284 /* Indicate for flow that the entire target reg is being set. */
1285 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1287 for (i = 0; i < nwords; i++)
1289 /* If I is 0, use the low-order word in both field and target;
1290 if I is 1, use the next to lowest word; and so on. */
1291 /* Word number in TARGET to use. */
1292 unsigned int wordnum
1293 = (WORDS_BIG_ENDIAN
1294 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1295 : i);
1296 /* Offset from start of field in OP0. */
1297 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1298 ? MAX (0, ((int) bitsize - ((int) i + 1)
1299 * (int) BITS_PER_WORD))
1300 : (int) i * BITS_PER_WORD);
1301 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1302 rtx result_part
1303 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1304 bitsize - i * BITS_PER_WORD),
1305 bitnum + bit_offset, 1, target_part, mode,
1306 word_mode);
1308 if (target_part == 0)
1309 abort ();
1311 if (result_part != target_part)
1312 emit_move_insn (target_part, result_part);
1315 if (unsignedp)
1317 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1318 need to be zero'd out. */
1319 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1321 unsigned int i, total_words;
1323 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1324 for (i = nwords; i < total_words; i++)
1325 emit_move_insn
1326 (operand_subword (target,
1327 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1328 1, VOIDmode),
1329 const0_rtx);
1331 return target;
1334 /* Signed bit field: sign-extend with two arithmetic shifts. */
1335 target = expand_shift (LSHIFT_EXPR, mode, target,
1336 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1337 NULL_RTX, 0);
1338 return expand_shift (RSHIFT_EXPR, mode, target,
1339 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1340 NULL_RTX, 0);
1343 /* From here on we know the desired field is smaller than a word. */
1345 /* Check if there is a correspondingly-sized integer field, so we can
1346 safely extract it as one size of integer, if necessary; then
1347 truncate or extend to the size that is wanted; then use SUBREGs or
1348 convert_to_mode to get one of the modes we really wanted. */
1350 int_mode = int_mode_for_mode (tmode);
1351 if (int_mode == BLKmode)
1352 int_mode = int_mode_for_mode (mode);
1353 if (int_mode == BLKmode)
1354 abort (); /* Should probably push op0 out to memory and then
1355 do a load. */
1357 /* OFFSET is the number of words or bytes (UNIT says which)
1358 from STR_RTX to the first word or byte containing part of the field. */
1360 if (!MEM_P (op0))
1362 if (offset != 0
1363 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1365 if (!REG_P (op0))
1366 op0 = copy_to_reg (op0);
1367 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1368 op0, (offset * UNITS_PER_WORD));
1370 offset = 0;
1372 else
1373 op0 = protect_from_queue (str_rtx, 1);
1375 /* Now OFFSET is nonzero only for memory operands. */
1377 if (unsignedp)
1379 if (HAVE_extzv
1380 && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1381 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1382 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1384 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1385 rtx bitsize_rtx, bitpos_rtx;
1386 rtx last = get_last_insn ();
1387 rtx xop0 = op0;
1388 rtx xtarget = target;
1389 rtx xspec_target = spec_target;
1390 rtx xspec_target_subreg = spec_target_subreg;
1391 rtx pat;
1392 enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1394 if (MEM_P (xop0))
1396 int save_volatile_ok = volatile_ok;
1397 volatile_ok = 1;
1399 /* Is the memory operand acceptable? */
1400 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1401 (xop0, GET_MODE (xop0))))
1403 /* No, load into a reg and extract from there. */
1404 enum machine_mode bestmode;
1406 /* Get the mode to use for inserting into this field. If
1407 OP0 is BLKmode, get the smallest mode consistent with the
1408 alignment. If OP0 is a non-BLKmode object that is no
1409 wider than MAXMODE, use its mode. Otherwise, use the
1410 smallest mode containing the field. */
1412 if (GET_MODE (xop0) == BLKmode
1413 || (GET_MODE_SIZE (GET_MODE (op0))
1414 > GET_MODE_SIZE (maxmode)))
1415 bestmode = get_best_mode (bitsize, bitnum,
1416 MEM_ALIGN (xop0), maxmode,
1417 MEM_VOLATILE_P (xop0));
1418 else
1419 bestmode = GET_MODE (xop0);
1421 if (bestmode == VOIDmode
1422 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1423 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1424 goto extzv_loses;
1426 /* Compute offset as multiple of this unit,
1427 counting in bytes. */
1428 unit = GET_MODE_BITSIZE (bestmode);
1429 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1430 xbitpos = bitnum % unit;
1431 xop0 = adjust_address (xop0, bestmode, xoffset);
1433 /* Fetch it to a register in that size. */
1434 xop0 = force_reg (bestmode, xop0);
1436 /* XBITPOS counts within UNIT, which is what is expected. */
1438 else
1439 /* Get ref to first byte containing part of the field. */
1440 xop0 = adjust_address (xop0, byte_mode, xoffset);
1442 volatile_ok = save_volatile_ok;
1445 /* If op0 is a register, we need it in MAXMODE (which is usually
1446 SImode). to make it acceptable to the format of extzv. */
1447 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1448 goto extzv_loses;
1449 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1450 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1452 /* On big-endian machines, we count bits from the most significant.
1453 If the bit field insn does not, we must invert. */
1454 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1455 xbitpos = unit - bitsize - xbitpos;
1457 /* Now convert from counting within UNIT to counting in MAXMODE. */
1458 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1459 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1461 unit = GET_MODE_BITSIZE (maxmode);
1463 if (xtarget == 0
1464 || (flag_force_mem && MEM_P (xtarget)))
1465 xtarget = xspec_target = gen_reg_rtx (tmode);
1467 if (GET_MODE (xtarget) != maxmode)
1469 if (REG_P (xtarget))
1471 int wider = (GET_MODE_SIZE (maxmode)
1472 > GET_MODE_SIZE (GET_MODE (xtarget)));
1473 xtarget = gen_lowpart (maxmode, xtarget);
1474 if (wider)
1475 xspec_target_subreg = xtarget;
1477 else
1478 xtarget = gen_reg_rtx (maxmode);
1481 /* If this machine's extzv insists on a register target,
1482 make sure we have one. */
1483 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1484 (xtarget, maxmode)))
1485 xtarget = gen_reg_rtx (maxmode);
1487 bitsize_rtx = GEN_INT (bitsize);
1488 bitpos_rtx = GEN_INT (xbitpos);
1490 pat = gen_extzv (protect_from_queue (xtarget, 1),
1491 xop0, bitsize_rtx, bitpos_rtx);
1492 if (pat)
1494 emit_insn (pat);
1495 target = xtarget;
1496 spec_target = xspec_target;
1497 spec_target_subreg = xspec_target_subreg;
1499 else
1501 delete_insns_since (last);
1502 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1503 bitpos, target, 1);
1506 else
1507 extzv_loses:
1508 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1509 bitpos, target, 1);
1511 else
1513 if (HAVE_extv
1514 && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1515 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1516 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1518 int xbitpos = bitpos, xoffset = offset;
1519 rtx bitsize_rtx, bitpos_rtx;
1520 rtx last = get_last_insn ();
1521 rtx xop0 = op0, xtarget = target;
1522 rtx xspec_target = spec_target;
1523 rtx xspec_target_subreg = spec_target_subreg;
1524 rtx pat;
1525 enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1527 if (MEM_P (xop0))
1529 /* Is the memory operand acceptable? */
1530 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1531 (xop0, GET_MODE (xop0))))
1533 /* No, load into a reg and extract from there. */
1534 enum machine_mode bestmode;
1536 /* Get the mode to use for inserting into this field. If
1537 OP0 is BLKmode, get the smallest mode consistent with the
1538 alignment. If OP0 is a non-BLKmode object that is no
1539 wider than MAXMODE, use its mode. Otherwise, use the
1540 smallest mode containing the field. */
1542 if (GET_MODE (xop0) == BLKmode
1543 || (GET_MODE_SIZE (GET_MODE (op0))
1544 > GET_MODE_SIZE (maxmode)))
1545 bestmode = get_best_mode (bitsize, bitnum,
1546 MEM_ALIGN (xop0), maxmode,
1547 MEM_VOLATILE_P (xop0));
1548 else
1549 bestmode = GET_MODE (xop0);
1551 if (bestmode == VOIDmode
1552 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1553 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1554 goto extv_loses;
1556 /* Compute offset as multiple of this unit,
1557 counting in bytes. */
1558 unit = GET_MODE_BITSIZE (bestmode);
1559 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1560 xbitpos = bitnum % unit;
1561 xop0 = adjust_address (xop0, bestmode, xoffset);
1563 /* Fetch it to a register in that size. */
1564 xop0 = force_reg (bestmode, xop0);
1566 /* XBITPOS counts within UNIT, which is what is expected. */
1568 else
1569 /* Get ref to first byte containing part of the field. */
1570 xop0 = adjust_address (xop0, byte_mode, xoffset);
1573 /* If op0 is a register, we need it in MAXMODE (which is usually
1574 SImode) to make it acceptable to the format of extv. */
1575 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1576 goto extv_loses;
1577 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1578 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1580 /* On big-endian machines, we count bits from the most significant.
1581 If the bit field insn does not, we must invert. */
1582 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1583 xbitpos = unit - bitsize - xbitpos;
1585 /* XBITPOS counts within a size of UNIT.
1586 Adjust to count within a size of MAXMODE. */
1587 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1588 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1590 unit = GET_MODE_BITSIZE (maxmode);
1592 if (xtarget == 0
1593 || (flag_force_mem && MEM_P (xtarget)))
1594 xtarget = xspec_target = gen_reg_rtx (tmode);
1596 if (GET_MODE (xtarget) != maxmode)
1598 if (REG_P (xtarget))
1600 int wider = (GET_MODE_SIZE (maxmode)
1601 > GET_MODE_SIZE (GET_MODE (xtarget)));
1602 xtarget = gen_lowpart (maxmode, xtarget);
1603 if (wider)
1604 xspec_target_subreg = xtarget;
1606 else
1607 xtarget = gen_reg_rtx (maxmode);
1610 /* If this machine's extv insists on a register target,
1611 make sure we have one. */
1612 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1613 (xtarget, maxmode)))
1614 xtarget = gen_reg_rtx (maxmode);
1616 bitsize_rtx = GEN_INT (bitsize);
1617 bitpos_rtx = GEN_INT (xbitpos);
1619 pat = gen_extv (protect_from_queue (xtarget, 1),
1620 xop0, bitsize_rtx, bitpos_rtx);
1621 if (pat)
1623 emit_insn (pat);
1624 target = xtarget;
1625 spec_target = xspec_target;
1626 spec_target_subreg = xspec_target_subreg;
1628 else
1630 delete_insns_since (last);
1631 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1632 bitpos, target, 0);
1635 else
1636 extv_loses:
1637 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1638 bitpos, target, 0);
1640 if (target == spec_target)
1641 return target;
1642 if (target == spec_target_subreg)
1643 return spec_target;
1644 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1646 /* If the target mode is floating-point, first convert to the
1647 integer mode of that size and then access it as a floating-point
1648 value via a SUBREG. */
1649 if (GET_MODE_CLASS (tmode) != MODE_INT
1650 && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1652 target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1653 MODE_INT, 0),
1654 target, unsignedp);
1655 return gen_lowpart (tmode, target);
1657 else
1658 return convert_to_mode (tmode, target, unsignedp);
1660 return target;
1663 /* Extract a bit field using shifts and boolean operations
1664 Returns an rtx to represent the value.
1665 OP0 addresses a register (word) or memory (byte).
1666 BITPOS says which bit within the word or byte the bit field starts in.
1667 OFFSET says how many bytes farther the bit field starts;
1668 it is 0 if OP0 is a register.
1669 BITSIZE says how many bits long the bit field is.
1670 (If OP0 is a register, it may be narrower than a full word,
1671 but BITPOS still counts within a full word,
1672 which is significant on bigendian machines.)
1674 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1675 If TARGET is nonzero, attempts to store the value there
1676 and return TARGET, but this is not guaranteed.
1677 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1679 static rtx
1680 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1681 unsigned HOST_WIDE_INT offset,
1682 unsigned HOST_WIDE_INT bitsize,
1683 unsigned HOST_WIDE_INT bitpos, rtx target,
1684 int unsignedp)
1686 unsigned int total_bits = BITS_PER_WORD;
1687 enum machine_mode mode;
1689 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1691 /* Special treatment for a bit field split across two registers. */
1692 if (bitsize + bitpos > BITS_PER_WORD)
1693 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1695 else
1697 /* Get the proper mode to use for this field. We want a mode that
1698 includes the entire field. If such a mode would be larger than
1699 a word, we won't be doing the extraction the normal way. */
1701 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1702 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1704 if (mode == VOIDmode)
1705 /* The only way this should occur is if the field spans word
1706 boundaries. */
1707 return extract_split_bit_field (op0, bitsize,
1708 bitpos + offset * BITS_PER_UNIT,
1709 unsignedp);
1711 total_bits = GET_MODE_BITSIZE (mode);
1713 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1714 be in the range 0 to total_bits-1, and put any excess bytes in
1715 OFFSET. */
1716 if (bitpos >= total_bits)
1718 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1719 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1720 * BITS_PER_UNIT);
1723 /* Get ref to an aligned byte, halfword, or word containing the field.
1724 Adjust BITPOS to be position within a word,
1725 and OFFSET to be the offset of that word.
1726 Then alter OP0 to refer to that word. */
1727 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1728 offset -= (offset % (total_bits / BITS_PER_UNIT));
1729 op0 = adjust_address (op0, mode, offset);
1732 mode = GET_MODE (op0);
1734 if (BYTES_BIG_ENDIAN)
1735 /* BITPOS is the distance between our msb and that of OP0.
1736 Convert it to the distance from the lsb. */
1737 bitpos = total_bits - bitsize - bitpos;
1739 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1740 We have reduced the big-endian case to the little-endian case. */
1742 if (unsignedp)
1744 if (bitpos)
1746 /* If the field does not already start at the lsb,
1747 shift it so it does. */
1748 tree amount = build_int_2 (bitpos, 0);
1749 /* Maybe propagate the target for the shift. */
1750 /* But not if we will return it--could confuse integrate.c. */
1751 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1752 if (tmode != mode) subtarget = 0;
1753 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1755 /* Convert the value to the desired mode. */
1756 if (mode != tmode)
1757 op0 = convert_to_mode (tmode, op0, 1);
1759 /* Unless the msb of the field used to be the msb when we shifted,
1760 mask out the upper bits. */
1762 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1763 return expand_binop (GET_MODE (op0), and_optab, op0,
1764 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1765 target, 1, OPTAB_LIB_WIDEN);
1766 return op0;
1769 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1770 then arithmetic-shift its lsb to the lsb of the word. */
1771 op0 = force_reg (mode, op0);
1772 if (mode != tmode)
1773 target = 0;
1775 /* Find the narrowest integer mode that contains the field. */
1777 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1778 mode = GET_MODE_WIDER_MODE (mode))
1779 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1781 op0 = convert_to_mode (mode, op0, 0);
1782 break;
1785 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1787 tree amount
1788 = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1789 /* Maybe propagate the target for the shift. */
1790 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1791 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1794 return expand_shift (RSHIFT_EXPR, mode, op0,
1795 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1796 target, 0);
1799 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1800 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1801 complement of that if COMPLEMENT. The mask is truncated if
1802 necessary to the width of mode MODE. The mask is zero-extended if
1803 BITSIZE+BITPOS is too small for MODE. */
1805 static rtx
1806 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1808 HOST_WIDE_INT masklow, maskhigh;
1810 if (bitsize == 0)
1811 masklow = 0;
1812 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1813 masklow = (HOST_WIDE_INT) -1 << bitpos;
1814 else
1815 masklow = 0;
1817 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1818 masklow &= ((unsigned HOST_WIDE_INT) -1
1819 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1821 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1822 maskhigh = -1;
1823 else
1824 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1826 if (bitsize == 0)
1827 maskhigh = 0;
1828 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1829 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1830 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1831 else
1832 maskhigh = 0;
1834 if (complement)
1836 maskhigh = ~maskhigh;
1837 masklow = ~masklow;
1840 return immed_double_const (masklow, maskhigh, mode);
1843 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1844 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1846 static rtx
1847 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1849 unsigned HOST_WIDE_INT v = INTVAL (value);
1850 HOST_WIDE_INT low, high;
1852 if (bitsize < HOST_BITS_PER_WIDE_INT)
1853 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1855 if (bitpos < HOST_BITS_PER_WIDE_INT)
1857 low = v << bitpos;
1858 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1860 else
1862 low = 0;
1863 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1866 return immed_double_const (low, high, mode);
1869 /* Extract a bit field that is split across two words
1870 and return an RTX for the result.
1872 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1873 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1874 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1876 static rtx
1877 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1878 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1880 unsigned int unit;
1881 unsigned int bitsdone = 0;
1882 rtx result = NULL_RTX;
1883 int first = 1;
1885 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1886 much at a time. */
1887 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1888 unit = BITS_PER_WORD;
1889 else
1890 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1892 while (bitsdone < bitsize)
1894 unsigned HOST_WIDE_INT thissize;
1895 rtx part, word;
1896 unsigned HOST_WIDE_INT thispos;
1897 unsigned HOST_WIDE_INT offset;
1899 offset = (bitpos + bitsdone) / unit;
1900 thispos = (bitpos + bitsdone) % unit;
1902 /* THISSIZE must not overrun a word boundary. Otherwise,
1903 extract_fixed_bit_field will call us again, and we will mutually
1904 recurse forever. */
1905 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1906 thissize = MIN (thissize, unit - thispos);
1908 /* If OP0 is a register, then handle OFFSET here.
1910 When handling multiword bitfields, extract_bit_field may pass
1911 down a word_mode SUBREG of a larger REG for a bitfield that actually
1912 crosses a word boundary. Thus, for a SUBREG, we must find
1913 the current word starting from the base register. */
1914 if (GET_CODE (op0) == SUBREG)
1916 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1917 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1918 GET_MODE (SUBREG_REG (op0)));
1919 offset = 0;
1921 else if (REG_P (op0))
1923 word = operand_subword_force (op0, offset, GET_MODE (op0));
1924 offset = 0;
1926 else
1927 word = op0;
1929 /* Extract the parts in bit-counting order,
1930 whose meaning is determined by BYTES_PER_UNIT.
1931 OFFSET is in UNITs, and UNIT is in bits.
1932 extract_fixed_bit_field wants offset in bytes. */
1933 part = extract_fixed_bit_field (word_mode, word,
1934 offset * unit / BITS_PER_UNIT,
1935 thissize, thispos, 0, 1);
1936 bitsdone += thissize;
1938 /* Shift this part into place for the result. */
1939 if (BYTES_BIG_ENDIAN)
1941 if (bitsize != bitsdone)
1942 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1943 build_int_2 (bitsize - bitsdone, 0), 0, 1);
1945 else
1947 if (bitsdone != thissize)
1948 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1949 build_int_2 (bitsdone - thissize, 0), 0, 1);
1952 if (first)
1953 result = part;
1954 else
1955 /* Combine the parts with bitwise or. This works
1956 because we extracted each part as an unsigned bit field. */
1957 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1958 OPTAB_LIB_WIDEN);
1960 first = 0;
1963 /* Unsigned bit field: we are done. */
1964 if (unsignedp)
1965 return result;
1966 /* Signed bit field: sign-extend with two arithmetic shifts. */
1967 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1968 build_int_2 (BITS_PER_WORD - bitsize, 0),
1969 NULL_RTX, 0);
1970 return expand_shift (RSHIFT_EXPR, word_mode, result,
1971 build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1974 /* Add INC into TARGET. */
1976 void
1977 expand_inc (rtx target, rtx inc)
1979 rtx value = expand_binop (GET_MODE (target), add_optab,
1980 target, inc,
1981 target, 0, OPTAB_LIB_WIDEN);
1982 if (value != target)
1983 emit_move_insn (target, value);
1986 /* Subtract DEC from TARGET. */
1988 void
1989 expand_dec (rtx target, rtx dec)
1991 rtx value = expand_binop (GET_MODE (target), sub_optab,
1992 target, dec,
1993 target, 0, OPTAB_LIB_WIDEN);
1994 if (value != target)
1995 emit_move_insn (target, value);
1998 /* Output a shift instruction for expression code CODE,
1999 with SHIFTED being the rtx for the value to shift,
2000 and AMOUNT the tree for the amount to shift by.
2001 Store the result in the rtx TARGET, if that is convenient.
2002 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2003 Return the rtx for where the value is. */
2006 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2007 tree amount, rtx target, int unsignedp)
2009 rtx op1, temp = 0;
2010 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2011 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2012 int try;
2014 /* Previously detected shift-counts computed by NEGATE_EXPR
2015 and shifted in the other direction; but that does not work
2016 on all machines. */
2018 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2020 if (SHIFT_COUNT_TRUNCATED)
2022 if (GET_CODE (op1) == CONST_INT
2023 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2024 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2025 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2026 % GET_MODE_BITSIZE (mode));
2027 else if (GET_CODE (op1) == SUBREG
2028 && subreg_lowpart_p (op1))
2029 op1 = SUBREG_REG (op1);
2032 if (op1 == const0_rtx)
2033 return shifted;
2035 /* Check whether its cheaper to implement a left shift by a constant
2036 bit count by a sequence of additions. */
2037 if (code == LSHIFT_EXPR
2038 && GET_CODE (op1) == CONST_INT
2039 && INTVAL (op1) > 0
2040 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2041 && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2043 int i;
2044 for (i = 0; i < INTVAL (op1); i++)
2046 temp = force_reg (mode, shifted);
2047 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2048 unsignedp, OPTAB_LIB_WIDEN);
2050 return shifted;
2053 for (try = 0; temp == 0 && try < 3; try++)
2055 enum optab_methods methods;
2057 if (try == 0)
2058 methods = OPTAB_DIRECT;
2059 else if (try == 1)
2060 methods = OPTAB_WIDEN;
2061 else
2062 methods = OPTAB_LIB_WIDEN;
2064 if (rotate)
2066 /* Widening does not work for rotation. */
2067 if (methods == OPTAB_WIDEN)
2068 continue;
2069 else if (methods == OPTAB_LIB_WIDEN)
2071 /* If we have been unable to open-code this by a rotation,
2072 do it as the IOR of two shifts. I.e., to rotate A
2073 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2074 where C is the bitsize of A.
2076 It is theoretically possible that the target machine might
2077 not be able to perform either shift and hence we would
2078 be making two libcalls rather than just the one for the
2079 shift (similarly if IOR could not be done). We will allow
2080 this extremely unlikely lossage to avoid complicating the
2081 code below. */
2083 rtx subtarget = target == shifted ? 0 : target;
2084 rtx temp1;
2085 tree type = TREE_TYPE (amount);
2086 tree new_amount = make_tree (type, op1);
2087 tree other_amount
2088 = fold (build (MINUS_EXPR, type,
2089 convert (type,
2090 build_int_2 (GET_MODE_BITSIZE (mode),
2091 0)),
2092 amount));
2094 shifted = force_reg (mode, shifted);
2096 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2097 mode, shifted, new_amount, subtarget, 1);
2098 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2099 mode, shifted, other_amount, 0, 1);
2100 return expand_binop (mode, ior_optab, temp, temp1, target,
2101 unsignedp, methods);
2104 temp = expand_binop (mode,
2105 left ? rotl_optab : rotr_optab,
2106 shifted, op1, target, unsignedp, methods);
2108 /* If we don't have the rotate, but we are rotating by a constant
2109 that is in range, try a rotate in the opposite direction. */
2111 if (temp == 0 && GET_CODE (op1) == CONST_INT
2112 && INTVAL (op1) > 0
2113 && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2114 temp = expand_binop (mode,
2115 left ? rotr_optab : rotl_optab,
2116 shifted,
2117 GEN_INT (GET_MODE_BITSIZE (mode)
2118 - INTVAL (op1)),
2119 target, unsignedp, methods);
2121 else if (unsignedp)
2122 temp = expand_binop (mode,
2123 left ? ashl_optab : lshr_optab,
2124 shifted, op1, target, unsignedp, methods);
2126 /* Do arithmetic shifts.
2127 Also, if we are going to widen the operand, we can just as well
2128 use an arithmetic right-shift instead of a logical one. */
2129 if (temp == 0 && ! rotate
2130 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2132 enum optab_methods methods1 = methods;
2134 /* If trying to widen a log shift to an arithmetic shift,
2135 don't accept an arithmetic shift of the same size. */
2136 if (unsignedp)
2137 methods1 = OPTAB_MUST_WIDEN;
2139 /* Arithmetic shift */
2141 temp = expand_binop (mode,
2142 left ? ashl_optab : ashr_optab,
2143 shifted, op1, target, unsignedp, methods1);
2146 /* We used to try extzv here for logical right shifts, but that was
2147 only useful for one machine, the VAX, and caused poor code
2148 generation there for lshrdi3, so the code was deleted and a
2149 define_expand for lshrsi3 was added to vax.md. */
2152 if (temp == 0)
2153 abort ();
2154 return temp;
2157 enum alg_code { alg_zero, alg_m, alg_shift,
2158 alg_add_t_m2, alg_sub_t_m2,
2159 alg_add_factor, alg_sub_factor,
2160 alg_add_t2_m, alg_sub_t2_m,
2161 alg_add, alg_subtract, alg_factor, alg_shiftop };
2163 /* This structure records a sequence of operations.
2164 `ops' is the number of operations recorded.
2165 `cost' is their total cost.
2166 The operations are stored in `op' and the corresponding
2167 logarithms of the integer coefficients in `log'.
2169 These are the operations:
2170 alg_zero total := 0;
2171 alg_m total := multiplicand;
2172 alg_shift total := total * coeff
2173 alg_add_t_m2 total := total + multiplicand * coeff;
2174 alg_sub_t_m2 total := total - multiplicand * coeff;
2175 alg_add_factor total := total * coeff + total;
2176 alg_sub_factor total := total * coeff - total;
2177 alg_add_t2_m total := total * coeff + multiplicand;
2178 alg_sub_t2_m total := total * coeff - multiplicand;
2180 The first operand must be either alg_zero or alg_m. */
2182 struct algorithm
2184 short cost;
2185 short ops;
2186 /* The size of the OP and LOG fields are not directly related to the
2187 word size, but the worst-case algorithms will be if we have few
2188 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2189 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2190 in total wordsize operations. */
2191 enum alg_code op[MAX_BITS_PER_WORD];
2192 char log[MAX_BITS_PER_WORD];
2195 /* Indicates the type of fixup needed after a constant multiplication.
2196 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2197 the result should be negated, and ADD_VARIANT means that the
2198 multiplicand should be added to the result. */
2199 enum mult_variant {basic_variant, negate_variant, add_variant};
2201 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2202 int, enum machine_mode mode);
2203 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2204 struct algorithm *, enum mult_variant *, int);
2205 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2206 const struct algorithm *, enum mult_variant);
2207 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2208 int, unsigned HOST_WIDE_INT *,
2209 int *, int *);
2210 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2211 static rtx extract_high_half (enum machine_mode, rtx);
2212 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2213 int, int);
2214 /* Compute and return the best algorithm for multiplying by T.
2215 The algorithm must cost less than cost_limit
2216 If retval.cost >= COST_LIMIT, no algorithm was found and all
2217 other field of the returned struct are undefined.
2218 MODE is the machine mode of the multiplication. */
2220 static void
2221 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2222 int cost_limit, enum machine_mode mode)
2224 int m;
2225 struct algorithm *alg_in, *best_alg;
2226 int cost;
2227 unsigned HOST_WIDE_INT q;
2228 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2230 /* Indicate that no algorithm is yet found. If no algorithm
2231 is found, this value will be returned and indicate failure. */
2232 alg_out->cost = cost_limit;
2234 if (cost_limit <= 0)
2235 return;
2237 /* Restrict the bits of "t" to the multiplication's mode. */
2238 t &= GET_MODE_MASK (mode);
2240 /* t == 1 can be done in zero cost. */
2241 if (t == 1)
2243 alg_out->ops = 1;
2244 alg_out->cost = 0;
2245 alg_out->op[0] = alg_m;
2246 return;
2249 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2250 fail now. */
2251 if (t == 0)
2253 if (zero_cost >= cost_limit)
2254 return;
2255 else
2257 alg_out->ops = 1;
2258 alg_out->cost = zero_cost;
2259 alg_out->op[0] = alg_zero;
2260 return;
2264 /* We'll be needing a couple extra algorithm structures now. */
2266 alg_in = alloca (sizeof (struct algorithm));
2267 best_alg = alloca (sizeof (struct algorithm));
2269 /* If we have a group of zero bits at the low-order part of T, try
2270 multiplying by the remaining bits and then doing a shift. */
2272 if ((t & 1) == 0)
2274 m = floor_log2 (t & -t); /* m = number of low zero bits */
2275 if (m < maxm)
2277 q = t >> m;
2278 /* The function expand_shift will choose between a shift and
2279 a sequence of additions, so the observed cost is given as
2280 MIN (m * add_cost[mode], shift_cost[mode][m]). */
2281 cost = m * add_cost[mode];
2282 if (shift_cost[mode][m] < cost)
2283 cost = shift_cost[mode][m];
2284 synth_mult (alg_in, q, cost_limit - cost, mode);
2286 cost += alg_in->cost;
2287 if (cost < cost_limit)
2289 struct algorithm *x;
2290 x = alg_in, alg_in = best_alg, best_alg = x;
2291 best_alg->log[best_alg->ops] = m;
2292 best_alg->op[best_alg->ops] = alg_shift;
2293 cost_limit = cost;
2298 /* If we have an odd number, add or subtract one. */
2299 if ((t & 1) != 0)
2301 unsigned HOST_WIDE_INT w;
2303 for (w = 1; (w & t) != 0; w <<= 1)
2305 /* If T was -1, then W will be zero after the loop. This is another
2306 case where T ends with ...111. Handling this with (T + 1) and
2307 subtract 1 produces slightly better code and results in algorithm
2308 selection much faster than treating it like the ...0111 case
2309 below. */
2310 if (w == 0
2311 || (w > 2
2312 /* Reject the case where t is 3.
2313 Thus we prefer addition in that case. */
2314 && t != 3))
2316 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2318 cost = add_cost[mode];
2319 synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2321 cost += alg_in->cost;
2322 if (cost < cost_limit)
2324 struct algorithm *x;
2325 x = alg_in, alg_in = best_alg, best_alg = x;
2326 best_alg->log[best_alg->ops] = 0;
2327 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2328 cost_limit = cost;
2331 else
2333 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2335 cost = add_cost[mode];
2336 synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2338 cost += alg_in->cost;
2339 if (cost < cost_limit)
2341 struct algorithm *x;
2342 x = alg_in, alg_in = best_alg, best_alg = x;
2343 best_alg->log[best_alg->ops] = 0;
2344 best_alg->op[best_alg->ops] = alg_add_t_m2;
2345 cost_limit = cost;
2350 /* Look for factors of t of the form
2351 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2352 If we find such a factor, we can multiply by t using an algorithm that
2353 multiplies by q, shift the result by m and add/subtract it to itself.
2355 We search for large factors first and loop down, even if large factors
2356 are less probable than small; if we find a large factor we will find a
2357 good sequence quickly, and therefore be able to prune (by decreasing
2358 COST_LIMIT) the search. */
2360 for (m = floor_log2 (t - 1); m >= 2; m--)
2362 unsigned HOST_WIDE_INT d;
2364 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2365 if (t % d == 0 && t > d && m < maxm)
2367 cost = add_cost[mode] + shift_cost[mode][m];
2368 if (shiftadd_cost[mode][m] < cost)
2369 cost = shiftadd_cost[mode][m];
2370 synth_mult (alg_in, t / d, cost_limit - cost, mode);
2372 cost += alg_in->cost;
2373 if (cost < cost_limit)
2375 struct algorithm *x;
2376 x = alg_in, alg_in = best_alg, best_alg = x;
2377 best_alg->log[best_alg->ops] = m;
2378 best_alg->op[best_alg->ops] = alg_add_factor;
2379 cost_limit = cost;
2381 /* Other factors will have been taken care of in the recursion. */
2382 break;
2385 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2386 if (t % d == 0 && t > d && m < maxm)
2388 cost = add_cost[mode] + shift_cost[mode][m];
2389 if (shiftsub_cost[mode][m] < cost)
2390 cost = shiftsub_cost[mode][m];
2391 synth_mult (alg_in, t / d, cost_limit - cost, mode);
2393 cost += alg_in->cost;
2394 if (cost < cost_limit)
2396 struct algorithm *x;
2397 x = alg_in, alg_in = best_alg, best_alg = x;
2398 best_alg->log[best_alg->ops] = m;
2399 best_alg->op[best_alg->ops] = alg_sub_factor;
2400 cost_limit = cost;
2402 break;
2406 /* Try shift-and-add (load effective address) instructions,
2407 i.e. do a*3, a*5, a*9. */
2408 if ((t & 1) != 0)
2410 q = t - 1;
2411 q = q & -q;
2412 m = exact_log2 (q);
2413 if (m >= 0 && m < maxm)
2415 cost = shiftadd_cost[mode][m];
2416 synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2418 cost += alg_in->cost;
2419 if (cost < cost_limit)
2421 struct algorithm *x;
2422 x = alg_in, alg_in = best_alg, best_alg = x;
2423 best_alg->log[best_alg->ops] = m;
2424 best_alg->op[best_alg->ops] = alg_add_t2_m;
2425 cost_limit = cost;
2429 q = t + 1;
2430 q = q & -q;
2431 m = exact_log2 (q);
2432 if (m >= 0 && m < maxm)
2434 cost = shiftsub_cost[mode][m];
2435 synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2437 cost += alg_in->cost;
2438 if (cost < cost_limit)
2440 struct algorithm *x;
2441 x = alg_in, alg_in = best_alg, best_alg = x;
2442 best_alg->log[best_alg->ops] = m;
2443 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2444 cost_limit = cost;
2449 /* If cost_limit has not decreased since we stored it in alg_out->cost,
2450 we have not found any algorithm. */
2451 if (cost_limit == alg_out->cost)
2452 return;
2454 /* If we are getting a too long sequence for `struct algorithm'
2455 to record, make this search fail. */
2456 if (best_alg->ops == MAX_BITS_PER_WORD)
2457 return;
2459 /* Copy the algorithm from temporary space to the space at alg_out.
2460 We avoid using structure assignment because the majority of
2461 best_alg is normally undefined, and this is a critical function. */
2462 alg_out->ops = best_alg->ops + 1;
2463 alg_out->cost = cost_limit;
2464 memcpy (alg_out->op, best_alg->op,
2465 alg_out->ops * sizeof *alg_out->op);
2466 memcpy (alg_out->log, best_alg->log,
2467 alg_out->ops * sizeof *alg_out->log);
2470 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2471 Try three variations:
2473 - a shift/add sequence based on VAL itself
2474 - a shift/add sequence based on -VAL, followed by a negation
2475 - a shift/add sequence based on VAL - 1, followed by an addition.
2477 Return true if the cheapest of these cost less than MULT_COST,
2478 describing the algorithm in *ALG and final fixup in *VARIANT. */
2480 static bool
2481 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2482 struct algorithm *alg, enum mult_variant *variant,
2483 int mult_cost)
2485 struct algorithm alg2;
2487 *variant = basic_variant;
2488 synth_mult (alg, val, mult_cost, mode);
2490 /* This works only if the inverted value actually fits in an
2491 `unsigned int' */
2492 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2494 synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2495 mode);
2496 alg2.cost += neg_cost[mode];
2497 if (alg2.cost < alg->cost)
2498 *alg = alg2, *variant = negate_variant;
2501 /* This proves very useful for division-by-constant. */
2502 synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2503 mode);
2504 alg2.cost += add_cost[mode];
2505 if (alg2.cost < alg->cost)
2506 *alg = alg2, *variant = add_variant;
2508 return alg->cost < mult_cost;
2511 /* A subroutine of expand_mult, used for constant multiplications.
2512 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2513 convenient. Use the shift/add sequence described by ALG and apply
2514 the final fixup specified by VARIANT. */
2516 static rtx
2517 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2518 rtx target, const struct algorithm *alg,
2519 enum mult_variant variant)
2521 HOST_WIDE_INT val_so_far;
2522 rtx insn, accum, tem;
2523 int opno;
2524 enum machine_mode nmode;
2526 /* op0 must be register to make mult_cost match the precomputed
2527 shiftadd_cost array. */
2528 op0 = protect_from_queue (op0, 0);
2530 /* Avoid referencing memory over and over.
2531 For speed, but also for correctness when mem is volatile. */
2532 if (MEM_P (op0))
2533 op0 = force_reg (mode, op0);
2535 /* ACCUM starts out either as OP0 or as a zero, depending on
2536 the first operation. */
2538 if (alg->op[0] == alg_zero)
2540 accum = copy_to_mode_reg (mode, const0_rtx);
2541 val_so_far = 0;
2543 else if (alg->op[0] == alg_m)
2545 accum = copy_to_mode_reg (mode, op0);
2546 val_so_far = 1;
2548 else
2549 abort ();
2551 for (opno = 1; opno < alg->ops; opno++)
2553 int log = alg->log[opno];
2554 int preserve = preserve_subexpressions_p ();
2555 rtx shift_subtarget = preserve ? 0 : accum;
2556 rtx add_target
2557 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2558 && ! preserve)
2559 ? target : 0;
2560 rtx accum_target = preserve ? 0 : accum;
2562 switch (alg->op[opno])
2564 case alg_shift:
2565 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2566 build_int_2 (log, 0), NULL_RTX, 0);
2567 val_so_far <<= log;
2568 break;
2570 case alg_add_t_m2:
2571 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2572 build_int_2 (log, 0), NULL_RTX, 0);
2573 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2574 add_target ? add_target : accum_target);
2575 val_so_far += (HOST_WIDE_INT) 1 << log;
2576 break;
2578 case alg_sub_t_m2:
2579 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2580 build_int_2 (log, 0), NULL_RTX, 0);
2581 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2582 add_target ? add_target : accum_target);
2583 val_so_far -= (HOST_WIDE_INT) 1 << log;
2584 break;
2586 case alg_add_t2_m:
2587 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2588 build_int_2 (log, 0), shift_subtarget,
2590 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2591 add_target ? add_target : accum_target);
2592 val_so_far = (val_so_far << log) + 1;
2593 break;
2595 case alg_sub_t2_m:
2596 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2597 build_int_2 (log, 0), shift_subtarget, 0);
2598 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2599 add_target ? add_target : accum_target);
2600 val_so_far = (val_so_far << log) - 1;
2601 break;
2603 case alg_add_factor:
2604 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2605 build_int_2 (log, 0), NULL_RTX, 0);
2606 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2607 add_target ? add_target : accum_target);
2608 val_so_far += val_so_far << log;
2609 break;
2611 case alg_sub_factor:
2612 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2613 build_int_2 (log, 0), NULL_RTX, 0);
2614 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2615 (add_target ? add_target
2616 : preserve ? 0 : tem));
2617 val_so_far = (val_so_far << log) - val_so_far;
2618 break;
2620 default:
2621 abort ();
2624 /* Write a REG_EQUAL note on the last insn so that we can cse
2625 multiplication sequences. Note that if ACCUM is a SUBREG,
2626 we've set the inner register and must properly indicate
2627 that. */
2629 tem = op0, nmode = mode;
2630 if (GET_CODE (accum) == SUBREG)
2632 nmode = GET_MODE (SUBREG_REG (accum));
2633 tem = gen_lowpart (nmode, op0);
2636 insn = get_last_insn ();
2637 set_unique_reg_note (insn, REG_EQUAL,
2638 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2641 if (variant == negate_variant)
2643 val_so_far = -val_so_far;
2644 accum = expand_unop (mode, neg_optab, accum, target, 0);
2646 else if (variant == add_variant)
2648 val_so_far = val_so_far + 1;
2649 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2652 /* Compare only the bits of val and val_so_far that are significant
2653 in the result mode, to avoid sign-/zero-extension confusion. */
2654 val &= GET_MODE_MASK (mode);
2655 val_so_far &= GET_MODE_MASK (mode);
2656 if (val != val_so_far)
2657 abort ();
2659 return accum;
2662 /* Perform a multiplication and return an rtx for the result.
2663 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2664 TARGET is a suggestion for where to store the result (an rtx).
2666 We check specially for a constant integer as OP1.
2667 If you want this check for OP0 as well, then before calling
2668 you should swap the two operands if OP0 would be constant. */
2671 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2672 int unsignedp)
2674 rtx const_op1 = op1;
2675 enum mult_variant variant;
2676 struct algorithm algorithm;
2678 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2679 less than or equal in size to `unsigned int' this doesn't matter.
2680 If the mode is larger than `unsigned int', then synth_mult works only
2681 if the constant value exactly fits in an `unsigned int' without any
2682 truncation. This means that multiplying by negative values does
2683 not work; results are off by 2^32 on a 32 bit machine. */
2685 /* If we are multiplying in DImode, it may still be a win
2686 to try to work with shifts and adds. */
2687 if (GET_CODE (op1) == CONST_DOUBLE
2688 && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2689 && HOST_BITS_PER_INT >= BITS_PER_WORD
2690 && CONST_DOUBLE_HIGH (op1) == 0)
2691 const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2692 else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2693 && GET_CODE (op1) == CONST_INT
2694 && INTVAL (op1) < 0)
2695 const_op1 = 0;
2697 /* We used to test optimize here, on the grounds that it's better to
2698 produce a smaller program when -O is not used.
2699 But this causes such a terrible slowdown sometimes
2700 that it seems better to use synth_mult always. */
2702 if (const_op1 && GET_CODE (const_op1) == CONST_INT
2703 && (unsignedp || !flag_trapv))
2705 int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2707 if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2708 mult_cost))
2709 return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2710 &algorithm, variant);
2713 if (GET_CODE (op0) == CONST_DOUBLE)
2715 rtx temp = op0;
2716 op0 = op1;
2717 op1 = temp;
2720 /* Expand x*2.0 as x+x. */
2721 if (GET_CODE (op1) == CONST_DOUBLE
2722 && GET_MODE_CLASS (mode) == MODE_FLOAT)
2724 REAL_VALUE_TYPE d;
2725 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2727 if (REAL_VALUES_EQUAL (d, dconst2))
2729 op0 = force_reg (GET_MODE (op0), op0);
2730 return expand_binop (mode, add_optab, op0, op0,
2731 target, unsignedp, OPTAB_LIB_WIDEN);
2735 /* This used to use umul_optab if unsigned, but for non-widening multiply
2736 there is no difference between signed and unsigned. */
2737 op0 = expand_binop (mode,
2738 ! unsignedp
2739 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2740 ? smulv_optab : smul_optab,
2741 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2742 if (op0 == 0)
2743 abort ();
2744 return op0;
2747 /* Return the smallest n such that 2**n >= X. */
2750 ceil_log2 (unsigned HOST_WIDE_INT x)
2752 return floor_log2 (x - 1) + 1;
2755 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2756 replace division by D, and put the least significant N bits of the result
2757 in *MULTIPLIER_PTR and return the most significant bit.
2759 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2760 needed precision is in PRECISION (should be <= N).
2762 PRECISION should be as small as possible so this function can choose
2763 multiplier more freely.
2765 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
2766 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2768 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2769 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
2771 static
2772 unsigned HOST_WIDE_INT
2773 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2774 unsigned HOST_WIDE_INT *multiplier_ptr,
2775 int *post_shift_ptr, int *lgup_ptr)
2777 HOST_WIDE_INT mhigh_hi, mlow_hi;
2778 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2779 int lgup, post_shift;
2780 int pow, pow2;
2781 unsigned HOST_WIDE_INT nl, dummy1;
2782 HOST_WIDE_INT nh, dummy2;
2784 /* lgup = ceil(log2(divisor)); */
2785 lgup = ceil_log2 (d);
2787 if (lgup > n)
2788 abort ();
2790 pow = n + lgup;
2791 pow2 = n + lgup - precision;
2793 if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2795 /* We could handle this with some effort, but this case is much better
2796 handled directly with a scc insn, so rely on caller using that. */
2797 abort ();
2800 /* mlow = 2^(N + lgup)/d */
2801 if (pow >= HOST_BITS_PER_WIDE_INT)
2803 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2804 nl = 0;
2806 else
2808 nh = 0;
2809 nl = (unsigned HOST_WIDE_INT) 1 << pow;
2811 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2812 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2814 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2815 if (pow2 >= HOST_BITS_PER_WIDE_INT)
2816 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2817 else
2818 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2819 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2820 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2822 if (mhigh_hi && nh - d >= d)
2823 abort ();
2824 if (mhigh_hi > 1 || mlow_hi > 1)
2825 abort ();
2826 /* Assert that mlow < mhigh. */
2827 if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2828 abort ();
2830 /* If precision == N, then mlow, mhigh exceed 2^N
2831 (but they do not exceed 2^(N+1)). */
2833 /* Reduce to lowest terms. */
2834 for (post_shift = lgup; post_shift > 0; post_shift--)
2836 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2837 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2838 if (ml_lo >= mh_lo)
2839 break;
2841 mlow_hi = 0;
2842 mlow_lo = ml_lo;
2843 mhigh_hi = 0;
2844 mhigh_lo = mh_lo;
2847 *post_shift_ptr = post_shift;
2848 *lgup_ptr = lgup;
2849 if (n < HOST_BITS_PER_WIDE_INT)
2851 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2852 *multiplier_ptr = mhigh_lo & mask;
2853 return mhigh_lo >= mask;
2855 else
2857 *multiplier_ptr = mhigh_lo;
2858 return mhigh_hi;
2862 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2863 congruent to 1 (mod 2**N). */
2865 static unsigned HOST_WIDE_INT
2866 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2868 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
2870 /* The algorithm notes that the choice y = x satisfies
2871 x*y == 1 mod 2^3, since x is assumed odd.
2872 Each iteration doubles the number of bits of significance in y. */
2874 unsigned HOST_WIDE_INT mask;
2875 unsigned HOST_WIDE_INT y = x;
2876 int nbit = 3;
2878 mask = (n == HOST_BITS_PER_WIDE_INT
2879 ? ~(unsigned HOST_WIDE_INT) 0
2880 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2882 while (nbit < n)
2884 y = y * (2 - x*y) & mask; /* Modulo 2^N */
2885 nbit *= 2;
2887 return y;
2890 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2891 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
2892 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
2893 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2894 become signed.
2896 The result is put in TARGET if that is convenient.
2898 MODE is the mode of operation. */
2901 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2902 rtx op1, rtx target, int unsignedp)
2904 rtx tem;
2905 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2907 tem = expand_shift (RSHIFT_EXPR, mode, op0,
2908 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2909 NULL_RTX, 0);
2910 tem = expand_and (mode, tem, op1, NULL_RTX);
2911 adj_operand
2912 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2913 adj_operand);
2915 tem = expand_shift (RSHIFT_EXPR, mode, op1,
2916 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2917 NULL_RTX, 0);
2918 tem = expand_and (mode, tem, op0, NULL_RTX);
2919 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2920 target);
2922 return target;
2925 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
2927 static rtx
2928 extract_high_half (enum machine_mode mode, rtx op)
2930 enum machine_mode wider_mode;
2932 if (mode == word_mode)
2933 return gen_highpart (mode, op);
2935 wider_mode = GET_MODE_WIDER_MODE (mode);
2936 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2937 build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
2938 return convert_modes (mode, wider_mode, op, 0);
2941 /* Like expand_mult_highpart, but only consider using a multiplication
2942 optab. OP1 is an rtx for the constant operand. */
2944 static rtx
2945 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2946 rtx target, int unsignedp, int max_cost)
2948 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2949 enum machine_mode wider_mode;
2950 optab moptab;
2951 rtx tem;
2952 int size;
2954 wider_mode = GET_MODE_WIDER_MODE (mode);
2955 size = GET_MODE_BITSIZE (mode);
2957 /* Firstly, try using a multiplication insn that only generates the needed
2958 high part of the product, and in the sign flavor of unsignedp. */
2959 if (mul_highpart_cost[mode] < max_cost)
2961 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2962 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2963 unsignedp, OPTAB_DIRECT);
2964 if (tem)
2965 return tem;
2968 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2969 Need to adjust the result after the multiplication. */
2970 if (size - 1 < BITS_PER_WORD
2971 && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2972 + 4 * add_cost[mode] < max_cost))
2974 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2975 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2976 unsignedp, OPTAB_DIRECT);
2977 if (tem)
2978 /* We used the wrong signedness. Adjust the result. */
2979 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2980 tem, unsignedp);
2983 /* Try widening multiplication. */
2984 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2985 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2986 && mul_widen_cost[wider_mode] < max_cost)
2988 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2989 unsignedp, OPTAB_WIDEN);
2990 if (tem)
2991 return extract_high_half (mode, tem);
2994 /* Try widening the mode and perform a non-widening multiplication. */
2995 moptab = smul_optab;
2996 if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2997 && size - 1 < BITS_PER_WORD
2998 && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3000 tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3001 unsignedp, OPTAB_WIDEN);
3002 if (tem)
3003 return extract_high_half (mode, tem);
3006 /* Try widening multiplication of opposite signedness, and adjust. */
3007 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3008 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3009 && size - 1 < BITS_PER_WORD
3010 && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3011 + 4 * add_cost[mode] < max_cost))
3013 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3014 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3015 if (tem != 0)
3017 tem = extract_high_half (mode, tem);
3018 /* We used the wrong signedness. Adjust the result. */
3019 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3020 target, unsignedp);
3024 return 0;
3027 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3028 in TARGET if that is convenient, and return where the result is. If the
3029 operation can not be performed, 0 is returned.
3031 MODE is the mode of operation and result.
3033 UNSIGNEDP nonzero means unsigned multiply.
3035 MAX_COST is the total allowed cost for the expanded RTL. */
3038 expand_mult_highpart (enum machine_mode mode, rtx op0,
3039 unsigned HOST_WIDE_INT cnst1, rtx target,
3040 int unsignedp, int max_cost)
3042 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3043 int extra_cost;
3044 bool sign_adjust = false;
3045 enum mult_variant variant;
3046 struct algorithm alg;
3047 rtx op1, tem;
3049 /* We can't support modes wider than HOST_BITS_PER_INT. */
3050 if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3051 abort ();
3053 op1 = gen_int_mode (cnst1, wider_mode);
3054 cnst1 &= GET_MODE_MASK (mode);
3056 /* We can't optimize modes wider than BITS_PER_WORD.
3057 ??? We might be able to perform double-word arithmetic if
3058 mode == word_mode, however all the cost calculations in
3059 synth_mult etc. assume single-word operations. */
3060 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3061 return expand_mult_highpart_optab (mode, op0, op1, target,
3062 unsignedp, max_cost);
3064 extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3066 /* Check whether we try to multiply by a negative constant. */
3067 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3069 sign_adjust = true;
3070 extra_cost += add_cost[mode];
3073 /* See whether shift/add multiplication is cheap enough. */
3074 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3075 max_cost - extra_cost))
3077 /* See whether the specialized multiplication optabs are
3078 cheaper than the shift/add version. */
3079 tem = expand_mult_highpart_optab (mode, op0, op1, target,
3080 unsignedp, alg.cost + extra_cost);
3081 if (tem)
3082 return tem;
3084 tem = convert_to_mode (wider_mode, op0, unsignedp);
3085 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3086 tem = extract_high_half (mode, tem);
3088 /* Adjust result for signedness. */
3089 if (sign_adjust)
3090 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3092 return tem;
3094 return expand_mult_highpart_optab (mode, op0, op1, target,
3095 unsignedp, max_cost);
3099 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3101 static rtx
3102 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3104 unsigned HOST_WIDE_INT mask;
3105 rtx result, temp, shift, label;
3106 int logd;
3108 logd = floor_log2 (d);
3109 result = gen_reg_rtx (mode);
3111 /* Avoid conditional branches when they're expensive. */
3112 if (BRANCH_COST >= 2
3113 && !optimize_size)
3115 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3116 mode, 0, -1);
3117 if (signmask)
3119 signmask = force_reg (mode, signmask);
3120 mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3121 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3123 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3124 which instruction sequence to use. If logical right shifts
3125 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3126 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3128 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3129 if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3130 || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3132 temp = expand_binop (mode, xor_optab, op0, signmask,
3133 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3134 temp = expand_binop (mode, sub_optab, temp, signmask,
3135 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3136 temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3137 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3138 temp = expand_binop (mode, xor_optab, temp, signmask,
3139 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3140 temp = expand_binop (mode, sub_optab, temp, signmask,
3141 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3143 else
3145 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3146 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3147 signmask = force_reg (mode, signmask);
3149 temp = expand_binop (mode, add_optab, op0, signmask,
3150 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3151 temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3152 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3153 temp = expand_binop (mode, sub_optab, temp, signmask,
3154 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3156 return temp;
3160 /* Mask contains the mode's signbit and the significant bits of the
3161 modulus. By including the signbit in the operation, many targets
3162 can avoid an explicit compare operation in the following comparison
3163 against zero. */
3165 mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3166 | (((HOST_WIDE_INT) 1 << logd) - 1);
3168 temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3169 1, OPTAB_LIB_WIDEN);
3170 if (temp != result)
3171 emit_move_insn (result, temp);
3173 label = gen_label_rtx ();
3174 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3176 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3177 0, OPTAB_LIB_WIDEN);
3178 mask = (HOST_WIDE_INT) -1 << logd;
3179 temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3180 1, OPTAB_LIB_WIDEN);
3181 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3182 0, OPTAB_LIB_WIDEN);
3183 if (temp != result)
3184 emit_move_insn (result, temp);
3185 emit_label (label);
3186 return result;
3189 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3190 if that is convenient, and returning where the result is.
3191 You may request either the quotient or the remainder as the result;
3192 specify REM_FLAG nonzero to get the remainder.
3194 CODE is the expression code for which kind of division this is;
3195 it controls how rounding is done. MODE is the machine mode to use.
3196 UNSIGNEDP nonzero means do unsigned division. */
3198 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3199 and then correct it by or'ing in missing high bits
3200 if result of ANDI is nonzero.
3201 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3202 This could optimize to a bfexts instruction.
3203 But C doesn't use these operations, so their optimizations are
3204 left for later. */
3205 /* ??? For modulo, we don't actually need the highpart of the first product,
3206 the low part will do nicely. And for small divisors, the second multiply
3207 can also be a low-part only multiply or even be completely left out.
3208 E.g. to calculate the remainder of a division by 3 with a 32 bit
3209 multiply, multiply with 0x55555556 and extract the upper two bits;
3210 the result is exact for inputs up to 0x1fffffff.
3211 The input range can be reduced by using cross-sum rules.
3212 For odd divisors >= 3, the following table gives right shift counts
3213 so that if a number is shifted by an integer multiple of the given
3214 amount, the remainder stays the same:
3215 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3216 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3217 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3218 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3219 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3221 Cross-sum rules for even numbers can be derived by leaving as many bits
3222 to the right alone as the divisor has zeros to the right.
3223 E.g. if x is an unsigned 32 bit number:
3224 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3227 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3230 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3231 rtx op0, rtx op1, rtx target, int unsignedp)
3233 enum machine_mode compute_mode;
3234 rtx tquotient;
3235 rtx quotient = 0, remainder = 0;
3236 rtx last;
3237 int size;
3238 rtx insn, set;
3239 optab optab1, optab2;
3240 int op1_is_constant, op1_is_pow2 = 0;
3241 int max_cost, extra_cost;
3242 static HOST_WIDE_INT last_div_const = 0;
3243 static HOST_WIDE_INT ext_op1;
3245 op1_is_constant = GET_CODE (op1) == CONST_INT;
3246 if (op1_is_constant)
3248 ext_op1 = INTVAL (op1);
3249 if (unsignedp)
3250 ext_op1 &= GET_MODE_MASK (mode);
3251 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3252 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3256 This is the structure of expand_divmod:
3258 First comes code to fix up the operands so we can perform the operations
3259 correctly and efficiently.
3261 Second comes a switch statement with code specific for each rounding mode.
3262 For some special operands this code emits all RTL for the desired
3263 operation, for other cases, it generates only a quotient and stores it in
3264 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3265 to indicate that it has not done anything.
3267 Last comes code that finishes the operation. If QUOTIENT is set and
3268 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3269 QUOTIENT is not set, it is computed using trunc rounding.
3271 We try to generate special code for division and remainder when OP1 is a
3272 constant. If |OP1| = 2**n we can use shifts and some other fast
3273 operations. For other values of OP1, we compute a carefully selected
3274 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3275 by m.
3277 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3278 half of the product. Different strategies for generating the product are
3279 implemented in expand_mult_highpart.
3281 If what we actually want is the remainder, we generate that by another
3282 by-constant multiplication and a subtraction. */
3284 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3285 code below will malfunction if we are, so check here and handle
3286 the special case if so. */
3287 if (op1 == const1_rtx)
3288 return rem_flag ? const0_rtx : op0;
3290 /* When dividing by -1, we could get an overflow.
3291 negv_optab can handle overflows. */
3292 if (! unsignedp && op1 == constm1_rtx)
3294 if (rem_flag)
3295 return const0_rtx;
3296 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3297 ? negv_optab : neg_optab, op0, target, 0);
3300 if (target
3301 /* Don't use the function value register as a target
3302 since we have to read it as well as write it,
3303 and function-inlining gets confused by this. */
3304 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3305 /* Don't clobber an operand while doing a multi-step calculation. */
3306 || ((rem_flag || op1_is_constant)
3307 && (reg_mentioned_p (target, op0)
3308 || (MEM_P (op0) && MEM_P (target))))
3309 || reg_mentioned_p (target, op1)
3310 || (MEM_P (op1) && MEM_P (target))))
3311 target = 0;
3313 /* Get the mode in which to perform this computation. Normally it will
3314 be MODE, but sometimes we can't do the desired operation in MODE.
3315 If so, pick a wider mode in which we can do the operation. Convert
3316 to that mode at the start to avoid repeated conversions.
3318 First see what operations we need. These depend on the expression
3319 we are evaluating. (We assume that divxx3 insns exist under the
3320 same conditions that modxx3 insns and that these insns don't normally
3321 fail. If these assumptions are not correct, we may generate less
3322 efficient code in some cases.)
3324 Then see if we find a mode in which we can open-code that operation
3325 (either a division, modulus, or shift). Finally, check for the smallest
3326 mode for which we can do the operation with a library call. */
3328 /* We might want to refine this now that we have division-by-constant
3329 optimization. Since expand_mult_highpart tries so many variants, it is
3330 not straightforward to generalize this. Maybe we should make an array
3331 of possible modes in init_expmed? Save this for GCC 2.7. */
3333 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3334 ? (unsignedp ? lshr_optab : ashr_optab)
3335 : (unsignedp ? udiv_optab : sdiv_optab));
3336 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3337 ? optab1
3338 : (unsignedp ? udivmod_optab : sdivmod_optab));
3340 for (compute_mode = mode; compute_mode != VOIDmode;
3341 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3342 if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3343 || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3344 break;
3346 if (compute_mode == VOIDmode)
3347 for (compute_mode = mode; compute_mode != VOIDmode;
3348 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3349 if (optab1->handlers[compute_mode].libfunc
3350 || optab2->handlers[compute_mode].libfunc)
3351 break;
3353 /* If we still couldn't find a mode, use MODE, but we'll probably abort
3354 in expand_binop. */
3355 if (compute_mode == VOIDmode)
3356 compute_mode = mode;
3358 if (target && GET_MODE (target) == compute_mode)
3359 tquotient = target;
3360 else
3361 tquotient = gen_reg_rtx (compute_mode);
3363 size = GET_MODE_BITSIZE (compute_mode);
3364 #if 0
3365 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3366 (mode), and thereby get better code when OP1 is a constant. Do that
3367 later. It will require going over all usages of SIZE below. */
3368 size = GET_MODE_BITSIZE (mode);
3369 #endif
3371 /* Only deduct something for a REM if the last divide done was
3372 for a different constant. Then set the constant of the last
3373 divide. */
3374 max_cost = div_cost[compute_mode]
3375 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3376 && INTVAL (op1) == last_div_const)
3377 ? mul_cost[compute_mode] + add_cost[compute_mode]
3378 : 0);
3380 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3382 /* Now convert to the best mode to use. */
3383 if (compute_mode != mode)
3385 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3386 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3388 /* convert_modes may have placed op1 into a register, so we
3389 must recompute the following. */
3390 op1_is_constant = GET_CODE (op1) == CONST_INT;
3391 op1_is_pow2 = (op1_is_constant
3392 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3393 || (! unsignedp
3394 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3397 /* If one of the operands is a volatile MEM, copy it into a register. */
3399 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3400 op0 = force_reg (compute_mode, op0);
3401 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3402 op1 = force_reg (compute_mode, op1);
3404 /* If we need the remainder or if OP1 is constant, we need to
3405 put OP0 in a register in case it has any queued subexpressions. */
3406 if (rem_flag || op1_is_constant)
3407 op0 = force_reg (compute_mode, op0);
3409 last = get_last_insn ();
3411 /* Promote floor rounding to trunc rounding for unsigned operations. */
3412 if (unsignedp)
3414 if (code == FLOOR_DIV_EXPR)
3415 code = TRUNC_DIV_EXPR;
3416 if (code == FLOOR_MOD_EXPR)
3417 code = TRUNC_MOD_EXPR;
3418 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3419 code = TRUNC_DIV_EXPR;
3422 if (op1 != const0_rtx)
3423 switch (code)
3425 case TRUNC_MOD_EXPR:
3426 case TRUNC_DIV_EXPR:
3427 if (op1_is_constant)
3429 if (unsignedp)
3431 unsigned HOST_WIDE_INT mh, ml;
3432 int pre_shift, post_shift;
3433 int dummy;
3434 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3435 & GET_MODE_MASK (compute_mode));
3437 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3439 pre_shift = floor_log2 (d);
3440 if (rem_flag)
3442 remainder
3443 = expand_binop (compute_mode, and_optab, op0,
3444 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3445 remainder, 1,
3446 OPTAB_LIB_WIDEN);
3447 if (remainder)
3448 return gen_lowpart (mode, remainder);
3450 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3451 build_int_2 (pre_shift, 0),
3452 tquotient, 1);
3454 else if (size <= HOST_BITS_PER_WIDE_INT)
3456 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3458 /* Most significant bit of divisor is set; emit an scc
3459 insn. */
3460 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3461 compute_mode, 1, 1);
3462 if (quotient == 0)
3463 goto fail1;
3465 else
3467 /* Find a suitable multiplier and right shift count
3468 instead of multiplying with D. */
3470 mh = choose_multiplier (d, size, size,
3471 &ml, &post_shift, &dummy);
3473 /* If the suggested multiplier is more than SIZE bits,
3474 we can do better for even divisors, using an
3475 initial right shift. */
3476 if (mh != 0 && (d & 1) == 0)
3478 pre_shift = floor_log2 (d & -d);
3479 mh = choose_multiplier (d >> pre_shift, size,
3480 size - pre_shift,
3481 &ml, &post_shift, &dummy);
3482 if (mh)
3483 abort ();
3485 else
3486 pre_shift = 0;
3488 if (mh != 0)
3490 rtx t1, t2, t3, t4;
3492 if (post_shift - 1 >= BITS_PER_WORD)
3493 goto fail1;
3495 extra_cost
3496 = (shift_cost[compute_mode][post_shift - 1]
3497 + shift_cost[compute_mode][1]
3498 + 2 * add_cost[compute_mode]);
3499 t1 = expand_mult_highpart (compute_mode, op0, ml,
3500 NULL_RTX, 1,
3501 max_cost - extra_cost);
3502 if (t1 == 0)
3503 goto fail1;
3504 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3505 op0, t1),
3506 NULL_RTX);
3507 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3508 build_int_2 (1, 0), NULL_RTX,1);
3509 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3510 t1, t3),
3511 NULL_RTX);
3512 quotient
3513 = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3514 build_int_2 (post_shift - 1, 0),
3515 tquotient, 1);
3517 else
3519 rtx t1, t2;
3521 if (pre_shift >= BITS_PER_WORD
3522 || post_shift >= BITS_PER_WORD)
3523 goto fail1;
3525 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3526 build_int_2 (pre_shift, 0),
3527 NULL_RTX, 1);
3528 extra_cost
3529 = (shift_cost[compute_mode][pre_shift]
3530 + shift_cost[compute_mode][post_shift]);
3531 t2 = expand_mult_highpart (compute_mode, t1, ml,
3532 NULL_RTX, 1,
3533 max_cost - extra_cost);
3534 if (t2 == 0)
3535 goto fail1;
3536 quotient
3537 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3538 build_int_2 (post_shift, 0),
3539 tquotient, 1);
3543 else /* Too wide mode to use tricky code */
3544 break;
3546 insn = get_last_insn ();
3547 if (insn != last
3548 && (set = single_set (insn)) != 0
3549 && SET_DEST (set) == quotient)
3550 set_unique_reg_note (insn,
3551 REG_EQUAL,
3552 gen_rtx_UDIV (compute_mode, op0, op1));
3554 else /* TRUNC_DIV, signed */
3556 unsigned HOST_WIDE_INT ml;
3557 int lgup, post_shift;
3558 HOST_WIDE_INT d = INTVAL (op1);
3559 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3561 /* n rem d = n rem -d */
3562 if (rem_flag && d < 0)
3564 d = abs_d;
3565 op1 = gen_int_mode (abs_d, compute_mode);
3568 if (d == 1)
3569 quotient = op0;
3570 else if (d == -1)
3571 quotient = expand_unop (compute_mode, neg_optab, op0,
3572 tquotient, 0);
3573 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3575 /* This case is not handled correctly below. */
3576 quotient = emit_store_flag (tquotient, EQ, op0, op1,
3577 compute_mode, 1, 1);
3578 if (quotient == 0)
3579 goto fail1;
3581 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3582 && (rem_flag ? smod_pow2_cheap[compute_mode]
3583 : sdiv_pow2_cheap[compute_mode])
3584 /* We assume that cheap metric is true if the
3585 optab has an expander for this mode. */
3586 && (((rem_flag ? smod_optab : sdiv_optab)
3587 ->handlers[compute_mode].insn_code
3588 != CODE_FOR_nothing)
3589 || (sdivmod_optab->handlers[compute_mode]
3590 .insn_code != CODE_FOR_nothing)))
3592 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3594 if (rem_flag)
3596 remainder = expand_smod_pow2 (compute_mode, op0, d);
3597 if (remainder)
3598 return gen_lowpart (mode, remainder);
3600 lgup = floor_log2 (abs_d);
3601 if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3603 rtx label = gen_label_rtx ();
3604 rtx t1;
3606 t1 = copy_to_mode_reg (compute_mode, op0);
3607 do_cmp_and_jump (t1, const0_rtx, GE,
3608 compute_mode, label);
3609 expand_inc (t1, gen_int_mode (abs_d - 1,
3610 compute_mode));
3611 emit_label (label);
3612 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3613 build_int_2 (lgup, 0),
3614 tquotient, 0);
3616 else
3618 rtx t1, t2, t3;
3619 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3620 build_int_2 (size - 1, 0),
3621 NULL_RTX, 0);
3622 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3623 build_int_2 (size - lgup, 0),
3624 NULL_RTX, 1);
3625 t3 = force_operand (gen_rtx_PLUS (compute_mode,
3626 op0, t2),
3627 NULL_RTX);
3628 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3629 build_int_2 (lgup, 0),
3630 tquotient, 0);
3633 /* We have computed OP0 / abs(OP1). If OP1 is negative,
3634 negate the quotient. */
3635 if (d < 0)
3637 insn = get_last_insn ();
3638 if (insn != last
3639 && (set = single_set (insn)) != 0
3640 && SET_DEST (set) == quotient
3641 && abs_d < ((unsigned HOST_WIDE_INT) 1
3642 << (HOST_BITS_PER_WIDE_INT - 1)))
3643 set_unique_reg_note (insn,
3644 REG_EQUAL,
3645 gen_rtx_DIV (compute_mode,
3646 op0,
3647 GEN_INT
3648 (trunc_int_for_mode
3649 (abs_d,
3650 compute_mode))));
3652 quotient = expand_unop (compute_mode, neg_optab,
3653 quotient, quotient, 0);
3656 else if (size <= HOST_BITS_PER_WIDE_INT)
3658 choose_multiplier (abs_d, size, size - 1,
3659 &ml, &post_shift, &lgup);
3660 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3662 rtx t1, t2, t3;
3664 if (post_shift >= BITS_PER_WORD
3665 || size - 1 >= BITS_PER_WORD)
3666 goto fail1;
3668 extra_cost = (shift_cost[compute_mode][post_shift]
3669 + shift_cost[compute_mode][size - 1]
3670 + add_cost[compute_mode]);
3671 t1 = expand_mult_highpart (compute_mode, op0, ml,
3672 NULL_RTX, 0,
3673 max_cost - extra_cost);
3674 if (t1 == 0)
3675 goto fail1;
3676 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3677 build_int_2 (post_shift, 0), NULL_RTX, 0);
3678 t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3679 build_int_2 (size - 1, 0), NULL_RTX, 0);
3680 if (d < 0)
3681 quotient
3682 = force_operand (gen_rtx_MINUS (compute_mode,
3683 t3, t2),
3684 tquotient);
3685 else
3686 quotient
3687 = force_operand (gen_rtx_MINUS (compute_mode,
3688 t2, t3),
3689 tquotient);
3691 else
3693 rtx t1, t2, t3, t4;
3695 if (post_shift >= BITS_PER_WORD
3696 || size - 1 >= BITS_PER_WORD)
3697 goto fail1;
3699 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3700 extra_cost = (shift_cost[compute_mode][post_shift]
3701 + shift_cost[compute_mode][size - 1]
3702 + 2 * add_cost[compute_mode]);
3703 t1 = expand_mult_highpart (compute_mode, op0, ml,
3704 NULL_RTX, 0,
3705 max_cost - extra_cost);
3706 if (t1 == 0)
3707 goto fail1;
3708 t2 = force_operand (gen_rtx_PLUS (compute_mode,
3709 t1, op0),
3710 NULL_RTX);
3711 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3712 build_int_2 (post_shift, 0),
3713 NULL_RTX, 0);
3714 t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3715 build_int_2 (size - 1, 0),
3716 NULL_RTX, 0);
3717 if (d < 0)
3718 quotient
3719 = force_operand (gen_rtx_MINUS (compute_mode,
3720 t4, t3),
3721 tquotient);
3722 else
3723 quotient
3724 = force_operand (gen_rtx_MINUS (compute_mode,
3725 t3, t4),
3726 tquotient);
3729 else /* Too wide mode to use tricky code */
3730 break;
3732 insn = get_last_insn ();
3733 if (insn != last
3734 && (set = single_set (insn)) != 0
3735 && SET_DEST (set) == quotient)
3736 set_unique_reg_note (insn,
3737 REG_EQUAL,
3738 gen_rtx_DIV (compute_mode, op0, op1));
3740 break;
3742 fail1:
3743 delete_insns_since (last);
3744 break;
3746 case FLOOR_DIV_EXPR:
3747 case FLOOR_MOD_EXPR:
3748 /* We will come here only for signed operations. */
3749 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3751 unsigned HOST_WIDE_INT mh, ml;
3752 int pre_shift, lgup, post_shift;
3753 HOST_WIDE_INT d = INTVAL (op1);
3755 if (d > 0)
3757 /* We could just as easily deal with negative constants here,
3758 but it does not seem worth the trouble for GCC 2.6. */
3759 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3761 pre_shift = floor_log2 (d);
3762 if (rem_flag)
3764 remainder = expand_binop (compute_mode, and_optab, op0,
3765 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3766 remainder, 0, OPTAB_LIB_WIDEN);
3767 if (remainder)
3768 return gen_lowpart (mode, remainder);
3770 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3771 build_int_2 (pre_shift, 0),
3772 tquotient, 0);
3774 else
3776 rtx t1, t2, t3, t4;
3778 mh = choose_multiplier (d, size, size - 1,
3779 &ml, &post_shift, &lgup);
3780 if (mh)
3781 abort ();
3783 if (post_shift < BITS_PER_WORD
3784 && size - 1 < BITS_PER_WORD)
3786 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3787 build_int_2 (size - 1, 0),
3788 NULL_RTX, 0);
3789 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3790 NULL_RTX, 0, OPTAB_WIDEN);
3791 extra_cost = (shift_cost[compute_mode][post_shift]
3792 + shift_cost[compute_mode][size - 1]
3793 + 2 * add_cost[compute_mode]);
3794 t3 = expand_mult_highpart (compute_mode, t2, ml,
3795 NULL_RTX, 1,
3796 max_cost - extra_cost);
3797 if (t3 != 0)
3799 t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3800 build_int_2 (post_shift, 0),
3801 NULL_RTX, 1);
3802 quotient = expand_binop (compute_mode, xor_optab,
3803 t4, t1, tquotient, 0,
3804 OPTAB_WIDEN);
3809 else
3811 rtx nsign, t1, t2, t3, t4;
3812 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3813 op0, constm1_rtx), NULL_RTX);
3814 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3815 0, OPTAB_WIDEN);
3816 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3817 build_int_2 (size - 1, 0), NULL_RTX, 0);
3818 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3819 NULL_RTX);
3820 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3821 NULL_RTX, 0);
3822 if (t4)
3824 rtx t5;
3825 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3826 NULL_RTX, 0);
3827 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3828 t4, t5),
3829 tquotient);
3834 if (quotient != 0)
3835 break;
3836 delete_insns_since (last);
3838 /* Try using an instruction that produces both the quotient and
3839 remainder, using truncation. We can easily compensate the quotient
3840 or remainder to get floor rounding, once we have the remainder.
3841 Notice that we compute also the final remainder value here,
3842 and return the result right away. */
3843 if (target == 0 || GET_MODE (target) != compute_mode)
3844 target = gen_reg_rtx (compute_mode);
3846 if (rem_flag)
3848 remainder
3849 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3850 quotient = gen_reg_rtx (compute_mode);
3852 else
3854 quotient
3855 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3856 remainder = gen_reg_rtx (compute_mode);
3859 if (expand_twoval_binop (sdivmod_optab, op0, op1,
3860 quotient, remainder, 0))
3862 /* This could be computed with a branch-less sequence.
3863 Save that for later. */
3864 rtx tem;
3865 rtx label = gen_label_rtx ();
3866 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3867 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3868 NULL_RTX, 0, OPTAB_WIDEN);
3869 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3870 expand_dec (quotient, const1_rtx);
3871 expand_inc (remainder, op1);
3872 emit_label (label);
3873 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3876 /* No luck with division elimination or divmod. Have to do it
3877 by conditionally adjusting op0 *and* the result. */
3879 rtx label1, label2, label3, label4, label5;
3880 rtx adjusted_op0;
3881 rtx tem;
3883 quotient = gen_reg_rtx (compute_mode);
3884 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3885 label1 = gen_label_rtx ();
3886 label2 = gen_label_rtx ();
3887 label3 = gen_label_rtx ();
3888 label4 = gen_label_rtx ();
3889 label5 = gen_label_rtx ();
3890 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3891 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3892 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3893 quotient, 0, OPTAB_LIB_WIDEN);
3894 if (tem != quotient)
3895 emit_move_insn (quotient, tem);
3896 emit_jump_insn (gen_jump (label5));
3897 emit_barrier ();
3898 emit_label (label1);
3899 expand_inc (adjusted_op0, const1_rtx);
3900 emit_jump_insn (gen_jump (label4));
3901 emit_barrier ();
3902 emit_label (label2);
3903 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3904 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3905 quotient, 0, OPTAB_LIB_WIDEN);
3906 if (tem != quotient)
3907 emit_move_insn (quotient, tem);
3908 emit_jump_insn (gen_jump (label5));
3909 emit_barrier ();
3910 emit_label (label3);
3911 expand_dec (adjusted_op0, const1_rtx);
3912 emit_label (label4);
3913 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3914 quotient, 0, OPTAB_LIB_WIDEN);
3915 if (tem != quotient)
3916 emit_move_insn (quotient, tem);
3917 expand_dec (quotient, const1_rtx);
3918 emit_label (label5);
3920 break;
3922 case CEIL_DIV_EXPR:
3923 case CEIL_MOD_EXPR:
3924 if (unsignedp)
3926 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3928 rtx t1, t2, t3;
3929 unsigned HOST_WIDE_INT d = INTVAL (op1);
3930 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3931 build_int_2 (floor_log2 (d), 0),
3932 tquotient, 1);
3933 t2 = expand_binop (compute_mode, and_optab, op0,
3934 GEN_INT (d - 1),
3935 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3936 t3 = gen_reg_rtx (compute_mode);
3937 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3938 compute_mode, 1, 1);
3939 if (t3 == 0)
3941 rtx lab;
3942 lab = gen_label_rtx ();
3943 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3944 expand_inc (t1, const1_rtx);
3945 emit_label (lab);
3946 quotient = t1;
3948 else
3949 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3950 t1, t3),
3951 tquotient);
3952 break;
3955 /* Try using an instruction that produces both the quotient and
3956 remainder, using truncation. We can easily compensate the
3957 quotient or remainder to get ceiling rounding, once we have the
3958 remainder. Notice that we compute also the final remainder
3959 value here, and return the result right away. */
3960 if (target == 0 || GET_MODE (target) != compute_mode)
3961 target = gen_reg_rtx (compute_mode);
3963 if (rem_flag)
3965 remainder = (REG_P (target)
3966 ? target : gen_reg_rtx (compute_mode));
3967 quotient = gen_reg_rtx (compute_mode);
3969 else
3971 quotient = (REG_P (target)
3972 ? target : gen_reg_rtx (compute_mode));
3973 remainder = gen_reg_rtx (compute_mode);
3976 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3977 remainder, 1))
3979 /* This could be computed with a branch-less sequence.
3980 Save that for later. */
3981 rtx label = gen_label_rtx ();
3982 do_cmp_and_jump (remainder, const0_rtx, EQ,
3983 compute_mode, label);
3984 expand_inc (quotient, const1_rtx);
3985 expand_dec (remainder, op1);
3986 emit_label (label);
3987 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3990 /* No luck with division elimination or divmod. Have to do it
3991 by conditionally adjusting op0 *and* the result. */
3993 rtx label1, label2;
3994 rtx adjusted_op0, tem;
3996 quotient = gen_reg_rtx (compute_mode);
3997 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3998 label1 = gen_label_rtx ();
3999 label2 = gen_label_rtx ();
4000 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4001 compute_mode, label1);
4002 emit_move_insn (quotient, const0_rtx);
4003 emit_jump_insn (gen_jump (label2));
4004 emit_barrier ();
4005 emit_label (label1);
4006 expand_dec (adjusted_op0, const1_rtx);
4007 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4008 quotient, 1, OPTAB_LIB_WIDEN);
4009 if (tem != quotient)
4010 emit_move_insn (quotient, tem);
4011 expand_inc (quotient, const1_rtx);
4012 emit_label (label2);
4015 else /* signed */
4017 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4018 && INTVAL (op1) >= 0)
4020 /* This is extremely similar to the code for the unsigned case
4021 above. For 2.7 we should merge these variants, but for
4022 2.6.1 I don't want to touch the code for unsigned since that
4023 get used in C. The signed case will only be used by other
4024 languages (Ada). */
4026 rtx t1, t2, t3;
4027 unsigned HOST_WIDE_INT d = INTVAL (op1);
4028 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4029 build_int_2 (floor_log2 (d), 0),
4030 tquotient, 0);
4031 t2 = expand_binop (compute_mode, and_optab, op0,
4032 GEN_INT (d - 1),
4033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4034 t3 = gen_reg_rtx (compute_mode);
4035 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4036 compute_mode, 1, 1);
4037 if (t3 == 0)
4039 rtx lab;
4040 lab = gen_label_rtx ();
4041 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4042 expand_inc (t1, const1_rtx);
4043 emit_label (lab);
4044 quotient = t1;
4046 else
4047 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4048 t1, t3),
4049 tquotient);
4050 break;
4053 /* Try using an instruction that produces both the quotient and
4054 remainder, using truncation. We can easily compensate the
4055 quotient or remainder to get ceiling rounding, once we have the
4056 remainder. Notice that we compute also the final remainder
4057 value here, and return the result right away. */
4058 if (target == 0 || GET_MODE (target) != compute_mode)
4059 target = gen_reg_rtx (compute_mode);
4060 if (rem_flag)
4062 remainder= (REG_P (target)
4063 ? target : gen_reg_rtx (compute_mode));
4064 quotient = gen_reg_rtx (compute_mode);
4066 else
4068 quotient = (REG_P (target)
4069 ? target : gen_reg_rtx (compute_mode));
4070 remainder = gen_reg_rtx (compute_mode);
4073 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4074 remainder, 0))
4076 /* This could be computed with a branch-less sequence.
4077 Save that for later. */
4078 rtx tem;
4079 rtx label = gen_label_rtx ();
4080 do_cmp_and_jump (remainder, const0_rtx, EQ,
4081 compute_mode, label);
4082 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4083 NULL_RTX, 0, OPTAB_WIDEN);
4084 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4085 expand_inc (quotient, const1_rtx);
4086 expand_dec (remainder, op1);
4087 emit_label (label);
4088 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4091 /* No luck with division elimination or divmod. Have to do it
4092 by conditionally adjusting op0 *and* the result. */
4094 rtx label1, label2, label3, label4, label5;
4095 rtx adjusted_op0;
4096 rtx tem;
4098 quotient = gen_reg_rtx (compute_mode);
4099 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4100 label1 = gen_label_rtx ();
4101 label2 = gen_label_rtx ();
4102 label3 = gen_label_rtx ();
4103 label4 = gen_label_rtx ();
4104 label5 = gen_label_rtx ();
4105 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4106 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4107 compute_mode, label1);
4108 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4109 quotient, 0, OPTAB_LIB_WIDEN);
4110 if (tem != quotient)
4111 emit_move_insn (quotient, tem);
4112 emit_jump_insn (gen_jump (label5));
4113 emit_barrier ();
4114 emit_label (label1);
4115 expand_dec (adjusted_op0, const1_rtx);
4116 emit_jump_insn (gen_jump (label4));
4117 emit_barrier ();
4118 emit_label (label2);
4119 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4120 compute_mode, label3);
4121 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4122 quotient, 0, OPTAB_LIB_WIDEN);
4123 if (tem != quotient)
4124 emit_move_insn (quotient, tem);
4125 emit_jump_insn (gen_jump (label5));
4126 emit_barrier ();
4127 emit_label (label3);
4128 expand_inc (adjusted_op0, const1_rtx);
4129 emit_label (label4);
4130 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4131 quotient, 0, OPTAB_LIB_WIDEN);
4132 if (tem != quotient)
4133 emit_move_insn (quotient, tem);
4134 expand_inc (quotient, const1_rtx);
4135 emit_label (label5);
4138 break;
4140 case EXACT_DIV_EXPR:
4141 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4143 HOST_WIDE_INT d = INTVAL (op1);
4144 unsigned HOST_WIDE_INT ml;
4145 int pre_shift;
4146 rtx t1;
4148 pre_shift = floor_log2 (d & -d);
4149 ml = invert_mod2n (d >> pre_shift, size);
4150 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4151 build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
4152 quotient = expand_mult (compute_mode, t1,
4153 gen_int_mode (ml, compute_mode),
4154 NULL_RTX, 1);
4156 insn = get_last_insn ();
4157 set_unique_reg_note (insn,
4158 REG_EQUAL,
4159 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4160 compute_mode,
4161 op0, op1));
4163 break;
4165 case ROUND_DIV_EXPR:
4166 case ROUND_MOD_EXPR:
4167 if (unsignedp)
4169 rtx tem;
4170 rtx label;
4171 label = gen_label_rtx ();
4172 quotient = gen_reg_rtx (compute_mode);
4173 remainder = gen_reg_rtx (compute_mode);
4174 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4176 rtx tem;
4177 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4178 quotient, 1, OPTAB_LIB_WIDEN);
4179 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4180 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4181 remainder, 1, OPTAB_LIB_WIDEN);
4183 tem = plus_constant (op1, -1);
4184 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4185 build_int_2 (1, 0), NULL_RTX, 1);
4186 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4187 expand_inc (quotient, const1_rtx);
4188 expand_dec (remainder, op1);
4189 emit_label (label);
4191 else
4193 rtx abs_rem, abs_op1, tem, mask;
4194 rtx label;
4195 label = gen_label_rtx ();
4196 quotient = gen_reg_rtx (compute_mode);
4197 remainder = gen_reg_rtx (compute_mode);
4198 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4200 rtx tem;
4201 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4202 quotient, 0, OPTAB_LIB_WIDEN);
4203 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4204 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4205 remainder, 0, OPTAB_LIB_WIDEN);
4207 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4208 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4209 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4210 build_int_2 (1, 0), NULL_RTX, 1);
4211 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4212 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4213 NULL_RTX, 0, OPTAB_WIDEN);
4214 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4215 build_int_2 (size - 1, 0), NULL_RTX, 0);
4216 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4217 NULL_RTX, 0, OPTAB_WIDEN);
4218 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4219 NULL_RTX, 0, OPTAB_WIDEN);
4220 expand_inc (quotient, tem);
4221 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4222 NULL_RTX, 0, OPTAB_WIDEN);
4223 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4224 NULL_RTX, 0, OPTAB_WIDEN);
4225 expand_dec (remainder, tem);
4226 emit_label (label);
4228 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4230 default:
4231 abort ();
4234 if (quotient == 0)
4236 if (target && GET_MODE (target) != compute_mode)
4237 target = 0;
4239 if (rem_flag)
4241 /* Try to produce the remainder without producing the quotient.
4242 If we seem to have a divmod pattern that does not require widening,
4243 don't try widening here. We should really have a WIDEN argument
4244 to expand_twoval_binop, since what we'd really like to do here is
4245 1) try a mod insn in compute_mode
4246 2) try a divmod insn in compute_mode
4247 3) try a div insn in compute_mode and multiply-subtract to get
4248 remainder
4249 4) try the same things with widening allowed. */
4250 remainder
4251 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4252 op0, op1, target,
4253 unsignedp,
4254 ((optab2->handlers[compute_mode].insn_code
4255 != CODE_FOR_nothing)
4256 ? OPTAB_DIRECT : OPTAB_WIDEN));
4257 if (remainder == 0)
4259 /* No luck there. Can we do remainder and divide at once
4260 without a library call? */
4261 remainder = gen_reg_rtx (compute_mode);
4262 if (! expand_twoval_binop ((unsignedp
4263 ? udivmod_optab
4264 : sdivmod_optab),
4265 op0, op1,
4266 NULL_RTX, remainder, unsignedp))
4267 remainder = 0;
4270 if (remainder)
4271 return gen_lowpart (mode, remainder);
4274 /* Produce the quotient. Try a quotient insn, but not a library call.
4275 If we have a divmod in this mode, use it in preference to widening
4276 the div (for this test we assume it will not fail). Note that optab2
4277 is set to the one of the two optabs that the call below will use. */
4278 quotient
4279 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4280 op0, op1, rem_flag ? NULL_RTX : target,
4281 unsignedp,
4282 ((optab2->handlers[compute_mode].insn_code
4283 != CODE_FOR_nothing)
4284 ? OPTAB_DIRECT : OPTAB_WIDEN));
4286 if (quotient == 0)
4288 /* No luck there. Try a quotient-and-remainder insn,
4289 keeping the quotient alone. */
4290 quotient = gen_reg_rtx (compute_mode);
4291 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4292 op0, op1,
4293 quotient, NULL_RTX, unsignedp))
4295 quotient = 0;
4296 if (! rem_flag)
4297 /* Still no luck. If we are not computing the remainder,
4298 use a library call for the quotient. */
4299 quotient = sign_expand_binop (compute_mode,
4300 udiv_optab, sdiv_optab,
4301 op0, op1, target,
4302 unsignedp, OPTAB_LIB_WIDEN);
4307 if (rem_flag)
4309 if (target && GET_MODE (target) != compute_mode)
4310 target = 0;
4312 if (quotient == 0)
4313 /* No divide instruction either. Use library for remainder. */
4314 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4315 op0, op1, target,
4316 unsignedp, OPTAB_LIB_WIDEN);
4317 else
4319 /* We divided. Now finish doing X - Y * (X / Y). */
4320 remainder = expand_mult (compute_mode, quotient, op1,
4321 NULL_RTX, unsignedp);
4322 remainder = expand_binop (compute_mode, sub_optab, op0,
4323 remainder, target, unsignedp,
4324 OPTAB_LIB_WIDEN);
4328 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4331 /* Return a tree node with data type TYPE, describing the value of X.
4332 Usually this is an VAR_DECL, if there is no obvious better choice.
4333 X may be an expression, however we only support those expressions
4334 generated by loop.c. */
4336 tree
4337 make_tree (tree type, rtx x)
4339 tree t;
4341 switch (GET_CODE (x))
4343 case CONST_INT:
4344 t = build_int_2 (INTVAL (x),
4345 (TYPE_UNSIGNED (type)
4346 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4347 < HOST_BITS_PER_WIDE_INT))
4348 || INTVAL (x) >= 0 ? 0 : -1);
4349 TREE_TYPE (t) = type;
4350 return t;
4352 case CONST_DOUBLE:
4353 if (GET_MODE (x) == VOIDmode)
4355 t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4356 TREE_TYPE (t) = type;
4358 else
4360 REAL_VALUE_TYPE d;
4362 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4363 t = build_real (type, d);
4366 return t;
4368 case CONST_VECTOR:
4370 int i, units;
4371 rtx elt;
4372 tree t = NULL_TREE;
4374 units = CONST_VECTOR_NUNITS (x);
4376 /* Build a tree with vector elements. */
4377 for (i = units - 1; i >= 0; --i)
4379 elt = CONST_VECTOR_ELT (x, i);
4380 t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4383 return build_vector (type, t);
4386 case PLUS:
4387 return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4388 make_tree (type, XEXP (x, 1))));
4390 case MINUS:
4391 return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4392 make_tree (type, XEXP (x, 1))));
4394 case NEG:
4395 return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4397 case MULT:
4398 return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4399 make_tree (type, XEXP (x, 1))));
4401 case ASHIFT:
4402 return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4403 make_tree (type, XEXP (x, 1))));
4405 case LSHIFTRT:
4406 t = lang_hooks.types.unsigned_type (type);
4407 return fold (convert (type,
4408 build (RSHIFT_EXPR, t,
4409 make_tree (t, XEXP (x, 0)),
4410 make_tree (type, XEXP (x, 1)))));
4412 case ASHIFTRT:
4413 t = lang_hooks.types.signed_type (type);
4414 return fold (convert (type,
4415 build (RSHIFT_EXPR, t,
4416 make_tree (t, XEXP (x, 0)),
4417 make_tree (type, XEXP (x, 1)))));
4419 case DIV:
4420 if (TREE_CODE (type) != REAL_TYPE)
4421 t = lang_hooks.types.signed_type (type);
4422 else
4423 t = type;
4425 return fold (convert (type,
4426 build (TRUNC_DIV_EXPR, t,
4427 make_tree (t, XEXP (x, 0)),
4428 make_tree (t, XEXP (x, 1)))));
4429 case UDIV:
4430 t = lang_hooks.types.unsigned_type (type);
4431 return fold (convert (type,
4432 build (TRUNC_DIV_EXPR, t,
4433 make_tree (t, XEXP (x, 0)),
4434 make_tree (t, XEXP (x, 1)))));
4436 case SIGN_EXTEND:
4437 case ZERO_EXTEND:
4438 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4439 GET_CODE (x) == ZERO_EXTEND);
4440 return fold (convert (type, make_tree (t, XEXP (x, 0))));
4442 default:
4443 t = build_decl (VAR_DECL, NULL_TREE, type);
4445 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4446 ptr_mode. So convert. */
4447 if (POINTER_TYPE_P (type))
4448 x = convert_memory_address (TYPE_MODE (type), x);
4450 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4451 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4452 t->decl.rtl = x;
4454 return t;
4458 /* Check whether the multiplication X * MULT + ADD overflows.
4459 X, MULT and ADD must be CONST_*.
4460 MODE is the machine mode for the computation.
4461 X and MULT must have mode MODE. ADD may have a different mode.
4462 So can X (defaults to same as MODE).
4463 UNSIGNEDP is nonzero to do unsigned multiplication. */
4465 bool
4466 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4468 tree type, mult_type, add_type, result;
4470 type = lang_hooks.types.type_for_mode (mode, unsignedp);
4472 /* In order to get a proper overflow indication from an unsigned
4473 type, we have to pretend that it's a sizetype. */
4474 mult_type = type;
4475 if (unsignedp)
4477 mult_type = copy_node (type);
4478 TYPE_IS_SIZETYPE (mult_type) = 1;
4481 add_type = (GET_MODE (add) == VOIDmode ? mult_type
4482 : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4484 result = fold (build (PLUS_EXPR, mult_type,
4485 fold (build (MULT_EXPR, mult_type,
4486 make_tree (mult_type, x),
4487 make_tree (mult_type, mult))),
4488 make_tree (add_type, add)));
4490 return TREE_CONSTANT_OVERFLOW (result);
4493 /* Return an rtx representing the value of X * MULT + ADD.
4494 TARGET is a suggestion for where to store the result (an rtx).
4495 MODE is the machine mode for the computation.
4496 X and MULT must have mode MODE. ADD may have a different mode.
4497 So can X (defaults to same as MODE).
4498 UNSIGNEDP is nonzero to do unsigned multiplication.
4499 This may emit insns. */
4502 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4503 int unsignedp)
4505 tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4506 tree add_type = (GET_MODE (add) == VOIDmode
4507 ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4508 unsignedp));
4509 tree result = fold (build (PLUS_EXPR, type,
4510 fold (build (MULT_EXPR, type,
4511 make_tree (type, x),
4512 make_tree (type, mult))),
4513 make_tree (add_type, add)));
4515 return expand_expr (result, target, VOIDmode, 0);
4518 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4519 and returning TARGET.
4521 If TARGET is 0, a pseudo-register or constant is returned. */
4524 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4526 rtx tem = 0;
4528 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4529 tem = simplify_binary_operation (AND, mode, op0, op1);
4530 if (tem == 0)
4531 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4533 if (target == 0)
4534 target = tem;
4535 else if (tem != target)
4536 emit_move_insn (target, tem);
4537 return target;
4540 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4541 and storing in TARGET. Normally return TARGET.
4542 Return 0 if that cannot be done.
4544 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
4545 it is VOIDmode, they cannot both be CONST_INT.
4547 UNSIGNEDP is for the case where we have to widen the operands
4548 to perform the operation. It says to use zero-extension.
4550 NORMALIZEP is 1 if we should convert the result to be either zero
4551 or one. Normalize is -1 if we should convert the result to be
4552 either zero or -1. If NORMALIZEP is zero, the result will be left
4553 "raw" out of the scc insn. */
4556 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4557 enum machine_mode mode, int unsignedp, int normalizep)
4559 rtx subtarget;
4560 enum insn_code icode;
4561 enum machine_mode compare_mode;
4562 enum machine_mode target_mode = GET_MODE (target);
4563 rtx tem;
4564 rtx last = get_last_insn ();
4565 rtx pattern, comparison;
4567 /* ??? Ok to do this and then fail? */
4568 op0 = protect_from_queue (op0, 0);
4569 op1 = protect_from_queue (op1, 0);
4571 if (unsignedp)
4572 code = unsigned_condition (code);
4574 /* If one operand is constant, make it the second one. Only do this
4575 if the other operand is not constant as well. */
4577 if (swap_commutative_operands_p (op0, op1))
4579 tem = op0;
4580 op0 = op1;
4581 op1 = tem;
4582 code = swap_condition (code);
4585 if (mode == VOIDmode)
4586 mode = GET_MODE (op0);
4588 /* For some comparisons with 1 and -1, we can convert this to
4589 comparisons with zero. This will often produce more opportunities for
4590 store-flag insns. */
4592 switch (code)
4594 case LT:
4595 if (op1 == const1_rtx)
4596 op1 = const0_rtx, code = LE;
4597 break;
4598 case LE:
4599 if (op1 == constm1_rtx)
4600 op1 = const0_rtx, code = LT;
4601 break;
4602 case GE:
4603 if (op1 == const1_rtx)
4604 op1 = const0_rtx, code = GT;
4605 break;
4606 case GT:
4607 if (op1 == constm1_rtx)
4608 op1 = const0_rtx, code = GE;
4609 break;
4610 case GEU:
4611 if (op1 == const1_rtx)
4612 op1 = const0_rtx, code = NE;
4613 break;
4614 case LTU:
4615 if (op1 == const1_rtx)
4616 op1 = const0_rtx, code = EQ;
4617 break;
4618 default:
4619 break;
4622 /* If we are comparing a double-word integer with zero or -1, we can
4623 convert the comparison into one involving a single word. */
4624 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4625 && GET_MODE_CLASS (mode) == MODE_INT
4626 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4628 if ((code == EQ || code == NE)
4629 && (op1 == const0_rtx || op1 == constm1_rtx))
4631 rtx op00, op01, op0both;
4633 /* Do a logical OR or AND of the two words and compare the result. */
4634 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4635 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4636 op0both = expand_binop (word_mode,
4637 op1 == const0_rtx ? ior_optab : and_optab,
4638 op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4640 if (op0both != 0)
4641 return emit_store_flag (target, code, op0both, op1, word_mode,
4642 unsignedp, normalizep);
4644 else if ((code == LT || code == GE) && op1 == const0_rtx)
4646 rtx op0h;
4648 /* If testing the sign bit, can just test on high word. */
4649 op0h = simplify_gen_subreg (word_mode, op0, mode,
4650 subreg_highpart_offset (word_mode, mode));
4651 return emit_store_flag (target, code, op0h, op1, word_mode,
4652 unsignedp, normalizep);
4656 /* From now on, we won't change CODE, so set ICODE now. */
4657 icode = setcc_gen_code[(int) code];
4659 /* If this is A < 0 or A >= 0, we can do this by taking the ones
4660 complement of A (for GE) and shifting the sign bit to the low bit. */
4661 if (op1 == const0_rtx && (code == LT || code == GE)
4662 && GET_MODE_CLASS (mode) == MODE_INT
4663 && (normalizep || STORE_FLAG_VALUE == 1
4664 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4665 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4666 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4668 subtarget = target;
4670 /* If the result is to be wider than OP0, it is best to convert it
4671 first. If it is to be narrower, it is *incorrect* to convert it
4672 first. */
4673 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4675 op0 = protect_from_queue (op0, 0);
4676 op0 = convert_modes (target_mode, mode, op0, 0);
4677 mode = target_mode;
4680 if (target_mode != mode)
4681 subtarget = 0;
4683 if (code == GE)
4684 op0 = expand_unop (mode, one_cmpl_optab, op0,
4685 ((STORE_FLAG_VALUE == 1 || normalizep)
4686 ? 0 : subtarget), 0);
4688 if (STORE_FLAG_VALUE == 1 || normalizep)
4689 /* If we are supposed to produce a 0/1 value, we want to do
4690 a logical shift from the sign bit to the low-order bit; for
4691 a -1/0 value, we do an arithmetic shift. */
4692 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4693 size_int (GET_MODE_BITSIZE (mode) - 1),
4694 subtarget, normalizep != -1);
4696 if (mode != target_mode)
4697 op0 = convert_modes (target_mode, mode, op0, 0);
4699 return op0;
4702 if (icode != CODE_FOR_nothing)
4704 insn_operand_predicate_fn pred;
4706 /* We think we may be able to do this with a scc insn. Emit the
4707 comparison and then the scc insn.
4709 compare_from_rtx may call emit_queue, which would be deleted below
4710 if the scc insn fails. So call it ourselves before setting LAST.
4711 Likewise for do_pending_stack_adjust. */
4713 emit_queue ();
4714 do_pending_stack_adjust ();
4715 last = get_last_insn ();
4717 comparison
4718 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4719 if (CONSTANT_P (comparison))
4721 if (GET_CODE (comparison) == CONST_INT)
4723 if (comparison == const0_rtx)
4724 return const0_rtx;
4726 #ifdef FLOAT_STORE_FLAG_VALUE
4727 else if (GET_CODE (comparison) == CONST_DOUBLE)
4729 if (comparison == CONST0_RTX (GET_MODE (comparison)))
4730 return const0_rtx;
4732 #endif
4733 else
4734 abort ();
4735 if (normalizep == 1)
4736 return const1_rtx;
4737 if (normalizep == -1)
4738 return constm1_rtx;
4739 return const_true_rtx;
4742 /* The code of COMPARISON may not match CODE if compare_from_rtx
4743 decided to swap its operands and reverse the original code.
4745 We know that compare_from_rtx returns either a CONST_INT or
4746 a new comparison code, so it is safe to just extract the
4747 code from COMPARISON. */
4748 code = GET_CODE (comparison);
4750 /* Get a reference to the target in the proper mode for this insn. */
4751 compare_mode = insn_data[(int) icode].operand[0].mode;
4752 subtarget = target;
4753 pred = insn_data[(int) icode].operand[0].predicate;
4754 if (preserve_subexpressions_p ()
4755 || ! (*pred) (subtarget, compare_mode))
4756 subtarget = gen_reg_rtx (compare_mode);
4758 pattern = GEN_FCN (icode) (subtarget);
4759 if (pattern)
4761 emit_insn (pattern);
4763 /* If we are converting to a wider mode, first convert to
4764 TARGET_MODE, then normalize. This produces better combining
4765 opportunities on machines that have a SIGN_EXTRACT when we are
4766 testing a single bit. This mostly benefits the 68k.
4768 If STORE_FLAG_VALUE does not have the sign bit set when
4769 interpreted in COMPARE_MODE, we can do this conversion as
4770 unsigned, which is usually more efficient. */
4771 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4773 convert_move (target, subtarget,
4774 (GET_MODE_BITSIZE (compare_mode)
4775 <= HOST_BITS_PER_WIDE_INT)
4776 && 0 == (STORE_FLAG_VALUE
4777 & ((HOST_WIDE_INT) 1
4778 << (GET_MODE_BITSIZE (compare_mode) -1))));
4779 op0 = target;
4780 compare_mode = target_mode;
4782 else
4783 op0 = subtarget;
4785 /* If we want to keep subexpressions around, don't reuse our
4786 last target. */
4788 if (preserve_subexpressions_p ())
4789 subtarget = 0;
4791 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
4792 we don't have to do anything. */
4793 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4795 /* STORE_FLAG_VALUE might be the most negative number, so write
4796 the comparison this way to avoid a compiler-time warning. */
4797 else if (- normalizep == STORE_FLAG_VALUE)
4798 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4800 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4801 makes it hard to use a value of just the sign bit due to
4802 ANSI integer constant typing rules. */
4803 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4804 && (STORE_FLAG_VALUE
4805 & ((HOST_WIDE_INT) 1
4806 << (GET_MODE_BITSIZE (compare_mode) - 1))))
4807 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4808 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4809 subtarget, normalizep == 1);
4810 else if (STORE_FLAG_VALUE & 1)
4812 op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4813 if (normalizep == -1)
4814 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4816 else
4817 abort ();
4819 /* If we were converting to a smaller mode, do the
4820 conversion now. */
4821 if (target_mode != compare_mode)
4823 convert_move (target, op0, 0);
4824 return target;
4826 else
4827 return op0;
4831 delete_insns_since (last);
4833 /* If expensive optimizations, use different pseudo registers for each
4834 insn, instead of reusing the same pseudo. This leads to better CSE,
4835 but slows down the compiler, since there are more pseudos */
4836 subtarget = (!flag_expensive_optimizations
4837 && (target_mode == mode)) ? target : NULL_RTX;
4839 /* If we reached here, we can't do this with a scc insn. However, there
4840 are some comparisons that can be done directly. For example, if
4841 this is an equality comparison of integers, we can try to exclusive-or
4842 (or subtract) the two operands and use a recursive call to try the
4843 comparison with zero. Don't do any of these cases if branches are
4844 very cheap. */
4846 if (BRANCH_COST > 0
4847 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4848 && op1 != const0_rtx)
4850 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4851 OPTAB_WIDEN);
4853 if (tem == 0)
4854 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4855 OPTAB_WIDEN);
4856 if (tem != 0)
4857 tem = emit_store_flag (target, code, tem, const0_rtx,
4858 mode, unsignedp, normalizep);
4859 if (tem == 0)
4860 delete_insns_since (last);
4861 return tem;
4864 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4865 the constant zero. Reject all other comparisons at this point. Only
4866 do LE and GT if branches are expensive since they are expensive on
4867 2-operand machines. */
4869 if (BRANCH_COST == 0
4870 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4871 || (code != EQ && code != NE
4872 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4873 return 0;
4875 /* See what we need to return. We can only return a 1, -1, or the
4876 sign bit. */
4878 if (normalizep == 0)
4880 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4881 normalizep = STORE_FLAG_VALUE;
4883 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4884 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4885 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4887 else
4888 return 0;
4891 /* Try to put the result of the comparison in the sign bit. Assume we can't
4892 do the necessary operation below. */
4894 tem = 0;
4896 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
4897 the sign bit set. */
4899 if (code == LE)
4901 /* This is destructive, so SUBTARGET can't be OP0. */
4902 if (rtx_equal_p (subtarget, op0))
4903 subtarget = 0;
4905 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4906 OPTAB_WIDEN);
4907 if (tem)
4908 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4909 OPTAB_WIDEN);
4912 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4913 number of bits in the mode of OP0, minus one. */
4915 if (code == GT)
4917 if (rtx_equal_p (subtarget, op0))
4918 subtarget = 0;
4920 tem = expand_shift (RSHIFT_EXPR, mode, op0,
4921 size_int (GET_MODE_BITSIZE (mode) - 1),
4922 subtarget, 0);
4923 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4924 OPTAB_WIDEN);
4927 if (code == EQ || code == NE)
4929 /* For EQ or NE, one way to do the comparison is to apply an operation
4930 that converts the operand into a positive number if it is nonzero
4931 or zero if it was originally zero. Then, for EQ, we subtract 1 and
4932 for NE we negate. This puts the result in the sign bit. Then we
4933 normalize with a shift, if needed.
4935 Two operations that can do the above actions are ABS and FFS, so try
4936 them. If that doesn't work, and MODE is smaller than a full word,
4937 we can use zero-extension to the wider mode (an unsigned conversion)
4938 as the operation. */
4940 /* Note that ABS doesn't yield a positive number for INT_MIN, but
4941 that is compensated by the subsequent overflow when subtracting
4942 one / negating. */
4944 if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4945 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4946 else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4947 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4948 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4950 op0 = protect_from_queue (op0, 0);
4951 tem = convert_modes (word_mode, mode, op0, 1);
4952 mode = word_mode;
4955 if (tem != 0)
4957 if (code == EQ)
4958 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4959 0, OPTAB_WIDEN);
4960 else
4961 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4964 /* If we couldn't do it that way, for NE we can "or" the two's complement
4965 of the value with itself. For EQ, we take the one's complement of
4966 that "or", which is an extra insn, so we only handle EQ if branches
4967 are expensive. */
4969 if (tem == 0 && (code == NE || BRANCH_COST > 1))
4971 if (rtx_equal_p (subtarget, op0))
4972 subtarget = 0;
4974 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4975 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4976 OPTAB_WIDEN);
4978 if (tem && code == EQ)
4979 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4983 if (tem && normalizep)
4984 tem = expand_shift (RSHIFT_EXPR, mode, tem,
4985 size_int (GET_MODE_BITSIZE (mode) - 1),
4986 subtarget, normalizep == 1);
4988 if (tem)
4990 if (GET_MODE (tem) != target_mode)
4992 convert_move (target, tem, 0);
4993 tem = target;
4995 else if (!subtarget)
4997 emit_move_insn (target, tem);
4998 tem = target;
5001 else
5002 delete_insns_since (last);
5004 return tem;
5007 /* Like emit_store_flag, but always succeeds. */
5010 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5011 enum machine_mode mode, int unsignedp, int normalizep)
5013 rtx tem, label;
5015 /* First see if emit_store_flag can do the job. */
5016 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5017 if (tem != 0)
5018 return tem;
5020 if (normalizep == 0)
5021 normalizep = 1;
5023 /* If this failed, we have to do this with set/compare/jump/set code. */
5025 if (!REG_P (target)
5026 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5027 target = gen_reg_rtx (GET_MODE (target));
5029 emit_move_insn (target, const1_rtx);
5030 label = gen_label_rtx ();
5031 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5032 NULL_RTX, label);
5034 emit_move_insn (target, const0_rtx);
5035 emit_label (label);
5037 return target;
5040 /* Perform possibly multi-word comparison and conditional jump to LABEL
5041 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5043 The algorithm is based on the code in expr.c:do_jump.
5045 Note that this does not perform a general comparison. Only variants
5046 generated within expmed.c are correctly handled, others abort (but could
5047 be handled if needed). */
5049 static void
5050 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5051 rtx label)
5053 /* If this mode is an integer too wide to compare properly,
5054 compare word by word. Rely on cse to optimize constant cases. */
5056 if (GET_MODE_CLASS (mode) == MODE_INT
5057 && ! can_compare_p (op, mode, ccp_jump))
5059 rtx label2 = gen_label_rtx ();
5061 switch (op)
5063 case LTU:
5064 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5065 break;
5067 case LEU:
5068 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5069 break;
5071 case LT:
5072 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5073 break;
5075 case GT:
5076 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5077 break;
5079 case GE:
5080 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5081 break;
5083 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
5084 that's the only equality operations we do */
5085 case EQ:
5086 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5087 abort ();
5088 do_jump_by_parts_equality_rtx (arg1, label2, label);
5089 break;
5091 case NE:
5092 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5093 abort ();
5094 do_jump_by_parts_equality_rtx (arg1, label, label2);
5095 break;
5097 default:
5098 abort ();
5101 emit_label (label2);
5103 else
5104 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);