* config/mips/mips.md (any_shift): New code macro.
[official-gcc.git] / gcc / expmed.c
blob034e49a293d99b85e002379d75b11710a9aefd16
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
41 unsigned HOST_WIDE_INT,
42 unsigned HOST_WIDE_INT, rtx);
43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT, rtx, int);
49 static rtx mask_rtx (enum machine_mode, int, int, int);
50 static rtx lshift_value (enum machine_mode, rtx, int, int);
51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, int);
53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 /* Nonzero means divides or modulus operations are relatively cheap for
58 powers of two, so don't use branches; emit the operation instead.
59 Usually, this will mean that the MD file will emit non-branch
60 sequences. */
62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
65 #ifndef SLOW_UNALIGNED_ACCESS
66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
67 #endif
69 /* For compilers that support multiple targets with different word sizes,
70 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
71 is the H8/300(H) compiler. */
73 #ifndef MAX_BITS_PER_WORD
74 #define MAX_BITS_PER_WORD BITS_PER_WORD
75 #endif
77 /* Reduce conditional compilation elsewhere. */
78 #ifndef HAVE_insv
79 #define HAVE_insv 0
80 #define CODE_FOR_insv CODE_FOR_nothing
81 #define gen_insv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extv
84 #define HAVE_extv 0
85 #define CODE_FOR_extv CODE_FOR_nothing
86 #define gen_extv(a,b,c,d) NULL_RTX
87 #endif
88 #ifndef HAVE_extzv
89 #define HAVE_extzv 0
90 #define CODE_FOR_extzv CODE_FOR_nothing
91 #define gen_extzv(a,b,c,d) NULL_RTX
92 #endif
94 /* Cost of various pieces of RTL. Note that some of these are indexed by
95 shift count and some by mode. */
96 static int zero_cost;
97 static int add_cost[NUM_MACHINE_MODES];
98 static int neg_cost[NUM_MACHINE_MODES];
99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
102 static int mul_cost[NUM_MACHINE_MODES];
103 static int div_cost[NUM_MACHINE_MODES];
104 static int mul_widen_cost[NUM_MACHINE_MODES];
105 static int mul_highpart_cost[NUM_MACHINE_MODES];
107 void
108 init_expmed (void)
110 struct
112 struct rtx_def reg; rtunion reg_fld[2];
113 struct rtx_def plus; rtunion plus_fld1;
114 struct rtx_def neg;
115 struct rtx_def udiv; rtunion udiv_fld1;
116 struct rtx_def mult; rtunion mult_fld1;
117 struct rtx_def div; rtunion div_fld1;
118 struct rtx_def mod; rtunion mod_fld1;
119 struct rtx_def zext;
120 struct rtx_def wide_mult; rtunion wide_mult_fld1;
121 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
122 struct rtx_def wide_trunc;
123 struct rtx_def shift; rtunion shift_fld1;
124 struct rtx_def shift_mult; rtunion shift_mult_fld1;
125 struct rtx_def shift_add; rtunion shift_add_fld1;
126 struct rtx_def shift_sub; rtunion shift_sub_fld1;
127 } all;
129 rtx pow2[MAX_BITS_PER_WORD];
130 rtx cint[MAX_BITS_PER_WORD];
131 int m, n;
132 enum machine_mode mode, wider_mode;
134 zero_cost = rtx_cost (const0_rtx, 0);
136 for (m = 1; m < MAX_BITS_PER_WORD; m++)
138 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
139 cint[m] = GEN_INT (m);
142 memset (&all, 0, sizeof all);
144 PUT_CODE (&all.reg, REG);
145 REGNO (&all.reg) = 10000;
147 PUT_CODE (&all.plus, PLUS);
148 XEXP (&all.plus, 0) = &all.reg;
149 XEXP (&all.plus, 1) = &all.reg;
151 PUT_CODE (&all.neg, NEG);
152 XEXP (&all.neg, 0) = &all.reg;
154 PUT_CODE (&all.udiv, UDIV);
155 XEXP (&all.udiv, 0) = &all.reg;
156 XEXP (&all.udiv, 1) = &all.reg;
158 PUT_CODE (&all.mult, MULT);
159 XEXP (&all.mult, 0) = &all.reg;
160 XEXP (&all.mult, 1) = &all.reg;
162 PUT_CODE (&all.div, DIV);
163 XEXP (&all.div, 0) = &all.reg;
164 XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
166 PUT_CODE (&all.mod, MOD);
167 XEXP (&all.mod, 0) = &all.reg;
168 XEXP (&all.mod, 1) = XEXP (&all.div, 1);
170 PUT_CODE (&all.zext, ZERO_EXTEND);
171 XEXP (&all.zext, 0) = &all.reg;
173 PUT_CODE (&all.wide_mult, MULT);
174 XEXP (&all.wide_mult, 0) = &all.zext;
175 XEXP (&all.wide_mult, 1) = &all.zext;
177 PUT_CODE (&all.wide_lshr, LSHIFTRT);
178 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
180 PUT_CODE (&all.wide_trunc, TRUNCATE);
181 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
183 PUT_CODE (&all.shift, ASHIFT);
184 XEXP (&all.shift, 0) = &all.reg;
186 PUT_CODE (&all.shift_mult, MULT);
187 XEXP (&all.shift_mult, 0) = &all.reg;
189 PUT_CODE (&all.shift_add, PLUS);
190 XEXP (&all.shift_add, 0) = &all.shift_mult;
191 XEXP (&all.shift_add, 1) = &all.reg;
193 PUT_CODE (&all.shift_sub, MINUS);
194 XEXP (&all.shift_sub, 0) = &all.shift_mult;
195 XEXP (&all.shift_sub, 1) = &all.reg;
197 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
198 mode != VOIDmode;
199 mode = GET_MODE_WIDER_MODE (mode))
201 PUT_MODE (&all.reg, mode);
202 PUT_MODE (&all.plus, mode);
203 PUT_MODE (&all.neg, mode);
204 PUT_MODE (&all.udiv, mode);
205 PUT_MODE (&all.mult, mode);
206 PUT_MODE (&all.div, mode);
207 PUT_MODE (&all.mod, mode);
208 PUT_MODE (&all.wide_trunc, mode);
209 PUT_MODE (&all.shift, mode);
210 PUT_MODE (&all.shift_mult, mode);
211 PUT_MODE (&all.shift_add, mode);
212 PUT_MODE (&all.shift_sub, mode);
214 add_cost[mode] = rtx_cost (&all.plus, SET);
215 neg_cost[mode] = rtx_cost (&all.neg, SET);
216 div_cost[mode] = rtx_cost (&all.udiv, SET);
217 mul_cost[mode] = rtx_cost (&all.mult, SET);
219 sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
220 smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
222 wider_mode = GET_MODE_WIDER_MODE (mode);
223 if (wider_mode != VOIDmode)
225 PUT_MODE (&all.zext, wider_mode);
226 PUT_MODE (&all.wide_mult, wider_mode);
227 PUT_MODE (&all.wide_lshr, wider_mode);
228 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
230 mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
231 mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
234 shift_cost[mode][0] = 0;
235 shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
237 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
238 for (m = 1; m < n; m++)
240 XEXP (&all.shift, 1) = cint[m];
241 XEXP (&all.shift_mult, 1) = pow2[m];
243 shift_cost[mode][m] = rtx_cost (&all.shift, SET);
244 shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
245 shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
250 /* Return an rtx representing minus the value of X.
251 MODE is the intended mode of the result,
252 useful if X is a CONST_INT. */
255 negate_rtx (enum machine_mode mode, rtx x)
257 rtx result = simplify_unary_operation (NEG, mode, x, mode);
259 if (result == 0)
260 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
262 return result;
265 /* Report on the availability of insv/extv/extzv and the desired mode
266 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
267 is false; else the mode of the specified operand. If OPNO is -1,
268 all the caller cares about is whether the insn is available. */
269 enum machine_mode
270 mode_for_extraction (enum extraction_pattern pattern, int opno)
272 const struct insn_data *data;
274 switch (pattern)
276 case EP_insv:
277 if (HAVE_insv)
279 data = &insn_data[CODE_FOR_insv];
280 break;
282 return MAX_MACHINE_MODE;
284 case EP_extv:
285 if (HAVE_extv)
287 data = &insn_data[CODE_FOR_extv];
288 break;
290 return MAX_MACHINE_MODE;
292 case EP_extzv:
293 if (HAVE_extzv)
295 data = &insn_data[CODE_FOR_extzv];
296 break;
298 return MAX_MACHINE_MODE;
300 default:
301 abort ();
304 if (opno == -1)
305 return VOIDmode;
307 /* Everyone who uses this function used to follow it with
308 if (result == VOIDmode) result = word_mode; */
309 if (data->operand[opno].mode == VOIDmode)
310 return word_mode;
311 return data->operand[opno].mode;
315 /* Generate code to store value from rtx VALUE
316 into a bit-field within structure STR_RTX
317 containing BITSIZE bits starting at bit BITNUM.
318 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
319 ALIGN is the alignment that STR_RTX is known to have.
320 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
322 /* ??? Note that there are two different ideas here for how
323 to determine the size to count bits within, for a register.
324 One is BITS_PER_WORD, and the other is the size of operand 3
325 of the insv pattern.
327 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
328 else, we use the mode of operand 3. */
331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
332 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
333 rtx value)
335 unsigned int unit
336 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
337 unsigned HOST_WIDE_INT offset = bitnum / unit;
338 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
339 rtx op0 = str_rtx;
340 int byte_offset;
342 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
344 while (GET_CODE (op0) == SUBREG)
346 /* The following line once was done only if WORDS_BIG_ENDIAN,
347 but I think that is a mistake. WORDS_BIG_ENDIAN is
348 meaningful at a much higher level; when structures are copied
349 between memory and regs, the higher-numbered regs
350 always get higher addresses. */
351 offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
352 /* We used to adjust BITPOS here, but now we do the whole adjustment
353 right after the loop. */
354 op0 = SUBREG_REG (op0);
357 /* Use vec_set patterns for inserting parts of vectors whenever
358 available. */
359 if (VECTOR_MODE_P (GET_MODE (op0))
360 && !MEM_P (op0)
361 && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
362 != CODE_FOR_nothing)
363 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
364 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
365 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
367 enum machine_mode outermode = GET_MODE (op0);
368 enum machine_mode innermode = GET_MODE_INNER (outermode);
369 int icode = (int) vec_set_optab->handlers[outermode].insn_code;
370 int pos = bitnum / GET_MODE_BITSIZE (innermode);
371 rtx rtxpos = GEN_INT (pos);
372 rtx src = value;
373 rtx dest = op0;
374 rtx pat, seq;
375 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
376 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
377 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
379 start_sequence ();
381 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
382 src = copy_to_mode_reg (mode1, src);
384 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
385 rtxpos = copy_to_mode_reg (mode1, rtxpos);
387 /* We could handle this, but we should always be called with a pseudo
388 for our targets and all insns should take them as outputs. */
389 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
390 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
391 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
392 abort ();
393 pat = GEN_FCN (icode) (dest, src, rtxpos);
394 seq = get_insns ();
395 end_sequence ();
396 if (pat)
398 emit_insn (seq);
399 emit_insn (pat);
400 return dest;
404 if (flag_force_mem)
406 int old_generating_concat_p = generating_concat_p;
407 generating_concat_p = 0;
408 value = force_not_mem (value);
409 generating_concat_p = old_generating_concat_p;
412 /* If the target is a register, overwriting the entire object, or storing
413 a full-word or multi-word field can be done with just a SUBREG.
415 If the target is memory, storing any naturally aligned field can be
416 done with a simple store. For targets that support fast unaligned
417 memory, any naturally sized, unit aligned field can be done directly. */
419 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
420 + (offset * UNITS_PER_WORD);
422 if (bitpos == 0
423 && bitsize == GET_MODE_BITSIZE (fieldmode)
424 && (!MEM_P (op0)
425 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
426 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
427 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
428 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
429 || (offset * BITS_PER_UNIT % bitsize == 0
430 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
432 if (GET_MODE (op0) != fieldmode)
434 if (GET_CODE (op0) == SUBREG)
436 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
437 || GET_MODE_CLASS (fieldmode) == MODE_INT
438 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
439 op0 = SUBREG_REG (op0);
440 else
441 /* Else we've got some float mode source being extracted into
442 a different float mode destination -- this combination of
443 subregs results in Severe Tire Damage. */
444 abort ();
446 if (REG_P (op0))
447 op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
448 else
449 op0 = adjust_address (op0, fieldmode, offset);
451 emit_move_insn (op0, value);
452 return value;
455 /* Make sure we are playing with integral modes. Pun with subregs
456 if we aren't. This must come after the entire register case above,
457 since that case is valid for any mode. The following cases are only
458 valid for integral modes. */
460 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
461 if (imode != GET_MODE (op0))
463 if (MEM_P (op0))
464 op0 = adjust_address (op0, imode, 0);
465 else if (imode != BLKmode)
466 op0 = gen_lowpart (imode, op0);
467 else
468 abort ();
472 /* We may be accessing data outside the field, which means
473 we can alias adjacent data. */
474 if (MEM_P (op0))
476 op0 = shallow_copy_rtx (op0);
477 set_mem_alias_set (op0, 0);
478 set_mem_expr (op0, 0);
481 /* If OP0 is a register, BITPOS must count within a word.
482 But as we have it, it counts within whatever size OP0 now has.
483 On a bigendian machine, these are not the same, so convert. */
484 if (BYTES_BIG_ENDIAN
485 && !MEM_P (op0)
486 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
487 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
489 /* Storing an lsb-aligned field in a register
490 can be done with a movestrict instruction. */
492 if (!MEM_P (op0)
493 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
494 && bitsize == GET_MODE_BITSIZE (fieldmode)
495 && (movstrict_optab->handlers[fieldmode].insn_code
496 != CODE_FOR_nothing))
498 int icode = movstrict_optab->handlers[fieldmode].insn_code;
500 /* Get appropriate low part of the value being stored. */
501 if (GET_CODE (value) == CONST_INT || REG_P (value))
502 value = gen_lowpart (fieldmode, value);
503 else if (!(GET_CODE (value) == SYMBOL_REF
504 || GET_CODE (value) == LABEL_REF
505 || GET_CODE (value) == CONST))
506 value = convert_to_mode (fieldmode, value, 0);
508 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
509 value = copy_to_mode_reg (fieldmode, value);
511 if (GET_CODE (op0) == SUBREG)
513 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
514 || GET_MODE_CLASS (fieldmode) == MODE_INT
515 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
516 op0 = SUBREG_REG (op0);
517 else
518 /* Else we've got some float mode source being extracted into
519 a different float mode destination -- this combination of
520 subregs results in Severe Tire Damage. */
521 abort ();
524 emit_insn (GEN_FCN (icode)
525 (gen_rtx_SUBREG (fieldmode, op0,
526 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
527 + (offset * UNITS_PER_WORD)),
528 value));
530 return value;
533 /* Handle fields bigger than a word. */
535 if (bitsize > BITS_PER_WORD)
537 /* Here we transfer the words of the field
538 in the order least significant first.
539 This is because the most significant word is the one which may
540 be less than full.
541 However, only do that if the value is not BLKmode. */
543 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
544 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
545 unsigned int i;
547 /* This is the mode we must force value to, so that there will be enough
548 subwords to extract. Note that fieldmode will often (always?) be
549 VOIDmode, because that is what store_field uses to indicate that this
550 is a bit field, but passing VOIDmode to operand_subword_force will
551 result in an abort. */
552 fieldmode = GET_MODE (value);
553 if (fieldmode == VOIDmode)
554 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
556 for (i = 0; i < nwords; i++)
558 /* If I is 0, use the low-order word in both field and target;
559 if I is 1, use the next to lowest word; and so on. */
560 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
561 unsigned int bit_offset = (backwards
562 ? MAX ((int) bitsize - ((int) i + 1)
563 * BITS_PER_WORD,
565 : (int) i * BITS_PER_WORD);
567 store_bit_field (op0, MIN (BITS_PER_WORD,
568 bitsize - i * BITS_PER_WORD),
569 bitnum + bit_offset, word_mode,
570 operand_subword_force (value, wordnum, fieldmode));
572 return value;
575 /* From here on we can assume that the field to be stored in is
576 a full-word (whatever type that is), since it is shorter than a word. */
578 /* OFFSET is the number of words or bytes (UNIT says which)
579 from STR_RTX to the first word or byte containing part of the field. */
581 if (!MEM_P (op0))
583 if (offset != 0
584 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
586 if (!REG_P (op0))
588 /* Since this is a destination (lvalue), we can't copy it to a
589 pseudo. We can trivially remove a SUBREG that does not
590 change the size of the operand. Such a SUBREG may have been
591 added above. Otherwise, abort. */
592 if (GET_CODE (op0) == SUBREG
593 && (GET_MODE_SIZE (GET_MODE (op0))
594 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
595 op0 = SUBREG_REG (op0);
596 else
597 abort ();
599 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
600 op0, (offset * UNITS_PER_WORD));
602 offset = 0;
605 /* If VALUE is a floating-point mode, access it as an integer of the
606 corresponding size. This can occur on a machine with 64 bit registers
607 that uses SFmode for float. This can also occur for unaligned float
608 structure fields. */
609 if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
610 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
611 value = gen_lowpart ((GET_MODE (value) == VOIDmode
612 ? word_mode : int_mode_for_mode (GET_MODE (value))),
613 value);
615 /* Now OFFSET is nonzero only if OP0 is memory
616 and is therefore always measured in bytes. */
618 if (HAVE_insv
619 && GET_MODE (value) != BLKmode
620 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
621 /* Ensure insv's size is wide enough for this field. */
622 && (GET_MODE_BITSIZE (op_mode) >= bitsize)
623 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
624 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
626 int xbitpos = bitpos;
627 rtx value1;
628 rtx xop0 = op0;
629 rtx last = get_last_insn ();
630 rtx pat;
631 enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
632 int save_volatile_ok = volatile_ok;
634 volatile_ok = 1;
636 /* If this machine's insv can only insert into a register, copy OP0
637 into a register and save it back later. */
638 /* This used to check flag_force_mem, but that was a serious
639 de-optimization now that flag_force_mem is enabled by -O2. */
640 if (MEM_P (op0)
641 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
642 (op0, VOIDmode)))
644 rtx tempreg;
645 enum machine_mode bestmode;
647 /* Get the mode to use for inserting into this field. If OP0 is
648 BLKmode, get the smallest mode consistent with the alignment. If
649 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
650 mode. Otherwise, use the smallest mode containing the field. */
652 if (GET_MODE (op0) == BLKmode
653 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
654 bestmode
655 = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
656 MEM_VOLATILE_P (op0));
657 else
658 bestmode = GET_MODE (op0);
660 if (bestmode == VOIDmode
661 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
662 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
663 goto insv_loses;
665 /* Adjust address to point to the containing unit of that mode.
666 Compute offset as multiple of this unit, counting in bytes. */
667 unit = GET_MODE_BITSIZE (bestmode);
668 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
669 bitpos = bitnum % unit;
670 op0 = adjust_address (op0, bestmode, offset);
672 /* Fetch that unit, store the bitfield in it, then store
673 the unit. */
674 tempreg = copy_to_reg (op0);
675 store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
676 emit_move_insn (op0, tempreg);
677 return value;
679 volatile_ok = save_volatile_ok;
681 /* Add OFFSET into OP0's address. */
682 if (MEM_P (xop0))
683 xop0 = adjust_address (xop0, byte_mode, offset);
685 /* If xop0 is a register, we need it in MAXMODE
686 to make it acceptable to the format of insv. */
687 if (GET_CODE (xop0) == SUBREG)
688 /* We can't just change the mode, because this might clobber op0,
689 and we will need the original value of op0 if insv fails. */
690 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
691 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
692 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
694 /* On big-endian machines, we count bits from the most significant.
695 If the bit field insn does not, we must invert. */
697 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
698 xbitpos = unit - bitsize - xbitpos;
700 /* We have been counting XBITPOS within UNIT.
701 Count instead within the size of the register. */
702 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
703 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
705 unit = GET_MODE_BITSIZE (maxmode);
707 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
708 value1 = value;
709 if (GET_MODE (value) != maxmode)
711 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
713 /* Optimization: Don't bother really extending VALUE
714 if it has all the bits we will actually use. However,
715 if we must narrow it, be sure we do it correctly. */
717 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
719 rtx tmp;
721 tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
722 if (! tmp)
723 tmp = simplify_gen_subreg (maxmode,
724 force_reg (GET_MODE (value),
725 value1),
726 GET_MODE (value), 0);
727 value1 = tmp;
729 else
730 value1 = gen_lowpart (maxmode, value1);
732 else if (GET_CODE (value) == CONST_INT)
733 value1 = gen_int_mode (INTVAL (value), maxmode);
734 else if (!CONSTANT_P (value))
735 /* Parse phase is supposed to make VALUE's data type
736 match that of the component reference, which is a type
737 at least as wide as the field; so VALUE should have
738 a mode that corresponds to that type. */
739 abort ();
742 /* If this machine's insv insists on a register,
743 get VALUE1 into a register. */
744 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
745 (value1, maxmode)))
746 value1 = force_reg (maxmode, value1);
748 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
749 if (pat)
750 emit_insn (pat);
751 else
753 delete_insns_since (last);
754 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
757 else
758 insv_loses:
759 /* Insv is not available; store using shifts and boolean ops. */
760 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
761 return value;
764 /* Use shifts and boolean operations to store VALUE
765 into a bit field of width BITSIZE
766 in a memory location specified by OP0 except offset by OFFSET bytes.
767 (OFFSET must be 0 if OP0 is a register.)
768 The field starts at position BITPOS within the byte.
769 (If OP0 is a register, it may be a full word or a narrower mode,
770 but BITPOS still counts within a full word,
771 which is significant on bigendian machines.) */
773 static void
774 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
775 unsigned HOST_WIDE_INT bitsize,
776 unsigned HOST_WIDE_INT bitpos, rtx value)
778 enum machine_mode mode;
779 unsigned int total_bits = BITS_PER_WORD;
780 rtx subtarget, temp;
781 int all_zero = 0;
782 int all_one = 0;
784 /* There is a case not handled here:
785 a structure with a known alignment of just a halfword
786 and a field split across two aligned halfwords within the structure.
787 Or likewise a structure with a known alignment of just a byte
788 and a field split across two bytes.
789 Such cases are not supposed to be able to occur. */
791 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
793 if (offset != 0)
794 abort ();
795 /* Special treatment for a bit field split across two registers. */
796 if (bitsize + bitpos > BITS_PER_WORD)
798 store_split_bit_field (op0, bitsize, bitpos, value);
799 return;
802 else
804 /* Get the proper mode to use for this field. We want a mode that
805 includes the entire field. If such a mode would be larger than
806 a word, we won't be doing the extraction the normal way.
807 We don't want a mode bigger than the destination. */
809 mode = GET_MODE (op0);
810 if (GET_MODE_BITSIZE (mode) == 0
811 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
812 mode = word_mode;
813 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
814 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
816 if (mode == VOIDmode)
818 /* The only way this should occur is if the field spans word
819 boundaries. */
820 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
821 value);
822 return;
825 total_bits = GET_MODE_BITSIZE (mode);
827 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
828 be in the range 0 to total_bits-1, and put any excess bytes in
829 OFFSET. */
830 if (bitpos >= total_bits)
832 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
833 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
834 * BITS_PER_UNIT);
837 /* Get ref to an aligned byte, halfword, or word containing the field.
838 Adjust BITPOS to be position within a word,
839 and OFFSET to be the offset of that word.
840 Then alter OP0 to refer to that word. */
841 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
842 offset -= (offset % (total_bits / BITS_PER_UNIT));
843 op0 = adjust_address (op0, mode, offset);
846 mode = GET_MODE (op0);
848 /* Now MODE is either some integral mode for a MEM as OP0,
849 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
850 The bit field is contained entirely within OP0.
851 BITPOS is the starting bit number within OP0.
852 (OP0's mode may actually be narrower than MODE.) */
854 if (BYTES_BIG_ENDIAN)
855 /* BITPOS is the distance between our msb
856 and that of the containing datum.
857 Convert it to the distance from the lsb. */
858 bitpos = total_bits - bitsize - bitpos;
860 /* Now BITPOS is always the distance between our lsb
861 and that of OP0. */
863 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
864 we must first convert its mode to MODE. */
866 if (GET_CODE (value) == CONST_INT)
868 HOST_WIDE_INT v = INTVAL (value);
870 if (bitsize < HOST_BITS_PER_WIDE_INT)
871 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
873 if (v == 0)
874 all_zero = 1;
875 else if ((bitsize < HOST_BITS_PER_WIDE_INT
876 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
877 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
878 all_one = 1;
880 value = lshift_value (mode, value, bitpos, bitsize);
882 else
884 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
885 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
887 if (GET_MODE (value) != mode)
889 if ((REG_P (value) || GET_CODE (value) == SUBREG)
890 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
891 value = gen_lowpart (mode, value);
892 else
893 value = convert_to_mode (mode, value, 1);
896 if (must_and)
897 value = expand_binop (mode, and_optab, value,
898 mask_rtx (mode, 0, bitsize, 0),
899 NULL_RTX, 1, OPTAB_LIB_WIDEN);
900 if (bitpos > 0)
901 value = expand_shift (LSHIFT_EXPR, mode, value,
902 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
905 /* Now clear the chosen bits in OP0,
906 except that if VALUE is -1 we need not bother. */
908 subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
910 if (! all_one)
912 temp = expand_binop (mode, and_optab, op0,
913 mask_rtx (mode, bitpos, bitsize, 1),
914 subtarget, 1, OPTAB_LIB_WIDEN);
915 subtarget = temp;
917 else
918 temp = op0;
920 /* Now logical-or VALUE into OP0, unless it is zero. */
922 if (! all_zero)
923 temp = expand_binop (mode, ior_optab, temp, value,
924 subtarget, 1, OPTAB_LIB_WIDEN);
925 if (op0 != temp)
926 emit_move_insn (op0, temp);
929 /* Store a bit field that is split across multiple accessible memory objects.
931 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
932 BITSIZE is the field width; BITPOS the position of its first bit
933 (within the word).
934 VALUE is the value to store.
936 This does not yet handle fields wider than BITS_PER_WORD. */
938 static void
939 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
940 unsigned HOST_WIDE_INT bitpos, rtx value)
942 unsigned int unit;
943 unsigned int bitsdone = 0;
945 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
946 much at a time. */
947 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
948 unit = BITS_PER_WORD;
949 else
950 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
952 /* If VALUE is a constant other than a CONST_INT, get it into a register in
953 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
954 that VALUE might be a floating-point constant. */
955 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
957 rtx word = gen_lowpart_common (word_mode, value);
959 if (word && (value != word))
960 value = word;
961 else
962 value = gen_lowpart_common (word_mode,
963 force_reg (GET_MODE (value) != VOIDmode
964 ? GET_MODE (value)
965 : word_mode, value));
968 while (bitsdone < bitsize)
970 unsigned HOST_WIDE_INT thissize;
971 rtx part, word;
972 unsigned HOST_WIDE_INT thispos;
973 unsigned HOST_WIDE_INT offset;
975 offset = (bitpos + bitsdone) / unit;
976 thispos = (bitpos + bitsdone) % unit;
978 /* THISSIZE must not overrun a word boundary. Otherwise,
979 store_fixed_bit_field will call us again, and we will mutually
980 recurse forever. */
981 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
982 thissize = MIN (thissize, unit - thispos);
984 if (BYTES_BIG_ENDIAN)
986 int total_bits;
988 /* We must do an endian conversion exactly the same way as it is
989 done in extract_bit_field, so that the two calls to
990 extract_fixed_bit_field will have comparable arguments. */
991 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
992 total_bits = BITS_PER_WORD;
993 else
994 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
996 /* Fetch successively less significant portions. */
997 if (GET_CODE (value) == CONST_INT)
998 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
999 >> (bitsize - bitsdone - thissize))
1000 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001 else
1002 /* The args are chosen so that the last part includes the
1003 lsb. Give extract_bit_field the value it needs (with
1004 endianness compensation) to fetch the piece we want. */
1005 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1006 total_bits - bitsize + bitsdone,
1007 NULL_RTX, 1);
1009 else
1011 /* Fetch successively more significant portions. */
1012 if (GET_CODE (value) == CONST_INT)
1013 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1014 >> bitsdone)
1015 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1016 else
1017 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1018 bitsdone, NULL_RTX, 1);
1021 /* If OP0 is a register, then handle OFFSET here.
1023 When handling multiword bitfields, extract_bit_field may pass
1024 down a word_mode SUBREG of a larger REG for a bitfield that actually
1025 crosses a word boundary. Thus, for a SUBREG, we must find
1026 the current word starting from the base register. */
1027 if (GET_CODE (op0) == SUBREG)
1029 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1030 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1031 GET_MODE (SUBREG_REG (op0)));
1032 offset = 0;
1034 else if (REG_P (op0))
1036 word = operand_subword_force (op0, offset, GET_MODE (op0));
1037 offset = 0;
1039 else
1040 word = op0;
1042 /* OFFSET is in UNITs, and UNIT is in bits.
1043 store_fixed_bit_field wants offset in bytes. */
1044 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1045 thispos, part);
1046 bitsdone += thissize;
1050 /* Generate code to extract a byte-field from STR_RTX
1051 containing BITSIZE bits, starting at BITNUM,
1052 and put it in TARGET if possible (if TARGET is nonzero).
1053 Regardless of TARGET, we return the rtx for where the value is placed.
1055 STR_RTX is the structure containing the byte (a REG or MEM).
1056 UNSIGNEDP is nonzero if this is an unsigned bit field.
1057 MODE is the natural mode of the field value once extracted.
1058 TMODE is the mode the caller would like the value to have;
1059 but the value may be returned with type MODE instead.
1061 TOTAL_SIZE is the size in bytes of the containing structure,
1062 or -1 if varying.
1064 If a TARGET is specified and we can store in it at no extra cost,
1065 we do so, and return TARGET.
1066 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1067 if they are equally easy. */
1070 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1071 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1072 enum machine_mode mode, enum machine_mode tmode)
1074 unsigned int unit
1075 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1076 unsigned HOST_WIDE_INT offset = bitnum / unit;
1077 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1078 rtx op0 = str_rtx;
1079 rtx spec_target = target;
1080 rtx spec_target_subreg = 0;
1081 enum machine_mode int_mode;
1082 enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1083 enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1084 enum machine_mode mode1;
1085 int byte_offset;
1087 if (tmode == VOIDmode)
1088 tmode = mode;
1090 while (GET_CODE (op0) == SUBREG)
1092 bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1093 if (bitpos > unit)
1095 offset += (bitpos / unit);
1096 bitpos %= unit;
1098 op0 = SUBREG_REG (op0);
1101 if (REG_P (op0)
1102 && mode == GET_MODE (op0)
1103 && bitnum == 0
1104 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1106 /* We're trying to extract a full register from itself. */
1107 return op0;
1110 /* Use vec_extract patterns for extracting parts of vectors whenever
1111 available. */
1112 if (VECTOR_MODE_P (GET_MODE (op0))
1113 && !MEM_P (op0)
1114 && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1115 != CODE_FOR_nothing)
1116 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1117 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1119 enum machine_mode outermode = GET_MODE (op0);
1120 enum machine_mode innermode = GET_MODE_INNER (outermode);
1121 int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1122 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1123 rtx rtxpos = GEN_INT (pos);
1124 rtx src = op0;
1125 rtx dest = NULL, pat, seq;
1126 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1127 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1128 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1130 if (innermode == tmode || innermode == mode)
1131 dest = target;
1133 if (!dest)
1134 dest = gen_reg_rtx (innermode);
1136 start_sequence ();
1138 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1139 dest = copy_to_mode_reg (mode0, dest);
1141 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1142 src = copy_to_mode_reg (mode1, src);
1144 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1145 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1147 /* We could handle this, but we should always be called with a pseudo
1148 for our targets and all insns should take them as outputs. */
1149 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1150 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1151 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1152 abort ();
1154 pat = GEN_FCN (icode) (dest, src, rtxpos);
1155 seq = get_insns ();
1156 end_sequence ();
1157 if (pat)
1159 emit_insn (seq);
1160 emit_insn (pat);
1161 return dest;
1165 /* Make sure we are playing with integral modes. Pun with subregs
1166 if we aren't. */
1168 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1169 if (imode != GET_MODE (op0))
1171 if (MEM_P (op0))
1172 op0 = adjust_address (op0, imode, 0);
1173 else if (imode != BLKmode)
1174 op0 = gen_lowpart (imode, op0);
1175 else
1176 abort ();
1180 /* We may be accessing data outside the field, which means
1181 we can alias adjacent data. */
1182 if (MEM_P (op0))
1184 op0 = shallow_copy_rtx (op0);
1185 set_mem_alias_set (op0, 0);
1186 set_mem_expr (op0, 0);
1189 /* Extraction of a full-word or multi-word value from a structure
1190 in a register or aligned memory can be done with just a SUBREG.
1191 A subword value in the least significant part of a register
1192 can also be extracted with a SUBREG. For this, we need the
1193 byte offset of the value in op0. */
1195 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1197 /* If OP0 is a register, BITPOS must count within a word.
1198 But as we have it, it counts within whatever size OP0 now has.
1199 On a bigendian machine, these are not the same, so convert. */
1200 if (BYTES_BIG_ENDIAN
1201 && !MEM_P (op0)
1202 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1203 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1205 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1206 If that's wrong, the solution is to test for it and set TARGET to 0
1207 if needed. */
1209 /* Only scalar integer modes can be converted via subregs. There is an
1210 additional problem for FP modes here in that they can have a precision
1211 which is different from the size. mode_for_size uses precision, but
1212 we want a mode based on the size, so we must avoid calling it for FP
1213 modes. */
1214 mode1 = (SCALAR_INT_MODE_P (tmode)
1215 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1216 : mode);
1218 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1219 && bitpos % BITS_PER_WORD == 0)
1220 || (mode1 != BLKmode
1221 /* ??? The big endian test here is wrong. This is correct
1222 if the value is in a register, and if mode_for_size is not
1223 the same mode as op0. This causes us to get unnecessarily
1224 inefficient code from the Thumb port when -mbig-endian. */
1225 && (BYTES_BIG_ENDIAN
1226 ? bitpos + bitsize == BITS_PER_WORD
1227 : bitpos == 0)))
1228 && ((!MEM_P (op0)
1229 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1230 GET_MODE_BITSIZE (GET_MODE (op0)))
1231 && GET_MODE_SIZE (mode1) != 0
1232 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1233 || (MEM_P (op0)
1234 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1235 || (offset * BITS_PER_UNIT % bitsize == 0
1236 && MEM_ALIGN (op0) % bitsize == 0)))))
1238 if (mode1 != GET_MODE (op0))
1240 if (GET_CODE (op0) == SUBREG)
1242 if (GET_MODE (SUBREG_REG (op0)) == mode1
1243 || GET_MODE_CLASS (mode1) == MODE_INT
1244 || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1245 op0 = SUBREG_REG (op0);
1246 else
1247 /* Else we've got some float mode source being extracted into
1248 a different float mode destination -- this combination of
1249 subregs results in Severe Tire Damage. */
1250 goto no_subreg_mode_swap;
1252 if (REG_P (op0))
1253 op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1254 else
1255 op0 = adjust_address (op0, mode1, offset);
1257 if (mode1 != mode)
1258 return convert_to_mode (tmode, op0, unsignedp);
1259 return op0;
1261 no_subreg_mode_swap:
1263 /* Handle fields bigger than a word. */
1265 if (bitsize > BITS_PER_WORD)
1267 /* Here we transfer the words of the field
1268 in the order least significant first.
1269 This is because the most significant word is the one which may
1270 be less than full. */
1272 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1273 unsigned int i;
1275 if (target == 0 || !REG_P (target))
1276 target = gen_reg_rtx (mode);
1278 /* Indicate for flow that the entire target reg is being set. */
1279 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1281 for (i = 0; i < nwords; i++)
1283 /* If I is 0, use the low-order word in both field and target;
1284 if I is 1, use the next to lowest word; and so on. */
1285 /* Word number in TARGET to use. */
1286 unsigned int wordnum
1287 = (WORDS_BIG_ENDIAN
1288 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1289 : i);
1290 /* Offset from start of field in OP0. */
1291 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1292 ? MAX (0, ((int) bitsize - ((int) i + 1)
1293 * (int) BITS_PER_WORD))
1294 : (int) i * BITS_PER_WORD);
1295 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1296 rtx result_part
1297 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1298 bitsize - i * BITS_PER_WORD),
1299 bitnum + bit_offset, 1, target_part, mode,
1300 word_mode);
1302 if (target_part == 0)
1303 abort ();
1305 if (result_part != target_part)
1306 emit_move_insn (target_part, result_part);
1309 if (unsignedp)
1311 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1312 need to be zero'd out. */
1313 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1315 unsigned int i, total_words;
1317 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1318 for (i = nwords; i < total_words; i++)
1319 emit_move_insn
1320 (operand_subword (target,
1321 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1322 1, VOIDmode),
1323 const0_rtx);
1325 return target;
1328 /* Signed bit field: sign-extend with two arithmetic shifts. */
1329 target = expand_shift (LSHIFT_EXPR, mode, target,
1330 build_int_cst (NULL_TREE,
1331 GET_MODE_BITSIZE (mode) - bitsize),
1332 NULL_RTX, 0);
1333 return expand_shift (RSHIFT_EXPR, mode, target,
1334 build_int_cst (NULL_TREE,
1335 GET_MODE_BITSIZE (mode) - bitsize),
1336 NULL_RTX, 0);
1339 /* From here on we know the desired field is smaller than a word. */
1341 /* Check if there is a correspondingly-sized integer field, so we can
1342 safely extract it as one size of integer, if necessary; then
1343 truncate or extend to the size that is wanted; then use SUBREGs or
1344 convert_to_mode to get one of the modes we really wanted. */
1346 int_mode = int_mode_for_mode (tmode);
1347 if (int_mode == BLKmode)
1348 int_mode = int_mode_for_mode (mode);
1349 if (int_mode == BLKmode)
1350 abort (); /* Should probably push op0 out to memory and then
1351 do a load. */
1353 /* OFFSET is the number of words or bytes (UNIT says which)
1354 from STR_RTX to the first word or byte containing part of the field. */
1356 if (!MEM_P (op0))
1358 if (offset != 0
1359 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1361 if (!REG_P (op0))
1362 op0 = copy_to_reg (op0);
1363 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1364 op0, (offset * UNITS_PER_WORD));
1366 offset = 0;
1369 /* Now OFFSET is nonzero only for memory operands. */
1371 if (unsignedp)
1373 if (HAVE_extzv
1374 && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1375 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1376 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1378 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1379 rtx bitsize_rtx, bitpos_rtx;
1380 rtx last = get_last_insn ();
1381 rtx xop0 = op0;
1382 rtx xtarget = target;
1383 rtx xspec_target = spec_target;
1384 rtx xspec_target_subreg = spec_target_subreg;
1385 rtx pat;
1386 enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1388 if (MEM_P (xop0))
1390 int save_volatile_ok = volatile_ok;
1391 volatile_ok = 1;
1393 /* Is the memory operand acceptable? */
1394 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1395 (xop0, GET_MODE (xop0))))
1397 /* No, load into a reg and extract from there. */
1398 enum machine_mode bestmode;
1400 /* Get the mode to use for inserting into this field. If
1401 OP0 is BLKmode, get the smallest mode consistent with the
1402 alignment. If OP0 is a non-BLKmode object that is no
1403 wider than MAXMODE, use its mode. Otherwise, use the
1404 smallest mode containing the field. */
1406 if (GET_MODE (xop0) == BLKmode
1407 || (GET_MODE_SIZE (GET_MODE (op0))
1408 > GET_MODE_SIZE (maxmode)))
1409 bestmode = get_best_mode (bitsize, bitnum,
1410 MEM_ALIGN (xop0), maxmode,
1411 MEM_VOLATILE_P (xop0));
1412 else
1413 bestmode = GET_MODE (xop0);
1415 if (bestmode == VOIDmode
1416 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1417 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1418 goto extzv_loses;
1420 /* Compute offset as multiple of this unit,
1421 counting in bytes. */
1422 unit = GET_MODE_BITSIZE (bestmode);
1423 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1424 xbitpos = bitnum % unit;
1425 xop0 = adjust_address (xop0, bestmode, xoffset);
1427 /* Fetch it to a register in that size. */
1428 xop0 = force_reg (bestmode, xop0);
1430 /* XBITPOS counts within UNIT, which is what is expected. */
1432 else
1433 /* Get ref to first byte containing part of the field. */
1434 xop0 = adjust_address (xop0, byte_mode, xoffset);
1436 volatile_ok = save_volatile_ok;
1439 /* If op0 is a register, we need it in MAXMODE (which is usually
1440 SImode). to make it acceptable to the format of extzv. */
1441 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1442 goto extzv_loses;
1443 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1444 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1446 /* On big-endian machines, we count bits from the most significant.
1447 If the bit field insn does not, we must invert. */
1448 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1449 xbitpos = unit - bitsize - xbitpos;
1451 /* Now convert from counting within UNIT to counting in MAXMODE. */
1452 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1453 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1455 unit = GET_MODE_BITSIZE (maxmode);
1457 if (xtarget == 0
1458 || (flag_force_mem && MEM_P (xtarget)))
1459 xtarget = xspec_target = gen_reg_rtx (tmode);
1461 if (GET_MODE (xtarget) != maxmode)
1463 if (REG_P (xtarget))
1465 int wider = (GET_MODE_SIZE (maxmode)
1466 > GET_MODE_SIZE (GET_MODE (xtarget)));
1467 xtarget = gen_lowpart (maxmode, xtarget);
1468 if (wider)
1469 xspec_target_subreg = xtarget;
1471 else
1472 xtarget = gen_reg_rtx (maxmode);
1475 /* If this machine's extzv insists on a register target,
1476 make sure we have one. */
1477 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1478 (xtarget, maxmode)))
1479 xtarget = gen_reg_rtx (maxmode);
1481 bitsize_rtx = GEN_INT (bitsize);
1482 bitpos_rtx = GEN_INT (xbitpos);
1484 pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1485 if (pat)
1487 emit_insn (pat);
1488 target = xtarget;
1489 spec_target = xspec_target;
1490 spec_target_subreg = xspec_target_subreg;
1492 else
1494 delete_insns_since (last);
1495 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1496 bitpos, target, 1);
1499 else
1500 extzv_loses:
1501 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1502 bitpos, target, 1);
1504 else
1506 if (HAVE_extv
1507 && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1508 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1509 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1511 int xbitpos = bitpos, xoffset = offset;
1512 rtx bitsize_rtx, bitpos_rtx;
1513 rtx last = get_last_insn ();
1514 rtx xop0 = op0, xtarget = target;
1515 rtx xspec_target = spec_target;
1516 rtx xspec_target_subreg = spec_target_subreg;
1517 rtx pat;
1518 enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1520 if (MEM_P (xop0))
1522 /* Is the memory operand acceptable? */
1523 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1524 (xop0, GET_MODE (xop0))))
1526 /* No, load into a reg and extract from there. */
1527 enum machine_mode bestmode;
1529 /* Get the mode to use for inserting into this field. If
1530 OP0 is BLKmode, get the smallest mode consistent with the
1531 alignment. If OP0 is a non-BLKmode object that is no
1532 wider than MAXMODE, use its mode. Otherwise, use the
1533 smallest mode containing the field. */
1535 if (GET_MODE (xop0) == BLKmode
1536 || (GET_MODE_SIZE (GET_MODE (op0))
1537 > GET_MODE_SIZE (maxmode)))
1538 bestmode = get_best_mode (bitsize, bitnum,
1539 MEM_ALIGN (xop0), maxmode,
1540 MEM_VOLATILE_P (xop0));
1541 else
1542 bestmode = GET_MODE (xop0);
1544 if (bestmode == VOIDmode
1545 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1546 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1547 goto extv_loses;
1549 /* Compute offset as multiple of this unit,
1550 counting in bytes. */
1551 unit = GET_MODE_BITSIZE (bestmode);
1552 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1553 xbitpos = bitnum % unit;
1554 xop0 = adjust_address (xop0, bestmode, xoffset);
1556 /* Fetch it to a register in that size. */
1557 xop0 = force_reg (bestmode, xop0);
1559 /* XBITPOS counts within UNIT, which is what is expected. */
1561 else
1562 /* Get ref to first byte containing part of the field. */
1563 xop0 = adjust_address (xop0, byte_mode, xoffset);
1566 /* If op0 is a register, we need it in MAXMODE (which is usually
1567 SImode) to make it acceptable to the format of extv. */
1568 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1569 goto extv_loses;
1570 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1571 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1573 /* On big-endian machines, we count bits from the most significant.
1574 If the bit field insn does not, we must invert. */
1575 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1576 xbitpos = unit - bitsize - xbitpos;
1578 /* XBITPOS counts within a size of UNIT.
1579 Adjust to count within a size of MAXMODE. */
1580 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1581 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1583 unit = GET_MODE_BITSIZE (maxmode);
1585 if (xtarget == 0
1586 || (flag_force_mem && MEM_P (xtarget)))
1587 xtarget = xspec_target = gen_reg_rtx (tmode);
1589 if (GET_MODE (xtarget) != maxmode)
1591 if (REG_P (xtarget))
1593 int wider = (GET_MODE_SIZE (maxmode)
1594 > GET_MODE_SIZE (GET_MODE (xtarget)));
1595 xtarget = gen_lowpart (maxmode, xtarget);
1596 if (wider)
1597 xspec_target_subreg = xtarget;
1599 else
1600 xtarget = gen_reg_rtx (maxmode);
1603 /* If this machine's extv insists on a register target,
1604 make sure we have one. */
1605 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1606 (xtarget, maxmode)))
1607 xtarget = gen_reg_rtx (maxmode);
1609 bitsize_rtx = GEN_INT (bitsize);
1610 bitpos_rtx = GEN_INT (xbitpos);
1612 pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1613 if (pat)
1615 emit_insn (pat);
1616 target = xtarget;
1617 spec_target = xspec_target;
1618 spec_target_subreg = xspec_target_subreg;
1620 else
1622 delete_insns_since (last);
1623 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1624 bitpos, target, 0);
1627 else
1628 extv_loses:
1629 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1630 bitpos, target, 0);
1632 if (target == spec_target)
1633 return target;
1634 if (target == spec_target_subreg)
1635 return spec_target;
1636 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1638 /* If the target mode is floating-point, first convert to the
1639 integer mode of that size and then access it as a floating-point
1640 value via a SUBREG. */
1641 if (GET_MODE_CLASS (tmode) != MODE_INT
1642 && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1644 target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1645 MODE_INT, 0),
1646 target, unsignedp);
1647 return gen_lowpart (tmode, target);
1649 else
1650 return convert_to_mode (tmode, target, unsignedp);
1652 return target;
1655 /* Extract a bit field using shifts and boolean operations
1656 Returns an rtx to represent the value.
1657 OP0 addresses a register (word) or memory (byte).
1658 BITPOS says which bit within the word or byte the bit field starts in.
1659 OFFSET says how many bytes farther the bit field starts;
1660 it is 0 if OP0 is a register.
1661 BITSIZE says how many bits long the bit field is.
1662 (If OP0 is a register, it may be narrower than a full word,
1663 but BITPOS still counts within a full word,
1664 which is significant on bigendian machines.)
1666 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1667 If TARGET is nonzero, attempts to store the value there
1668 and return TARGET, but this is not guaranteed.
1669 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1671 static rtx
1672 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1673 unsigned HOST_WIDE_INT offset,
1674 unsigned HOST_WIDE_INT bitsize,
1675 unsigned HOST_WIDE_INT bitpos, rtx target,
1676 int unsignedp)
1678 unsigned int total_bits = BITS_PER_WORD;
1679 enum machine_mode mode;
1681 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1683 /* Special treatment for a bit field split across two registers. */
1684 if (bitsize + bitpos > BITS_PER_WORD)
1685 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1687 else
1689 /* Get the proper mode to use for this field. We want a mode that
1690 includes the entire field. If such a mode would be larger than
1691 a word, we won't be doing the extraction the normal way. */
1693 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1694 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1696 if (mode == VOIDmode)
1697 /* The only way this should occur is if the field spans word
1698 boundaries. */
1699 return extract_split_bit_field (op0, bitsize,
1700 bitpos + offset * BITS_PER_UNIT,
1701 unsignedp);
1703 total_bits = GET_MODE_BITSIZE (mode);
1705 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1706 be in the range 0 to total_bits-1, and put any excess bytes in
1707 OFFSET. */
1708 if (bitpos >= total_bits)
1710 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1711 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1712 * BITS_PER_UNIT);
1715 /* Get ref to an aligned byte, halfword, or word containing the field.
1716 Adjust BITPOS to be position within a word,
1717 and OFFSET to be the offset of that word.
1718 Then alter OP0 to refer to that word. */
1719 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1720 offset -= (offset % (total_bits / BITS_PER_UNIT));
1721 op0 = adjust_address (op0, mode, offset);
1724 mode = GET_MODE (op0);
1726 if (BYTES_BIG_ENDIAN)
1727 /* BITPOS is the distance between our msb and that of OP0.
1728 Convert it to the distance from the lsb. */
1729 bitpos = total_bits - bitsize - bitpos;
1731 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1732 We have reduced the big-endian case to the little-endian case. */
1734 if (unsignedp)
1736 if (bitpos)
1738 /* If the field does not already start at the lsb,
1739 shift it so it does. */
1740 tree amount = build_int_cst (NULL_TREE, bitpos);
1741 /* Maybe propagate the target for the shift. */
1742 /* But not if we will return it--could confuse integrate.c. */
1743 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1744 if (tmode != mode) subtarget = 0;
1745 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1747 /* Convert the value to the desired mode. */
1748 if (mode != tmode)
1749 op0 = convert_to_mode (tmode, op0, 1);
1751 /* Unless the msb of the field used to be the msb when we shifted,
1752 mask out the upper bits. */
1754 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1755 return expand_binop (GET_MODE (op0), and_optab, op0,
1756 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1757 target, 1, OPTAB_LIB_WIDEN);
1758 return op0;
1761 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1762 then arithmetic-shift its lsb to the lsb of the word. */
1763 op0 = force_reg (mode, op0);
1764 if (mode != tmode)
1765 target = 0;
1767 /* Find the narrowest integer mode that contains the field. */
1769 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1770 mode = GET_MODE_WIDER_MODE (mode))
1771 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1773 op0 = convert_to_mode (mode, op0, 0);
1774 break;
1777 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1779 tree amount
1780 = build_int_cst (NULL_TREE,
1781 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1782 /* Maybe propagate the target for the shift. */
1783 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1787 return expand_shift (RSHIFT_EXPR, mode, op0,
1788 build_int_cst (NULL_TREE,
1789 GET_MODE_BITSIZE (mode) - bitsize),
1790 target, 0);
1793 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1794 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1795 complement of that if COMPLEMENT. The mask is truncated if
1796 necessary to the width of mode MODE. The mask is zero-extended if
1797 BITSIZE+BITPOS is too small for MODE. */
1799 static rtx
1800 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1802 HOST_WIDE_INT masklow, maskhigh;
1804 if (bitsize == 0)
1805 masklow = 0;
1806 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1807 masklow = (HOST_WIDE_INT) -1 << bitpos;
1808 else
1809 masklow = 0;
1811 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1812 masklow &= ((unsigned HOST_WIDE_INT) -1
1813 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1815 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1816 maskhigh = -1;
1817 else
1818 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1820 if (bitsize == 0)
1821 maskhigh = 0;
1822 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1823 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1824 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1825 else
1826 maskhigh = 0;
1828 if (complement)
1830 maskhigh = ~maskhigh;
1831 masklow = ~masklow;
1834 return immed_double_const (masklow, maskhigh, mode);
1837 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1838 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1840 static rtx
1841 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1843 unsigned HOST_WIDE_INT v = INTVAL (value);
1844 HOST_WIDE_INT low, high;
1846 if (bitsize < HOST_BITS_PER_WIDE_INT)
1847 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1849 if (bitpos < HOST_BITS_PER_WIDE_INT)
1851 low = v << bitpos;
1852 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1854 else
1856 low = 0;
1857 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1860 return immed_double_const (low, high, mode);
1863 /* Extract a bit field that is split across two words
1864 and return an RTX for the result.
1866 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1867 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1868 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1870 static rtx
1871 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1872 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1874 unsigned int unit;
1875 unsigned int bitsdone = 0;
1876 rtx result = NULL_RTX;
1877 int first = 1;
1879 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1880 much at a time. */
1881 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1882 unit = BITS_PER_WORD;
1883 else
1884 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1886 while (bitsdone < bitsize)
1888 unsigned HOST_WIDE_INT thissize;
1889 rtx part, word;
1890 unsigned HOST_WIDE_INT thispos;
1891 unsigned HOST_WIDE_INT offset;
1893 offset = (bitpos + bitsdone) / unit;
1894 thispos = (bitpos + bitsdone) % unit;
1896 /* THISSIZE must not overrun a word boundary. Otherwise,
1897 extract_fixed_bit_field will call us again, and we will mutually
1898 recurse forever. */
1899 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1900 thissize = MIN (thissize, unit - thispos);
1902 /* If OP0 is a register, then handle OFFSET here.
1904 When handling multiword bitfields, extract_bit_field may pass
1905 down a word_mode SUBREG of a larger REG for a bitfield that actually
1906 crosses a word boundary. Thus, for a SUBREG, we must find
1907 the current word starting from the base register. */
1908 if (GET_CODE (op0) == SUBREG)
1910 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1911 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1912 GET_MODE (SUBREG_REG (op0)));
1913 offset = 0;
1915 else if (REG_P (op0))
1917 word = operand_subword_force (op0, offset, GET_MODE (op0));
1918 offset = 0;
1920 else
1921 word = op0;
1923 /* Extract the parts in bit-counting order,
1924 whose meaning is determined by BYTES_PER_UNIT.
1925 OFFSET is in UNITs, and UNIT is in bits.
1926 extract_fixed_bit_field wants offset in bytes. */
1927 part = extract_fixed_bit_field (word_mode, word,
1928 offset * unit / BITS_PER_UNIT,
1929 thissize, thispos, 0, 1);
1930 bitsdone += thissize;
1932 /* Shift this part into place for the result. */
1933 if (BYTES_BIG_ENDIAN)
1935 if (bitsize != bitsdone)
1936 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937 build_int_cst (NULL_TREE, bitsize - bitsdone),
1938 0, 1);
1940 else
1942 if (bitsdone != thissize)
1943 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1944 build_int_cst (NULL_TREE,
1945 bitsdone - thissize), 0, 1);
1948 if (first)
1949 result = part;
1950 else
1951 /* Combine the parts with bitwise or. This works
1952 because we extracted each part as an unsigned bit field. */
1953 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1954 OPTAB_LIB_WIDEN);
1956 first = 0;
1959 /* Unsigned bit field: we are done. */
1960 if (unsignedp)
1961 return result;
1962 /* Signed bit field: sign-extend with two arithmetic shifts. */
1963 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1964 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1965 NULL_RTX, 0);
1966 return expand_shift (RSHIFT_EXPR, word_mode, result,
1967 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1968 NULL_RTX, 0);
1971 /* Add INC into TARGET. */
1973 void
1974 expand_inc (rtx target, rtx inc)
1976 rtx value = expand_binop (GET_MODE (target), add_optab,
1977 target, inc,
1978 target, 0, OPTAB_LIB_WIDEN);
1979 if (value != target)
1980 emit_move_insn (target, value);
1983 /* Subtract DEC from TARGET. */
1985 void
1986 expand_dec (rtx target, rtx dec)
1988 rtx value = expand_binop (GET_MODE (target), sub_optab,
1989 target, dec,
1990 target, 0, OPTAB_LIB_WIDEN);
1991 if (value != target)
1992 emit_move_insn (target, value);
1995 /* Output a shift instruction for expression code CODE,
1996 with SHIFTED being the rtx for the value to shift,
1997 and AMOUNT the tree for the amount to shift by.
1998 Store the result in the rtx TARGET, if that is convenient.
1999 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2000 Return the rtx for where the value is. */
2003 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2004 tree amount, rtx target, int unsignedp)
2006 rtx op1, temp = 0;
2007 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2008 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2009 int try;
2011 /* Previously detected shift-counts computed by NEGATE_EXPR
2012 and shifted in the other direction; but that does not work
2013 on all machines. */
2015 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2017 if (SHIFT_COUNT_TRUNCATED)
2019 if (GET_CODE (op1) == CONST_INT
2020 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2021 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2022 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2023 % GET_MODE_BITSIZE (mode));
2024 else if (GET_CODE (op1) == SUBREG
2025 && subreg_lowpart_p (op1))
2026 op1 = SUBREG_REG (op1);
2029 if (op1 == const0_rtx)
2030 return shifted;
2032 /* Check whether its cheaper to implement a left shift by a constant
2033 bit count by a sequence of additions. */
2034 if (code == LSHIFT_EXPR
2035 && GET_CODE (op1) == CONST_INT
2036 && INTVAL (op1) > 0
2037 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2038 && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2040 int i;
2041 for (i = 0; i < INTVAL (op1); i++)
2043 temp = force_reg (mode, shifted);
2044 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2045 unsignedp, OPTAB_LIB_WIDEN);
2047 return shifted;
2050 for (try = 0; temp == 0 && try < 3; try++)
2052 enum optab_methods methods;
2054 if (try == 0)
2055 methods = OPTAB_DIRECT;
2056 else if (try == 1)
2057 methods = OPTAB_WIDEN;
2058 else
2059 methods = OPTAB_LIB_WIDEN;
2061 if (rotate)
2063 /* Widening does not work for rotation. */
2064 if (methods == OPTAB_WIDEN)
2065 continue;
2066 else if (methods == OPTAB_LIB_WIDEN)
2068 /* If we have been unable to open-code this by a rotation,
2069 do it as the IOR of two shifts. I.e., to rotate A
2070 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2071 where C is the bitsize of A.
2073 It is theoretically possible that the target machine might
2074 not be able to perform either shift and hence we would
2075 be making two libcalls rather than just the one for the
2076 shift (similarly if IOR could not be done). We will allow
2077 this extremely unlikely lossage to avoid complicating the
2078 code below. */
2080 rtx subtarget = target == shifted ? 0 : target;
2081 rtx temp1;
2082 tree type = TREE_TYPE (amount);
2083 tree new_amount = make_tree (type, op1);
2084 tree other_amount
2085 = fold (build2 (MINUS_EXPR, type, convert
2086 (type, build_int_cst
2087 (NULL_TREE, GET_MODE_BITSIZE (mode))),
2088 amount));
2090 shifted = force_reg (mode, shifted);
2092 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2093 mode, shifted, new_amount, subtarget, 1);
2094 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2095 mode, shifted, other_amount, 0, 1);
2096 return expand_binop (mode, ior_optab, temp, temp1, target,
2097 unsignedp, methods);
2100 temp = expand_binop (mode,
2101 left ? rotl_optab : rotr_optab,
2102 shifted, op1, target, unsignedp, methods);
2104 /* If we don't have the rotate, but we are rotating by a constant
2105 that is in range, try a rotate in the opposite direction. */
2107 if (temp == 0 && GET_CODE (op1) == CONST_INT
2108 && INTVAL (op1) > 0
2109 && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2110 temp = expand_binop (mode,
2111 left ? rotr_optab : rotl_optab,
2112 shifted,
2113 GEN_INT (GET_MODE_BITSIZE (mode)
2114 - INTVAL (op1)),
2115 target, unsignedp, methods);
2117 else if (unsignedp)
2118 temp = expand_binop (mode,
2119 left ? ashl_optab : lshr_optab,
2120 shifted, op1, target, unsignedp, methods);
2122 /* Do arithmetic shifts.
2123 Also, if we are going to widen the operand, we can just as well
2124 use an arithmetic right-shift instead of a logical one. */
2125 if (temp == 0 && ! rotate
2126 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2128 enum optab_methods methods1 = methods;
2130 /* If trying to widen a log shift to an arithmetic shift,
2131 don't accept an arithmetic shift of the same size. */
2132 if (unsignedp)
2133 methods1 = OPTAB_MUST_WIDEN;
2135 /* Arithmetic shift */
2137 temp = expand_binop (mode,
2138 left ? ashl_optab : ashr_optab,
2139 shifted, op1, target, unsignedp, methods1);
2142 /* We used to try extzv here for logical right shifts, but that was
2143 only useful for one machine, the VAX, and caused poor code
2144 generation there for lshrdi3, so the code was deleted and a
2145 define_expand for lshrsi3 was added to vax.md. */
2148 if (temp == 0)
2149 abort ();
2150 return temp;
2153 enum alg_code { alg_zero, alg_m, alg_shift,
2154 alg_add_t_m2, alg_sub_t_m2,
2155 alg_add_factor, alg_sub_factor,
2156 alg_add_t2_m, alg_sub_t2_m,
2157 alg_add, alg_subtract, alg_factor, alg_shiftop };
2159 /* This structure records a sequence of operations.
2160 `ops' is the number of operations recorded.
2161 `cost' is their total cost.
2162 The operations are stored in `op' and the corresponding
2163 logarithms of the integer coefficients in `log'.
2165 These are the operations:
2166 alg_zero total := 0;
2167 alg_m total := multiplicand;
2168 alg_shift total := total * coeff
2169 alg_add_t_m2 total := total + multiplicand * coeff;
2170 alg_sub_t_m2 total := total - multiplicand * coeff;
2171 alg_add_factor total := total * coeff + total;
2172 alg_sub_factor total := total * coeff - total;
2173 alg_add_t2_m total := total * coeff + multiplicand;
2174 alg_sub_t2_m total := total * coeff - multiplicand;
2176 The first operand must be either alg_zero or alg_m. */
2178 struct algorithm
2180 short cost;
2181 short ops;
2182 /* The size of the OP and LOG fields are not directly related to the
2183 word size, but the worst-case algorithms will be if we have few
2184 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2185 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2186 in total wordsize operations. */
2187 enum alg_code op[MAX_BITS_PER_WORD];
2188 char log[MAX_BITS_PER_WORD];
2191 /* Indicates the type of fixup needed after a constant multiplication.
2192 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2193 the result should be negated, and ADD_VARIANT means that the
2194 multiplicand should be added to the result. */
2195 enum mult_variant {basic_variant, negate_variant, add_variant};
2197 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2198 int, enum machine_mode mode);
2199 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2200 struct algorithm *, enum mult_variant *, int);
2201 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2202 const struct algorithm *, enum mult_variant);
2203 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2204 int, unsigned HOST_WIDE_INT *,
2205 int *, int *);
2206 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2207 static rtx extract_high_half (enum machine_mode, rtx);
2208 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2209 int, int);
2210 /* Compute and return the best algorithm for multiplying by T.
2211 The algorithm must cost less than cost_limit
2212 If retval.cost >= COST_LIMIT, no algorithm was found and all
2213 other field of the returned struct are undefined.
2214 MODE is the machine mode of the multiplication. */
2216 static void
2217 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2218 int cost_limit, enum machine_mode mode)
2220 int m;
2221 struct algorithm *alg_in, *best_alg;
2222 int cost;
2223 unsigned HOST_WIDE_INT q;
2224 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2226 /* Indicate that no algorithm is yet found. If no algorithm
2227 is found, this value will be returned and indicate failure. */
2228 alg_out->cost = cost_limit;
2230 if (cost_limit <= 0)
2231 return;
2233 /* Restrict the bits of "t" to the multiplication's mode. */
2234 t &= GET_MODE_MASK (mode);
2236 /* t == 1 can be done in zero cost. */
2237 if (t == 1)
2239 alg_out->ops = 1;
2240 alg_out->cost = 0;
2241 alg_out->op[0] = alg_m;
2242 return;
2245 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2246 fail now. */
2247 if (t == 0)
2249 if (zero_cost >= cost_limit)
2250 return;
2251 else
2253 alg_out->ops = 1;
2254 alg_out->cost = zero_cost;
2255 alg_out->op[0] = alg_zero;
2256 return;
2260 /* We'll be needing a couple extra algorithm structures now. */
2262 alg_in = alloca (sizeof (struct algorithm));
2263 best_alg = alloca (sizeof (struct algorithm));
2265 /* If we have a group of zero bits at the low-order part of T, try
2266 multiplying by the remaining bits and then doing a shift. */
2268 if ((t & 1) == 0)
2270 m = floor_log2 (t & -t); /* m = number of low zero bits */
2271 if (m < maxm)
2273 q = t >> m;
2274 /* The function expand_shift will choose between a shift and
2275 a sequence of additions, so the observed cost is given as
2276 MIN (m * add_cost[mode], shift_cost[mode][m]). */
2277 cost = m * add_cost[mode];
2278 if (shift_cost[mode][m] < cost)
2279 cost = shift_cost[mode][m];
2280 synth_mult (alg_in, q, cost_limit - cost, mode);
2282 cost += alg_in->cost;
2283 if (cost < cost_limit)
2285 struct algorithm *x;
2286 x = alg_in, alg_in = best_alg, best_alg = x;
2287 best_alg->log[best_alg->ops] = m;
2288 best_alg->op[best_alg->ops] = alg_shift;
2289 cost_limit = cost;
2294 /* If we have an odd number, add or subtract one. */
2295 if ((t & 1) != 0)
2297 unsigned HOST_WIDE_INT w;
2299 for (w = 1; (w & t) != 0; w <<= 1)
2301 /* If T was -1, then W will be zero after the loop. This is another
2302 case where T ends with ...111. Handling this with (T + 1) and
2303 subtract 1 produces slightly better code and results in algorithm
2304 selection much faster than treating it like the ...0111 case
2305 below. */
2306 if (w == 0
2307 || (w > 2
2308 /* Reject the case where t is 3.
2309 Thus we prefer addition in that case. */
2310 && t != 3))
2312 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2314 cost = add_cost[mode];
2315 synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2317 cost += alg_in->cost;
2318 if (cost < cost_limit)
2320 struct algorithm *x;
2321 x = alg_in, alg_in = best_alg, best_alg = x;
2322 best_alg->log[best_alg->ops] = 0;
2323 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2324 cost_limit = cost;
2327 else
2329 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2331 cost = add_cost[mode];
2332 synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2334 cost += alg_in->cost;
2335 if (cost < cost_limit)
2337 struct algorithm *x;
2338 x = alg_in, alg_in = best_alg, best_alg = x;
2339 best_alg->log[best_alg->ops] = 0;
2340 best_alg->op[best_alg->ops] = alg_add_t_m2;
2341 cost_limit = cost;
2346 /* Look for factors of t of the form
2347 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2348 If we find such a factor, we can multiply by t using an algorithm that
2349 multiplies by q, shift the result by m and add/subtract it to itself.
2351 We search for large factors first and loop down, even if large factors
2352 are less probable than small; if we find a large factor we will find a
2353 good sequence quickly, and therefore be able to prune (by decreasing
2354 COST_LIMIT) the search. */
2356 for (m = floor_log2 (t - 1); m >= 2; m--)
2358 unsigned HOST_WIDE_INT d;
2360 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2361 if (t % d == 0 && t > d && m < maxm)
2363 cost = add_cost[mode] + shift_cost[mode][m];
2364 if (shiftadd_cost[mode][m] < cost)
2365 cost = shiftadd_cost[mode][m];
2366 synth_mult (alg_in, t / d, cost_limit - cost, mode);
2368 cost += alg_in->cost;
2369 if (cost < cost_limit)
2371 struct algorithm *x;
2372 x = alg_in, alg_in = best_alg, best_alg = x;
2373 best_alg->log[best_alg->ops] = m;
2374 best_alg->op[best_alg->ops] = alg_add_factor;
2375 cost_limit = cost;
2377 /* Other factors will have been taken care of in the recursion. */
2378 break;
2381 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2382 if (t % d == 0 && t > d && m < maxm)
2384 cost = add_cost[mode] + shift_cost[mode][m];
2385 if (shiftsub_cost[mode][m] < cost)
2386 cost = shiftsub_cost[mode][m];
2387 synth_mult (alg_in, t / d, cost_limit - cost, mode);
2389 cost += alg_in->cost;
2390 if (cost < cost_limit)
2392 struct algorithm *x;
2393 x = alg_in, alg_in = best_alg, best_alg = x;
2394 best_alg->log[best_alg->ops] = m;
2395 best_alg->op[best_alg->ops] = alg_sub_factor;
2396 cost_limit = cost;
2398 break;
2402 /* Try shift-and-add (load effective address) instructions,
2403 i.e. do a*3, a*5, a*9. */
2404 if ((t & 1) != 0)
2406 q = t - 1;
2407 q = q & -q;
2408 m = exact_log2 (q);
2409 if (m >= 0 && m < maxm)
2411 cost = shiftadd_cost[mode][m];
2412 synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2414 cost += alg_in->cost;
2415 if (cost < cost_limit)
2417 struct algorithm *x;
2418 x = alg_in, alg_in = best_alg, best_alg = x;
2419 best_alg->log[best_alg->ops] = m;
2420 best_alg->op[best_alg->ops] = alg_add_t2_m;
2421 cost_limit = cost;
2425 q = t + 1;
2426 q = q & -q;
2427 m = exact_log2 (q);
2428 if (m >= 0 && m < maxm)
2430 cost = shiftsub_cost[mode][m];
2431 synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2433 cost += alg_in->cost;
2434 if (cost < cost_limit)
2436 struct algorithm *x;
2437 x = alg_in, alg_in = best_alg, best_alg = x;
2438 best_alg->log[best_alg->ops] = m;
2439 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2440 cost_limit = cost;
2445 /* If cost_limit has not decreased since we stored it in alg_out->cost,
2446 we have not found any algorithm. */
2447 if (cost_limit == alg_out->cost)
2448 return;
2450 /* If we are getting a too long sequence for `struct algorithm'
2451 to record, make this search fail. */
2452 if (best_alg->ops == MAX_BITS_PER_WORD)
2453 return;
2455 /* Copy the algorithm from temporary space to the space at alg_out.
2456 We avoid using structure assignment because the majority of
2457 best_alg is normally undefined, and this is a critical function. */
2458 alg_out->ops = best_alg->ops + 1;
2459 alg_out->cost = cost_limit;
2460 memcpy (alg_out->op, best_alg->op,
2461 alg_out->ops * sizeof *alg_out->op);
2462 memcpy (alg_out->log, best_alg->log,
2463 alg_out->ops * sizeof *alg_out->log);
2466 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2467 Try three variations:
2469 - a shift/add sequence based on VAL itself
2470 - a shift/add sequence based on -VAL, followed by a negation
2471 - a shift/add sequence based on VAL - 1, followed by an addition.
2473 Return true if the cheapest of these cost less than MULT_COST,
2474 describing the algorithm in *ALG and final fixup in *VARIANT. */
2476 static bool
2477 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2478 struct algorithm *alg, enum mult_variant *variant,
2479 int mult_cost)
2481 struct algorithm alg2;
2483 *variant = basic_variant;
2484 synth_mult (alg, val, mult_cost, mode);
2486 /* This works only if the inverted value actually fits in an
2487 `unsigned int' */
2488 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2490 synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2491 mode);
2492 alg2.cost += neg_cost[mode];
2493 if (alg2.cost < alg->cost)
2494 *alg = alg2, *variant = negate_variant;
2497 /* This proves very useful for division-by-constant. */
2498 synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2499 mode);
2500 alg2.cost += add_cost[mode];
2501 if (alg2.cost < alg->cost)
2502 *alg = alg2, *variant = add_variant;
2504 return alg->cost < mult_cost;
2507 /* A subroutine of expand_mult, used for constant multiplications.
2508 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2509 convenient. Use the shift/add sequence described by ALG and apply
2510 the final fixup specified by VARIANT. */
2512 static rtx
2513 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2514 rtx target, const struct algorithm *alg,
2515 enum mult_variant variant)
2517 HOST_WIDE_INT val_so_far;
2518 rtx insn, accum, tem;
2519 int opno;
2520 enum machine_mode nmode;
2522 /* Avoid referencing memory over and over.
2523 For speed, but also for correctness when mem is volatile. */
2524 if (MEM_P (op0))
2525 op0 = force_reg (mode, op0);
2527 /* ACCUM starts out either as OP0 or as a zero, depending on
2528 the first operation. */
2530 if (alg->op[0] == alg_zero)
2532 accum = copy_to_mode_reg (mode, const0_rtx);
2533 val_so_far = 0;
2535 else if (alg->op[0] == alg_m)
2537 accum = copy_to_mode_reg (mode, op0);
2538 val_so_far = 1;
2540 else
2541 abort ();
2543 for (opno = 1; opno < alg->ops; opno++)
2545 int log = alg->log[opno];
2546 rtx shift_subtarget = optimize ? 0 : accum;
2547 rtx add_target
2548 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2549 && !optimize)
2550 ? target : 0;
2551 rtx accum_target = optimize ? 0 : accum;
2553 switch (alg->op[opno])
2555 case alg_shift:
2556 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2557 build_int_cst (NULL_TREE, log),
2558 NULL_RTX, 0);
2559 val_so_far <<= log;
2560 break;
2562 case alg_add_t_m2:
2563 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2564 build_int_cst (NULL_TREE, log),
2565 NULL_RTX, 0);
2566 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2567 add_target ? add_target : accum_target);
2568 val_so_far += (HOST_WIDE_INT) 1 << log;
2569 break;
2571 case alg_sub_t_m2:
2572 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2573 build_int_cst (NULL_TREE, log),
2574 NULL_RTX, 0);
2575 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2576 add_target ? add_target : accum_target);
2577 val_so_far -= (HOST_WIDE_INT) 1 << log;
2578 break;
2580 case alg_add_t2_m:
2581 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2582 build_int_cst (NULL_TREE, log),
2583 shift_subtarget,
2585 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2586 add_target ? add_target : accum_target);
2587 val_so_far = (val_so_far << log) + 1;
2588 break;
2590 case alg_sub_t2_m:
2591 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2592 build_int_cst (NULL_TREE, log),
2593 shift_subtarget, 0);
2594 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2595 add_target ? add_target : accum_target);
2596 val_so_far = (val_so_far << log) - 1;
2597 break;
2599 case alg_add_factor:
2600 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2601 build_int_cst (NULL_TREE, log),
2602 NULL_RTX, 0);
2603 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2604 add_target ? add_target : accum_target);
2605 val_so_far += val_so_far << log;
2606 break;
2608 case alg_sub_factor:
2609 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2610 build_int_cst (NULL_TREE, log),
2611 NULL_RTX, 0);
2612 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2613 (add_target
2614 ? add_target : (optimize ? 0 : tem)));
2615 val_so_far = (val_so_far << log) - val_so_far;
2616 break;
2618 default:
2619 abort ();
2622 /* Write a REG_EQUAL note on the last insn so that we can cse
2623 multiplication sequences. Note that if ACCUM is a SUBREG,
2624 we've set the inner register and must properly indicate
2625 that. */
2627 tem = op0, nmode = mode;
2628 if (GET_CODE (accum) == SUBREG)
2630 nmode = GET_MODE (SUBREG_REG (accum));
2631 tem = gen_lowpart (nmode, op0);
2634 insn = get_last_insn ();
2635 set_unique_reg_note (insn, REG_EQUAL,
2636 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2639 if (variant == negate_variant)
2641 val_so_far = -val_so_far;
2642 accum = expand_unop (mode, neg_optab, accum, target, 0);
2644 else if (variant == add_variant)
2646 val_so_far = val_so_far + 1;
2647 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2650 /* Compare only the bits of val and val_so_far that are significant
2651 in the result mode, to avoid sign-/zero-extension confusion. */
2652 val &= GET_MODE_MASK (mode);
2653 val_so_far &= GET_MODE_MASK (mode);
2654 if (val != val_so_far)
2655 abort ();
2657 return accum;
2660 /* Perform a multiplication and return an rtx for the result.
2661 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2662 TARGET is a suggestion for where to store the result (an rtx).
2664 We check specially for a constant integer as OP1.
2665 If you want this check for OP0 as well, then before calling
2666 you should swap the two operands if OP0 would be constant. */
2669 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2670 int unsignedp)
2672 rtx const_op1 = op1;
2673 enum mult_variant variant;
2674 struct algorithm algorithm;
2676 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2677 less than or equal in size to `unsigned int' this doesn't matter.
2678 If the mode is larger than `unsigned int', then synth_mult works only
2679 if the constant value exactly fits in an `unsigned int' without any
2680 truncation. This means that multiplying by negative values does
2681 not work; results are off by 2^32 on a 32 bit machine. */
2683 /* If we are multiplying in DImode, it may still be a win
2684 to try to work with shifts and adds. */
2685 if (GET_CODE (op1) == CONST_DOUBLE
2686 && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2687 && HOST_BITS_PER_INT >= BITS_PER_WORD
2688 && CONST_DOUBLE_HIGH (op1) == 0)
2689 const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2690 else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2691 && GET_CODE (op1) == CONST_INT
2692 && INTVAL (op1) < 0)
2693 const_op1 = 0;
2695 /* We used to test optimize here, on the grounds that it's better to
2696 produce a smaller program when -O is not used.
2697 But this causes such a terrible slowdown sometimes
2698 that it seems better to use synth_mult always. */
2700 if (const_op1 && GET_CODE (const_op1) == CONST_INT
2701 && (unsignedp || !flag_trapv))
2703 int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2705 if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2706 mult_cost))
2707 return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2708 &algorithm, variant);
2711 if (GET_CODE (op0) == CONST_DOUBLE)
2713 rtx temp = op0;
2714 op0 = op1;
2715 op1 = temp;
2718 /* Expand x*2.0 as x+x. */
2719 if (GET_CODE (op1) == CONST_DOUBLE
2720 && GET_MODE_CLASS (mode) == MODE_FLOAT)
2722 REAL_VALUE_TYPE d;
2723 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2725 if (REAL_VALUES_EQUAL (d, dconst2))
2727 op0 = force_reg (GET_MODE (op0), op0);
2728 return expand_binop (mode, add_optab, op0, op0,
2729 target, unsignedp, OPTAB_LIB_WIDEN);
2733 /* This used to use umul_optab if unsigned, but for non-widening multiply
2734 there is no difference between signed and unsigned. */
2735 op0 = expand_binop (mode,
2736 ! unsignedp
2737 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2738 ? smulv_optab : smul_optab,
2739 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2740 if (op0 == 0)
2741 abort ();
2742 return op0;
2745 /* Return the smallest n such that 2**n >= X. */
2748 ceil_log2 (unsigned HOST_WIDE_INT x)
2750 return floor_log2 (x - 1) + 1;
2753 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2754 replace division by D, and put the least significant N bits of the result
2755 in *MULTIPLIER_PTR and return the most significant bit.
2757 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2758 needed precision is in PRECISION (should be <= N).
2760 PRECISION should be as small as possible so this function can choose
2761 multiplier more freely.
2763 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
2764 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2766 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2767 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
2769 static
2770 unsigned HOST_WIDE_INT
2771 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2772 unsigned HOST_WIDE_INT *multiplier_ptr,
2773 int *post_shift_ptr, int *lgup_ptr)
2775 HOST_WIDE_INT mhigh_hi, mlow_hi;
2776 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2777 int lgup, post_shift;
2778 int pow, pow2;
2779 unsigned HOST_WIDE_INT nl, dummy1;
2780 HOST_WIDE_INT nh, dummy2;
2782 /* lgup = ceil(log2(divisor)); */
2783 lgup = ceil_log2 (d);
2785 if (lgup > n)
2786 abort ();
2788 pow = n + lgup;
2789 pow2 = n + lgup - precision;
2791 if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2793 /* We could handle this with some effort, but this case is much better
2794 handled directly with a scc insn, so rely on caller using that. */
2795 abort ();
2798 /* mlow = 2^(N + lgup)/d */
2799 if (pow >= HOST_BITS_PER_WIDE_INT)
2801 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2802 nl = 0;
2804 else
2806 nh = 0;
2807 nl = (unsigned HOST_WIDE_INT) 1 << pow;
2809 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2810 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2812 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2813 if (pow2 >= HOST_BITS_PER_WIDE_INT)
2814 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2815 else
2816 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2817 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2818 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2820 if (mhigh_hi && nh - d >= d)
2821 abort ();
2822 if (mhigh_hi > 1 || mlow_hi > 1)
2823 abort ();
2824 /* Assert that mlow < mhigh. */
2825 if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2826 abort ();
2828 /* If precision == N, then mlow, mhigh exceed 2^N
2829 (but they do not exceed 2^(N+1)). */
2831 /* Reduce to lowest terms. */
2832 for (post_shift = lgup; post_shift > 0; post_shift--)
2834 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2835 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2836 if (ml_lo >= mh_lo)
2837 break;
2839 mlow_hi = 0;
2840 mlow_lo = ml_lo;
2841 mhigh_hi = 0;
2842 mhigh_lo = mh_lo;
2845 *post_shift_ptr = post_shift;
2846 *lgup_ptr = lgup;
2847 if (n < HOST_BITS_PER_WIDE_INT)
2849 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2850 *multiplier_ptr = mhigh_lo & mask;
2851 return mhigh_lo >= mask;
2853 else
2855 *multiplier_ptr = mhigh_lo;
2856 return mhigh_hi;
2860 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2861 congruent to 1 (mod 2**N). */
2863 static unsigned HOST_WIDE_INT
2864 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2866 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
2868 /* The algorithm notes that the choice y = x satisfies
2869 x*y == 1 mod 2^3, since x is assumed odd.
2870 Each iteration doubles the number of bits of significance in y. */
2872 unsigned HOST_WIDE_INT mask;
2873 unsigned HOST_WIDE_INT y = x;
2874 int nbit = 3;
2876 mask = (n == HOST_BITS_PER_WIDE_INT
2877 ? ~(unsigned HOST_WIDE_INT) 0
2878 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2880 while (nbit < n)
2882 y = y * (2 - x*y) & mask; /* Modulo 2^N */
2883 nbit *= 2;
2885 return y;
2888 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2889 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
2890 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
2891 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2892 become signed.
2894 The result is put in TARGET if that is convenient.
2896 MODE is the mode of operation. */
2899 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2900 rtx op1, rtx target, int unsignedp)
2902 rtx tem;
2903 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2905 tem = expand_shift (RSHIFT_EXPR, mode, op0,
2906 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
2907 NULL_RTX, 0);
2908 tem = expand_and (mode, tem, op1, NULL_RTX);
2909 adj_operand
2910 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2911 adj_operand);
2913 tem = expand_shift (RSHIFT_EXPR, mode, op1,
2914 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
2915 NULL_RTX, 0);
2916 tem = expand_and (mode, tem, op0, NULL_RTX);
2917 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2918 target);
2920 return target;
2923 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
2925 static rtx
2926 extract_high_half (enum machine_mode mode, rtx op)
2928 enum machine_mode wider_mode;
2930 if (mode == word_mode)
2931 return gen_highpart (mode, op);
2933 wider_mode = GET_MODE_WIDER_MODE (mode);
2934 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2935 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
2936 return convert_modes (mode, wider_mode, op, 0);
2939 /* Like expand_mult_highpart, but only consider using a multiplication
2940 optab. OP1 is an rtx for the constant operand. */
2942 static rtx
2943 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2944 rtx target, int unsignedp, int max_cost)
2946 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2947 enum machine_mode wider_mode;
2948 optab moptab;
2949 rtx tem;
2950 int size;
2952 wider_mode = GET_MODE_WIDER_MODE (mode);
2953 size = GET_MODE_BITSIZE (mode);
2955 /* Firstly, try using a multiplication insn that only generates the needed
2956 high part of the product, and in the sign flavor of unsignedp. */
2957 if (mul_highpart_cost[mode] < max_cost)
2959 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2960 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2961 unsignedp, OPTAB_DIRECT);
2962 if (tem)
2963 return tem;
2966 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2967 Need to adjust the result after the multiplication. */
2968 if (size - 1 < BITS_PER_WORD
2969 && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2970 + 4 * add_cost[mode] < max_cost))
2972 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2973 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2974 unsignedp, OPTAB_DIRECT);
2975 if (tem)
2976 /* We used the wrong signedness. Adjust the result. */
2977 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2978 tem, unsignedp);
2981 /* Try widening multiplication. */
2982 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2983 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2984 && mul_widen_cost[wider_mode] < max_cost)
2986 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2987 unsignedp, OPTAB_WIDEN);
2988 if (tem)
2989 return extract_high_half (mode, tem);
2992 /* Try widening the mode and perform a non-widening multiplication. */
2993 moptab = smul_optab;
2994 if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2995 && size - 1 < BITS_PER_WORD
2996 && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
2998 tem = expand_binop (wider_mode, moptab, op0, op1, 0,
2999 unsignedp, OPTAB_WIDEN);
3000 if (tem)
3001 return extract_high_half (mode, tem);
3004 /* Try widening multiplication of opposite signedness, and adjust. */
3005 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3006 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3007 && size - 1 < BITS_PER_WORD
3008 && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3009 + 4 * add_cost[mode] < max_cost))
3011 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3012 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3013 if (tem != 0)
3015 tem = extract_high_half (mode, tem);
3016 /* We used the wrong signedness. Adjust the result. */
3017 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3018 target, unsignedp);
3022 return 0;
3025 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3026 in TARGET if that is convenient, and return where the result is. If the
3027 operation can not be performed, 0 is returned.
3029 MODE is the mode of operation and result.
3031 UNSIGNEDP nonzero means unsigned multiply.
3033 MAX_COST is the total allowed cost for the expanded RTL. */
3036 expand_mult_highpart (enum machine_mode mode, rtx op0,
3037 unsigned HOST_WIDE_INT cnst1, rtx target,
3038 int unsignedp, int max_cost)
3040 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3041 int extra_cost;
3042 bool sign_adjust = false;
3043 enum mult_variant variant;
3044 struct algorithm alg;
3045 rtx op1, tem;
3047 /* We can't support modes wider than HOST_BITS_PER_INT. */
3048 if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3049 abort ();
3051 op1 = gen_int_mode (cnst1, wider_mode);
3052 cnst1 &= GET_MODE_MASK (mode);
3054 /* We can't optimize modes wider than BITS_PER_WORD.
3055 ??? We might be able to perform double-word arithmetic if
3056 mode == word_mode, however all the cost calculations in
3057 synth_mult etc. assume single-word operations. */
3058 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3059 return expand_mult_highpart_optab (mode, op0, op1, target,
3060 unsignedp, max_cost);
3062 extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3064 /* Check whether we try to multiply by a negative constant. */
3065 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3067 sign_adjust = true;
3068 extra_cost += add_cost[mode];
3071 /* See whether shift/add multiplication is cheap enough. */
3072 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3073 max_cost - extra_cost))
3075 /* See whether the specialized multiplication optabs are
3076 cheaper than the shift/add version. */
3077 tem = expand_mult_highpart_optab (mode, op0, op1, target,
3078 unsignedp, alg.cost + extra_cost);
3079 if (tem)
3080 return tem;
3082 tem = convert_to_mode (wider_mode, op0, unsignedp);
3083 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3084 tem = extract_high_half (mode, tem);
3086 /* Adjust result for signedness. */
3087 if (sign_adjust)
3088 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3090 return tem;
3092 return expand_mult_highpart_optab (mode, op0, op1, target,
3093 unsignedp, max_cost);
3097 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3099 static rtx
3100 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3102 unsigned HOST_WIDE_INT mask;
3103 rtx result, temp, shift, label;
3104 int logd;
3106 logd = floor_log2 (d);
3107 result = gen_reg_rtx (mode);
3109 /* Avoid conditional branches when they're expensive. */
3110 if (BRANCH_COST >= 2
3111 && !optimize_size)
3113 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3114 mode, 0, -1);
3115 if (signmask)
3117 signmask = force_reg (mode, signmask);
3118 mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3119 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3121 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3122 which instruction sequence to use. If logical right shifts
3123 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3124 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3126 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3127 if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3128 || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3130 temp = expand_binop (mode, xor_optab, op0, signmask,
3131 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3132 temp = expand_binop (mode, sub_optab, temp, signmask,
3133 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3134 temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3135 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3136 temp = expand_binop (mode, xor_optab, temp, signmask,
3137 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3138 temp = expand_binop (mode, sub_optab, temp, signmask,
3139 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3141 else
3143 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3144 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3145 signmask = force_reg (mode, signmask);
3147 temp = expand_binop (mode, add_optab, op0, signmask,
3148 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3149 temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3150 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3151 temp = expand_binop (mode, sub_optab, temp, signmask,
3152 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3154 return temp;
3158 /* Mask contains the mode's signbit and the significant bits of the
3159 modulus. By including the signbit in the operation, many targets
3160 can avoid an explicit compare operation in the following comparison
3161 against zero. */
3163 mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3164 | (((HOST_WIDE_INT) 1 << logd) - 1);
3166 temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3167 1, OPTAB_LIB_WIDEN);
3168 if (temp != result)
3169 emit_move_insn (result, temp);
3171 label = gen_label_rtx ();
3172 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3174 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3175 0, OPTAB_LIB_WIDEN);
3176 mask = (HOST_WIDE_INT) -1 << logd;
3177 temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3178 1, OPTAB_LIB_WIDEN);
3179 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3180 0, OPTAB_LIB_WIDEN);
3181 if (temp != result)
3182 emit_move_insn (result, temp);
3183 emit_label (label);
3184 return result;
3187 /* Expand signed division of OP0 by a power of two D in mode MODE.
3188 This routine is only called for positive values of D. */
3190 static rtx
3191 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3193 rtx temp, label;
3194 tree shift;
3195 int logd;
3197 logd = floor_log2 (d);
3198 shift = build_int_cst (NULL_TREE, logd);
3200 if (d == 2 && BRANCH_COST >= 1)
3202 temp = gen_reg_rtx (mode);
3203 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3204 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3205 0, OPTAB_LIB_WIDEN);
3206 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3209 #ifdef HAVE_conditional_move
3210 if (BRANCH_COST >= 2)
3212 rtx temp2;
3214 start_sequence ();
3215 temp2 = copy_to_mode_reg (mode, op0);
3216 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3217 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3218 temp = force_reg (mode, temp);
3220 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3221 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3222 mode, temp, temp2, mode, 0);
3223 if (temp2)
3225 rtx seq = get_insns ();
3226 end_sequence ();
3227 emit_insn (seq);
3228 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3230 end_sequence ();
3232 #endif
3234 if (BRANCH_COST >= 2)
3236 int ushift = GET_MODE_BITSIZE (mode) - logd;
3238 temp = gen_reg_rtx (mode);
3239 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3240 if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3241 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3242 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3243 else
3244 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3245 build_int_cst (NULL_TREE, ushift),
3246 NULL_RTX, 1);
3247 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3248 0, OPTAB_LIB_WIDEN);
3249 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3252 label = gen_label_rtx ();
3253 temp = copy_to_mode_reg (mode, op0);
3254 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3255 expand_inc (temp, GEN_INT (d - 1));
3256 emit_label (label);
3257 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3260 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3261 if that is convenient, and returning where the result is.
3262 You may request either the quotient or the remainder as the result;
3263 specify REM_FLAG nonzero to get the remainder.
3265 CODE is the expression code for which kind of division this is;
3266 it controls how rounding is done. MODE is the machine mode to use.
3267 UNSIGNEDP nonzero means do unsigned division. */
3269 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3270 and then correct it by or'ing in missing high bits
3271 if result of ANDI is nonzero.
3272 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3273 This could optimize to a bfexts instruction.
3274 But C doesn't use these operations, so their optimizations are
3275 left for later. */
3276 /* ??? For modulo, we don't actually need the highpart of the first product,
3277 the low part will do nicely. And for small divisors, the second multiply
3278 can also be a low-part only multiply or even be completely left out.
3279 E.g. to calculate the remainder of a division by 3 with a 32 bit
3280 multiply, multiply with 0x55555556 and extract the upper two bits;
3281 the result is exact for inputs up to 0x1fffffff.
3282 The input range can be reduced by using cross-sum rules.
3283 For odd divisors >= 3, the following table gives right shift counts
3284 so that if a number is shifted by an integer multiple of the given
3285 amount, the remainder stays the same:
3286 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3287 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3288 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3289 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3290 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3292 Cross-sum rules for even numbers can be derived by leaving as many bits
3293 to the right alone as the divisor has zeros to the right.
3294 E.g. if x is an unsigned 32 bit number:
3295 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3298 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3301 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3302 rtx op0, rtx op1, rtx target, int unsignedp)
3304 enum machine_mode compute_mode;
3305 rtx tquotient;
3306 rtx quotient = 0, remainder = 0;
3307 rtx last;
3308 int size;
3309 rtx insn, set;
3310 optab optab1, optab2;
3311 int op1_is_constant, op1_is_pow2 = 0;
3312 int max_cost, extra_cost;
3313 static HOST_WIDE_INT last_div_const = 0;
3314 static HOST_WIDE_INT ext_op1;
3316 op1_is_constant = GET_CODE (op1) == CONST_INT;
3317 if (op1_is_constant)
3319 ext_op1 = INTVAL (op1);
3320 if (unsignedp)
3321 ext_op1 &= GET_MODE_MASK (mode);
3322 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3323 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3327 This is the structure of expand_divmod:
3329 First comes code to fix up the operands so we can perform the operations
3330 correctly and efficiently.
3332 Second comes a switch statement with code specific for each rounding mode.
3333 For some special operands this code emits all RTL for the desired
3334 operation, for other cases, it generates only a quotient and stores it in
3335 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3336 to indicate that it has not done anything.
3338 Last comes code that finishes the operation. If QUOTIENT is set and
3339 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3340 QUOTIENT is not set, it is computed using trunc rounding.
3342 We try to generate special code for division and remainder when OP1 is a
3343 constant. If |OP1| = 2**n we can use shifts and some other fast
3344 operations. For other values of OP1, we compute a carefully selected
3345 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3346 by m.
3348 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3349 half of the product. Different strategies for generating the product are
3350 implemented in expand_mult_highpart.
3352 If what we actually want is the remainder, we generate that by another
3353 by-constant multiplication and a subtraction. */
3355 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3356 code below will malfunction if we are, so check here and handle
3357 the special case if so. */
3358 if (op1 == const1_rtx)
3359 return rem_flag ? const0_rtx : op0;
3361 /* When dividing by -1, we could get an overflow.
3362 negv_optab can handle overflows. */
3363 if (! unsignedp && op1 == constm1_rtx)
3365 if (rem_flag)
3366 return const0_rtx;
3367 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3368 ? negv_optab : neg_optab, op0, target, 0);
3371 if (target
3372 /* Don't use the function value register as a target
3373 since we have to read it as well as write it,
3374 and function-inlining gets confused by this. */
3375 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3376 /* Don't clobber an operand while doing a multi-step calculation. */
3377 || ((rem_flag || op1_is_constant)
3378 && (reg_mentioned_p (target, op0)
3379 || (MEM_P (op0) && MEM_P (target))))
3380 || reg_mentioned_p (target, op1)
3381 || (MEM_P (op1) && MEM_P (target))))
3382 target = 0;
3384 /* Get the mode in which to perform this computation. Normally it will
3385 be MODE, but sometimes we can't do the desired operation in MODE.
3386 If so, pick a wider mode in which we can do the operation. Convert
3387 to that mode at the start to avoid repeated conversions.
3389 First see what operations we need. These depend on the expression
3390 we are evaluating. (We assume that divxx3 insns exist under the
3391 same conditions that modxx3 insns and that these insns don't normally
3392 fail. If these assumptions are not correct, we may generate less
3393 efficient code in some cases.)
3395 Then see if we find a mode in which we can open-code that operation
3396 (either a division, modulus, or shift). Finally, check for the smallest
3397 mode for which we can do the operation with a library call. */
3399 /* We might want to refine this now that we have division-by-constant
3400 optimization. Since expand_mult_highpart tries so many variants, it is
3401 not straightforward to generalize this. Maybe we should make an array
3402 of possible modes in init_expmed? Save this for GCC 2.7. */
3404 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3405 ? (unsignedp ? lshr_optab : ashr_optab)
3406 : (unsignedp ? udiv_optab : sdiv_optab));
3407 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3408 ? optab1
3409 : (unsignedp ? udivmod_optab : sdivmod_optab));
3411 for (compute_mode = mode; compute_mode != VOIDmode;
3412 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3413 if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3414 || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3415 break;
3417 if (compute_mode == VOIDmode)
3418 for (compute_mode = mode; compute_mode != VOIDmode;
3419 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3420 if (optab1->handlers[compute_mode].libfunc
3421 || optab2->handlers[compute_mode].libfunc)
3422 break;
3424 /* If we still couldn't find a mode, use MODE, but we'll probably abort
3425 in expand_binop. */
3426 if (compute_mode == VOIDmode)
3427 compute_mode = mode;
3429 if (target && GET_MODE (target) == compute_mode)
3430 tquotient = target;
3431 else
3432 tquotient = gen_reg_rtx (compute_mode);
3434 size = GET_MODE_BITSIZE (compute_mode);
3435 #if 0
3436 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3437 (mode), and thereby get better code when OP1 is a constant. Do that
3438 later. It will require going over all usages of SIZE below. */
3439 size = GET_MODE_BITSIZE (mode);
3440 #endif
3442 /* Only deduct something for a REM if the last divide done was
3443 for a different constant. Then set the constant of the last
3444 divide. */
3445 max_cost = div_cost[compute_mode]
3446 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3447 && INTVAL (op1) == last_div_const)
3448 ? mul_cost[compute_mode] + add_cost[compute_mode]
3449 : 0);
3451 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3453 /* Now convert to the best mode to use. */
3454 if (compute_mode != mode)
3456 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3457 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3459 /* convert_modes may have placed op1 into a register, so we
3460 must recompute the following. */
3461 op1_is_constant = GET_CODE (op1) == CONST_INT;
3462 op1_is_pow2 = (op1_is_constant
3463 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3464 || (! unsignedp
3465 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3468 /* If one of the operands is a volatile MEM, copy it into a register. */
3470 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3471 op0 = force_reg (compute_mode, op0);
3472 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3473 op1 = force_reg (compute_mode, op1);
3475 /* If we need the remainder or if OP1 is constant, we need to
3476 put OP0 in a register in case it has any queued subexpressions. */
3477 if (rem_flag || op1_is_constant)
3478 op0 = force_reg (compute_mode, op0);
3480 last = get_last_insn ();
3482 /* Promote floor rounding to trunc rounding for unsigned operations. */
3483 if (unsignedp)
3485 if (code == FLOOR_DIV_EXPR)
3486 code = TRUNC_DIV_EXPR;
3487 if (code == FLOOR_MOD_EXPR)
3488 code = TRUNC_MOD_EXPR;
3489 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3490 code = TRUNC_DIV_EXPR;
3493 if (op1 != const0_rtx)
3494 switch (code)
3496 case TRUNC_MOD_EXPR:
3497 case TRUNC_DIV_EXPR:
3498 if (op1_is_constant)
3500 if (unsignedp)
3502 unsigned HOST_WIDE_INT mh, ml;
3503 int pre_shift, post_shift;
3504 int dummy;
3505 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3506 & GET_MODE_MASK (compute_mode));
3508 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3510 pre_shift = floor_log2 (d);
3511 if (rem_flag)
3513 remainder
3514 = expand_binop (compute_mode, and_optab, op0,
3515 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3516 remainder, 1,
3517 OPTAB_LIB_WIDEN);
3518 if (remainder)
3519 return gen_lowpart (mode, remainder);
3521 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3522 build_int_cst (NULL_TREE,
3523 pre_shift),
3524 tquotient, 1);
3526 else if (size <= HOST_BITS_PER_WIDE_INT)
3528 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3530 /* Most significant bit of divisor is set; emit an scc
3531 insn. */
3532 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3533 compute_mode, 1, 1);
3534 if (quotient == 0)
3535 goto fail1;
3537 else
3539 /* Find a suitable multiplier and right shift count
3540 instead of multiplying with D. */
3542 mh = choose_multiplier (d, size, size,
3543 &ml, &post_shift, &dummy);
3545 /* If the suggested multiplier is more than SIZE bits,
3546 we can do better for even divisors, using an
3547 initial right shift. */
3548 if (mh != 0 && (d & 1) == 0)
3550 pre_shift = floor_log2 (d & -d);
3551 mh = choose_multiplier (d >> pre_shift, size,
3552 size - pre_shift,
3553 &ml, &post_shift, &dummy);
3554 if (mh)
3555 abort ();
3557 else
3558 pre_shift = 0;
3560 if (mh != 0)
3562 rtx t1, t2, t3, t4;
3564 if (post_shift - 1 >= BITS_PER_WORD)
3565 goto fail1;
3567 extra_cost
3568 = (shift_cost[compute_mode][post_shift - 1]
3569 + shift_cost[compute_mode][1]
3570 + 2 * add_cost[compute_mode]);
3571 t1 = expand_mult_highpart (compute_mode, op0, ml,
3572 NULL_RTX, 1,
3573 max_cost - extra_cost);
3574 if (t1 == 0)
3575 goto fail1;
3576 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3577 op0, t1),
3578 NULL_RTX);
3579 t3 = expand_shift
3580 (RSHIFT_EXPR, compute_mode, t2,
3581 build_int_cst (NULL_TREE, 1),
3582 NULL_RTX,1);
3583 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3584 t1, t3),
3585 NULL_RTX);
3586 quotient = expand_shift
3587 (RSHIFT_EXPR, compute_mode, t4,
3588 build_int_cst (NULL_TREE, post_shift - 1),
3589 tquotient, 1);
3591 else
3593 rtx t1, t2;
3595 if (pre_shift >= BITS_PER_WORD
3596 || post_shift >= BITS_PER_WORD)
3597 goto fail1;
3599 t1 = expand_shift
3600 (RSHIFT_EXPR, compute_mode, op0,
3601 build_int_cst (NULL_TREE, pre_shift),
3602 NULL_RTX, 1);
3603 extra_cost
3604 = (shift_cost[compute_mode][pre_shift]
3605 + shift_cost[compute_mode][post_shift]);
3606 t2 = expand_mult_highpart (compute_mode, t1, ml,
3607 NULL_RTX, 1,
3608 max_cost - extra_cost);
3609 if (t2 == 0)
3610 goto fail1;
3611 quotient = expand_shift
3612 (RSHIFT_EXPR, compute_mode, t2,
3613 build_int_cst (NULL_TREE, post_shift),
3614 tquotient, 1);
3618 else /* Too wide mode to use tricky code */
3619 break;
3621 insn = get_last_insn ();
3622 if (insn != last
3623 && (set = single_set (insn)) != 0
3624 && SET_DEST (set) == quotient)
3625 set_unique_reg_note (insn,
3626 REG_EQUAL,
3627 gen_rtx_UDIV (compute_mode, op0, op1));
3629 else /* TRUNC_DIV, signed */
3631 unsigned HOST_WIDE_INT ml;
3632 int lgup, post_shift;
3633 HOST_WIDE_INT d = INTVAL (op1);
3634 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3636 /* n rem d = n rem -d */
3637 if (rem_flag && d < 0)
3639 d = abs_d;
3640 op1 = gen_int_mode (abs_d, compute_mode);
3643 if (d == 1)
3644 quotient = op0;
3645 else if (d == -1)
3646 quotient = expand_unop (compute_mode, neg_optab, op0,
3647 tquotient, 0);
3648 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3650 /* This case is not handled correctly below. */
3651 quotient = emit_store_flag (tquotient, EQ, op0, op1,
3652 compute_mode, 1, 1);
3653 if (quotient == 0)
3654 goto fail1;
3656 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3657 && (rem_flag ? smod_pow2_cheap[compute_mode]
3658 : sdiv_pow2_cheap[compute_mode])
3659 /* We assume that cheap metric is true if the
3660 optab has an expander for this mode. */
3661 && (((rem_flag ? smod_optab : sdiv_optab)
3662 ->handlers[compute_mode].insn_code
3663 != CODE_FOR_nothing)
3664 || (sdivmod_optab->handlers[compute_mode]
3665 .insn_code != CODE_FOR_nothing)))
3667 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3669 if (rem_flag)
3671 remainder = expand_smod_pow2 (compute_mode, op0, d);
3672 if (remainder)
3673 return gen_lowpart (mode, remainder);
3675 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
3677 /* We have computed OP0 / abs(OP1). If OP1 is negative,
3678 negate the quotient. */
3679 if (d < 0)
3681 insn = get_last_insn ();
3682 if (insn != last
3683 && (set = single_set (insn)) != 0
3684 && SET_DEST (set) == quotient
3685 && abs_d < ((unsigned HOST_WIDE_INT) 1
3686 << (HOST_BITS_PER_WIDE_INT - 1)))
3687 set_unique_reg_note (insn,
3688 REG_EQUAL,
3689 gen_rtx_DIV (compute_mode,
3690 op0,
3691 GEN_INT
3692 (trunc_int_for_mode
3693 (abs_d,
3694 compute_mode))));
3696 quotient = expand_unop (compute_mode, neg_optab,
3697 quotient, quotient, 0);
3700 else if (size <= HOST_BITS_PER_WIDE_INT)
3702 choose_multiplier (abs_d, size, size - 1,
3703 &ml, &post_shift, &lgup);
3704 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3706 rtx t1, t2, t3;
3708 if (post_shift >= BITS_PER_WORD
3709 || size - 1 >= BITS_PER_WORD)
3710 goto fail1;
3712 extra_cost = (shift_cost[compute_mode][post_shift]
3713 + shift_cost[compute_mode][size - 1]
3714 + add_cost[compute_mode]);
3715 t1 = expand_mult_highpart (compute_mode, op0, ml,
3716 NULL_RTX, 0,
3717 max_cost - extra_cost);
3718 if (t1 == 0)
3719 goto fail1;
3720 t2 = expand_shift
3721 (RSHIFT_EXPR, compute_mode, t1,
3722 build_int_cst (NULL_TREE, post_shift),
3723 NULL_RTX, 0);
3724 t3 = expand_shift
3725 (RSHIFT_EXPR, compute_mode, op0,
3726 build_int_cst (NULL_TREE, size - 1),
3727 NULL_RTX, 0);
3728 if (d < 0)
3729 quotient
3730 = force_operand (gen_rtx_MINUS (compute_mode,
3731 t3, t2),
3732 tquotient);
3733 else
3734 quotient
3735 = force_operand (gen_rtx_MINUS (compute_mode,
3736 t2, t3),
3737 tquotient);
3739 else
3741 rtx t1, t2, t3, t4;
3743 if (post_shift >= BITS_PER_WORD
3744 || size - 1 >= BITS_PER_WORD)
3745 goto fail1;
3747 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3748 extra_cost = (shift_cost[compute_mode][post_shift]
3749 + shift_cost[compute_mode][size - 1]
3750 + 2 * add_cost[compute_mode]);
3751 t1 = expand_mult_highpart (compute_mode, op0, ml,
3752 NULL_RTX, 0,
3753 max_cost - extra_cost);
3754 if (t1 == 0)
3755 goto fail1;
3756 t2 = force_operand (gen_rtx_PLUS (compute_mode,
3757 t1, op0),
3758 NULL_RTX);
3759 t3 = expand_shift
3760 (RSHIFT_EXPR, compute_mode, t2,
3761 build_int_cst (NULL_TREE, post_shift),
3762 NULL_RTX, 0);
3763 t4 = expand_shift
3764 (RSHIFT_EXPR, compute_mode, op0,
3765 build_int_cst (NULL_TREE, size - 1),
3766 NULL_RTX, 0);
3767 if (d < 0)
3768 quotient
3769 = force_operand (gen_rtx_MINUS (compute_mode,
3770 t4, t3),
3771 tquotient);
3772 else
3773 quotient
3774 = force_operand (gen_rtx_MINUS (compute_mode,
3775 t3, t4),
3776 tquotient);
3779 else /* Too wide mode to use tricky code */
3780 break;
3782 insn = get_last_insn ();
3783 if (insn != last
3784 && (set = single_set (insn)) != 0
3785 && SET_DEST (set) == quotient)
3786 set_unique_reg_note (insn,
3787 REG_EQUAL,
3788 gen_rtx_DIV (compute_mode, op0, op1));
3790 break;
3792 fail1:
3793 delete_insns_since (last);
3794 break;
3796 case FLOOR_DIV_EXPR:
3797 case FLOOR_MOD_EXPR:
3798 /* We will come here only for signed operations. */
3799 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3801 unsigned HOST_WIDE_INT mh, ml;
3802 int pre_shift, lgup, post_shift;
3803 HOST_WIDE_INT d = INTVAL (op1);
3805 if (d > 0)
3807 /* We could just as easily deal with negative constants here,
3808 but it does not seem worth the trouble for GCC 2.6. */
3809 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3811 pre_shift = floor_log2 (d);
3812 if (rem_flag)
3814 remainder = expand_binop (compute_mode, and_optab, op0,
3815 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3816 remainder, 0, OPTAB_LIB_WIDEN);
3817 if (remainder)
3818 return gen_lowpart (mode, remainder);
3820 quotient = expand_shift
3821 (RSHIFT_EXPR, compute_mode, op0,
3822 build_int_cst (NULL_TREE, pre_shift),
3823 tquotient, 0);
3825 else
3827 rtx t1, t2, t3, t4;
3829 mh = choose_multiplier (d, size, size - 1,
3830 &ml, &post_shift, &lgup);
3831 if (mh)
3832 abort ();
3834 if (post_shift < BITS_PER_WORD
3835 && size - 1 < BITS_PER_WORD)
3837 t1 = expand_shift
3838 (RSHIFT_EXPR, compute_mode, op0,
3839 build_int_cst (NULL_TREE, size - 1),
3840 NULL_RTX, 0);
3841 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3842 NULL_RTX, 0, OPTAB_WIDEN);
3843 extra_cost = (shift_cost[compute_mode][post_shift]
3844 + shift_cost[compute_mode][size - 1]
3845 + 2 * add_cost[compute_mode]);
3846 t3 = expand_mult_highpart (compute_mode, t2, ml,
3847 NULL_RTX, 1,
3848 max_cost - extra_cost);
3849 if (t3 != 0)
3851 t4 = expand_shift
3852 (RSHIFT_EXPR, compute_mode, t3,
3853 build_int_cst (NULL_TREE, post_shift),
3854 NULL_RTX, 1);
3855 quotient = expand_binop (compute_mode, xor_optab,
3856 t4, t1, tquotient, 0,
3857 OPTAB_WIDEN);
3862 else
3864 rtx nsign, t1, t2, t3, t4;
3865 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3866 op0, constm1_rtx), NULL_RTX);
3867 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3868 0, OPTAB_WIDEN);
3869 nsign = expand_shift
3870 (RSHIFT_EXPR, compute_mode, t2,
3871 build_int_cst (NULL_TREE, size - 1),
3872 NULL_RTX, 0);
3873 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3874 NULL_RTX);
3875 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3876 NULL_RTX, 0);
3877 if (t4)
3879 rtx t5;
3880 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3881 NULL_RTX, 0);
3882 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3883 t4, t5),
3884 tquotient);
3889 if (quotient != 0)
3890 break;
3891 delete_insns_since (last);
3893 /* Try using an instruction that produces both the quotient and
3894 remainder, using truncation. We can easily compensate the quotient
3895 or remainder to get floor rounding, once we have the remainder.
3896 Notice that we compute also the final remainder value here,
3897 and return the result right away. */
3898 if (target == 0 || GET_MODE (target) != compute_mode)
3899 target = gen_reg_rtx (compute_mode);
3901 if (rem_flag)
3903 remainder
3904 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3905 quotient = gen_reg_rtx (compute_mode);
3907 else
3909 quotient
3910 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3911 remainder = gen_reg_rtx (compute_mode);
3914 if (expand_twoval_binop (sdivmod_optab, op0, op1,
3915 quotient, remainder, 0))
3917 /* This could be computed with a branch-less sequence.
3918 Save that for later. */
3919 rtx tem;
3920 rtx label = gen_label_rtx ();
3921 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3922 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3923 NULL_RTX, 0, OPTAB_WIDEN);
3924 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3925 expand_dec (quotient, const1_rtx);
3926 expand_inc (remainder, op1);
3927 emit_label (label);
3928 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3931 /* No luck with division elimination or divmod. Have to do it
3932 by conditionally adjusting op0 *and* the result. */
3934 rtx label1, label2, label3, label4, label5;
3935 rtx adjusted_op0;
3936 rtx tem;
3938 quotient = gen_reg_rtx (compute_mode);
3939 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3940 label1 = gen_label_rtx ();
3941 label2 = gen_label_rtx ();
3942 label3 = gen_label_rtx ();
3943 label4 = gen_label_rtx ();
3944 label5 = gen_label_rtx ();
3945 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3946 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3947 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3948 quotient, 0, OPTAB_LIB_WIDEN);
3949 if (tem != quotient)
3950 emit_move_insn (quotient, tem);
3951 emit_jump_insn (gen_jump (label5));
3952 emit_barrier ();
3953 emit_label (label1);
3954 expand_inc (adjusted_op0, const1_rtx);
3955 emit_jump_insn (gen_jump (label4));
3956 emit_barrier ();
3957 emit_label (label2);
3958 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3959 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3960 quotient, 0, OPTAB_LIB_WIDEN);
3961 if (tem != quotient)
3962 emit_move_insn (quotient, tem);
3963 emit_jump_insn (gen_jump (label5));
3964 emit_barrier ();
3965 emit_label (label3);
3966 expand_dec (adjusted_op0, const1_rtx);
3967 emit_label (label4);
3968 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3969 quotient, 0, OPTAB_LIB_WIDEN);
3970 if (tem != quotient)
3971 emit_move_insn (quotient, tem);
3972 expand_dec (quotient, const1_rtx);
3973 emit_label (label5);
3975 break;
3977 case CEIL_DIV_EXPR:
3978 case CEIL_MOD_EXPR:
3979 if (unsignedp)
3981 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3983 rtx t1, t2, t3;
3984 unsigned HOST_WIDE_INT d = INTVAL (op1);
3985 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3986 build_int_cst (NULL_TREE, floor_log2 (d)),
3987 tquotient, 1);
3988 t2 = expand_binop (compute_mode, and_optab, op0,
3989 GEN_INT (d - 1),
3990 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3991 t3 = gen_reg_rtx (compute_mode);
3992 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3993 compute_mode, 1, 1);
3994 if (t3 == 0)
3996 rtx lab;
3997 lab = gen_label_rtx ();
3998 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3999 expand_inc (t1, const1_rtx);
4000 emit_label (lab);
4001 quotient = t1;
4003 else
4004 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4005 t1, t3),
4006 tquotient);
4007 break;
4010 /* Try using an instruction that produces both the quotient and
4011 remainder, using truncation. We can easily compensate the
4012 quotient or remainder to get ceiling rounding, once we have the
4013 remainder. Notice that we compute also the final remainder
4014 value here, and return the result right away. */
4015 if (target == 0 || GET_MODE (target) != compute_mode)
4016 target = gen_reg_rtx (compute_mode);
4018 if (rem_flag)
4020 remainder = (REG_P (target)
4021 ? target : gen_reg_rtx (compute_mode));
4022 quotient = gen_reg_rtx (compute_mode);
4024 else
4026 quotient = (REG_P (target)
4027 ? target : gen_reg_rtx (compute_mode));
4028 remainder = gen_reg_rtx (compute_mode);
4031 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4032 remainder, 1))
4034 /* This could be computed with a branch-less sequence.
4035 Save that for later. */
4036 rtx label = gen_label_rtx ();
4037 do_cmp_and_jump (remainder, const0_rtx, EQ,
4038 compute_mode, label);
4039 expand_inc (quotient, const1_rtx);
4040 expand_dec (remainder, op1);
4041 emit_label (label);
4042 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4045 /* No luck with division elimination or divmod. Have to do it
4046 by conditionally adjusting op0 *and* the result. */
4048 rtx label1, label2;
4049 rtx adjusted_op0, tem;
4051 quotient = gen_reg_rtx (compute_mode);
4052 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4053 label1 = gen_label_rtx ();
4054 label2 = gen_label_rtx ();
4055 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4056 compute_mode, label1);
4057 emit_move_insn (quotient, const0_rtx);
4058 emit_jump_insn (gen_jump (label2));
4059 emit_barrier ();
4060 emit_label (label1);
4061 expand_dec (adjusted_op0, const1_rtx);
4062 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4063 quotient, 1, OPTAB_LIB_WIDEN);
4064 if (tem != quotient)
4065 emit_move_insn (quotient, tem);
4066 expand_inc (quotient, const1_rtx);
4067 emit_label (label2);
4070 else /* signed */
4072 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4073 && INTVAL (op1) >= 0)
4075 /* This is extremely similar to the code for the unsigned case
4076 above. For 2.7 we should merge these variants, but for
4077 2.6.1 I don't want to touch the code for unsigned since that
4078 get used in C. The signed case will only be used by other
4079 languages (Ada). */
4081 rtx t1, t2, t3;
4082 unsigned HOST_WIDE_INT d = INTVAL (op1);
4083 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4084 build_int_cst (NULL_TREE, floor_log2 (d)),
4085 tquotient, 0);
4086 t2 = expand_binop (compute_mode, and_optab, op0,
4087 GEN_INT (d - 1),
4088 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4089 t3 = gen_reg_rtx (compute_mode);
4090 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4091 compute_mode, 1, 1);
4092 if (t3 == 0)
4094 rtx lab;
4095 lab = gen_label_rtx ();
4096 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4097 expand_inc (t1, const1_rtx);
4098 emit_label (lab);
4099 quotient = t1;
4101 else
4102 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4103 t1, t3),
4104 tquotient);
4105 break;
4108 /* Try using an instruction that produces both the quotient and
4109 remainder, using truncation. We can easily compensate the
4110 quotient or remainder to get ceiling rounding, once we have the
4111 remainder. Notice that we compute also the final remainder
4112 value here, and return the result right away. */
4113 if (target == 0 || GET_MODE (target) != compute_mode)
4114 target = gen_reg_rtx (compute_mode);
4115 if (rem_flag)
4117 remainder= (REG_P (target)
4118 ? target : gen_reg_rtx (compute_mode));
4119 quotient = gen_reg_rtx (compute_mode);
4121 else
4123 quotient = (REG_P (target)
4124 ? target : gen_reg_rtx (compute_mode));
4125 remainder = gen_reg_rtx (compute_mode);
4128 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4129 remainder, 0))
4131 /* This could be computed with a branch-less sequence.
4132 Save that for later. */
4133 rtx tem;
4134 rtx label = gen_label_rtx ();
4135 do_cmp_and_jump (remainder, const0_rtx, EQ,
4136 compute_mode, label);
4137 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4138 NULL_RTX, 0, OPTAB_WIDEN);
4139 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4140 expand_inc (quotient, const1_rtx);
4141 expand_dec (remainder, op1);
4142 emit_label (label);
4143 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4146 /* No luck with division elimination or divmod. Have to do it
4147 by conditionally adjusting op0 *and* the result. */
4149 rtx label1, label2, label3, label4, label5;
4150 rtx adjusted_op0;
4151 rtx tem;
4153 quotient = gen_reg_rtx (compute_mode);
4154 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4155 label1 = gen_label_rtx ();
4156 label2 = gen_label_rtx ();
4157 label3 = gen_label_rtx ();
4158 label4 = gen_label_rtx ();
4159 label5 = gen_label_rtx ();
4160 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4161 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4162 compute_mode, label1);
4163 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4164 quotient, 0, OPTAB_LIB_WIDEN);
4165 if (tem != quotient)
4166 emit_move_insn (quotient, tem);
4167 emit_jump_insn (gen_jump (label5));
4168 emit_barrier ();
4169 emit_label (label1);
4170 expand_dec (adjusted_op0, const1_rtx);
4171 emit_jump_insn (gen_jump (label4));
4172 emit_barrier ();
4173 emit_label (label2);
4174 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4175 compute_mode, label3);
4176 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4177 quotient, 0, OPTAB_LIB_WIDEN);
4178 if (tem != quotient)
4179 emit_move_insn (quotient, tem);
4180 emit_jump_insn (gen_jump (label5));
4181 emit_barrier ();
4182 emit_label (label3);
4183 expand_inc (adjusted_op0, const1_rtx);
4184 emit_label (label4);
4185 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4186 quotient, 0, OPTAB_LIB_WIDEN);
4187 if (tem != quotient)
4188 emit_move_insn (quotient, tem);
4189 expand_inc (quotient, const1_rtx);
4190 emit_label (label5);
4193 break;
4195 case EXACT_DIV_EXPR:
4196 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4198 HOST_WIDE_INT d = INTVAL (op1);
4199 unsigned HOST_WIDE_INT ml;
4200 int pre_shift;
4201 rtx t1;
4203 pre_shift = floor_log2 (d & -d);
4204 ml = invert_mod2n (d >> pre_shift, size);
4205 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4206 build_int_cst (NULL_TREE, pre_shift),
4207 NULL_RTX, unsignedp);
4208 quotient = expand_mult (compute_mode, t1,
4209 gen_int_mode (ml, compute_mode),
4210 NULL_RTX, 1);
4212 insn = get_last_insn ();
4213 set_unique_reg_note (insn,
4214 REG_EQUAL,
4215 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4216 compute_mode,
4217 op0, op1));
4219 break;
4221 case ROUND_DIV_EXPR:
4222 case ROUND_MOD_EXPR:
4223 if (unsignedp)
4225 rtx tem;
4226 rtx label;
4227 label = gen_label_rtx ();
4228 quotient = gen_reg_rtx (compute_mode);
4229 remainder = gen_reg_rtx (compute_mode);
4230 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4232 rtx tem;
4233 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4234 quotient, 1, OPTAB_LIB_WIDEN);
4235 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4236 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4237 remainder, 1, OPTAB_LIB_WIDEN);
4239 tem = plus_constant (op1, -1);
4240 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4241 build_int_cst (NULL_TREE, 1),
4242 NULL_RTX, 1);
4243 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4244 expand_inc (quotient, const1_rtx);
4245 expand_dec (remainder, op1);
4246 emit_label (label);
4248 else
4250 rtx abs_rem, abs_op1, tem, mask;
4251 rtx label;
4252 label = gen_label_rtx ();
4253 quotient = gen_reg_rtx (compute_mode);
4254 remainder = gen_reg_rtx (compute_mode);
4255 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4257 rtx tem;
4258 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4259 quotient, 0, OPTAB_LIB_WIDEN);
4260 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4261 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4262 remainder, 0, OPTAB_LIB_WIDEN);
4264 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4265 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4266 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4267 build_int_cst (NULL_TREE, 1),
4268 NULL_RTX, 1);
4269 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4270 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4271 NULL_RTX, 0, OPTAB_WIDEN);
4272 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4273 build_int_cst (NULL_TREE, size - 1),
4274 NULL_RTX, 0);
4275 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4276 NULL_RTX, 0, OPTAB_WIDEN);
4277 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4278 NULL_RTX, 0, OPTAB_WIDEN);
4279 expand_inc (quotient, tem);
4280 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4281 NULL_RTX, 0, OPTAB_WIDEN);
4282 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4283 NULL_RTX, 0, OPTAB_WIDEN);
4284 expand_dec (remainder, tem);
4285 emit_label (label);
4287 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4289 default:
4290 abort ();
4293 if (quotient == 0)
4295 if (target && GET_MODE (target) != compute_mode)
4296 target = 0;
4298 if (rem_flag)
4300 /* Try to produce the remainder without producing the quotient.
4301 If we seem to have a divmod pattern that does not require widening,
4302 don't try widening here. We should really have a WIDEN argument
4303 to expand_twoval_binop, since what we'd really like to do here is
4304 1) try a mod insn in compute_mode
4305 2) try a divmod insn in compute_mode
4306 3) try a div insn in compute_mode and multiply-subtract to get
4307 remainder
4308 4) try the same things with widening allowed. */
4309 remainder
4310 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4311 op0, op1, target,
4312 unsignedp,
4313 ((optab2->handlers[compute_mode].insn_code
4314 != CODE_FOR_nothing)
4315 ? OPTAB_DIRECT : OPTAB_WIDEN));
4316 if (remainder == 0)
4318 /* No luck there. Can we do remainder and divide at once
4319 without a library call? */
4320 remainder = gen_reg_rtx (compute_mode);
4321 if (! expand_twoval_binop ((unsignedp
4322 ? udivmod_optab
4323 : sdivmod_optab),
4324 op0, op1,
4325 NULL_RTX, remainder, unsignedp))
4326 remainder = 0;
4329 if (remainder)
4330 return gen_lowpart (mode, remainder);
4333 /* Produce the quotient. Try a quotient insn, but not a library call.
4334 If we have a divmod in this mode, use it in preference to widening
4335 the div (for this test we assume it will not fail). Note that optab2
4336 is set to the one of the two optabs that the call below will use. */
4337 quotient
4338 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4339 op0, op1, rem_flag ? NULL_RTX : target,
4340 unsignedp,
4341 ((optab2->handlers[compute_mode].insn_code
4342 != CODE_FOR_nothing)
4343 ? OPTAB_DIRECT : OPTAB_WIDEN));
4345 if (quotient == 0)
4347 /* No luck there. Try a quotient-and-remainder insn,
4348 keeping the quotient alone. */
4349 quotient = gen_reg_rtx (compute_mode);
4350 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4351 op0, op1,
4352 quotient, NULL_RTX, unsignedp))
4354 quotient = 0;
4355 if (! rem_flag)
4356 /* Still no luck. If we are not computing the remainder,
4357 use a library call for the quotient. */
4358 quotient = sign_expand_binop (compute_mode,
4359 udiv_optab, sdiv_optab,
4360 op0, op1, target,
4361 unsignedp, OPTAB_LIB_WIDEN);
4366 if (rem_flag)
4368 if (target && GET_MODE (target) != compute_mode)
4369 target = 0;
4371 if (quotient == 0)
4373 /* No divide instruction either. Use library for remainder. */
4374 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4375 op0, op1, target,
4376 unsignedp, OPTAB_LIB_WIDEN);
4377 /* No remainder function. Try a quotient-and-remainder
4378 function, keeping the remainder. */
4379 if (!remainder)
4381 remainder = gen_reg_rtx (compute_mode);
4382 if (!expand_twoval_binop_libfunc
4383 (unsignedp ? udivmod_optab : sdivmod_optab,
4384 op0, op1,
4385 NULL_RTX, remainder,
4386 unsignedp ? UMOD : MOD))
4387 remainder = NULL_RTX;
4390 else
4392 /* We divided. Now finish doing X - Y * (X / Y). */
4393 remainder = expand_mult (compute_mode, quotient, op1,
4394 NULL_RTX, unsignedp);
4395 remainder = expand_binop (compute_mode, sub_optab, op0,
4396 remainder, target, unsignedp,
4397 OPTAB_LIB_WIDEN);
4401 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4404 /* Return a tree node with data type TYPE, describing the value of X.
4405 Usually this is an VAR_DECL, if there is no obvious better choice.
4406 X may be an expression, however we only support those expressions
4407 generated by loop.c. */
4409 tree
4410 make_tree (tree type, rtx x)
4412 tree t;
4414 switch (GET_CODE (x))
4416 case CONST_INT:
4418 HOST_WIDE_INT hi = 0;
4420 if (INTVAL (x) < 0
4421 && !(TYPE_UNSIGNED (type)
4422 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4423 < HOST_BITS_PER_WIDE_INT)))
4424 hi = -1;
4426 t = build_int_cst_wide (type, INTVAL (x), hi);
4428 return t;
4431 case CONST_DOUBLE:
4432 if (GET_MODE (x) == VOIDmode)
4433 t = build_int_cst_wide (type,
4434 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4435 else
4437 REAL_VALUE_TYPE d;
4439 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4440 t = build_real (type, d);
4443 return t;
4445 case CONST_VECTOR:
4447 int i, units;
4448 rtx elt;
4449 tree t = NULL_TREE;
4451 units = CONST_VECTOR_NUNITS (x);
4453 /* Build a tree with vector elements. */
4454 for (i = units - 1; i >= 0; --i)
4456 elt = CONST_VECTOR_ELT (x, i);
4457 t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4460 return build_vector (type, t);
4463 case PLUS:
4464 return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4465 make_tree (type, XEXP (x, 1))));
4467 case MINUS:
4468 return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4469 make_tree (type, XEXP (x, 1))));
4471 case NEG:
4472 return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4474 case MULT:
4475 return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4476 make_tree (type, XEXP (x, 1))));
4478 case ASHIFT:
4479 return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4480 make_tree (type, XEXP (x, 1))));
4482 case LSHIFTRT:
4483 t = lang_hooks.types.unsigned_type (type);
4484 return fold (convert (type,
4485 build2 (RSHIFT_EXPR, t,
4486 make_tree (t, XEXP (x, 0)),
4487 make_tree (type, XEXP (x, 1)))));
4489 case ASHIFTRT:
4490 t = lang_hooks.types.signed_type (type);
4491 return fold (convert (type,
4492 build2 (RSHIFT_EXPR, t,
4493 make_tree (t, XEXP (x, 0)),
4494 make_tree (type, XEXP (x, 1)))));
4496 case DIV:
4497 if (TREE_CODE (type) != REAL_TYPE)
4498 t = lang_hooks.types.signed_type (type);
4499 else
4500 t = type;
4502 return fold (convert (type,
4503 build2 (TRUNC_DIV_EXPR, t,
4504 make_tree (t, XEXP (x, 0)),
4505 make_tree (t, XEXP (x, 1)))));
4506 case UDIV:
4507 t = lang_hooks.types.unsigned_type (type);
4508 return fold (convert (type,
4509 build2 (TRUNC_DIV_EXPR, t,
4510 make_tree (t, XEXP (x, 0)),
4511 make_tree (t, XEXP (x, 1)))));
4513 case SIGN_EXTEND:
4514 case ZERO_EXTEND:
4515 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4516 GET_CODE (x) == ZERO_EXTEND);
4517 return fold (convert (type, make_tree (t, XEXP (x, 0))));
4519 default:
4520 t = build_decl (VAR_DECL, NULL_TREE, type);
4522 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4523 ptr_mode. So convert. */
4524 if (POINTER_TYPE_P (type))
4525 x = convert_memory_address (TYPE_MODE (type), x);
4527 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4528 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4529 t->decl.rtl = x;
4531 return t;
4535 /* Check whether the multiplication X * MULT + ADD overflows.
4536 X, MULT and ADD must be CONST_*.
4537 MODE is the machine mode for the computation.
4538 X and MULT must have mode MODE. ADD may have a different mode.
4539 So can X (defaults to same as MODE).
4540 UNSIGNEDP is nonzero to do unsigned multiplication. */
4542 bool
4543 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4544 enum machine_mode mode, int unsignedp)
4546 tree type, mult_type, add_type, result;
4548 type = lang_hooks.types.type_for_mode (mode, unsignedp);
4550 /* In order to get a proper overflow indication from an unsigned
4551 type, we have to pretend that it's a sizetype. */
4552 mult_type = type;
4553 if (unsignedp)
4555 /* FIXME:It would be nice if we could step directly from this
4556 type to its sizetype equivalent. */
4557 mult_type = build_distinct_type_copy (type);
4558 TYPE_IS_SIZETYPE (mult_type) = 1;
4561 add_type = (GET_MODE (add) == VOIDmode ? mult_type
4562 : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4564 result = fold (build2 (PLUS_EXPR, mult_type,
4565 fold (build2 (MULT_EXPR, mult_type,
4566 make_tree (mult_type, x),
4567 make_tree (mult_type, mult))),
4568 make_tree (add_type, add)));
4570 return TREE_CONSTANT_OVERFLOW (result);
4573 /* Return an rtx representing the value of X * MULT + ADD.
4574 TARGET is a suggestion for where to store the result (an rtx).
4575 MODE is the machine mode for the computation.
4576 X and MULT must have mode MODE. ADD may have a different mode.
4577 So can X (defaults to same as MODE).
4578 UNSIGNEDP is nonzero to do unsigned multiplication.
4579 This may emit insns. */
4582 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4583 int unsignedp)
4585 tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4586 tree add_type = (GET_MODE (add) == VOIDmode
4587 ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4588 unsignedp));
4589 tree result = fold (build2 (PLUS_EXPR, type,
4590 fold (build2 (MULT_EXPR, type,
4591 make_tree (type, x),
4592 make_tree (type, mult))),
4593 make_tree (add_type, add)));
4595 return expand_expr (result, target, VOIDmode, 0);
4598 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4599 and returning TARGET.
4601 If TARGET is 0, a pseudo-register or constant is returned. */
4604 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4606 rtx tem = 0;
4608 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4609 tem = simplify_binary_operation (AND, mode, op0, op1);
4610 if (tem == 0)
4611 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4613 if (target == 0)
4614 target = tem;
4615 else if (tem != target)
4616 emit_move_insn (target, tem);
4617 return target;
4620 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4621 and storing in TARGET. Normally return TARGET.
4622 Return 0 if that cannot be done.
4624 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
4625 it is VOIDmode, they cannot both be CONST_INT.
4627 UNSIGNEDP is for the case where we have to widen the operands
4628 to perform the operation. It says to use zero-extension.
4630 NORMALIZEP is 1 if we should convert the result to be either zero
4631 or one. Normalize is -1 if we should convert the result to be
4632 either zero or -1. If NORMALIZEP is zero, the result will be left
4633 "raw" out of the scc insn. */
4636 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4637 enum machine_mode mode, int unsignedp, int normalizep)
4639 rtx subtarget;
4640 enum insn_code icode;
4641 enum machine_mode compare_mode;
4642 enum machine_mode target_mode = GET_MODE (target);
4643 rtx tem;
4644 rtx last = get_last_insn ();
4645 rtx pattern, comparison;
4647 if (unsignedp)
4648 code = unsigned_condition (code);
4650 /* If one operand is constant, make it the second one. Only do this
4651 if the other operand is not constant as well. */
4653 if (swap_commutative_operands_p (op0, op1))
4655 tem = op0;
4656 op0 = op1;
4657 op1 = tem;
4658 code = swap_condition (code);
4661 if (mode == VOIDmode)
4662 mode = GET_MODE (op0);
4664 /* For some comparisons with 1 and -1, we can convert this to
4665 comparisons with zero. This will often produce more opportunities for
4666 store-flag insns. */
4668 switch (code)
4670 case LT:
4671 if (op1 == const1_rtx)
4672 op1 = const0_rtx, code = LE;
4673 break;
4674 case LE:
4675 if (op1 == constm1_rtx)
4676 op1 = const0_rtx, code = LT;
4677 break;
4678 case GE:
4679 if (op1 == const1_rtx)
4680 op1 = const0_rtx, code = GT;
4681 break;
4682 case GT:
4683 if (op1 == constm1_rtx)
4684 op1 = const0_rtx, code = GE;
4685 break;
4686 case GEU:
4687 if (op1 == const1_rtx)
4688 op1 = const0_rtx, code = NE;
4689 break;
4690 case LTU:
4691 if (op1 == const1_rtx)
4692 op1 = const0_rtx, code = EQ;
4693 break;
4694 default:
4695 break;
4698 /* If we are comparing a double-word integer with zero or -1, we can
4699 convert the comparison into one involving a single word. */
4700 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4701 && GET_MODE_CLASS (mode) == MODE_INT
4702 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4704 if ((code == EQ || code == NE)
4705 && (op1 == const0_rtx || op1 == constm1_rtx))
4707 rtx op00, op01, op0both;
4709 /* Do a logical OR or AND of the two words and compare the result. */
4710 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4711 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4712 op0both = expand_binop (word_mode,
4713 op1 == const0_rtx ? ior_optab : and_optab,
4714 op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4716 if (op0both != 0)
4717 return emit_store_flag (target, code, op0both, op1, word_mode,
4718 unsignedp, normalizep);
4720 else if ((code == LT || code == GE) && op1 == const0_rtx)
4722 rtx op0h;
4724 /* If testing the sign bit, can just test on high word. */
4725 op0h = simplify_gen_subreg (word_mode, op0, mode,
4726 subreg_highpart_offset (word_mode, mode));
4727 return emit_store_flag (target, code, op0h, op1, word_mode,
4728 unsignedp, normalizep);
4732 /* From now on, we won't change CODE, so set ICODE now. */
4733 icode = setcc_gen_code[(int) code];
4735 /* If this is A < 0 or A >= 0, we can do this by taking the ones
4736 complement of A (for GE) and shifting the sign bit to the low bit. */
4737 if (op1 == const0_rtx && (code == LT || code == GE)
4738 && GET_MODE_CLASS (mode) == MODE_INT
4739 && (normalizep || STORE_FLAG_VALUE == 1
4740 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4741 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4742 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4744 subtarget = target;
4746 /* If the result is to be wider than OP0, it is best to convert it
4747 first. If it is to be narrower, it is *incorrect* to convert it
4748 first. */
4749 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4751 op0 = convert_modes (target_mode, mode, op0, 0);
4752 mode = target_mode;
4755 if (target_mode != mode)
4756 subtarget = 0;
4758 if (code == GE)
4759 op0 = expand_unop (mode, one_cmpl_optab, op0,
4760 ((STORE_FLAG_VALUE == 1 || normalizep)
4761 ? 0 : subtarget), 0);
4763 if (STORE_FLAG_VALUE == 1 || normalizep)
4764 /* If we are supposed to produce a 0/1 value, we want to do
4765 a logical shift from the sign bit to the low-order bit; for
4766 a -1/0 value, we do an arithmetic shift. */
4767 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4768 size_int (GET_MODE_BITSIZE (mode) - 1),
4769 subtarget, normalizep != -1);
4771 if (mode != target_mode)
4772 op0 = convert_modes (target_mode, mode, op0, 0);
4774 return op0;
4777 if (icode != CODE_FOR_nothing)
4779 insn_operand_predicate_fn pred;
4781 /* We think we may be able to do this with a scc insn. Emit the
4782 comparison and then the scc insn. */
4784 do_pending_stack_adjust ();
4785 last = get_last_insn ();
4787 comparison
4788 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4789 if (CONSTANT_P (comparison))
4791 if (GET_CODE (comparison) == CONST_INT)
4793 if (comparison == const0_rtx)
4794 return const0_rtx;
4796 #ifdef FLOAT_STORE_FLAG_VALUE
4797 else if (GET_CODE (comparison) == CONST_DOUBLE)
4799 if (comparison == CONST0_RTX (GET_MODE (comparison)))
4800 return const0_rtx;
4802 #endif
4803 else
4804 abort ();
4805 if (normalizep == 1)
4806 return const1_rtx;
4807 if (normalizep == -1)
4808 return constm1_rtx;
4809 return const_true_rtx;
4812 /* The code of COMPARISON may not match CODE if compare_from_rtx
4813 decided to swap its operands and reverse the original code.
4815 We know that compare_from_rtx returns either a CONST_INT or
4816 a new comparison code, so it is safe to just extract the
4817 code from COMPARISON. */
4818 code = GET_CODE (comparison);
4820 /* Get a reference to the target in the proper mode for this insn. */
4821 compare_mode = insn_data[(int) icode].operand[0].mode;
4822 subtarget = target;
4823 pred = insn_data[(int) icode].operand[0].predicate;
4824 if (optimize || ! (*pred) (subtarget, compare_mode))
4825 subtarget = gen_reg_rtx (compare_mode);
4827 pattern = GEN_FCN (icode) (subtarget);
4828 if (pattern)
4830 emit_insn (pattern);
4832 /* If we are converting to a wider mode, first convert to
4833 TARGET_MODE, then normalize. This produces better combining
4834 opportunities on machines that have a SIGN_EXTRACT when we are
4835 testing a single bit. This mostly benefits the 68k.
4837 If STORE_FLAG_VALUE does not have the sign bit set when
4838 interpreted in COMPARE_MODE, we can do this conversion as
4839 unsigned, which is usually more efficient. */
4840 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4842 convert_move (target, subtarget,
4843 (GET_MODE_BITSIZE (compare_mode)
4844 <= HOST_BITS_PER_WIDE_INT)
4845 && 0 == (STORE_FLAG_VALUE
4846 & ((HOST_WIDE_INT) 1
4847 << (GET_MODE_BITSIZE (compare_mode) -1))));
4848 op0 = target;
4849 compare_mode = target_mode;
4851 else
4852 op0 = subtarget;
4854 /* If we want to keep subexpressions around, don't reuse our
4855 last target. */
4857 if (optimize)
4858 subtarget = 0;
4860 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
4861 we don't have to do anything. */
4862 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4864 /* STORE_FLAG_VALUE might be the most negative number, so write
4865 the comparison this way to avoid a compiler-time warning. */
4866 else if (- normalizep == STORE_FLAG_VALUE)
4867 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4869 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4870 makes it hard to use a value of just the sign bit due to
4871 ANSI integer constant typing rules. */
4872 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4873 && (STORE_FLAG_VALUE
4874 & ((HOST_WIDE_INT) 1
4875 << (GET_MODE_BITSIZE (compare_mode) - 1))))
4876 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4877 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4878 subtarget, normalizep == 1);
4879 else if (STORE_FLAG_VALUE & 1)
4881 op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4882 if (normalizep == -1)
4883 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4885 else
4886 abort ();
4888 /* If we were converting to a smaller mode, do the
4889 conversion now. */
4890 if (target_mode != compare_mode)
4892 convert_move (target, op0, 0);
4893 return target;
4895 else
4896 return op0;
4900 delete_insns_since (last);
4902 /* If optimizing, use different pseudo registers for each insn, instead
4903 of reusing the same pseudo. This leads to better CSE, but slows
4904 down the compiler, since there are more pseudos */
4905 subtarget = (!optimize
4906 && (target_mode == mode)) ? target : NULL_RTX;
4908 /* If we reached here, we can't do this with a scc insn. However, there
4909 are some comparisons that can be done directly. For example, if
4910 this is an equality comparison of integers, we can try to exclusive-or
4911 (or subtract) the two operands and use a recursive call to try the
4912 comparison with zero. Don't do any of these cases if branches are
4913 very cheap. */
4915 if (BRANCH_COST > 0
4916 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4917 && op1 != const0_rtx)
4919 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4920 OPTAB_WIDEN);
4922 if (tem == 0)
4923 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4924 OPTAB_WIDEN);
4925 if (tem != 0)
4926 tem = emit_store_flag (target, code, tem, const0_rtx,
4927 mode, unsignedp, normalizep);
4928 if (tem == 0)
4929 delete_insns_since (last);
4930 return tem;
4933 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4934 the constant zero. Reject all other comparisons at this point. Only
4935 do LE and GT if branches are expensive since they are expensive on
4936 2-operand machines. */
4938 if (BRANCH_COST == 0
4939 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4940 || (code != EQ && code != NE
4941 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4942 return 0;
4944 /* See what we need to return. We can only return a 1, -1, or the
4945 sign bit. */
4947 if (normalizep == 0)
4949 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4950 normalizep = STORE_FLAG_VALUE;
4952 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4953 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4954 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4956 else
4957 return 0;
4960 /* Try to put the result of the comparison in the sign bit. Assume we can't
4961 do the necessary operation below. */
4963 tem = 0;
4965 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
4966 the sign bit set. */
4968 if (code == LE)
4970 /* This is destructive, so SUBTARGET can't be OP0. */
4971 if (rtx_equal_p (subtarget, op0))
4972 subtarget = 0;
4974 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4975 OPTAB_WIDEN);
4976 if (tem)
4977 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4978 OPTAB_WIDEN);
4981 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4982 number of bits in the mode of OP0, minus one. */
4984 if (code == GT)
4986 if (rtx_equal_p (subtarget, op0))
4987 subtarget = 0;
4989 tem = expand_shift (RSHIFT_EXPR, mode, op0,
4990 size_int (GET_MODE_BITSIZE (mode) - 1),
4991 subtarget, 0);
4992 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4993 OPTAB_WIDEN);
4996 if (code == EQ || code == NE)
4998 /* For EQ or NE, one way to do the comparison is to apply an operation
4999 that converts the operand into a positive number if it is nonzero
5000 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5001 for NE we negate. This puts the result in the sign bit. Then we
5002 normalize with a shift, if needed.
5004 Two operations that can do the above actions are ABS and FFS, so try
5005 them. If that doesn't work, and MODE is smaller than a full word,
5006 we can use zero-extension to the wider mode (an unsigned conversion)
5007 as the operation. */
5009 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5010 that is compensated by the subsequent overflow when subtracting
5011 one / negating. */
5013 if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5014 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5015 else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5016 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5017 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5019 tem = convert_modes (word_mode, mode, op0, 1);
5020 mode = word_mode;
5023 if (tem != 0)
5025 if (code == EQ)
5026 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5027 0, OPTAB_WIDEN);
5028 else
5029 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5032 /* If we couldn't do it that way, for NE we can "or" the two's complement
5033 of the value with itself. For EQ, we take the one's complement of
5034 that "or", which is an extra insn, so we only handle EQ if branches
5035 are expensive. */
5037 if (tem == 0 && (code == NE || BRANCH_COST > 1))
5039 if (rtx_equal_p (subtarget, op0))
5040 subtarget = 0;
5042 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5043 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5044 OPTAB_WIDEN);
5046 if (tem && code == EQ)
5047 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5051 if (tem && normalizep)
5052 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5053 size_int (GET_MODE_BITSIZE (mode) - 1),
5054 subtarget, normalizep == 1);
5056 if (tem)
5058 if (GET_MODE (tem) != target_mode)
5060 convert_move (target, tem, 0);
5061 tem = target;
5063 else if (!subtarget)
5065 emit_move_insn (target, tem);
5066 tem = target;
5069 else
5070 delete_insns_since (last);
5072 return tem;
5075 /* Like emit_store_flag, but always succeeds. */
5078 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5079 enum machine_mode mode, int unsignedp, int normalizep)
5081 rtx tem, label;
5083 /* First see if emit_store_flag can do the job. */
5084 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5085 if (tem != 0)
5086 return tem;
5088 if (normalizep == 0)
5089 normalizep = 1;
5091 /* If this failed, we have to do this with set/compare/jump/set code. */
5093 if (!REG_P (target)
5094 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5095 target = gen_reg_rtx (GET_MODE (target));
5097 emit_move_insn (target, const1_rtx);
5098 label = gen_label_rtx ();
5099 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5100 NULL_RTX, label);
5102 emit_move_insn (target, const0_rtx);
5103 emit_label (label);
5105 return target;
5108 /* Perform possibly multi-word comparison and conditional jump to LABEL
5109 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5111 The algorithm is based on the code in expr.c:do_jump.
5113 Note that this does not perform a general comparison. Only variants
5114 generated within expmed.c are correctly handled, others abort (but could
5115 be handled if needed). */
5117 static void
5118 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5119 rtx label)
5121 /* If this mode is an integer too wide to compare properly,
5122 compare word by word. Rely on cse to optimize constant cases. */
5124 if (GET_MODE_CLASS (mode) == MODE_INT
5125 && ! can_compare_p (op, mode, ccp_jump))
5127 rtx label2 = gen_label_rtx ();
5129 switch (op)
5131 case LTU:
5132 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5133 break;
5135 case LEU:
5136 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5137 break;
5139 case LT:
5140 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5141 break;
5143 case GT:
5144 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5145 break;
5147 case GE:
5148 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5149 break;
5151 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
5152 that's the only equality operations we do */
5153 case EQ:
5154 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5155 abort ();
5156 do_jump_by_parts_equality_rtx (arg1, label2, label);
5157 break;
5159 case NE:
5160 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5161 abort ();
5162 do_jump_by_parts_equality_rtx (arg1, label, label2);
5163 break;
5165 default:
5166 abort ();
5169 emit_label (label2);
5171 else
5172 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);