* tree-ssa-pre.c (grand_bitmap_obstack): New.
[official-gcc.git] / gcc / expmed.c
blob10084e5ae8b4cb4fd475fc6aed26a4910c7d8c23
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
41 unsigned HOST_WIDE_INT,
42 unsigned HOST_WIDE_INT, rtx);
43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT, rtx, int);
49 static rtx mask_rtx (enum machine_mode, int, int, int);
50 static rtx lshift_value (enum machine_mode, rtx, int, int);
51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, int);
53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 /* Nonzero means divides or modulus operations are relatively cheap for
58 powers of two, so don't use branches; emit the operation instead.
59 Usually, this will mean that the MD file will emit non-branch
60 sequences. */
62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
65 #ifndef SLOW_UNALIGNED_ACCESS
66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
67 #endif
69 /* For compilers that support multiple targets with different word sizes,
70 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
71 is the H8/300(H) compiler. */
73 #ifndef MAX_BITS_PER_WORD
74 #define MAX_BITS_PER_WORD BITS_PER_WORD
75 #endif
77 /* Reduce conditional compilation elsewhere. */
78 #ifndef HAVE_insv
79 #define HAVE_insv 0
80 #define CODE_FOR_insv CODE_FOR_nothing
81 #define gen_insv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extv
84 #define HAVE_extv 0
85 #define CODE_FOR_extv CODE_FOR_nothing
86 #define gen_extv(a,b,c,d) NULL_RTX
87 #endif
88 #ifndef HAVE_extzv
89 #define HAVE_extzv 0
90 #define CODE_FOR_extzv CODE_FOR_nothing
91 #define gen_extzv(a,b,c,d) NULL_RTX
92 #endif
94 /* Cost of various pieces of RTL. Note that some of these are indexed by
95 shift count and some by mode. */
96 static int zero_cost;
97 static int add_cost[NUM_MACHINE_MODES];
98 static int neg_cost[NUM_MACHINE_MODES];
99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
102 static int mul_cost[NUM_MACHINE_MODES];
103 static int div_cost[NUM_MACHINE_MODES];
104 static int mul_widen_cost[NUM_MACHINE_MODES];
105 static int mul_highpart_cost[NUM_MACHINE_MODES];
107 void
108 init_expmed (void)
110 struct
112 struct rtx_def reg; rtunion reg_fld[2];
113 struct rtx_def plus; rtunion plus_fld1;
114 struct rtx_def neg;
115 struct rtx_def udiv; rtunion udiv_fld1;
116 struct rtx_def mult; rtunion mult_fld1;
117 struct rtx_def div; rtunion div_fld1;
118 struct rtx_def mod; rtunion mod_fld1;
119 struct rtx_def zext;
120 struct rtx_def wide_mult; rtunion wide_mult_fld1;
121 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
122 struct rtx_def wide_trunc;
123 struct rtx_def shift; rtunion shift_fld1;
124 struct rtx_def shift_mult; rtunion shift_mult_fld1;
125 struct rtx_def shift_add; rtunion shift_add_fld1;
126 struct rtx_def shift_sub; rtunion shift_sub_fld1;
127 } all;
129 rtx pow2[MAX_BITS_PER_WORD];
130 rtx cint[MAX_BITS_PER_WORD];
131 int m, n;
132 enum machine_mode mode, wider_mode;
134 zero_cost = rtx_cost (const0_rtx, 0);
136 for (m = 1; m < MAX_BITS_PER_WORD; m++)
138 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
139 cint[m] = GEN_INT (m);
142 memset (&all, 0, sizeof all);
144 PUT_CODE (&all.reg, REG);
145 REGNO (&all.reg) = 10000;
147 PUT_CODE (&all.plus, PLUS);
148 XEXP (&all.plus, 0) = &all.reg;
149 XEXP (&all.plus, 1) = &all.reg;
151 PUT_CODE (&all.neg, NEG);
152 XEXP (&all.neg, 0) = &all.reg;
154 PUT_CODE (&all.udiv, UDIV);
155 XEXP (&all.udiv, 0) = &all.reg;
156 XEXP (&all.udiv, 1) = &all.reg;
158 PUT_CODE (&all.mult, MULT);
159 XEXP (&all.mult, 0) = &all.reg;
160 XEXP (&all.mult, 1) = &all.reg;
162 PUT_CODE (&all.div, DIV);
163 XEXP (&all.div, 0) = &all.reg;
164 XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
166 PUT_CODE (&all.mod, MOD);
167 XEXP (&all.mod, 0) = &all.reg;
168 XEXP (&all.mod, 1) = XEXP (&all.div, 1);
170 PUT_CODE (&all.zext, ZERO_EXTEND);
171 XEXP (&all.zext, 0) = &all.reg;
173 PUT_CODE (&all.wide_mult, MULT);
174 XEXP (&all.wide_mult, 0) = &all.zext;
175 XEXP (&all.wide_mult, 1) = &all.zext;
177 PUT_CODE (&all.wide_lshr, LSHIFTRT);
178 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
180 PUT_CODE (&all.wide_trunc, TRUNCATE);
181 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
183 PUT_CODE (&all.shift, ASHIFT);
184 XEXP (&all.shift, 0) = &all.reg;
186 PUT_CODE (&all.shift_mult, MULT);
187 XEXP (&all.shift_mult, 0) = &all.reg;
189 PUT_CODE (&all.shift_add, PLUS);
190 XEXP (&all.shift_add, 0) = &all.shift_mult;
191 XEXP (&all.shift_add, 1) = &all.reg;
193 PUT_CODE (&all.shift_sub, MINUS);
194 XEXP (&all.shift_sub, 0) = &all.shift_mult;
195 XEXP (&all.shift_sub, 1) = &all.reg;
197 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
198 mode != VOIDmode;
199 mode = GET_MODE_WIDER_MODE (mode))
201 PUT_MODE (&all.reg, mode);
202 PUT_MODE (&all.plus, mode);
203 PUT_MODE (&all.neg, mode);
204 PUT_MODE (&all.udiv, mode);
205 PUT_MODE (&all.mult, mode);
206 PUT_MODE (&all.div, mode);
207 PUT_MODE (&all.mod, mode);
208 PUT_MODE (&all.wide_trunc, mode);
209 PUT_MODE (&all.shift, mode);
210 PUT_MODE (&all.shift_mult, mode);
211 PUT_MODE (&all.shift_add, mode);
212 PUT_MODE (&all.shift_sub, mode);
214 add_cost[mode] = rtx_cost (&all.plus, SET);
215 neg_cost[mode] = rtx_cost (&all.neg, SET);
216 div_cost[mode] = rtx_cost (&all.udiv, SET);
217 mul_cost[mode] = rtx_cost (&all.mult, SET);
219 sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
220 smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
222 wider_mode = GET_MODE_WIDER_MODE (mode);
223 if (wider_mode != VOIDmode)
225 PUT_MODE (&all.zext, wider_mode);
226 PUT_MODE (&all.wide_mult, wider_mode);
227 PUT_MODE (&all.wide_lshr, wider_mode);
228 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
230 mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
231 mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
234 shift_cost[mode][0] = 0;
235 shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
237 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
238 for (m = 1; m < n; m++)
240 XEXP (&all.shift, 1) = cint[m];
241 XEXP (&all.shift_mult, 1) = pow2[m];
243 shift_cost[mode][m] = rtx_cost (&all.shift, SET);
244 shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
245 shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
250 /* Return an rtx representing minus the value of X.
251 MODE is the intended mode of the result,
252 useful if X is a CONST_INT. */
255 negate_rtx (enum machine_mode mode, rtx x)
257 rtx result = simplify_unary_operation (NEG, mode, x, mode);
259 if (result == 0)
260 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
262 return result;
265 /* Report on the availability of insv/extv/extzv and the desired mode
266 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
267 is false; else the mode of the specified operand. If OPNO is -1,
268 all the caller cares about is whether the insn is available. */
269 enum machine_mode
270 mode_for_extraction (enum extraction_pattern pattern, int opno)
272 const struct insn_data *data;
274 switch (pattern)
276 case EP_insv:
277 if (HAVE_insv)
279 data = &insn_data[CODE_FOR_insv];
280 break;
282 return MAX_MACHINE_MODE;
284 case EP_extv:
285 if (HAVE_extv)
287 data = &insn_data[CODE_FOR_extv];
288 break;
290 return MAX_MACHINE_MODE;
292 case EP_extzv:
293 if (HAVE_extzv)
295 data = &insn_data[CODE_FOR_extzv];
296 break;
298 return MAX_MACHINE_MODE;
300 default:
301 abort ();
304 if (opno == -1)
305 return VOIDmode;
307 /* Everyone who uses this function used to follow it with
308 if (result == VOIDmode) result = word_mode; */
309 if (data->operand[opno].mode == VOIDmode)
310 return word_mode;
311 return data->operand[opno].mode;
315 /* Generate code to store value from rtx VALUE
316 into a bit-field within structure STR_RTX
317 containing BITSIZE bits starting at bit BITNUM.
318 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
319 ALIGN is the alignment that STR_RTX is known to have.
320 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
322 /* ??? Note that there are two different ideas here for how
323 to determine the size to count bits within, for a register.
324 One is BITS_PER_WORD, and the other is the size of operand 3
325 of the insv pattern.
327 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
328 else, we use the mode of operand 3. */
331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
332 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
333 rtx value)
335 unsigned int unit
336 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
337 unsigned HOST_WIDE_INT offset = bitnum / unit;
338 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
339 rtx op0 = str_rtx;
340 int byte_offset;
342 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
344 while (GET_CODE (op0) == SUBREG)
346 /* The following line once was done only if WORDS_BIG_ENDIAN,
347 but I think that is a mistake. WORDS_BIG_ENDIAN is
348 meaningful at a much higher level; when structures are copied
349 between memory and regs, the higher-numbered regs
350 always get higher addresses. */
351 offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
352 /* We used to adjust BITPOS here, but now we do the whole adjustment
353 right after the loop. */
354 op0 = SUBREG_REG (op0);
357 /* Use vec_set patterns for inserting parts of vectors whenever
358 available. */
359 if (VECTOR_MODE_P (GET_MODE (op0))
360 && !MEM_P (op0)
361 && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
362 != CODE_FOR_nothing)
363 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
364 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
365 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
367 enum machine_mode outermode = GET_MODE (op0);
368 enum machine_mode innermode = GET_MODE_INNER (outermode);
369 int icode = (int) vec_set_optab->handlers[outermode].insn_code;
370 int pos = bitnum / GET_MODE_BITSIZE (innermode);
371 rtx rtxpos = GEN_INT (pos);
372 rtx src = value;
373 rtx dest = op0;
374 rtx pat, seq;
375 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
376 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
377 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
379 start_sequence ();
381 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
382 src = copy_to_mode_reg (mode1, src);
384 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
385 rtxpos = copy_to_mode_reg (mode1, rtxpos);
387 /* We could handle this, but we should always be called with a pseudo
388 for our targets and all insns should take them as outputs. */
389 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
390 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
391 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
392 abort ();
393 pat = GEN_FCN (icode) (dest, src, rtxpos);
394 seq = get_insns ();
395 end_sequence ();
396 if (pat)
398 emit_insn (seq);
399 emit_insn (pat);
400 return dest;
404 if (flag_force_mem)
406 int old_generating_concat_p = generating_concat_p;
407 generating_concat_p = 0;
408 value = force_not_mem (value);
409 generating_concat_p = old_generating_concat_p;
412 /* If the target is a register, overwriting the entire object, or storing
413 a full-word or multi-word field can be done with just a SUBREG.
415 If the target is memory, storing any naturally aligned field can be
416 done with a simple store. For targets that support fast unaligned
417 memory, any naturally sized, unit aligned field can be done directly. */
419 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
420 + (offset * UNITS_PER_WORD);
422 if (bitpos == 0
423 && bitsize == GET_MODE_BITSIZE (fieldmode)
424 && (!MEM_P (op0)
425 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
426 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
427 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
428 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
429 || (offset * BITS_PER_UNIT % bitsize == 0
430 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
432 if (GET_MODE (op0) != fieldmode)
434 if (GET_CODE (op0) == SUBREG)
436 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
437 || GET_MODE_CLASS (fieldmode) == MODE_INT
438 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
439 op0 = SUBREG_REG (op0);
440 else
441 /* Else we've got some float mode source being extracted into
442 a different float mode destination -- this combination of
443 subregs results in Severe Tire Damage. */
444 abort ();
446 if (REG_P (op0))
447 op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
448 else
449 op0 = adjust_address (op0, fieldmode, offset);
451 emit_move_insn (op0, value);
452 return value;
455 /* Make sure we are playing with integral modes. Pun with subregs
456 if we aren't. This must come after the entire register case above,
457 since that case is valid for any mode. The following cases are only
458 valid for integral modes. */
460 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
461 if (imode != GET_MODE (op0))
463 if (MEM_P (op0))
464 op0 = adjust_address (op0, imode, 0);
465 else if (imode != BLKmode)
466 op0 = gen_lowpart (imode, op0);
467 else
468 abort ();
472 /* We may be accessing data outside the field, which means
473 we can alias adjacent data. */
474 if (MEM_P (op0))
476 op0 = shallow_copy_rtx (op0);
477 set_mem_alias_set (op0, 0);
478 set_mem_expr (op0, 0);
481 /* If OP0 is a register, BITPOS must count within a word.
482 But as we have it, it counts within whatever size OP0 now has.
483 On a bigendian machine, these are not the same, so convert. */
484 if (BYTES_BIG_ENDIAN
485 && !MEM_P (op0)
486 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
487 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
489 /* Storing an lsb-aligned field in a register
490 can be done with a movestrict instruction. */
492 if (!MEM_P (op0)
493 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
494 && bitsize == GET_MODE_BITSIZE (fieldmode)
495 && (movstrict_optab->handlers[fieldmode].insn_code
496 != CODE_FOR_nothing))
498 int icode = movstrict_optab->handlers[fieldmode].insn_code;
500 /* Get appropriate low part of the value being stored. */
501 if (GET_CODE (value) == CONST_INT || REG_P (value))
502 value = gen_lowpart (fieldmode, value);
503 else if (!(GET_CODE (value) == SYMBOL_REF
504 || GET_CODE (value) == LABEL_REF
505 || GET_CODE (value) == CONST))
506 value = convert_to_mode (fieldmode, value, 0);
508 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
509 value = copy_to_mode_reg (fieldmode, value);
511 if (GET_CODE (op0) == SUBREG)
513 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
514 || GET_MODE_CLASS (fieldmode) == MODE_INT
515 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
516 op0 = SUBREG_REG (op0);
517 else
518 /* Else we've got some float mode source being extracted into
519 a different float mode destination -- this combination of
520 subregs results in Severe Tire Damage. */
521 abort ();
524 emit_insn (GEN_FCN (icode)
525 (gen_rtx_SUBREG (fieldmode, op0,
526 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
527 + (offset * UNITS_PER_WORD)),
528 value));
530 return value;
533 /* Handle fields bigger than a word. */
535 if (bitsize > BITS_PER_WORD)
537 /* Here we transfer the words of the field
538 in the order least significant first.
539 This is because the most significant word is the one which may
540 be less than full.
541 However, only do that if the value is not BLKmode. */
543 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
544 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
545 unsigned int i;
547 /* This is the mode we must force value to, so that there will be enough
548 subwords to extract. Note that fieldmode will often (always?) be
549 VOIDmode, because that is what store_field uses to indicate that this
550 is a bit field, but passing VOIDmode to operand_subword_force will
551 result in an abort. */
552 fieldmode = GET_MODE (value);
553 if (fieldmode == VOIDmode)
554 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
556 for (i = 0; i < nwords; i++)
558 /* If I is 0, use the low-order word in both field and target;
559 if I is 1, use the next to lowest word; and so on. */
560 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
561 unsigned int bit_offset = (backwards
562 ? MAX ((int) bitsize - ((int) i + 1)
563 * BITS_PER_WORD,
565 : (int) i * BITS_PER_WORD);
567 store_bit_field (op0, MIN (BITS_PER_WORD,
568 bitsize - i * BITS_PER_WORD),
569 bitnum + bit_offset, word_mode,
570 operand_subword_force (value, wordnum, fieldmode));
572 return value;
575 /* From here on we can assume that the field to be stored in is
576 a full-word (whatever type that is), since it is shorter than a word. */
578 /* OFFSET is the number of words or bytes (UNIT says which)
579 from STR_RTX to the first word or byte containing part of the field. */
581 if (!MEM_P (op0))
583 if (offset != 0
584 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
586 if (!REG_P (op0))
588 /* Since this is a destination (lvalue), we can't copy it to a
589 pseudo. We can trivially remove a SUBREG that does not
590 change the size of the operand. Such a SUBREG may have been
591 added above. Otherwise, abort. */
592 if (GET_CODE (op0) == SUBREG
593 && (GET_MODE_SIZE (GET_MODE (op0))
594 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
595 op0 = SUBREG_REG (op0);
596 else
597 abort ();
599 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
600 op0, (offset * UNITS_PER_WORD));
602 offset = 0;
605 /* If VALUE is a floating-point mode, access it as an integer of the
606 corresponding size. This can occur on a machine with 64 bit registers
607 that uses SFmode for float. This can also occur for unaligned float
608 structure fields. */
609 if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
610 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
611 value = gen_lowpart ((GET_MODE (value) == VOIDmode
612 ? word_mode : int_mode_for_mode (GET_MODE (value))),
613 value);
615 /* Now OFFSET is nonzero only if OP0 is memory
616 and is therefore always measured in bytes. */
618 if (HAVE_insv
619 && GET_MODE (value) != BLKmode
620 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
621 /* Ensure insv's size is wide enough for this field. */
622 && (GET_MODE_BITSIZE (op_mode) >= bitsize)
623 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
624 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
626 int xbitpos = bitpos;
627 rtx value1;
628 rtx xop0 = op0;
629 rtx last = get_last_insn ();
630 rtx pat;
631 enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
632 int save_volatile_ok = volatile_ok;
634 volatile_ok = 1;
636 /* If this machine's insv can only insert into a register, copy OP0
637 into a register and save it back later. */
638 /* This used to check flag_force_mem, but that was a serious
639 de-optimization now that flag_force_mem is enabled by -O2. */
640 if (MEM_P (op0)
641 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
642 (op0, VOIDmode)))
644 rtx tempreg;
645 enum machine_mode bestmode;
647 /* Get the mode to use for inserting into this field. If OP0 is
648 BLKmode, get the smallest mode consistent with the alignment. If
649 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
650 mode. Otherwise, use the smallest mode containing the field. */
652 if (GET_MODE (op0) == BLKmode
653 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
654 bestmode
655 = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
656 MEM_VOLATILE_P (op0));
657 else
658 bestmode = GET_MODE (op0);
660 if (bestmode == VOIDmode
661 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
662 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
663 goto insv_loses;
665 /* Adjust address to point to the containing unit of that mode.
666 Compute offset as multiple of this unit, counting in bytes. */
667 unit = GET_MODE_BITSIZE (bestmode);
668 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
669 bitpos = bitnum % unit;
670 op0 = adjust_address (op0, bestmode, offset);
672 /* Fetch that unit, store the bitfield in it, then store
673 the unit. */
674 tempreg = copy_to_reg (op0);
675 store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
676 emit_move_insn (op0, tempreg);
677 return value;
679 volatile_ok = save_volatile_ok;
681 /* Add OFFSET into OP0's address. */
682 if (MEM_P (xop0))
683 xop0 = adjust_address (xop0, byte_mode, offset);
685 /* If xop0 is a register, we need it in MAXMODE
686 to make it acceptable to the format of insv. */
687 if (GET_CODE (xop0) == SUBREG)
688 /* We can't just change the mode, because this might clobber op0,
689 and we will need the original value of op0 if insv fails. */
690 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
691 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
692 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
694 /* On big-endian machines, we count bits from the most significant.
695 If the bit field insn does not, we must invert. */
697 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
698 xbitpos = unit - bitsize - xbitpos;
700 /* We have been counting XBITPOS within UNIT.
701 Count instead within the size of the register. */
702 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
703 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
705 unit = GET_MODE_BITSIZE (maxmode);
707 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
708 value1 = value;
709 if (GET_MODE (value) != maxmode)
711 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
713 /* Optimization: Don't bother really extending VALUE
714 if it has all the bits we will actually use. However,
715 if we must narrow it, be sure we do it correctly. */
717 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
719 rtx tmp;
721 tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
722 if (! tmp)
723 tmp = simplify_gen_subreg (maxmode,
724 force_reg (GET_MODE (value),
725 value1),
726 GET_MODE (value), 0);
727 value1 = tmp;
729 else
730 value1 = gen_lowpart (maxmode, value1);
732 else if (GET_CODE (value) == CONST_INT)
733 value1 = gen_int_mode (INTVAL (value), maxmode);
734 else if (!CONSTANT_P (value))
735 /* Parse phase is supposed to make VALUE's data type
736 match that of the component reference, which is a type
737 at least as wide as the field; so VALUE should have
738 a mode that corresponds to that type. */
739 abort ();
742 /* If this machine's insv insists on a register,
743 get VALUE1 into a register. */
744 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
745 (value1, maxmode)))
746 value1 = force_reg (maxmode, value1);
748 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
749 if (pat)
750 emit_insn (pat);
751 else
753 delete_insns_since (last);
754 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
757 else
758 insv_loses:
759 /* Insv is not available; store using shifts and boolean ops. */
760 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
761 return value;
764 /* Use shifts and boolean operations to store VALUE
765 into a bit field of width BITSIZE
766 in a memory location specified by OP0 except offset by OFFSET bytes.
767 (OFFSET must be 0 if OP0 is a register.)
768 The field starts at position BITPOS within the byte.
769 (If OP0 is a register, it may be a full word or a narrower mode,
770 but BITPOS still counts within a full word,
771 which is significant on bigendian machines.) */
773 static void
774 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
775 unsigned HOST_WIDE_INT bitsize,
776 unsigned HOST_WIDE_INT bitpos, rtx value)
778 enum machine_mode mode;
779 unsigned int total_bits = BITS_PER_WORD;
780 rtx subtarget, temp;
781 int all_zero = 0;
782 int all_one = 0;
784 /* There is a case not handled here:
785 a structure with a known alignment of just a halfword
786 and a field split across two aligned halfwords within the structure.
787 Or likewise a structure with a known alignment of just a byte
788 and a field split across two bytes.
789 Such cases are not supposed to be able to occur. */
791 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
793 if (offset != 0)
794 abort ();
795 /* Special treatment for a bit field split across two registers. */
796 if (bitsize + bitpos > BITS_PER_WORD)
798 store_split_bit_field (op0, bitsize, bitpos, value);
799 return;
802 else
804 /* Get the proper mode to use for this field. We want a mode that
805 includes the entire field. If such a mode would be larger than
806 a word, we won't be doing the extraction the normal way.
807 We don't want a mode bigger than the destination. */
809 mode = GET_MODE (op0);
810 if (GET_MODE_BITSIZE (mode) == 0
811 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
812 mode = word_mode;
813 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
814 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
816 if (mode == VOIDmode)
818 /* The only way this should occur is if the field spans word
819 boundaries. */
820 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
821 value);
822 return;
825 total_bits = GET_MODE_BITSIZE (mode);
827 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
828 be in the range 0 to total_bits-1, and put any excess bytes in
829 OFFSET. */
830 if (bitpos >= total_bits)
832 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
833 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
834 * BITS_PER_UNIT);
837 /* Get ref to an aligned byte, halfword, or word containing the field.
838 Adjust BITPOS to be position within a word,
839 and OFFSET to be the offset of that word.
840 Then alter OP0 to refer to that word. */
841 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
842 offset -= (offset % (total_bits / BITS_PER_UNIT));
843 op0 = adjust_address (op0, mode, offset);
846 mode = GET_MODE (op0);
848 /* Now MODE is either some integral mode for a MEM as OP0,
849 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
850 The bit field is contained entirely within OP0.
851 BITPOS is the starting bit number within OP0.
852 (OP0's mode may actually be narrower than MODE.) */
854 if (BYTES_BIG_ENDIAN)
855 /* BITPOS is the distance between our msb
856 and that of the containing datum.
857 Convert it to the distance from the lsb. */
858 bitpos = total_bits - bitsize - bitpos;
860 /* Now BITPOS is always the distance between our lsb
861 and that of OP0. */
863 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
864 we must first convert its mode to MODE. */
866 if (GET_CODE (value) == CONST_INT)
868 HOST_WIDE_INT v = INTVAL (value);
870 if (bitsize < HOST_BITS_PER_WIDE_INT)
871 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
873 if (v == 0)
874 all_zero = 1;
875 else if ((bitsize < HOST_BITS_PER_WIDE_INT
876 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
877 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
878 all_one = 1;
880 value = lshift_value (mode, value, bitpos, bitsize);
882 else
884 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
885 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
887 if (GET_MODE (value) != mode)
889 if ((REG_P (value) || GET_CODE (value) == SUBREG)
890 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
891 value = gen_lowpart (mode, value);
892 else
893 value = convert_to_mode (mode, value, 1);
896 if (must_and)
897 value = expand_binop (mode, and_optab, value,
898 mask_rtx (mode, 0, bitsize, 0),
899 NULL_RTX, 1, OPTAB_LIB_WIDEN);
900 if (bitpos > 0)
901 value = expand_shift (LSHIFT_EXPR, mode, value,
902 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
905 /* Now clear the chosen bits in OP0,
906 except that if VALUE is -1 we need not bother. */
908 subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
910 if (! all_one)
912 temp = expand_binop (mode, and_optab, op0,
913 mask_rtx (mode, bitpos, bitsize, 1),
914 subtarget, 1, OPTAB_LIB_WIDEN);
915 subtarget = temp;
917 else
918 temp = op0;
920 /* Now logical-or VALUE into OP0, unless it is zero. */
922 if (! all_zero)
923 temp = expand_binop (mode, ior_optab, temp, value,
924 subtarget, 1, OPTAB_LIB_WIDEN);
925 if (op0 != temp)
926 emit_move_insn (op0, temp);
929 /* Store a bit field that is split across multiple accessible memory objects.
931 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
932 BITSIZE is the field width; BITPOS the position of its first bit
933 (within the word).
934 VALUE is the value to store.
936 This does not yet handle fields wider than BITS_PER_WORD. */
938 static void
939 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
940 unsigned HOST_WIDE_INT bitpos, rtx value)
942 unsigned int unit;
943 unsigned int bitsdone = 0;
945 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
946 much at a time. */
947 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
948 unit = BITS_PER_WORD;
949 else
950 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
952 /* If VALUE is a constant other than a CONST_INT, get it into a register in
953 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
954 that VALUE might be a floating-point constant. */
955 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
957 rtx word = gen_lowpart_common (word_mode, value);
959 if (word && (value != word))
960 value = word;
961 else
962 value = gen_lowpart_common (word_mode,
963 force_reg (GET_MODE (value) != VOIDmode
964 ? GET_MODE (value)
965 : word_mode, value));
968 while (bitsdone < bitsize)
970 unsigned HOST_WIDE_INT thissize;
971 rtx part, word;
972 unsigned HOST_WIDE_INT thispos;
973 unsigned HOST_WIDE_INT offset;
975 offset = (bitpos + bitsdone) / unit;
976 thispos = (bitpos + bitsdone) % unit;
978 /* THISSIZE must not overrun a word boundary. Otherwise,
979 store_fixed_bit_field will call us again, and we will mutually
980 recurse forever. */
981 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
982 thissize = MIN (thissize, unit - thispos);
984 if (BYTES_BIG_ENDIAN)
986 int total_bits;
988 /* We must do an endian conversion exactly the same way as it is
989 done in extract_bit_field, so that the two calls to
990 extract_fixed_bit_field will have comparable arguments. */
991 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
992 total_bits = BITS_PER_WORD;
993 else
994 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
996 /* Fetch successively less significant portions. */
997 if (GET_CODE (value) == CONST_INT)
998 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
999 >> (bitsize - bitsdone - thissize))
1000 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001 else
1002 /* The args are chosen so that the last part includes the
1003 lsb. Give extract_bit_field the value it needs (with
1004 endianness compensation) to fetch the piece we want. */
1005 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1006 total_bits - bitsize + bitsdone,
1007 NULL_RTX, 1);
1009 else
1011 /* Fetch successively more significant portions. */
1012 if (GET_CODE (value) == CONST_INT)
1013 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1014 >> bitsdone)
1015 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1016 else
1017 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1018 bitsdone, NULL_RTX, 1);
1021 /* If OP0 is a register, then handle OFFSET here.
1023 When handling multiword bitfields, extract_bit_field may pass
1024 down a word_mode SUBREG of a larger REG for a bitfield that actually
1025 crosses a word boundary. Thus, for a SUBREG, we must find
1026 the current word starting from the base register. */
1027 if (GET_CODE (op0) == SUBREG)
1029 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1030 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1031 GET_MODE (SUBREG_REG (op0)));
1032 offset = 0;
1034 else if (REG_P (op0))
1036 word = operand_subword_force (op0, offset, GET_MODE (op0));
1037 offset = 0;
1039 else
1040 word = op0;
1042 /* OFFSET is in UNITs, and UNIT is in bits.
1043 store_fixed_bit_field wants offset in bytes. */
1044 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1045 thispos, part);
1046 bitsdone += thissize;
1050 /* Generate code to extract a byte-field from STR_RTX
1051 containing BITSIZE bits, starting at BITNUM,
1052 and put it in TARGET if possible (if TARGET is nonzero).
1053 Regardless of TARGET, we return the rtx for where the value is placed.
1055 STR_RTX is the structure containing the byte (a REG or MEM).
1056 UNSIGNEDP is nonzero if this is an unsigned bit field.
1057 MODE is the natural mode of the field value once extracted.
1058 TMODE is the mode the caller would like the value to have;
1059 but the value may be returned with type MODE instead.
1061 TOTAL_SIZE is the size in bytes of the containing structure,
1062 or -1 if varying.
1064 If a TARGET is specified and we can store in it at no extra cost,
1065 we do so, and return TARGET.
1066 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1067 if they are equally easy. */
1070 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1071 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1072 enum machine_mode mode, enum machine_mode tmode)
1074 unsigned int unit
1075 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1076 unsigned HOST_WIDE_INT offset = bitnum / unit;
1077 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1078 rtx op0 = str_rtx;
1079 rtx spec_target = target;
1080 rtx spec_target_subreg = 0;
1081 enum machine_mode int_mode;
1082 enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1083 enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1084 enum machine_mode mode1;
1085 int byte_offset;
1087 if (tmode == VOIDmode)
1088 tmode = mode;
1090 while (GET_CODE (op0) == SUBREG)
1092 bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1093 if (bitpos > unit)
1095 offset += (bitpos / unit);
1096 bitpos %= unit;
1098 op0 = SUBREG_REG (op0);
1101 if (REG_P (op0)
1102 && mode == GET_MODE (op0)
1103 && bitnum == 0
1104 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1106 /* We're trying to extract a full register from itself. */
1107 return op0;
1110 /* Use vec_extract patterns for extracting parts of vectors whenever
1111 available. */
1112 if (VECTOR_MODE_P (GET_MODE (op0))
1113 && !MEM_P (op0)
1114 && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1115 != CODE_FOR_nothing)
1116 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1117 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1119 enum machine_mode outermode = GET_MODE (op0);
1120 enum machine_mode innermode = GET_MODE_INNER (outermode);
1121 int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1122 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1123 rtx rtxpos = GEN_INT (pos);
1124 rtx src = op0;
1125 rtx dest = NULL, pat, seq;
1126 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1127 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1128 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1130 if (innermode == tmode || innermode == mode)
1131 dest = target;
1133 if (!dest)
1134 dest = gen_reg_rtx (innermode);
1136 start_sequence ();
1138 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1139 dest = copy_to_mode_reg (mode0, dest);
1141 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1142 src = copy_to_mode_reg (mode1, src);
1144 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1145 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1147 /* We could handle this, but we should always be called with a pseudo
1148 for our targets and all insns should take them as outputs. */
1149 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1150 || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1151 || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1152 abort ();
1154 pat = GEN_FCN (icode) (dest, src, rtxpos);
1155 seq = get_insns ();
1156 end_sequence ();
1157 if (pat)
1159 emit_insn (seq);
1160 emit_insn (pat);
1161 return dest;
1165 /* Make sure we are playing with integral modes. Pun with subregs
1166 if we aren't. */
1168 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1169 if (imode != GET_MODE (op0))
1171 if (MEM_P (op0))
1172 op0 = adjust_address (op0, imode, 0);
1173 else if (imode != BLKmode)
1174 op0 = gen_lowpart (imode, op0);
1175 else
1176 abort ();
1180 /* We may be accessing data outside the field, which means
1181 we can alias adjacent data. */
1182 if (MEM_P (op0))
1184 op0 = shallow_copy_rtx (op0);
1185 set_mem_alias_set (op0, 0);
1186 set_mem_expr (op0, 0);
1189 /* Extraction of a full-word or multi-word value from a structure
1190 in a register or aligned memory can be done with just a SUBREG.
1191 A subword value in the least significant part of a register
1192 can also be extracted with a SUBREG. For this, we need the
1193 byte offset of the value in op0. */
1195 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1197 /* If OP0 is a register, BITPOS must count within a word.
1198 But as we have it, it counts within whatever size OP0 now has.
1199 On a bigendian machine, these are not the same, so convert. */
1200 if (BYTES_BIG_ENDIAN
1201 && !MEM_P (op0)
1202 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1203 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1205 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1206 If that's wrong, the solution is to test for it and set TARGET to 0
1207 if needed. */
1209 /* Only scalar integer modes can be converted via subregs. There is an
1210 additional problem for FP modes here in that they can have a precision
1211 which is different from the size. mode_for_size uses precision, but
1212 we want a mode based on the size, so we must avoid calling it for FP
1213 modes. */
1214 mode1 = (SCALAR_INT_MODE_P (tmode)
1215 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1216 : mode);
1218 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1219 && bitpos % BITS_PER_WORD == 0)
1220 || (mode1 != BLKmode
1221 /* ??? The big endian test here is wrong. This is correct
1222 if the value is in a register, and if mode_for_size is not
1223 the same mode as op0. This causes us to get unnecessarily
1224 inefficient code from the Thumb port when -mbig-endian. */
1225 && (BYTES_BIG_ENDIAN
1226 ? bitpos + bitsize == BITS_PER_WORD
1227 : bitpos == 0)))
1228 && ((!MEM_P (op0)
1229 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1230 GET_MODE_BITSIZE (GET_MODE (op0)))
1231 && GET_MODE_SIZE (mode1) != 0
1232 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1233 || (MEM_P (op0)
1234 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1235 || (offset * BITS_PER_UNIT % bitsize == 0
1236 && MEM_ALIGN (op0) % bitsize == 0)))))
1238 if (mode1 != GET_MODE (op0))
1240 if (GET_CODE (op0) == SUBREG)
1242 if (GET_MODE (SUBREG_REG (op0)) == mode1
1243 || GET_MODE_CLASS (mode1) == MODE_INT
1244 || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1245 op0 = SUBREG_REG (op0);
1246 else
1247 /* Else we've got some float mode source being extracted into
1248 a different float mode destination -- this combination of
1249 subregs results in Severe Tire Damage. */
1250 goto no_subreg_mode_swap;
1252 if (REG_P (op0))
1253 op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1254 else
1255 op0 = adjust_address (op0, mode1, offset);
1257 if (mode1 != mode)
1258 return convert_to_mode (tmode, op0, unsignedp);
1259 return op0;
1261 no_subreg_mode_swap:
1263 /* Handle fields bigger than a word. */
1265 if (bitsize > BITS_PER_WORD)
1267 /* Here we transfer the words of the field
1268 in the order least significant first.
1269 This is because the most significant word is the one which may
1270 be less than full. */
1272 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1273 unsigned int i;
1275 if (target == 0 || !REG_P (target))
1276 target = gen_reg_rtx (mode);
1278 /* Indicate for flow that the entire target reg is being set. */
1279 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1281 for (i = 0; i < nwords; i++)
1283 /* If I is 0, use the low-order word in both field and target;
1284 if I is 1, use the next to lowest word; and so on. */
1285 /* Word number in TARGET to use. */
1286 unsigned int wordnum
1287 = (WORDS_BIG_ENDIAN
1288 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1289 : i);
1290 /* Offset from start of field in OP0. */
1291 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1292 ? MAX (0, ((int) bitsize - ((int) i + 1)
1293 * (int) BITS_PER_WORD))
1294 : (int) i * BITS_PER_WORD);
1295 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1296 rtx result_part
1297 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1298 bitsize - i * BITS_PER_WORD),
1299 bitnum + bit_offset, 1, target_part, mode,
1300 word_mode);
1302 if (target_part == 0)
1303 abort ();
1305 if (result_part != target_part)
1306 emit_move_insn (target_part, result_part);
1309 if (unsignedp)
1311 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1312 need to be zero'd out. */
1313 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1315 unsigned int i, total_words;
1317 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1318 for (i = nwords; i < total_words; i++)
1319 emit_move_insn
1320 (operand_subword (target,
1321 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1322 1, VOIDmode),
1323 const0_rtx);
1325 return target;
1328 /* Signed bit field: sign-extend with two arithmetic shifts. */
1329 target = expand_shift (LSHIFT_EXPR, mode, target,
1330 build_int_cst (NULL_TREE,
1331 GET_MODE_BITSIZE (mode) - bitsize),
1332 NULL_RTX, 0);
1333 return expand_shift (RSHIFT_EXPR, mode, target,
1334 build_int_cst (NULL_TREE,
1335 GET_MODE_BITSIZE (mode) - bitsize),
1336 NULL_RTX, 0);
1339 /* From here on we know the desired field is smaller than a word. */
1341 /* Check if there is a correspondingly-sized integer field, so we can
1342 safely extract it as one size of integer, if necessary; then
1343 truncate or extend to the size that is wanted; then use SUBREGs or
1344 convert_to_mode to get one of the modes we really wanted. */
1346 int_mode = int_mode_for_mode (tmode);
1347 if (int_mode == BLKmode)
1348 int_mode = int_mode_for_mode (mode);
1349 if (int_mode == BLKmode)
1350 abort (); /* Should probably push op0 out to memory and then
1351 do a load. */
1353 /* OFFSET is the number of words or bytes (UNIT says which)
1354 from STR_RTX to the first word or byte containing part of the field. */
1356 if (!MEM_P (op0))
1358 if (offset != 0
1359 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1361 if (!REG_P (op0))
1362 op0 = copy_to_reg (op0);
1363 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1364 op0, (offset * UNITS_PER_WORD));
1366 offset = 0;
1369 /* Now OFFSET is nonzero only for memory operands. */
1371 if (unsignedp)
1373 if (HAVE_extzv
1374 && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1375 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1376 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1378 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1379 rtx bitsize_rtx, bitpos_rtx;
1380 rtx last = get_last_insn ();
1381 rtx xop0 = op0;
1382 rtx xtarget = target;
1383 rtx xspec_target = spec_target;
1384 rtx xspec_target_subreg = spec_target_subreg;
1385 rtx pat;
1386 enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1388 if (MEM_P (xop0))
1390 int save_volatile_ok = volatile_ok;
1391 volatile_ok = 1;
1393 /* Is the memory operand acceptable? */
1394 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1395 (xop0, GET_MODE (xop0))))
1397 /* No, load into a reg and extract from there. */
1398 enum machine_mode bestmode;
1400 /* Get the mode to use for inserting into this field. If
1401 OP0 is BLKmode, get the smallest mode consistent with the
1402 alignment. If OP0 is a non-BLKmode object that is no
1403 wider than MAXMODE, use its mode. Otherwise, use the
1404 smallest mode containing the field. */
1406 if (GET_MODE (xop0) == BLKmode
1407 || (GET_MODE_SIZE (GET_MODE (op0))
1408 > GET_MODE_SIZE (maxmode)))
1409 bestmode = get_best_mode (bitsize, bitnum,
1410 MEM_ALIGN (xop0), maxmode,
1411 MEM_VOLATILE_P (xop0));
1412 else
1413 bestmode = GET_MODE (xop0);
1415 if (bestmode == VOIDmode
1416 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1417 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1418 goto extzv_loses;
1420 /* Compute offset as multiple of this unit,
1421 counting in bytes. */
1422 unit = GET_MODE_BITSIZE (bestmode);
1423 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1424 xbitpos = bitnum % unit;
1425 xop0 = adjust_address (xop0, bestmode, xoffset);
1427 /* Fetch it to a register in that size. */
1428 xop0 = force_reg (bestmode, xop0);
1430 /* XBITPOS counts within UNIT, which is what is expected. */
1432 else
1433 /* Get ref to first byte containing part of the field. */
1434 xop0 = adjust_address (xop0, byte_mode, xoffset);
1436 volatile_ok = save_volatile_ok;
1439 /* If op0 is a register, we need it in MAXMODE (which is usually
1440 SImode). to make it acceptable to the format of extzv. */
1441 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1442 goto extzv_loses;
1443 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1444 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1446 /* On big-endian machines, we count bits from the most significant.
1447 If the bit field insn does not, we must invert. */
1448 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1449 xbitpos = unit - bitsize - xbitpos;
1451 /* Now convert from counting within UNIT to counting in MAXMODE. */
1452 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1453 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1455 unit = GET_MODE_BITSIZE (maxmode);
1457 if (xtarget == 0
1458 || (flag_force_mem && MEM_P (xtarget)))
1459 xtarget = xspec_target = gen_reg_rtx (tmode);
1461 if (GET_MODE (xtarget) != maxmode)
1463 if (REG_P (xtarget))
1465 int wider = (GET_MODE_SIZE (maxmode)
1466 > GET_MODE_SIZE (GET_MODE (xtarget)));
1467 xtarget = gen_lowpart (maxmode, xtarget);
1468 if (wider)
1469 xspec_target_subreg = xtarget;
1471 else
1472 xtarget = gen_reg_rtx (maxmode);
1475 /* If this machine's extzv insists on a register target,
1476 make sure we have one. */
1477 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1478 (xtarget, maxmode)))
1479 xtarget = gen_reg_rtx (maxmode);
1481 bitsize_rtx = GEN_INT (bitsize);
1482 bitpos_rtx = GEN_INT (xbitpos);
1484 pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1485 if (pat)
1487 emit_insn (pat);
1488 target = xtarget;
1489 spec_target = xspec_target;
1490 spec_target_subreg = xspec_target_subreg;
1492 else
1494 delete_insns_since (last);
1495 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1496 bitpos, target, 1);
1499 else
1500 extzv_loses:
1501 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1502 bitpos, target, 1);
1504 else
1506 if (HAVE_extv
1507 && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1508 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1509 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1511 int xbitpos = bitpos, xoffset = offset;
1512 rtx bitsize_rtx, bitpos_rtx;
1513 rtx last = get_last_insn ();
1514 rtx xop0 = op0, xtarget = target;
1515 rtx xspec_target = spec_target;
1516 rtx xspec_target_subreg = spec_target_subreg;
1517 rtx pat;
1518 enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1520 if (MEM_P (xop0))
1522 /* Is the memory operand acceptable? */
1523 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1524 (xop0, GET_MODE (xop0))))
1526 /* No, load into a reg and extract from there. */
1527 enum machine_mode bestmode;
1529 /* Get the mode to use for inserting into this field. If
1530 OP0 is BLKmode, get the smallest mode consistent with the
1531 alignment. If OP0 is a non-BLKmode object that is no
1532 wider than MAXMODE, use its mode. Otherwise, use the
1533 smallest mode containing the field. */
1535 if (GET_MODE (xop0) == BLKmode
1536 || (GET_MODE_SIZE (GET_MODE (op0))
1537 > GET_MODE_SIZE (maxmode)))
1538 bestmode = get_best_mode (bitsize, bitnum,
1539 MEM_ALIGN (xop0), maxmode,
1540 MEM_VOLATILE_P (xop0));
1541 else
1542 bestmode = GET_MODE (xop0);
1544 if (bestmode == VOIDmode
1545 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1546 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1547 goto extv_loses;
1549 /* Compute offset as multiple of this unit,
1550 counting in bytes. */
1551 unit = GET_MODE_BITSIZE (bestmode);
1552 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1553 xbitpos = bitnum % unit;
1554 xop0 = adjust_address (xop0, bestmode, xoffset);
1556 /* Fetch it to a register in that size. */
1557 xop0 = force_reg (bestmode, xop0);
1559 /* XBITPOS counts within UNIT, which is what is expected. */
1561 else
1562 /* Get ref to first byte containing part of the field. */
1563 xop0 = adjust_address (xop0, byte_mode, xoffset);
1566 /* If op0 is a register, we need it in MAXMODE (which is usually
1567 SImode) to make it acceptable to the format of extv. */
1568 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1569 goto extv_loses;
1570 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1571 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1573 /* On big-endian machines, we count bits from the most significant.
1574 If the bit field insn does not, we must invert. */
1575 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1576 xbitpos = unit - bitsize - xbitpos;
1578 /* XBITPOS counts within a size of UNIT.
1579 Adjust to count within a size of MAXMODE. */
1580 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1581 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1583 unit = GET_MODE_BITSIZE (maxmode);
1585 if (xtarget == 0
1586 || (flag_force_mem && MEM_P (xtarget)))
1587 xtarget = xspec_target = gen_reg_rtx (tmode);
1589 if (GET_MODE (xtarget) != maxmode)
1591 if (REG_P (xtarget))
1593 int wider = (GET_MODE_SIZE (maxmode)
1594 > GET_MODE_SIZE (GET_MODE (xtarget)));
1595 xtarget = gen_lowpart (maxmode, xtarget);
1596 if (wider)
1597 xspec_target_subreg = xtarget;
1599 else
1600 xtarget = gen_reg_rtx (maxmode);
1603 /* If this machine's extv insists on a register target,
1604 make sure we have one. */
1605 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1606 (xtarget, maxmode)))
1607 xtarget = gen_reg_rtx (maxmode);
1609 bitsize_rtx = GEN_INT (bitsize);
1610 bitpos_rtx = GEN_INT (xbitpos);
1612 pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1613 if (pat)
1615 emit_insn (pat);
1616 target = xtarget;
1617 spec_target = xspec_target;
1618 spec_target_subreg = xspec_target_subreg;
1620 else
1622 delete_insns_since (last);
1623 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1624 bitpos, target, 0);
1627 else
1628 extv_loses:
1629 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1630 bitpos, target, 0);
1632 if (target == spec_target)
1633 return target;
1634 if (target == spec_target_subreg)
1635 return spec_target;
1636 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1638 /* If the target mode is floating-point, first convert to the
1639 integer mode of that size and then access it as a floating-point
1640 value via a SUBREG. */
1641 if (GET_MODE_CLASS (tmode) != MODE_INT
1642 && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1644 target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1645 MODE_INT, 0),
1646 target, unsignedp);
1647 return gen_lowpart (tmode, target);
1649 else
1650 return convert_to_mode (tmode, target, unsignedp);
1652 return target;
1655 /* Extract a bit field using shifts and boolean operations
1656 Returns an rtx to represent the value.
1657 OP0 addresses a register (word) or memory (byte).
1658 BITPOS says which bit within the word or byte the bit field starts in.
1659 OFFSET says how many bytes farther the bit field starts;
1660 it is 0 if OP0 is a register.
1661 BITSIZE says how many bits long the bit field is.
1662 (If OP0 is a register, it may be narrower than a full word,
1663 but BITPOS still counts within a full word,
1664 which is significant on bigendian machines.)
1666 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1667 If TARGET is nonzero, attempts to store the value there
1668 and return TARGET, but this is not guaranteed.
1669 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1671 static rtx
1672 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1673 unsigned HOST_WIDE_INT offset,
1674 unsigned HOST_WIDE_INT bitsize,
1675 unsigned HOST_WIDE_INT bitpos, rtx target,
1676 int unsignedp)
1678 unsigned int total_bits = BITS_PER_WORD;
1679 enum machine_mode mode;
1681 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1683 /* Special treatment for a bit field split across two registers. */
1684 if (bitsize + bitpos > BITS_PER_WORD)
1685 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1687 else
1689 /* Get the proper mode to use for this field. We want a mode that
1690 includes the entire field. If such a mode would be larger than
1691 a word, we won't be doing the extraction the normal way. */
1693 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1694 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1696 if (mode == VOIDmode)
1697 /* The only way this should occur is if the field spans word
1698 boundaries. */
1699 return extract_split_bit_field (op0, bitsize,
1700 bitpos + offset * BITS_PER_UNIT,
1701 unsignedp);
1703 total_bits = GET_MODE_BITSIZE (mode);
1705 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1706 be in the range 0 to total_bits-1, and put any excess bytes in
1707 OFFSET. */
1708 if (bitpos >= total_bits)
1710 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1711 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1712 * BITS_PER_UNIT);
1715 /* Get ref to an aligned byte, halfword, or word containing the field.
1716 Adjust BITPOS to be position within a word,
1717 and OFFSET to be the offset of that word.
1718 Then alter OP0 to refer to that word. */
1719 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1720 offset -= (offset % (total_bits / BITS_PER_UNIT));
1721 op0 = adjust_address (op0, mode, offset);
1724 mode = GET_MODE (op0);
1726 if (BYTES_BIG_ENDIAN)
1727 /* BITPOS is the distance between our msb and that of OP0.
1728 Convert it to the distance from the lsb. */
1729 bitpos = total_bits - bitsize - bitpos;
1731 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1732 We have reduced the big-endian case to the little-endian case. */
1734 if (unsignedp)
1736 if (bitpos)
1738 /* If the field does not already start at the lsb,
1739 shift it so it does. */
1740 tree amount = build_int_cst (NULL_TREE, bitpos);
1741 /* Maybe propagate the target for the shift. */
1742 /* But not if we will return it--could confuse integrate.c. */
1743 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1744 if (tmode != mode) subtarget = 0;
1745 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1747 /* Convert the value to the desired mode. */
1748 if (mode != tmode)
1749 op0 = convert_to_mode (tmode, op0, 1);
1751 /* Unless the msb of the field used to be the msb when we shifted,
1752 mask out the upper bits. */
1754 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1755 return expand_binop (GET_MODE (op0), and_optab, op0,
1756 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1757 target, 1, OPTAB_LIB_WIDEN);
1758 return op0;
1761 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1762 then arithmetic-shift its lsb to the lsb of the word. */
1763 op0 = force_reg (mode, op0);
1764 if (mode != tmode)
1765 target = 0;
1767 /* Find the narrowest integer mode that contains the field. */
1769 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1770 mode = GET_MODE_WIDER_MODE (mode))
1771 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1773 op0 = convert_to_mode (mode, op0, 0);
1774 break;
1777 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1779 tree amount
1780 = build_int_cst (NULL_TREE,
1781 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1782 /* Maybe propagate the target for the shift. */
1783 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1787 return expand_shift (RSHIFT_EXPR, mode, op0,
1788 build_int_cst (NULL_TREE,
1789 GET_MODE_BITSIZE (mode) - bitsize),
1790 target, 0);
1793 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1794 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1795 complement of that if COMPLEMENT. The mask is truncated if
1796 necessary to the width of mode MODE. The mask is zero-extended if
1797 BITSIZE+BITPOS is too small for MODE. */
1799 static rtx
1800 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1802 HOST_WIDE_INT masklow, maskhigh;
1804 if (bitsize == 0)
1805 masklow = 0;
1806 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1807 masklow = (HOST_WIDE_INT) -1 << bitpos;
1808 else
1809 masklow = 0;
1811 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1812 masklow &= ((unsigned HOST_WIDE_INT) -1
1813 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1815 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1816 maskhigh = -1;
1817 else
1818 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1820 if (bitsize == 0)
1821 maskhigh = 0;
1822 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1823 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1824 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1825 else
1826 maskhigh = 0;
1828 if (complement)
1830 maskhigh = ~maskhigh;
1831 masklow = ~masklow;
1834 return immed_double_const (masklow, maskhigh, mode);
1837 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1838 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1840 static rtx
1841 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1843 unsigned HOST_WIDE_INT v = INTVAL (value);
1844 HOST_WIDE_INT low, high;
1846 if (bitsize < HOST_BITS_PER_WIDE_INT)
1847 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1849 if (bitpos < HOST_BITS_PER_WIDE_INT)
1851 low = v << bitpos;
1852 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1854 else
1856 low = 0;
1857 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1860 return immed_double_const (low, high, mode);
1863 /* Extract a bit field that is split across two words
1864 and return an RTX for the result.
1866 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1867 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1868 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1870 static rtx
1871 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1872 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1874 unsigned int unit;
1875 unsigned int bitsdone = 0;
1876 rtx result = NULL_RTX;
1877 int first = 1;
1879 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1880 much at a time. */
1881 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1882 unit = BITS_PER_WORD;
1883 else
1884 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1886 while (bitsdone < bitsize)
1888 unsigned HOST_WIDE_INT thissize;
1889 rtx part, word;
1890 unsigned HOST_WIDE_INT thispos;
1891 unsigned HOST_WIDE_INT offset;
1893 offset = (bitpos + bitsdone) / unit;
1894 thispos = (bitpos + bitsdone) % unit;
1896 /* THISSIZE must not overrun a word boundary. Otherwise,
1897 extract_fixed_bit_field will call us again, and we will mutually
1898 recurse forever. */
1899 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1900 thissize = MIN (thissize, unit - thispos);
1902 /* If OP0 is a register, then handle OFFSET here.
1904 When handling multiword bitfields, extract_bit_field may pass
1905 down a word_mode SUBREG of a larger REG for a bitfield that actually
1906 crosses a word boundary. Thus, for a SUBREG, we must find
1907 the current word starting from the base register. */
1908 if (GET_CODE (op0) == SUBREG)
1910 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1911 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1912 GET_MODE (SUBREG_REG (op0)));
1913 offset = 0;
1915 else if (REG_P (op0))
1917 word = operand_subword_force (op0, offset, GET_MODE (op0));
1918 offset = 0;
1920 else
1921 word = op0;
1923 /* Extract the parts in bit-counting order,
1924 whose meaning is determined by BYTES_PER_UNIT.
1925 OFFSET is in UNITs, and UNIT is in bits.
1926 extract_fixed_bit_field wants offset in bytes. */
1927 part = extract_fixed_bit_field (word_mode, word,
1928 offset * unit / BITS_PER_UNIT,
1929 thissize, thispos, 0, 1);
1930 bitsdone += thissize;
1932 /* Shift this part into place for the result. */
1933 if (BYTES_BIG_ENDIAN)
1935 if (bitsize != bitsdone)
1936 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937 build_int_cst (NULL_TREE, bitsize - bitsdone),
1938 0, 1);
1940 else
1942 if (bitsdone != thissize)
1943 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1944 build_int_cst (NULL_TREE,
1945 bitsdone - thissize), 0, 1);
1948 if (first)
1949 result = part;
1950 else
1951 /* Combine the parts with bitwise or. This works
1952 because we extracted each part as an unsigned bit field. */
1953 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1954 OPTAB_LIB_WIDEN);
1956 first = 0;
1959 /* Unsigned bit field: we are done. */
1960 if (unsignedp)
1961 return result;
1962 /* Signed bit field: sign-extend with two arithmetic shifts. */
1963 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1964 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1965 NULL_RTX, 0);
1966 return expand_shift (RSHIFT_EXPR, word_mode, result,
1967 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1968 NULL_RTX, 0);
1971 /* Add INC into TARGET. */
1973 void
1974 expand_inc (rtx target, rtx inc)
1976 rtx value = expand_binop (GET_MODE (target), add_optab,
1977 target, inc,
1978 target, 0, OPTAB_LIB_WIDEN);
1979 if (value != target)
1980 emit_move_insn (target, value);
1983 /* Subtract DEC from TARGET. */
1985 void
1986 expand_dec (rtx target, rtx dec)
1988 rtx value = expand_binop (GET_MODE (target), sub_optab,
1989 target, dec,
1990 target, 0, OPTAB_LIB_WIDEN);
1991 if (value != target)
1992 emit_move_insn (target, value);
1995 /* Output a shift instruction for expression code CODE,
1996 with SHIFTED being the rtx for the value to shift,
1997 and AMOUNT the tree for the amount to shift by.
1998 Store the result in the rtx TARGET, if that is convenient.
1999 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2000 Return the rtx for where the value is. */
2003 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2004 tree amount, rtx target, int unsignedp)
2006 rtx op1, temp = 0;
2007 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2008 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2009 int try;
2011 /* Previously detected shift-counts computed by NEGATE_EXPR
2012 and shifted in the other direction; but that does not work
2013 on all machines. */
2015 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2017 if (SHIFT_COUNT_TRUNCATED)
2019 if (GET_CODE (op1) == CONST_INT
2020 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2021 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2022 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2023 % GET_MODE_BITSIZE (mode));
2024 else if (GET_CODE (op1) == SUBREG
2025 && subreg_lowpart_p (op1))
2026 op1 = SUBREG_REG (op1);
2029 if (op1 == const0_rtx)
2030 return shifted;
2032 /* Check whether its cheaper to implement a left shift by a constant
2033 bit count by a sequence of additions. */
2034 if (code == LSHIFT_EXPR
2035 && GET_CODE (op1) == CONST_INT
2036 && INTVAL (op1) > 0
2037 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2038 && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2040 int i;
2041 for (i = 0; i < INTVAL (op1); i++)
2043 temp = force_reg (mode, shifted);
2044 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2045 unsignedp, OPTAB_LIB_WIDEN);
2047 return shifted;
2050 for (try = 0; temp == 0 && try < 3; try++)
2052 enum optab_methods methods;
2054 if (try == 0)
2055 methods = OPTAB_DIRECT;
2056 else if (try == 1)
2057 methods = OPTAB_WIDEN;
2058 else
2059 methods = OPTAB_LIB_WIDEN;
2061 if (rotate)
2063 /* Widening does not work for rotation. */
2064 if (methods == OPTAB_WIDEN)
2065 continue;
2066 else if (methods == OPTAB_LIB_WIDEN)
2068 /* If we have been unable to open-code this by a rotation,
2069 do it as the IOR of two shifts. I.e., to rotate A
2070 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2071 where C is the bitsize of A.
2073 It is theoretically possible that the target machine might
2074 not be able to perform either shift and hence we would
2075 be making two libcalls rather than just the one for the
2076 shift (similarly if IOR could not be done). We will allow
2077 this extremely unlikely lossage to avoid complicating the
2078 code below. */
2080 rtx subtarget = target == shifted ? 0 : target;
2081 rtx temp1;
2082 tree type = TREE_TYPE (amount);
2083 tree new_amount = make_tree (type, op1);
2084 tree other_amount
2085 = fold (build2 (MINUS_EXPR, type, convert
2086 (type, build_int_cst
2087 (NULL_TREE, GET_MODE_BITSIZE (mode))),
2088 amount));
2090 shifted = force_reg (mode, shifted);
2092 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2093 mode, shifted, new_amount, subtarget, 1);
2094 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2095 mode, shifted, other_amount, 0, 1);
2096 return expand_binop (mode, ior_optab, temp, temp1, target,
2097 unsignedp, methods);
2100 temp = expand_binop (mode,
2101 left ? rotl_optab : rotr_optab,
2102 shifted, op1, target, unsignedp, methods);
2104 /* If we don't have the rotate, but we are rotating by a constant
2105 that is in range, try a rotate in the opposite direction. */
2107 if (temp == 0 && GET_CODE (op1) == CONST_INT
2108 && INTVAL (op1) > 0
2109 && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2110 temp = expand_binop (mode,
2111 left ? rotr_optab : rotl_optab,
2112 shifted,
2113 GEN_INT (GET_MODE_BITSIZE (mode)
2114 - INTVAL (op1)),
2115 target, unsignedp, methods);
2117 else if (unsignedp)
2118 temp = expand_binop (mode,
2119 left ? ashl_optab : lshr_optab,
2120 shifted, op1, target, unsignedp, methods);
2122 /* Do arithmetic shifts.
2123 Also, if we are going to widen the operand, we can just as well
2124 use an arithmetic right-shift instead of a logical one. */
2125 if (temp == 0 && ! rotate
2126 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2128 enum optab_methods methods1 = methods;
2130 /* If trying to widen a log shift to an arithmetic shift,
2131 don't accept an arithmetic shift of the same size. */
2132 if (unsignedp)
2133 methods1 = OPTAB_MUST_WIDEN;
2135 /* Arithmetic shift */
2137 temp = expand_binop (mode,
2138 left ? ashl_optab : ashr_optab,
2139 shifted, op1, target, unsignedp, methods1);
2142 /* We used to try extzv here for logical right shifts, but that was
2143 only useful for one machine, the VAX, and caused poor code
2144 generation there for lshrdi3, so the code was deleted and a
2145 define_expand for lshrsi3 was added to vax.md. */
2148 if (temp == 0)
2149 abort ();
2150 return temp;
2153 enum alg_code { alg_zero, alg_m, alg_shift,
2154 alg_add_t_m2, alg_sub_t_m2,
2155 alg_add_factor, alg_sub_factor,
2156 alg_add_t2_m, alg_sub_t2_m };
2158 /* This structure holds the "cost" of a multiply sequence. The
2159 "cost" field holds the total rtx_cost of every operator in the
2160 synthetic multiplication sequence, hence cost(a op b) is defined
2161 as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2162 The "latency" field holds the minimum possible latency of the
2163 synthetic multiply, on a hypothetical infinitely parallel CPU.
2164 This is the critical path, or the maximum height, of the expression
2165 tree which is the sum of rtx_costs on the most expensive path from
2166 any leaf to the root. Hence latency(a op b) is defined as zero for
2167 leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
2169 struct mult_cost {
2170 short cost; /* Total rtx_cost of the multiplication sequence. */
2171 short latency; /* The latency of the multiplication sequence. */
2174 /* This macro is used to compare a pointer to a mult_cost against an
2175 single integer "rtx_cost" value. This is equivalent to the macro
2176 CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
2177 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
2178 || ((X)->cost == (Y) && (X)->latency < (Y)))
2180 /* This macro is used to compare two pointers to mult_costs against
2181 each other. The macro returns true if X is cheaper than Y.
2182 Currently, the cheaper of two mult_costs is the one with the
2183 lower "cost". If "cost"s are tied, the lower latency is cheaper. */
2184 #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
2185 || ((X)->cost == (Y)->cost \
2186 && (X)->latency < (Y)->latency))
2188 /* This structure records a sequence of operations.
2189 `ops' is the number of operations recorded.
2190 `cost' is their total cost.
2191 The operations are stored in `op' and the corresponding
2192 logarithms of the integer coefficients in `log'.
2194 These are the operations:
2195 alg_zero total := 0;
2196 alg_m total := multiplicand;
2197 alg_shift total := total * coeff
2198 alg_add_t_m2 total := total + multiplicand * coeff;
2199 alg_sub_t_m2 total := total - multiplicand * coeff;
2200 alg_add_factor total := total * coeff + total;
2201 alg_sub_factor total := total * coeff - total;
2202 alg_add_t2_m total := total * coeff + multiplicand;
2203 alg_sub_t2_m total := total * coeff - multiplicand;
2205 The first operand must be either alg_zero or alg_m. */
2207 struct algorithm
2209 struct mult_cost cost;
2210 short ops;
2211 /* The size of the OP and LOG fields are not directly related to the
2212 word size, but the worst-case algorithms will be if we have few
2213 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2214 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2215 in total wordsize operations. */
2216 enum alg_code op[MAX_BITS_PER_WORD];
2217 char log[MAX_BITS_PER_WORD];
2220 /* Indicates the type of fixup needed after a constant multiplication.
2221 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2222 the result should be negated, and ADD_VARIANT means that the
2223 multiplicand should be added to the result. */
2224 enum mult_variant {basic_variant, negate_variant, add_variant};
2226 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2227 const struct mult_cost *, enum machine_mode mode);
2228 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2229 struct algorithm *, enum mult_variant *, int);
2230 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2231 const struct algorithm *, enum mult_variant);
2232 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2233 int, unsigned HOST_WIDE_INT *,
2234 int *, int *);
2235 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2236 static rtx extract_high_half (enum machine_mode, rtx);
2237 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2238 int, int);
2239 /* Compute and return the best algorithm for multiplying by T.
2240 The algorithm must cost less than cost_limit
2241 If retval.cost >= COST_LIMIT, no algorithm was found and all
2242 other field of the returned struct are undefined.
2243 MODE is the machine mode of the multiplication. */
2245 static void
2246 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2247 const struct mult_cost *cost_limit, enum machine_mode mode)
2249 int m;
2250 struct algorithm *alg_in, *best_alg;
2251 struct mult_cost best_cost;
2252 struct mult_cost new_limit;
2253 int op_cost, op_latency;
2254 unsigned HOST_WIDE_INT q;
2255 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2257 /* Indicate that no algorithm is yet found. If no algorithm
2258 is found, this value will be returned and indicate failure. */
2259 alg_out->cost.cost = cost_limit->cost + 1;
2261 if (cost_limit->cost < 0
2262 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2263 return;
2265 /* Restrict the bits of "t" to the multiplication's mode. */
2266 t &= GET_MODE_MASK (mode);
2268 /* t == 1 can be done in zero cost. */
2269 if (t == 1)
2271 alg_out->ops = 1;
2272 alg_out->cost.cost = 0;
2273 alg_out->cost.latency = 0;
2274 alg_out->op[0] = alg_m;
2275 return;
2278 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2279 fail now. */
2280 if (t == 0)
2282 if (MULT_COST_LESS (cost_limit, zero_cost))
2283 return;
2284 else
2286 alg_out->ops = 1;
2287 alg_out->cost.cost = zero_cost;
2288 alg_out->cost.latency = zero_cost;
2289 alg_out->op[0] = alg_zero;
2290 return;
2294 /* We'll be needing a couple extra algorithm structures now. */
2296 alg_in = alloca (sizeof (struct algorithm));
2297 best_alg = alloca (sizeof (struct algorithm));
2298 best_cost = *cost_limit;
2300 /* If we have a group of zero bits at the low-order part of T, try
2301 multiplying by the remaining bits and then doing a shift. */
2303 if ((t & 1) == 0)
2305 m = floor_log2 (t & -t); /* m = number of low zero bits */
2306 if (m < maxm)
2308 q = t >> m;
2309 /* The function expand_shift will choose between a shift and
2310 a sequence of additions, so the observed cost is given as
2311 MIN (m * add_cost[mode], shift_cost[mode][m]). */
2312 op_cost = m * add_cost[mode];
2313 if (shift_cost[mode][m] < op_cost)
2314 op_cost = shift_cost[mode][m];
2315 new_limit.cost = best_cost.cost - op_cost;
2316 new_limit.latency = best_cost.latency - op_cost;
2317 synth_mult (alg_in, q, &new_limit, mode);
2319 alg_in->cost.cost += op_cost;
2320 alg_in->cost.latency += op_cost;
2321 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2323 struct algorithm *x;
2324 best_cost = alg_in->cost;
2325 x = alg_in, alg_in = best_alg, best_alg = x;
2326 best_alg->log[best_alg->ops] = m;
2327 best_alg->op[best_alg->ops] = alg_shift;
2332 /* If we have an odd number, add or subtract one. */
2333 if ((t & 1) != 0)
2335 unsigned HOST_WIDE_INT w;
2337 for (w = 1; (w & t) != 0; w <<= 1)
2339 /* If T was -1, then W will be zero after the loop. This is another
2340 case where T ends with ...111. Handling this with (T + 1) and
2341 subtract 1 produces slightly better code and results in algorithm
2342 selection much faster than treating it like the ...0111 case
2343 below. */
2344 if (w == 0
2345 || (w > 2
2346 /* Reject the case where t is 3.
2347 Thus we prefer addition in that case. */
2348 && t != 3))
2350 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2352 op_cost = add_cost[mode];
2353 new_limit.cost = best_cost.cost - op_cost;
2354 new_limit.latency = best_cost.latency - op_cost;
2355 synth_mult (alg_in, t + 1, &new_limit, mode);
2357 alg_in->cost.cost += op_cost;
2358 alg_in->cost.latency += op_cost;
2359 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2361 struct algorithm *x;
2362 best_cost = alg_in->cost;
2363 x = alg_in, alg_in = best_alg, best_alg = x;
2364 best_alg->log[best_alg->ops] = 0;
2365 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2368 else
2370 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2372 op_cost = add_cost[mode];
2373 new_limit.cost = best_cost.cost - op_cost;
2374 new_limit.latency = best_cost.latency - op_cost;
2375 synth_mult (alg_in, t - 1, &new_limit, mode);
2377 alg_in->cost.cost += op_cost;
2378 alg_in->cost.latency += op_cost;
2379 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2381 struct algorithm *x;
2382 best_cost = alg_in->cost;
2383 x = alg_in, alg_in = best_alg, best_alg = x;
2384 best_alg->log[best_alg->ops] = 0;
2385 best_alg->op[best_alg->ops] = alg_add_t_m2;
2390 /* Look for factors of t of the form
2391 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2392 If we find such a factor, we can multiply by t using an algorithm that
2393 multiplies by q, shift the result by m and add/subtract it to itself.
2395 We search for large factors first and loop down, even if large factors
2396 are less probable than small; if we find a large factor we will find a
2397 good sequence quickly, and therefore be able to prune (by decreasing
2398 COST_LIMIT) the search. */
2400 for (m = floor_log2 (t - 1); m >= 2; m--)
2402 unsigned HOST_WIDE_INT d;
2404 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2405 if (t % d == 0 && t > d && m < maxm)
2407 /* If the target has a cheap shift-and-add instruction use
2408 that in preference to a shift insn followed by an add insn.
2409 Assume that the shift-and-add is "atomic" with a latency
2410 equal to it's cost, otherwise assume that on superscalar
2411 hardware the shift may be executed concurrently with the
2412 earlier steps in the algorithm. */
2413 op_cost = add_cost[mode] + shift_cost[mode][m];
2414 if (shiftadd_cost[mode][m] < op_cost)
2416 op_cost = shiftadd_cost[mode][m];
2417 op_latency = op_cost;
2419 else
2420 op_latency = add_cost[mode];
2422 new_limit.cost = best_cost.cost - op_cost;
2423 new_limit.latency = best_cost.latency - op_latency;
2424 synth_mult (alg_in, t / d, &new_limit, mode);
2426 alg_in->cost.cost += op_cost;
2427 alg_in->cost.latency += op_latency;
2428 if (alg_in->cost.latency < op_cost)
2429 alg_in->cost.latency = op_cost;
2430 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2432 struct algorithm *x;
2433 best_cost = alg_in->cost;
2434 x = alg_in, alg_in = best_alg, best_alg = x;
2435 best_alg->log[best_alg->ops] = m;
2436 best_alg->op[best_alg->ops] = alg_add_factor;
2438 /* Other factors will have been taken care of in the recursion. */
2439 break;
2442 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2443 if (t % d == 0 && t > d && m < maxm)
2445 /* If the target has a cheap shift-and-subtract insn use
2446 that in preference to a shift insn followed by a sub insn.
2447 Assume that the shift-and-sub is "atomic" with a latency
2448 equal to it's cost, otherwise assume that on superscalar
2449 hardware the shift may be executed concurrently with the
2450 earlier steps in the algorithm. */
2451 op_cost = add_cost[mode] + shift_cost[mode][m];
2452 if (shiftsub_cost[mode][m] < op_cost)
2454 op_cost = shiftsub_cost[mode][m];
2455 op_latency = op_cost;
2457 else
2458 op_latency = add_cost[mode];
2460 new_limit.cost = best_cost.cost - op_cost;
2461 new_limit.cost = best_cost.cost - op_latency;
2462 synth_mult (alg_in, t / d, &new_limit, mode);
2464 alg_in->cost.cost += op_cost;
2465 alg_in->cost.latency += op_latency;
2466 if (alg_in->cost.latency < op_cost)
2467 alg_in->cost.latency = op_cost;
2468 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2470 struct algorithm *x;
2471 best_cost = alg_in->cost;
2472 x = alg_in, alg_in = best_alg, best_alg = x;
2473 best_alg->log[best_alg->ops] = m;
2474 best_alg->op[best_alg->ops] = alg_sub_factor;
2476 break;
2480 /* Try shift-and-add (load effective address) instructions,
2481 i.e. do a*3, a*5, a*9. */
2482 if ((t & 1) != 0)
2484 q = t - 1;
2485 q = q & -q;
2486 m = exact_log2 (q);
2487 if (m >= 0 && m < maxm)
2489 op_cost = shiftadd_cost[mode][m];
2490 new_limit.cost = best_cost.cost - op_cost;
2491 new_limit.latency = best_cost.latency - op_cost;
2492 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2494 alg_in->cost.cost += op_cost;
2495 alg_in->cost.latency += op_cost;
2496 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2498 struct algorithm *x;
2499 best_cost = alg_in->cost;
2500 x = alg_in, alg_in = best_alg, best_alg = x;
2501 best_alg->log[best_alg->ops] = m;
2502 best_alg->op[best_alg->ops] = alg_add_t2_m;
2506 q = t + 1;
2507 q = q & -q;
2508 m = exact_log2 (q);
2509 if (m >= 0 && m < maxm)
2511 op_cost = shiftsub_cost[mode][m];
2512 new_limit.cost = best_cost.cost - op_cost;
2513 new_limit.latency = best_cost.latency - op_cost;
2514 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2516 alg_in->cost.cost += op_cost;
2517 alg_in->cost.latency += op_cost;
2518 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2520 struct algorithm *x;
2521 best_cost = alg_in->cost;
2522 x = alg_in, alg_in = best_alg, best_alg = x;
2523 best_alg->log[best_alg->ops] = m;
2524 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2529 /* If we are getting a too long sequence for `struct algorithm'
2530 to record, make this search fail. */
2531 if (best_alg->ops == MAX_BITS_PER_WORD)
2532 return;
2534 /* If best_cost has not decreased, we have not found any algorithm. */
2535 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2536 return;
2538 /* Copy the algorithm from temporary space to the space at alg_out.
2539 We avoid using structure assignment because the majority of
2540 best_alg is normally undefined, and this is a critical function. */
2541 alg_out->ops = best_alg->ops + 1;
2542 alg_out->cost = best_cost;
2543 memcpy (alg_out->op, best_alg->op,
2544 alg_out->ops * sizeof *alg_out->op);
2545 memcpy (alg_out->log, best_alg->log,
2546 alg_out->ops * sizeof *alg_out->log);
2549 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2550 Try three variations:
2552 - a shift/add sequence based on VAL itself
2553 - a shift/add sequence based on -VAL, followed by a negation
2554 - a shift/add sequence based on VAL - 1, followed by an addition.
2556 Return true if the cheapest of these cost less than MULT_COST,
2557 describing the algorithm in *ALG and final fixup in *VARIANT. */
2559 static bool
2560 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2561 struct algorithm *alg, enum mult_variant *variant,
2562 int mult_cost)
2564 struct algorithm alg2;
2565 struct mult_cost limit;
2566 int op_cost;
2568 *variant = basic_variant;
2569 limit.cost = mult_cost;
2570 limit.latency = mult_cost;
2571 synth_mult (alg, val, &limit, mode);
2573 /* This works only if the inverted value actually fits in an
2574 `unsigned int' */
2575 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2577 op_cost = neg_cost[mode];
2578 if (MULT_COST_LESS (&alg->cost, mult_cost))
2580 limit.cost = alg->cost.cost - op_cost;
2581 limit.latency = alg->cost.latency - op_cost;
2583 else
2585 limit.cost = mult_cost - op_cost;
2586 limit.latency = mult_cost - op_cost;
2589 synth_mult (&alg2, -val, &limit, mode);
2590 alg2.cost.cost += op_cost;
2591 alg2.cost.latency += op_cost;
2592 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2593 *alg = alg2, *variant = negate_variant;
2596 /* This proves very useful for division-by-constant. */
2597 op_cost = add_cost[mode];
2598 if (MULT_COST_LESS (&alg->cost, mult_cost))
2600 limit.cost = alg->cost.cost - op_cost;
2601 limit.latency = alg->cost.latency - op_cost;
2603 else
2605 limit.cost = mult_cost - op_cost;
2606 limit.latency = mult_cost - op_cost;
2609 synth_mult (&alg2, val - 1, &limit, mode);
2610 alg2.cost.cost += op_cost;
2611 alg2.cost.latency += op_cost;
2612 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2613 *alg = alg2, *variant = add_variant;
2615 return MULT_COST_LESS (&alg->cost, mult_cost);
2618 /* A subroutine of expand_mult, used for constant multiplications.
2619 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2620 convenient. Use the shift/add sequence described by ALG and apply
2621 the final fixup specified by VARIANT. */
2623 static rtx
2624 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2625 rtx target, const struct algorithm *alg,
2626 enum mult_variant variant)
2628 HOST_WIDE_INT val_so_far;
2629 rtx insn, accum, tem;
2630 int opno;
2631 enum machine_mode nmode;
2633 /* Avoid referencing memory over and over.
2634 For speed, but also for correctness when mem is volatile. */
2635 if (MEM_P (op0))
2636 op0 = force_reg (mode, op0);
2638 /* ACCUM starts out either as OP0 or as a zero, depending on
2639 the first operation. */
2641 if (alg->op[0] == alg_zero)
2643 accum = copy_to_mode_reg (mode, const0_rtx);
2644 val_so_far = 0;
2646 else if (alg->op[0] == alg_m)
2648 accum = copy_to_mode_reg (mode, op0);
2649 val_so_far = 1;
2651 else
2652 abort ();
2654 for (opno = 1; opno < alg->ops; opno++)
2656 int log = alg->log[opno];
2657 rtx shift_subtarget = optimize ? 0 : accum;
2658 rtx add_target
2659 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2660 && !optimize)
2661 ? target : 0;
2662 rtx accum_target = optimize ? 0 : accum;
2664 switch (alg->op[opno])
2666 case alg_shift:
2667 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2668 build_int_cst (NULL_TREE, log),
2669 NULL_RTX, 0);
2670 val_so_far <<= log;
2671 break;
2673 case alg_add_t_m2:
2674 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2675 build_int_cst (NULL_TREE, log),
2676 NULL_RTX, 0);
2677 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2678 add_target ? add_target : accum_target);
2679 val_so_far += (HOST_WIDE_INT) 1 << log;
2680 break;
2682 case alg_sub_t_m2:
2683 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2684 build_int_cst (NULL_TREE, log),
2685 NULL_RTX, 0);
2686 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2687 add_target ? add_target : accum_target);
2688 val_so_far -= (HOST_WIDE_INT) 1 << log;
2689 break;
2691 case alg_add_t2_m:
2692 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2693 build_int_cst (NULL_TREE, log),
2694 shift_subtarget,
2696 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2697 add_target ? add_target : accum_target);
2698 val_so_far = (val_so_far << log) + 1;
2699 break;
2701 case alg_sub_t2_m:
2702 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2703 build_int_cst (NULL_TREE, log),
2704 shift_subtarget, 0);
2705 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2706 add_target ? add_target : accum_target);
2707 val_so_far = (val_so_far << log) - 1;
2708 break;
2710 case alg_add_factor:
2711 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2712 build_int_cst (NULL_TREE, log),
2713 NULL_RTX, 0);
2714 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2715 add_target ? add_target : accum_target);
2716 val_so_far += val_so_far << log;
2717 break;
2719 case alg_sub_factor:
2720 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2721 build_int_cst (NULL_TREE, log),
2722 NULL_RTX, 0);
2723 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2724 (add_target
2725 ? add_target : (optimize ? 0 : tem)));
2726 val_so_far = (val_so_far << log) - val_so_far;
2727 break;
2729 default:
2730 abort ();
2733 /* Write a REG_EQUAL note on the last insn so that we can cse
2734 multiplication sequences. Note that if ACCUM is a SUBREG,
2735 we've set the inner register and must properly indicate
2736 that. */
2738 tem = op0, nmode = mode;
2739 if (GET_CODE (accum) == SUBREG)
2741 nmode = GET_MODE (SUBREG_REG (accum));
2742 tem = gen_lowpart (nmode, op0);
2745 insn = get_last_insn ();
2746 set_unique_reg_note (insn, REG_EQUAL,
2747 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2750 if (variant == negate_variant)
2752 val_so_far = -val_so_far;
2753 accum = expand_unop (mode, neg_optab, accum, target, 0);
2755 else if (variant == add_variant)
2757 val_so_far = val_so_far + 1;
2758 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2761 /* Compare only the bits of val and val_so_far that are significant
2762 in the result mode, to avoid sign-/zero-extension confusion. */
2763 val &= GET_MODE_MASK (mode);
2764 val_so_far &= GET_MODE_MASK (mode);
2765 if (val != val_so_far)
2766 abort ();
2768 return accum;
2771 /* Perform a multiplication and return an rtx for the result.
2772 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2773 TARGET is a suggestion for where to store the result (an rtx).
2775 We check specially for a constant integer as OP1.
2776 If you want this check for OP0 as well, then before calling
2777 you should swap the two operands if OP0 would be constant. */
2780 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2781 int unsignedp)
2783 rtx const_op1 = op1;
2784 enum mult_variant variant;
2785 struct algorithm algorithm;
2787 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2788 less than or equal in size to `unsigned int' this doesn't matter.
2789 If the mode is larger than `unsigned int', then synth_mult works only
2790 if the constant value exactly fits in an `unsigned int' without any
2791 truncation. This means that multiplying by negative values does
2792 not work; results are off by 2^32 on a 32 bit machine. */
2794 /* If we are multiplying in DImode, it may still be a win
2795 to try to work with shifts and adds. */
2796 if (GET_CODE (op1) == CONST_DOUBLE
2797 && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2798 && HOST_BITS_PER_INT >= BITS_PER_WORD
2799 && CONST_DOUBLE_HIGH (op1) == 0)
2800 const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2801 else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2802 && GET_CODE (op1) == CONST_INT
2803 && INTVAL (op1) < 0)
2804 const_op1 = 0;
2806 /* We used to test optimize here, on the grounds that it's better to
2807 produce a smaller program when -O is not used.
2808 But this causes such a terrible slowdown sometimes
2809 that it seems better to use synth_mult always. */
2811 if (const_op1 && GET_CODE (const_op1) == CONST_INT
2812 && (unsignedp || !flag_trapv))
2814 int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2816 if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2817 mult_cost))
2818 return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2819 &algorithm, variant);
2822 if (GET_CODE (op0) == CONST_DOUBLE)
2824 rtx temp = op0;
2825 op0 = op1;
2826 op1 = temp;
2829 /* Expand x*2.0 as x+x. */
2830 if (GET_CODE (op1) == CONST_DOUBLE
2831 && GET_MODE_CLASS (mode) == MODE_FLOAT)
2833 REAL_VALUE_TYPE d;
2834 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2836 if (REAL_VALUES_EQUAL (d, dconst2))
2838 op0 = force_reg (GET_MODE (op0), op0);
2839 return expand_binop (mode, add_optab, op0, op0,
2840 target, unsignedp, OPTAB_LIB_WIDEN);
2844 /* This used to use umul_optab if unsigned, but for non-widening multiply
2845 there is no difference between signed and unsigned. */
2846 op0 = expand_binop (mode,
2847 ! unsignedp
2848 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2849 ? smulv_optab : smul_optab,
2850 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2851 if (op0 == 0)
2852 abort ();
2853 return op0;
2856 /* Return the smallest n such that 2**n >= X. */
2859 ceil_log2 (unsigned HOST_WIDE_INT x)
2861 return floor_log2 (x - 1) + 1;
2864 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2865 replace division by D, and put the least significant N bits of the result
2866 in *MULTIPLIER_PTR and return the most significant bit.
2868 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2869 needed precision is in PRECISION (should be <= N).
2871 PRECISION should be as small as possible so this function can choose
2872 multiplier more freely.
2874 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
2875 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2877 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2878 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
2880 static
2881 unsigned HOST_WIDE_INT
2882 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2883 unsigned HOST_WIDE_INT *multiplier_ptr,
2884 int *post_shift_ptr, int *lgup_ptr)
2886 HOST_WIDE_INT mhigh_hi, mlow_hi;
2887 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2888 int lgup, post_shift;
2889 int pow, pow2;
2890 unsigned HOST_WIDE_INT nl, dummy1;
2891 HOST_WIDE_INT nh, dummy2;
2893 /* lgup = ceil(log2(divisor)); */
2894 lgup = ceil_log2 (d);
2896 if (lgup > n)
2897 abort ();
2899 pow = n + lgup;
2900 pow2 = n + lgup - precision;
2902 if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2904 /* We could handle this with some effort, but this case is much better
2905 handled directly with a scc insn, so rely on caller using that. */
2906 abort ();
2909 /* mlow = 2^(N + lgup)/d */
2910 if (pow >= HOST_BITS_PER_WIDE_INT)
2912 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2913 nl = 0;
2915 else
2917 nh = 0;
2918 nl = (unsigned HOST_WIDE_INT) 1 << pow;
2920 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2921 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2923 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2924 if (pow2 >= HOST_BITS_PER_WIDE_INT)
2925 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2926 else
2927 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2928 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2929 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2931 if (mhigh_hi && nh - d >= d)
2932 abort ();
2933 if (mhigh_hi > 1 || mlow_hi > 1)
2934 abort ();
2935 /* Assert that mlow < mhigh. */
2936 if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2937 abort ();
2939 /* If precision == N, then mlow, mhigh exceed 2^N
2940 (but they do not exceed 2^(N+1)). */
2942 /* Reduce to lowest terms. */
2943 for (post_shift = lgup; post_shift > 0; post_shift--)
2945 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2946 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2947 if (ml_lo >= mh_lo)
2948 break;
2950 mlow_hi = 0;
2951 mlow_lo = ml_lo;
2952 mhigh_hi = 0;
2953 mhigh_lo = mh_lo;
2956 *post_shift_ptr = post_shift;
2957 *lgup_ptr = lgup;
2958 if (n < HOST_BITS_PER_WIDE_INT)
2960 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2961 *multiplier_ptr = mhigh_lo & mask;
2962 return mhigh_lo >= mask;
2964 else
2966 *multiplier_ptr = mhigh_lo;
2967 return mhigh_hi;
2971 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2972 congruent to 1 (mod 2**N). */
2974 static unsigned HOST_WIDE_INT
2975 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2977 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
2979 /* The algorithm notes that the choice y = x satisfies
2980 x*y == 1 mod 2^3, since x is assumed odd.
2981 Each iteration doubles the number of bits of significance in y. */
2983 unsigned HOST_WIDE_INT mask;
2984 unsigned HOST_WIDE_INT y = x;
2985 int nbit = 3;
2987 mask = (n == HOST_BITS_PER_WIDE_INT
2988 ? ~(unsigned HOST_WIDE_INT) 0
2989 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2991 while (nbit < n)
2993 y = y * (2 - x*y) & mask; /* Modulo 2^N */
2994 nbit *= 2;
2996 return y;
2999 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3000 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3001 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3002 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3003 become signed.
3005 The result is put in TARGET if that is convenient.
3007 MODE is the mode of operation. */
3010 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3011 rtx op1, rtx target, int unsignedp)
3013 rtx tem;
3014 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3016 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3017 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3018 NULL_RTX, 0);
3019 tem = expand_and (mode, tem, op1, NULL_RTX);
3020 adj_operand
3021 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3022 adj_operand);
3024 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3025 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3026 NULL_RTX, 0);
3027 tem = expand_and (mode, tem, op0, NULL_RTX);
3028 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3029 target);
3031 return target;
3034 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3036 static rtx
3037 extract_high_half (enum machine_mode mode, rtx op)
3039 enum machine_mode wider_mode;
3041 if (mode == word_mode)
3042 return gen_highpart (mode, op);
3044 wider_mode = GET_MODE_WIDER_MODE (mode);
3045 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3046 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3047 return convert_modes (mode, wider_mode, op, 0);
3050 /* Like expand_mult_highpart, but only consider using a multiplication
3051 optab. OP1 is an rtx for the constant operand. */
3053 static rtx
3054 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3055 rtx target, int unsignedp, int max_cost)
3057 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3058 enum machine_mode wider_mode;
3059 optab moptab;
3060 rtx tem;
3061 int size;
3063 wider_mode = GET_MODE_WIDER_MODE (mode);
3064 size = GET_MODE_BITSIZE (mode);
3066 /* Firstly, try using a multiplication insn that only generates the needed
3067 high part of the product, and in the sign flavor of unsignedp. */
3068 if (mul_highpart_cost[mode] < max_cost)
3070 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3071 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3072 unsignedp, OPTAB_DIRECT);
3073 if (tem)
3074 return tem;
3077 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3078 Need to adjust the result after the multiplication. */
3079 if (size - 1 < BITS_PER_WORD
3080 && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3081 + 4 * add_cost[mode] < max_cost))
3083 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3084 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3085 unsignedp, OPTAB_DIRECT);
3086 if (tem)
3087 /* We used the wrong signedness. Adjust the result. */
3088 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3089 tem, unsignedp);
3092 /* Try widening multiplication. */
3093 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3094 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3095 && mul_widen_cost[wider_mode] < max_cost)
3097 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3098 unsignedp, OPTAB_WIDEN);
3099 if (tem)
3100 return extract_high_half (mode, tem);
3103 /* Try widening the mode and perform a non-widening multiplication. */
3104 moptab = smul_optab;
3105 if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3106 && size - 1 < BITS_PER_WORD
3107 && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3109 tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3110 unsignedp, OPTAB_WIDEN);
3111 if (tem)
3112 return extract_high_half (mode, tem);
3115 /* Try widening multiplication of opposite signedness, and adjust. */
3116 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3117 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3118 && size - 1 < BITS_PER_WORD
3119 && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3120 + 4 * add_cost[mode] < max_cost))
3122 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3123 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3124 if (tem != 0)
3126 tem = extract_high_half (mode, tem);
3127 /* We used the wrong signedness. Adjust the result. */
3128 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3129 target, unsignedp);
3133 return 0;
3136 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3137 in TARGET if that is convenient, and return where the result is. If the
3138 operation can not be performed, 0 is returned.
3140 MODE is the mode of operation and result.
3142 UNSIGNEDP nonzero means unsigned multiply.
3144 MAX_COST is the total allowed cost for the expanded RTL. */
3147 expand_mult_highpart (enum machine_mode mode, rtx op0,
3148 unsigned HOST_WIDE_INT cnst1, rtx target,
3149 int unsignedp, int max_cost)
3151 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3152 int extra_cost;
3153 bool sign_adjust = false;
3154 enum mult_variant variant;
3155 struct algorithm alg;
3156 rtx op1, tem;
3158 /* We can't support modes wider than HOST_BITS_PER_INT. */
3159 if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3160 abort ();
3162 op1 = gen_int_mode (cnst1, wider_mode);
3163 cnst1 &= GET_MODE_MASK (mode);
3165 /* We can't optimize modes wider than BITS_PER_WORD.
3166 ??? We might be able to perform double-word arithmetic if
3167 mode == word_mode, however all the cost calculations in
3168 synth_mult etc. assume single-word operations. */
3169 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3170 return expand_mult_highpart_optab (mode, op0, op1, target,
3171 unsignedp, max_cost);
3173 extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3175 /* Check whether we try to multiply by a negative constant. */
3176 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3178 sign_adjust = true;
3179 extra_cost += add_cost[mode];
3182 /* See whether shift/add multiplication is cheap enough. */
3183 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3184 max_cost - extra_cost))
3186 /* See whether the specialized multiplication optabs are
3187 cheaper than the shift/add version. */
3188 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3189 alg.cost.cost + extra_cost);
3190 if (tem)
3191 return tem;
3193 tem = convert_to_mode (wider_mode, op0, unsignedp);
3194 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3195 tem = extract_high_half (mode, tem);
3197 /* Adjust result for signedness. */
3198 if (sign_adjust)
3199 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3201 return tem;
3203 return expand_mult_highpart_optab (mode, op0, op1, target,
3204 unsignedp, max_cost);
3208 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3210 static rtx
3211 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3213 unsigned HOST_WIDE_INT mask;
3214 rtx result, temp, shift, label;
3215 int logd;
3217 logd = floor_log2 (d);
3218 result = gen_reg_rtx (mode);
3220 /* Avoid conditional branches when they're expensive. */
3221 if (BRANCH_COST >= 2
3222 && !optimize_size)
3224 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3225 mode, 0, -1);
3226 if (signmask)
3228 signmask = force_reg (mode, signmask);
3229 mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3230 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3232 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3233 which instruction sequence to use. If logical right shifts
3234 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3235 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3237 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3238 if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3239 || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3241 temp = expand_binop (mode, xor_optab, op0, signmask,
3242 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3243 temp = expand_binop (mode, sub_optab, temp, signmask,
3244 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3245 temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3246 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3247 temp = expand_binop (mode, xor_optab, temp, signmask,
3248 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3249 temp = expand_binop (mode, sub_optab, temp, signmask,
3250 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3252 else
3254 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3255 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3256 signmask = force_reg (mode, signmask);
3258 temp = expand_binop (mode, add_optab, op0, signmask,
3259 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3260 temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3261 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3262 temp = expand_binop (mode, sub_optab, temp, signmask,
3263 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3265 return temp;
3269 /* Mask contains the mode's signbit and the significant bits of the
3270 modulus. By including the signbit in the operation, many targets
3271 can avoid an explicit compare operation in the following comparison
3272 against zero. */
3274 mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3275 | (((HOST_WIDE_INT) 1 << logd) - 1);
3277 temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3278 1, OPTAB_LIB_WIDEN);
3279 if (temp != result)
3280 emit_move_insn (result, temp);
3282 label = gen_label_rtx ();
3283 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3285 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3286 0, OPTAB_LIB_WIDEN);
3287 mask = (HOST_WIDE_INT) -1 << logd;
3288 temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3289 1, OPTAB_LIB_WIDEN);
3290 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3291 0, OPTAB_LIB_WIDEN);
3292 if (temp != result)
3293 emit_move_insn (result, temp);
3294 emit_label (label);
3295 return result;
3298 /* Expand signed division of OP0 by a power of two D in mode MODE.
3299 This routine is only called for positive values of D. */
3301 static rtx
3302 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3304 rtx temp, label;
3305 tree shift;
3306 int logd;
3308 logd = floor_log2 (d);
3309 shift = build_int_cst (NULL_TREE, logd);
3311 if (d == 2 && BRANCH_COST >= 1)
3313 temp = gen_reg_rtx (mode);
3314 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3315 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3316 0, OPTAB_LIB_WIDEN);
3317 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3320 #ifdef HAVE_conditional_move
3321 if (BRANCH_COST >= 2)
3323 rtx temp2;
3325 start_sequence ();
3326 temp2 = copy_to_mode_reg (mode, op0);
3327 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3328 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3329 temp = force_reg (mode, temp);
3331 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3332 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3333 mode, temp, temp2, mode, 0);
3334 if (temp2)
3336 rtx seq = get_insns ();
3337 end_sequence ();
3338 emit_insn (seq);
3339 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3341 end_sequence ();
3343 #endif
3345 if (BRANCH_COST >= 2)
3347 int ushift = GET_MODE_BITSIZE (mode) - logd;
3349 temp = gen_reg_rtx (mode);
3350 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3351 if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3352 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3353 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3354 else
3355 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3356 build_int_cst (NULL_TREE, ushift),
3357 NULL_RTX, 1);
3358 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3359 0, OPTAB_LIB_WIDEN);
3360 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3363 label = gen_label_rtx ();
3364 temp = copy_to_mode_reg (mode, op0);
3365 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3366 expand_inc (temp, GEN_INT (d - 1));
3367 emit_label (label);
3368 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3371 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3372 if that is convenient, and returning where the result is.
3373 You may request either the quotient or the remainder as the result;
3374 specify REM_FLAG nonzero to get the remainder.
3376 CODE is the expression code for which kind of division this is;
3377 it controls how rounding is done. MODE is the machine mode to use.
3378 UNSIGNEDP nonzero means do unsigned division. */
3380 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3381 and then correct it by or'ing in missing high bits
3382 if result of ANDI is nonzero.
3383 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3384 This could optimize to a bfexts instruction.
3385 But C doesn't use these operations, so their optimizations are
3386 left for later. */
3387 /* ??? For modulo, we don't actually need the highpart of the first product,
3388 the low part will do nicely. And for small divisors, the second multiply
3389 can also be a low-part only multiply or even be completely left out.
3390 E.g. to calculate the remainder of a division by 3 with a 32 bit
3391 multiply, multiply with 0x55555556 and extract the upper two bits;
3392 the result is exact for inputs up to 0x1fffffff.
3393 The input range can be reduced by using cross-sum rules.
3394 For odd divisors >= 3, the following table gives right shift counts
3395 so that if a number is shifted by an integer multiple of the given
3396 amount, the remainder stays the same:
3397 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3398 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3399 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3400 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3401 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3403 Cross-sum rules for even numbers can be derived by leaving as many bits
3404 to the right alone as the divisor has zeros to the right.
3405 E.g. if x is an unsigned 32 bit number:
3406 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3409 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3412 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3413 rtx op0, rtx op1, rtx target, int unsignedp)
3415 enum machine_mode compute_mode;
3416 rtx tquotient;
3417 rtx quotient = 0, remainder = 0;
3418 rtx last;
3419 int size;
3420 rtx insn, set;
3421 optab optab1, optab2;
3422 int op1_is_constant, op1_is_pow2 = 0;
3423 int max_cost, extra_cost;
3424 static HOST_WIDE_INT last_div_const = 0;
3425 static HOST_WIDE_INT ext_op1;
3427 op1_is_constant = GET_CODE (op1) == CONST_INT;
3428 if (op1_is_constant)
3430 ext_op1 = INTVAL (op1);
3431 if (unsignedp)
3432 ext_op1 &= GET_MODE_MASK (mode);
3433 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3434 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3438 This is the structure of expand_divmod:
3440 First comes code to fix up the operands so we can perform the operations
3441 correctly and efficiently.
3443 Second comes a switch statement with code specific for each rounding mode.
3444 For some special operands this code emits all RTL for the desired
3445 operation, for other cases, it generates only a quotient and stores it in
3446 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3447 to indicate that it has not done anything.
3449 Last comes code that finishes the operation. If QUOTIENT is set and
3450 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3451 QUOTIENT is not set, it is computed using trunc rounding.
3453 We try to generate special code for division and remainder when OP1 is a
3454 constant. If |OP1| = 2**n we can use shifts and some other fast
3455 operations. For other values of OP1, we compute a carefully selected
3456 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3457 by m.
3459 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3460 half of the product. Different strategies for generating the product are
3461 implemented in expand_mult_highpart.
3463 If what we actually want is the remainder, we generate that by another
3464 by-constant multiplication and a subtraction. */
3466 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3467 code below will malfunction if we are, so check here and handle
3468 the special case if so. */
3469 if (op1 == const1_rtx)
3470 return rem_flag ? const0_rtx : op0;
3472 /* When dividing by -1, we could get an overflow.
3473 negv_optab can handle overflows. */
3474 if (! unsignedp && op1 == constm1_rtx)
3476 if (rem_flag)
3477 return const0_rtx;
3478 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3479 ? negv_optab : neg_optab, op0, target, 0);
3482 if (target
3483 /* Don't use the function value register as a target
3484 since we have to read it as well as write it,
3485 and function-inlining gets confused by this. */
3486 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3487 /* Don't clobber an operand while doing a multi-step calculation. */
3488 || ((rem_flag || op1_is_constant)
3489 && (reg_mentioned_p (target, op0)
3490 || (MEM_P (op0) && MEM_P (target))))
3491 || reg_mentioned_p (target, op1)
3492 || (MEM_P (op1) && MEM_P (target))))
3493 target = 0;
3495 /* Get the mode in which to perform this computation. Normally it will
3496 be MODE, but sometimes we can't do the desired operation in MODE.
3497 If so, pick a wider mode in which we can do the operation. Convert
3498 to that mode at the start to avoid repeated conversions.
3500 First see what operations we need. These depend on the expression
3501 we are evaluating. (We assume that divxx3 insns exist under the
3502 same conditions that modxx3 insns and that these insns don't normally
3503 fail. If these assumptions are not correct, we may generate less
3504 efficient code in some cases.)
3506 Then see if we find a mode in which we can open-code that operation
3507 (either a division, modulus, or shift). Finally, check for the smallest
3508 mode for which we can do the operation with a library call. */
3510 /* We might want to refine this now that we have division-by-constant
3511 optimization. Since expand_mult_highpart tries so many variants, it is
3512 not straightforward to generalize this. Maybe we should make an array
3513 of possible modes in init_expmed? Save this for GCC 2.7. */
3515 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3516 ? (unsignedp ? lshr_optab : ashr_optab)
3517 : (unsignedp ? udiv_optab : sdiv_optab));
3518 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3519 ? optab1
3520 : (unsignedp ? udivmod_optab : sdivmod_optab));
3522 for (compute_mode = mode; compute_mode != VOIDmode;
3523 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3524 if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3525 || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3526 break;
3528 if (compute_mode == VOIDmode)
3529 for (compute_mode = mode; compute_mode != VOIDmode;
3530 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3531 if (optab1->handlers[compute_mode].libfunc
3532 || optab2->handlers[compute_mode].libfunc)
3533 break;
3535 /* If we still couldn't find a mode, use MODE, but we'll probably abort
3536 in expand_binop. */
3537 if (compute_mode == VOIDmode)
3538 compute_mode = mode;
3540 if (target && GET_MODE (target) == compute_mode)
3541 tquotient = target;
3542 else
3543 tquotient = gen_reg_rtx (compute_mode);
3545 size = GET_MODE_BITSIZE (compute_mode);
3546 #if 0
3547 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3548 (mode), and thereby get better code when OP1 is a constant. Do that
3549 later. It will require going over all usages of SIZE below. */
3550 size = GET_MODE_BITSIZE (mode);
3551 #endif
3553 /* Only deduct something for a REM if the last divide done was
3554 for a different constant. Then set the constant of the last
3555 divide. */
3556 max_cost = div_cost[compute_mode]
3557 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3558 && INTVAL (op1) == last_div_const)
3559 ? mul_cost[compute_mode] + add_cost[compute_mode]
3560 : 0);
3562 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3564 /* Now convert to the best mode to use. */
3565 if (compute_mode != mode)
3567 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3568 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3570 /* convert_modes may have placed op1 into a register, so we
3571 must recompute the following. */
3572 op1_is_constant = GET_CODE (op1) == CONST_INT;
3573 op1_is_pow2 = (op1_is_constant
3574 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3575 || (! unsignedp
3576 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3579 /* If one of the operands is a volatile MEM, copy it into a register. */
3581 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3582 op0 = force_reg (compute_mode, op0);
3583 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3584 op1 = force_reg (compute_mode, op1);
3586 /* If we need the remainder or if OP1 is constant, we need to
3587 put OP0 in a register in case it has any queued subexpressions. */
3588 if (rem_flag || op1_is_constant)
3589 op0 = force_reg (compute_mode, op0);
3591 last = get_last_insn ();
3593 /* Promote floor rounding to trunc rounding for unsigned operations. */
3594 if (unsignedp)
3596 if (code == FLOOR_DIV_EXPR)
3597 code = TRUNC_DIV_EXPR;
3598 if (code == FLOOR_MOD_EXPR)
3599 code = TRUNC_MOD_EXPR;
3600 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3601 code = TRUNC_DIV_EXPR;
3604 if (op1 != const0_rtx)
3605 switch (code)
3607 case TRUNC_MOD_EXPR:
3608 case TRUNC_DIV_EXPR:
3609 if (op1_is_constant)
3611 if (unsignedp)
3613 unsigned HOST_WIDE_INT mh, ml;
3614 int pre_shift, post_shift;
3615 int dummy;
3616 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3617 & GET_MODE_MASK (compute_mode));
3619 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3621 pre_shift = floor_log2 (d);
3622 if (rem_flag)
3624 remainder
3625 = expand_binop (compute_mode, and_optab, op0,
3626 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3627 remainder, 1,
3628 OPTAB_LIB_WIDEN);
3629 if (remainder)
3630 return gen_lowpart (mode, remainder);
3632 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3633 build_int_cst (NULL_TREE,
3634 pre_shift),
3635 tquotient, 1);
3637 else if (size <= HOST_BITS_PER_WIDE_INT)
3639 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3641 /* Most significant bit of divisor is set; emit an scc
3642 insn. */
3643 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3644 compute_mode, 1, 1);
3645 if (quotient == 0)
3646 goto fail1;
3648 else
3650 /* Find a suitable multiplier and right shift count
3651 instead of multiplying with D. */
3653 mh = choose_multiplier (d, size, size,
3654 &ml, &post_shift, &dummy);
3656 /* If the suggested multiplier is more than SIZE bits,
3657 we can do better for even divisors, using an
3658 initial right shift. */
3659 if (mh != 0 && (d & 1) == 0)
3661 pre_shift = floor_log2 (d & -d);
3662 mh = choose_multiplier (d >> pre_shift, size,
3663 size - pre_shift,
3664 &ml, &post_shift, &dummy);
3665 if (mh)
3666 abort ();
3668 else
3669 pre_shift = 0;
3671 if (mh != 0)
3673 rtx t1, t2, t3, t4;
3675 if (post_shift - 1 >= BITS_PER_WORD)
3676 goto fail1;
3678 extra_cost
3679 = (shift_cost[compute_mode][post_shift - 1]
3680 + shift_cost[compute_mode][1]
3681 + 2 * add_cost[compute_mode]);
3682 t1 = expand_mult_highpart (compute_mode, op0, ml,
3683 NULL_RTX, 1,
3684 max_cost - extra_cost);
3685 if (t1 == 0)
3686 goto fail1;
3687 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3688 op0, t1),
3689 NULL_RTX);
3690 t3 = expand_shift
3691 (RSHIFT_EXPR, compute_mode, t2,
3692 build_int_cst (NULL_TREE, 1),
3693 NULL_RTX,1);
3694 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3695 t1, t3),
3696 NULL_RTX);
3697 quotient = expand_shift
3698 (RSHIFT_EXPR, compute_mode, t4,
3699 build_int_cst (NULL_TREE, post_shift - 1),
3700 tquotient, 1);
3702 else
3704 rtx t1, t2;
3706 if (pre_shift >= BITS_PER_WORD
3707 || post_shift >= BITS_PER_WORD)
3708 goto fail1;
3710 t1 = expand_shift
3711 (RSHIFT_EXPR, compute_mode, op0,
3712 build_int_cst (NULL_TREE, pre_shift),
3713 NULL_RTX, 1);
3714 extra_cost
3715 = (shift_cost[compute_mode][pre_shift]
3716 + shift_cost[compute_mode][post_shift]);
3717 t2 = expand_mult_highpart (compute_mode, t1, ml,
3718 NULL_RTX, 1,
3719 max_cost - extra_cost);
3720 if (t2 == 0)
3721 goto fail1;
3722 quotient = expand_shift
3723 (RSHIFT_EXPR, compute_mode, t2,
3724 build_int_cst (NULL_TREE, post_shift),
3725 tquotient, 1);
3729 else /* Too wide mode to use tricky code */
3730 break;
3732 insn = get_last_insn ();
3733 if (insn != last
3734 && (set = single_set (insn)) != 0
3735 && SET_DEST (set) == quotient)
3736 set_unique_reg_note (insn,
3737 REG_EQUAL,
3738 gen_rtx_UDIV (compute_mode, op0, op1));
3740 else /* TRUNC_DIV, signed */
3742 unsigned HOST_WIDE_INT ml;
3743 int lgup, post_shift;
3744 HOST_WIDE_INT d = INTVAL (op1);
3745 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3747 /* n rem d = n rem -d */
3748 if (rem_flag && d < 0)
3750 d = abs_d;
3751 op1 = gen_int_mode (abs_d, compute_mode);
3754 if (d == 1)
3755 quotient = op0;
3756 else if (d == -1)
3757 quotient = expand_unop (compute_mode, neg_optab, op0,
3758 tquotient, 0);
3759 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3761 /* This case is not handled correctly below. */
3762 quotient = emit_store_flag (tquotient, EQ, op0, op1,
3763 compute_mode, 1, 1);
3764 if (quotient == 0)
3765 goto fail1;
3767 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3768 && (rem_flag ? smod_pow2_cheap[compute_mode]
3769 : sdiv_pow2_cheap[compute_mode])
3770 /* We assume that cheap metric is true if the
3771 optab has an expander for this mode. */
3772 && (((rem_flag ? smod_optab : sdiv_optab)
3773 ->handlers[compute_mode].insn_code
3774 != CODE_FOR_nothing)
3775 || (sdivmod_optab->handlers[compute_mode]
3776 .insn_code != CODE_FOR_nothing)))
3778 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3780 if (rem_flag)
3782 remainder = expand_smod_pow2 (compute_mode, op0, d);
3783 if (remainder)
3784 return gen_lowpart (mode, remainder);
3786 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
3788 /* We have computed OP0 / abs(OP1). If OP1 is negative,
3789 negate the quotient. */
3790 if (d < 0)
3792 insn = get_last_insn ();
3793 if (insn != last
3794 && (set = single_set (insn)) != 0
3795 && SET_DEST (set) == quotient
3796 && abs_d < ((unsigned HOST_WIDE_INT) 1
3797 << (HOST_BITS_PER_WIDE_INT - 1)))
3798 set_unique_reg_note (insn,
3799 REG_EQUAL,
3800 gen_rtx_DIV (compute_mode,
3801 op0,
3802 GEN_INT
3803 (trunc_int_for_mode
3804 (abs_d,
3805 compute_mode))));
3807 quotient = expand_unop (compute_mode, neg_optab,
3808 quotient, quotient, 0);
3811 else if (size <= HOST_BITS_PER_WIDE_INT)
3813 choose_multiplier (abs_d, size, size - 1,
3814 &ml, &post_shift, &lgup);
3815 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3817 rtx t1, t2, t3;
3819 if (post_shift >= BITS_PER_WORD
3820 || size - 1 >= BITS_PER_WORD)
3821 goto fail1;
3823 extra_cost = (shift_cost[compute_mode][post_shift]
3824 + shift_cost[compute_mode][size - 1]
3825 + add_cost[compute_mode]);
3826 t1 = expand_mult_highpart (compute_mode, op0, ml,
3827 NULL_RTX, 0,
3828 max_cost - extra_cost);
3829 if (t1 == 0)
3830 goto fail1;
3831 t2 = expand_shift
3832 (RSHIFT_EXPR, compute_mode, t1,
3833 build_int_cst (NULL_TREE, post_shift),
3834 NULL_RTX, 0);
3835 t3 = expand_shift
3836 (RSHIFT_EXPR, compute_mode, op0,
3837 build_int_cst (NULL_TREE, size - 1),
3838 NULL_RTX, 0);
3839 if (d < 0)
3840 quotient
3841 = force_operand (gen_rtx_MINUS (compute_mode,
3842 t3, t2),
3843 tquotient);
3844 else
3845 quotient
3846 = force_operand (gen_rtx_MINUS (compute_mode,
3847 t2, t3),
3848 tquotient);
3850 else
3852 rtx t1, t2, t3, t4;
3854 if (post_shift >= BITS_PER_WORD
3855 || size - 1 >= BITS_PER_WORD)
3856 goto fail1;
3858 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3859 extra_cost = (shift_cost[compute_mode][post_shift]
3860 + shift_cost[compute_mode][size - 1]
3861 + 2 * add_cost[compute_mode]);
3862 t1 = expand_mult_highpart (compute_mode, op0, ml,
3863 NULL_RTX, 0,
3864 max_cost - extra_cost);
3865 if (t1 == 0)
3866 goto fail1;
3867 t2 = force_operand (gen_rtx_PLUS (compute_mode,
3868 t1, op0),
3869 NULL_RTX);
3870 t3 = expand_shift
3871 (RSHIFT_EXPR, compute_mode, t2,
3872 build_int_cst (NULL_TREE, post_shift),
3873 NULL_RTX, 0);
3874 t4 = expand_shift
3875 (RSHIFT_EXPR, compute_mode, op0,
3876 build_int_cst (NULL_TREE, size - 1),
3877 NULL_RTX, 0);
3878 if (d < 0)
3879 quotient
3880 = force_operand (gen_rtx_MINUS (compute_mode,
3881 t4, t3),
3882 tquotient);
3883 else
3884 quotient
3885 = force_operand (gen_rtx_MINUS (compute_mode,
3886 t3, t4),
3887 tquotient);
3890 else /* Too wide mode to use tricky code */
3891 break;
3893 insn = get_last_insn ();
3894 if (insn != last
3895 && (set = single_set (insn)) != 0
3896 && SET_DEST (set) == quotient)
3897 set_unique_reg_note (insn,
3898 REG_EQUAL,
3899 gen_rtx_DIV (compute_mode, op0, op1));
3901 break;
3903 fail1:
3904 delete_insns_since (last);
3905 break;
3907 case FLOOR_DIV_EXPR:
3908 case FLOOR_MOD_EXPR:
3909 /* We will come here only for signed operations. */
3910 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3912 unsigned HOST_WIDE_INT mh, ml;
3913 int pre_shift, lgup, post_shift;
3914 HOST_WIDE_INT d = INTVAL (op1);
3916 if (d > 0)
3918 /* We could just as easily deal with negative constants here,
3919 but it does not seem worth the trouble for GCC 2.6. */
3920 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3922 pre_shift = floor_log2 (d);
3923 if (rem_flag)
3925 remainder = expand_binop (compute_mode, and_optab, op0,
3926 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3927 remainder, 0, OPTAB_LIB_WIDEN);
3928 if (remainder)
3929 return gen_lowpart (mode, remainder);
3931 quotient = expand_shift
3932 (RSHIFT_EXPR, compute_mode, op0,
3933 build_int_cst (NULL_TREE, pre_shift),
3934 tquotient, 0);
3936 else
3938 rtx t1, t2, t3, t4;
3940 mh = choose_multiplier (d, size, size - 1,
3941 &ml, &post_shift, &lgup);
3942 if (mh)
3943 abort ();
3945 if (post_shift < BITS_PER_WORD
3946 && size - 1 < BITS_PER_WORD)
3948 t1 = expand_shift
3949 (RSHIFT_EXPR, compute_mode, op0,
3950 build_int_cst (NULL_TREE, size - 1),
3951 NULL_RTX, 0);
3952 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3953 NULL_RTX, 0, OPTAB_WIDEN);
3954 extra_cost = (shift_cost[compute_mode][post_shift]
3955 + shift_cost[compute_mode][size - 1]
3956 + 2 * add_cost[compute_mode]);
3957 t3 = expand_mult_highpart (compute_mode, t2, ml,
3958 NULL_RTX, 1,
3959 max_cost - extra_cost);
3960 if (t3 != 0)
3962 t4 = expand_shift
3963 (RSHIFT_EXPR, compute_mode, t3,
3964 build_int_cst (NULL_TREE, post_shift),
3965 NULL_RTX, 1);
3966 quotient = expand_binop (compute_mode, xor_optab,
3967 t4, t1, tquotient, 0,
3968 OPTAB_WIDEN);
3973 else
3975 rtx nsign, t1, t2, t3, t4;
3976 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3977 op0, constm1_rtx), NULL_RTX);
3978 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3979 0, OPTAB_WIDEN);
3980 nsign = expand_shift
3981 (RSHIFT_EXPR, compute_mode, t2,
3982 build_int_cst (NULL_TREE, size - 1),
3983 NULL_RTX, 0);
3984 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3985 NULL_RTX);
3986 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3987 NULL_RTX, 0);
3988 if (t4)
3990 rtx t5;
3991 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3992 NULL_RTX, 0);
3993 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3994 t4, t5),
3995 tquotient);
4000 if (quotient != 0)
4001 break;
4002 delete_insns_since (last);
4004 /* Try using an instruction that produces both the quotient and
4005 remainder, using truncation. We can easily compensate the quotient
4006 or remainder to get floor rounding, once we have the remainder.
4007 Notice that we compute also the final remainder value here,
4008 and return the result right away. */
4009 if (target == 0 || GET_MODE (target) != compute_mode)
4010 target = gen_reg_rtx (compute_mode);
4012 if (rem_flag)
4014 remainder
4015 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4016 quotient = gen_reg_rtx (compute_mode);
4018 else
4020 quotient
4021 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4022 remainder = gen_reg_rtx (compute_mode);
4025 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4026 quotient, remainder, 0))
4028 /* This could be computed with a branch-less sequence.
4029 Save that for later. */
4030 rtx tem;
4031 rtx label = gen_label_rtx ();
4032 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4033 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4034 NULL_RTX, 0, OPTAB_WIDEN);
4035 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4036 expand_dec (quotient, const1_rtx);
4037 expand_inc (remainder, op1);
4038 emit_label (label);
4039 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4042 /* No luck with division elimination or divmod. Have to do it
4043 by conditionally adjusting op0 *and* the result. */
4045 rtx label1, label2, label3, label4, label5;
4046 rtx adjusted_op0;
4047 rtx tem;
4049 quotient = gen_reg_rtx (compute_mode);
4050 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4051 label1 = gen_label_rtx ();
4052 label2 = gen_label_rtx ();
4053 label3 = gen_label_rtx ();
4054 label4 = gen_label_rtx ();
4055 label5 = gen_label_rtx ();
4056 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4057 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4058 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4059 quotient, 0, OPTAB_LIB_WIDEN);
4060 if (tem != quotient)
4061 emit_move_insn (quotient, tem);
4062 emit_jump_insn (gen_jump (label5));
4063 emit_barrier ();
4064 emit_label (label1);
4065 expand_inc (adjusted_op0, const1_rtx);
4066 emit_jump_insn (gen_jump (label4));
4067 emit_barrier ();
4068 emit_label (label2);
4069 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4070 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4071 quotient, 0, OPTAB_LIB_WIDEN);
4072 if (tem != quotient)
4073 emit_move_insn (quotient, tem);
4074 emit_jump_insn (gen_jump (label5));
4075 emit_barrier ();
4076 emit_label (label3);
4077 expand_dec (adjusted_op0, const1_rtx);
4078 emit_label (label4);
4079 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4080 quotient, 0, OPTAB_LIB_WIDEN);
4081 if (tem != quotient)
4082 emit_move_insn (quotient, tem);
4083 expand_dec (quotient, const1_rtx);
4084 emit_label (label5);
4086 break;
4088 case CEIL_DIV_EXPR:
4089 case CEIL_MOD_EXPR:
4090 if (unsignedp)
4092 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4094 rtx t1, t2, t3;
4095 unsigned HOST_WIDE_INT d = INTVAL (op1);
4096 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4097 build_int_cst (NULL_TREE, floor_log2 (d)),
4098 tquotient, 1);
4099 t2 = expand_binop (compute_mode, and_optab, op0,
4100 GEN_INT (d - 1),
4101 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4102 t3 = gen_reg_rtx (compute_mode);
4103 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4104 compute_mode, 1, 1);
4105 if (t3 == 0)
4107 rtx lab;
4108 lab = gen_label_rtx ();
4109 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4110 expand_inc (t1, const1_rtx);
4111 emit_label (lab);
4112 quotient = t1;
4114 else
4115 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4116 t1, t3),
4117 tquotient);
4118 break;
4121 /* Try using an instruction that produces both the quotient and
4122 remainder, using truncation. We can easily compensate the
4123 quotient or remainder to get ceiling rounding, once we have the
4124 remainder. Notice that we compute also the final remainder
4125 value here, and return the result right away. */
4126 if (target == 0 || GET_MODE (target) != compute_mode)
4127 target = gen_reg_rtx (compute_mode);
4129 if (rem_flag)
4131 remainder = (REG_P (target)
4132 ? target : gen_reg_rtx (compute_mode));
4133 quotient = gen_reg_rtx (compute_mode);
4135 else
4137 quotient = (REG_P (target)
4138 ? target : gen_reg_rtx (compute_mode));
4139 remainder = gen_reg_rtx (compute_mode);
4142 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4143 remainder, 1))
4145 /* This could be computed with a branch-less sequence.
4146 Save that for later. */
4147 rtx label = gen_label_rtx ();
4148 do_cmp_and_jump (remainder, const0_rtx, EQ,
4149 compute_mode, label);
4150 expand_inc (quotient, const1_rtx);
4151 expand_dec (remainder, op1);
4152 emit_label (label);
4153 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4156 /* No luck with division elimination or divmod. Have to do it
4157 by conditionally adjusting op0 *and* the result. */
4159 rtx label1, label2;
4160 rtx adjusted_op0, tem;
4162 quotient = gen_reg_rtx (compute_mode);
4163 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4164 label1 = gen_label_rtx ();
4165 label2 = gen_label_rtx ();
4166 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4167 compute_mode, label1);
4168 emit_move_insn (quotient, const0_rtx);
4169 emit_jump_insn (gen_jump (label2));
4170 emit_barrier ();
4171 emit_label (label1);
4172 expand_dec (adjusted_op0, const1_rtx);
4173 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4174 quotient, 1, OPTAB_LIB_WIDEN);
4175 if (tem != quotient)
4176 emit_move_insn (quotient, tem);
4177 expand_inc (quotient, const1_rtx);
4178 emit_label (label2);
4181 else /* signed */
4183 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4184 && INTVAL (op1) >= 0)
4186 /* This is extremely similar to the code for the unsigned case
4187 above. For 2.7 we should merge these variants, but for
4188 2.6.1 I don't want to touch the code for unsigned since that
4189 get used in C. The signed case will only be used by other
4190 languages (Ada). */
4192 rtx t1, t2, t3;
4193 unsigned HOST_WIDE_INT d = INTVAL (op1);
4194 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4195 build_int_cst (NULL_TREE, floor_log2 (d)),
4196 tquotient, 0);
4197 t2 = expand_binop (compute_mode, and_optab, op0,
4198 GEN_INT (d - 1),
4199 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4200 t3 = gen_reg_rtx (compute_mode);
4201 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4202 compute_mode, 1, 1);
4203 if (t3 == 0)
4205 rtx lab;
4206 lab = gen_label_rtx ();
4207 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4208 expand_inc (t1, const1_rtx);
4209 emit_label (lab);
4210 quotient = t1;
4212 else
4213 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4214 t1, t3),
4215 tquotient);
4216 break;
4219 /* Try using an instruction that produces both the quotient and
4220 remainder, using truncation. We can easily compensate the
4221 quotient or remainder to get ceiling rounding, once we have the
4222 remainder. Notice that we compute also the final remainder
4223 value here, and return the result right away. */
4224 if (target == 0 || GET_MODE (target) != compute_mode)
4225 target = gen_reg_rtx (compute_mode);
4226 if (rem_flag)
4228 remainder= (REG_P (target)
4229 ? target : gen_reg_rtx (compute_mode));
4230 quotient = gen_reg_rtx (compute_mode);
4232 else
4234 quotient = (REG_P (target)
4235 ? target : gen_reg_rtx (compute_mode));
4236 remainder = gen_reg_rtx (compute_mode);
4239 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4240 remainder, 0))
4242 /* This could be computed with a branch-less sequence.
4243 Save that for later. */
4244 rtx tem;
4245 rtx label = gen_label_rtx ();
4246 do_cmp_and_jump (remainder, const0_rtx, EQ,
4247 compute_mode, label);
4248 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4249 NULL_RTX, 0, OPTAB_WIDEN);
4250 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4251 expand_inc (quotient, const1_rtx);
4252 expand_dec (remainder, op1);
4253 emit_label (label);
4254 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4257 /* No luck with division elimination or divmod. Have to do it
4258 by conditionally adjusting op0 *and* the result. */
4260 rtx label1, label2, label3, label4, label5;
4261 rtx adjusted_op0;
4262 rtx tem;
4264 quotient = gen_reg_rtx (compute_mode);
4265 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4266 label1 = gen_label_rtx ();
4267 label2 = gen_label_rtx ();
4268 label3 = gen_label_rtx ();
4269 label4 = gen_label_rtx ();
4270 label5 = gen_label_rtx ();
4271 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4272 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4273 compute_mode, label1);
4274 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4275 quotient, 0, OPTAB_LIB_WIDEN);
4276 if (tem != quotient)
4277 emit_move_insn (quotient, tem);
4278 emit_jump_insn (gen_jump (label5));
4279 emit_barrier ();
4280 emit_label (label1);
4281 expand_dec (adjusted_op0, const1_rtx);
4282 emit_jump_insn (gen_jump (label4));
4283 emit_barrier ();
4284 emit_label (label2);
4285 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4286 compute_mode, label3);
4287 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4288 quotient, 0, OPTAB_LIB_WIDEN);
4289 if (tem != quotient)
4290 emit_move_insn (quotient, tem);
4291 emit_jump_insn (gen_jump (label5));
4292 emit_barrier ();
4293 emit_label (label3);
4294 expand_inc (adjusted_op0, const1_rtx);
4295 emit_label (label4);
4296 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4297 quotient, 0, OPTAB_LIB_WIDEN);
4298 if (tem != quotient)
4299 emit_move_insn (quotient, tem);
4300 expand_inc (quotient, const1_rtx);
4301 emit_label (label5);
4304 break;
4306 case EXACT_DIV_EXPR:
4307 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4309 HOST_WIDE_INT d = INTVAL (op1);
4310 unsigned HOST_WIDE_INT ml;
4311 int pre_shift;
4312 rtx t1;
4314 pre_shift = floor_log2 (d & -d);
4315 ml = invert_mod2n (d >> pre_shift, size);
4316 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4317 build_int_cst (NULL_TREE, pre_shift),
4318 NULL_RTX, unsignedp);
4319 quotient = expand_mult (compute_mode, t1,
4320 gen_int_mode (ml, compute_mode),
4321 NULL_RTX, 1);
4323 insn = get_last_insn ();
4324 set_unique_reg_note (insn,
4325 REG_EQUAL,
4326 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4327 compute_mode,
4328 op0, op1));
4330 break;
4332 case ROUND_DIV_EXPR:
4333 case ROUND_MOD_EXPR:
4334 if (unsignedp)
4336 rtx tem;
4337 rtx label;
4338 label = gen_label_rtx ();
4339 quotient = gen_reg_rtx (compute_mode);
4340 remainder = gen_reg_rtx (compute_mode);
4341 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4343 rtx tem;
4344 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4345 quotient, 1, OPTAB_LIB_WIDEN);
4346 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4347 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4348 remainder, 1, OPTAB_LIB_WIDEN);
4350 tem = plus_constant (op1, -1);
4351 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4352 build_int_cst (NULL_TREE, 1),
4353 NULL_RTX, 1);
4354 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4355 expand_inc (quotient, const1_rtx);
4356 expand_dec (remainder, op1);
4357 emit_label (label);
4359 else
4361 rtx abs_rem, abs_op1, tem, mask;
4362 rtx label;
4363 label = gen_label_rtx ();
4364 quotient = gen_reg_rtx (compute_mode);
4365 remainder = gen_reg_rtx (compute_mode);
4366 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4368 rtx tem;
4369 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4370 quotient, 0, OPTAB_LIB_WIDEN);
4371 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4372 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4373 remainder, 0, OPTAB_LIB_WIDEN);
4375 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4376 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4377 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4378 build_int_cst (NULL_TREE, 1),
4379 NULL_RTX, 1);
4380 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4381 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4382 NULL_RTX, 0, OPTAB_WIDEN);
4383 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4384 build_int_cst (NULL_TREE, size - 1),
4385 NULL_RTX, 0);
4386 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4387 NULL_RTX, 0, OPTAB_WIDEN);
4388 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4389 NULL_RTX, 0, OPTAB_WIDEN);
4390 expand_inc (quotient, tem);
4391 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4392 NULL_RTX, 0, OPTAB_WIDEN);
4393 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4394 NULL_RTX, 0, OPTAB_WIDEN);
4395 expand_dec (remainder, tem);
4396 emit_label (label);
4398 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4400 default:
4401 abort ();
4404 if (quotient == 0)
4406 if (target && GET_MODE (target) != compute_mode)
4407 target = 0;
4409 if (rem_flag)
4411 /* Try to produce the remainder without producing the quotient.
4412 If we seem to have a divmod pattern that does not require widening,
4413 don't try widening here. We should really have a WIDEN argument
4414 to expand_twoval_binop, since what we'd really like to do here is
4415 1) try a mod insn in compute_mode
4416 2) try a divmod insn in compute_mode
4417 3) try a div insn in compute_mode and multiply-subtract to get
4418 remainder
4419 4) try the same things with widening allowed. */
4420 remainder
4421 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4422 op0, op1, target,
4423 unsignedp,
4424 ((optab2->handlers[compute_mode].insn_code
4425 != CODE_FOR_nothing)
4426 ? OPTAB_DIRECT : OPTAB_WIDEN));
4427 if (remainder == 0)
4429 /* No luck there. Can we do remainder and divide at once
4430 without a library call? */
4431 remainder = gen_reg_rtx (compute_mode);
4432 if (! expand_twoval_binop ((unsignedp
4433 ? udivmod_optab
4434 : sdivmod_optab),
4435 op0, op1,
4436 NULL_RTX, remainder, unsignedp))
4437 remainder = 0;
4440 if (remainder)
4441 return gen_lowpart (mode, remainder);
4444 /* Produce the quotient. Try a quotient insn, but not a library call.
4445 If we have a divmod in this mode, use it in preference to widening
4446 the div (for this test we assume it will not fail). Note that optab2
4447 is set to the one of the two optabs that the call below will use. */
4448 quotient
4449 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4450 op0, op1, rem_flag ? NULL_RTX : target,
4451 unsignedp,
4452 ((optab2->handlers[compute_mode].insn_code
4453 != CODE_FOR_nothing)
4454 ? OPTAB_DIRECT : OPTAB_WIDEN));
4456 if (quotient == 0)
4458 /* No luck there. Try a quotient-and-remainder insn,
4459 keeping the quotient alone. */
4460 quotient = gen_reg_rtx (compute_mode);
4461 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4462 op0, op1,
4463 quotient, NULL_RTX, unsignedp))
4465 quotient = 0;
4466 if (! rem_flag)
4467 /* Still no luck. If we are not computing the remainder,
4468 use a library call for the quotient. */
4469 quotient = sign_expand_binop (compute_mode,
4470 udiv_optab, sdiv_optab,
4471 op0, op1, target,
4472 unsignedp, OPTAB_LIB_WIDEN);
4477 if (rem_flag)
4479 if (target && GET_MODE (target) != compute_mode)
4480 target = 0;
4482 if (quotient == 0)
4484 /* No divide instruction either. Use library for remainder. */
4485 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4486 op0, op1, target,
4487 unsignedp, OPTAB_LIB_WIDEN);
4488 /* No remainder function. Try a quotient-and-remainder
4489 function, keeping the remainder. */
4490 if (!remainder)
4492 remainder = gen_reg_rtx (compute_mode);
4493 if (!expand_twoval_binop_libfunc
4494 (unsignedp ? udivmod_optab : sdivmod_optab,
4495 op0, op1,
4496 NULL_RTX, remainder,
4497 unsignedp ? UMOD : MOD))
4498 remainder = NULL_RTX;
4501 else
4503 /* We divided. Now finish doing X - Y * (X / Y). */
4504 remainder = expand_mult (compute_mode, quotient, op1,
4505 NULL_RTX, unsignedp);
4506 remainder = expand_binop (compute_mode, sub_optab, op0,
4507 remainder, target, unsignedp,
4508 OPTAB_LIB_WIDEN);
4512 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4515 /* Return a tree node with data type TYPE, describing the value of X.
4516 Usually this is an VAR_DECL, if there is no obvious better choice.
4517 X may be an expression, however we only support those expressions
4518 generated by loop.c. */
4520 tree
4521 make_tree (tree type, rtx x)
4523 tree t;
4525 switch (GET_CODE (x))
4527 case CONST_INT:
4529 HOST_WIDE_INT hi = 0;
4531 if (INTVAL (x) < 0
4532 && !(TYPE_UNSIGNED (type)
4533 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4534 < HOST_BITS_PER_WIDE_INT)))
4535 hi = -1;
4537 t = build_int_cst_wide (type, INTVAL (x), hi);
4539 return t;
4542 case CONST_DOUBLE:
4543 if (GET_MODE (x) == VOIDmode)
4544 t = build_int_cst_wide (type,
4545 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4546 else
4548 REAL_VALUE_TYPE d;
4550 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4551 t = build_real (type, d);
4554 return t;
4556 case CONST_VECTOR:
4558 int i, units;
4559 rtx elt;
4560 tree t = NULL_TREE;
4562 units = CONST_VECTOR_NUNITS (x);
4564 /* Build a tree with vector elements. */
4565 for (i = units - 1; i >= 0; --i)
4567 elt = CONST_VECTOR_ELT (x, i);
4568 t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4571 return build_vector (type, t);
4574 case PLUS:
4575 return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4576 make_tree (type, XEXP (x, 1))));
4578 case MINUS:
4579 return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4580 make_tree (type, XEXP (x, 1))));
4582 case NEG:
4583 return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4585 case MULT:
4586 return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4587 make_tree (type, XEXP (x, 1))));
4589 case ASHIFT:
4590 return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4591 make_tree (type, XEXP (x, 1))));
4593 case LSHIFTRT:
4594 t = lang_hooks.types.unsigned_type (type);
4595 return fold (convert (type,
4596 build2 (RSHIFT_EXPR, t,
4597 make_tree (t, XEXP (x, 0)),
4598 make_tree (type, XEXP (x, 1)))));
4600 case ASHIFTRT:
4601 t = lang_hooks.types.signed_type (type);
4602 return fold (convert (type,
4603 build2 (RSHIFT_EXPR, t,
4604 make_tree (t, XEXP (x, 0)),
4605 make_tree (type, XEXP (x, 1)))));
4607 case DIV:
4608 if (TREE_CODE (type) != REAL_TYPE)
4609 t = lang_hooks.types.signed_type (type);
4610 else
4611 t = type;
4613 return fold (convert (type,
4614 build2 (TRUNC_DIV_EXPR, t,
4615 make_tree (t, XEXP (x, 0)),
4616 make_tree (t, XEXP (x, 1)))));
4617 case UDIV:
4618 t = lang_hooks.types.unsigned_type (type);
4619 return fold (convert (type,
4620 build2 (TRUNC_DIV_EXPR, t,
4621 make_tree (t, XEXP (x, 0)),
4622 make_tree (t, XEXP (x, 1)))));
4624 case SIGN_EXTEND:
4625 case ZERO_EXTEND:
4626 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4627 GET_CODE (x) == ZERO_EXTEND);
4628 return fold (convert (type, make_tree (t, XEXP (x, 0))));
4630 default:
4631 t = build_decl (VAR_DECL, NULL_TREE, type);
4633 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4634 ptr_mode. So convert. */
4635 if (POINTER_TYPE_P (type))
4636 x = convert_memory_address (TYPE_MODE (type), x);
4638 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4639 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4640 t->decl.rtl = x;
4642 return t;
4646 /* Check whether the multiplication X * MULT + ADD overflows.
4647 X, MULT and ADD must be CONST_*.
4648 MODE is the machine mode for the computation.
4649 X and MULT must have mode MODE. ADD may have a different mode.
4650 So can X (defaults to same as MODE).
4651 UNSIGNEDP is nonzero to do unsigned multiplication. */
4653 bool
4654 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4655 enum machine_mode mode, int unsignedp)
4657 tree type, mult_type, add_type, result;
4659 type = lang_hooks.types.type_for_mode (mode, unsignedp);
4661 /* In order to get a proper overflow indication from an unsigned
4662 type, we have to pretend that it's a sizetype. */
4663 mult_type = type;
4664 if (unsignedp)
4666 /* FIXME:It would be nice if we could step directly from this
4667 type to its sizetype equivalent. */
4668 mult_type = build_distinct_type_copy (type);
4669 TYPE_IS_SIZETYPE (mult_type) = 1;
4672 add_type = (GET_MODE (add) == VOIDmode ? mult_type
4673 : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4675 result = fold (build2 (PLUS_EXPR, mult_type,
4676 fold (build2 (MULT_EXPR, mult_type,
4677 make_tree (mult_type, x),
4678 make_tree (mult_type, mult))),
4679 make_tree (add_type, add)));
4681 return TREE_CONSTANT_OVERFLOW (result);
4684 /* Return an rtx representing the value of X * MULT + ADD.
4685 TARGET is a suggestion for where to store the result (an rtx).
4686 MODE is the machine mode for the computation.
4687 X and MULT must have mode MODE. ADD may have a different mode.
4688 So can X (defaults to same as MODE).
4689 UNSIGNEDP is nonzero to do unsigned multiplication.
4690 This may emit insns. */
4693 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4694 int unsignedp)
4696 tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4697 tree add_type = (GET_MODE (add) == VOIDmode
4698 ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4699 unsignedp));
4700 tree result = fold (build2 (PLUS_EXPR, type,
4701 fold (build2 (MULT_EXPR, type,
4702 make_tree (type, x),
4703 make_tree (type, mult))),
4704 make_tree (add_type, add)));
4706 return expand_expr (result, target, VOIDmode, 0);
4709 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4710 and returning TARGET.
4712 If TARGET is 0, a pseudo-register or constant is returned. */
4715 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4717 rtx tem = 0;
4719 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4720 tem = simplify_binary_operation (AND, mode, op0, op1);
4721 if (tem == 0)
4722 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4724 if (target == 0)
4725 target = tem;
4726 else if (tem != target)
4727 emit_move_insn (target, tem);
4728 return target;
4731 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4732 and storing in TARGET. Normally return TARGET.
4733 Return 0 if that cannot be done.
4735 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
4736 it is VOIDmode, they cannot both be CONST_INT.
4738 UNSIGNEDP is for the case where we have to widen the operands
4739 to perform the operation. It says to use zero-extension.
4741 NORMALIZEP is 1 if we should convert the result to be either zero
4742 or one. Normalize is -1 if we should convert the result to be
4743 either zero or -1. If NORMALIZEP is zero, the result will be left
4744 "raw" out of the scc insn. */
4747 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4748 enum machine_mode mode, int unsignedp, int normalizep)
4750 rtx subtarget;
4751 enum insn_code icode;
4752 enum machine_mode compare_mode;
4753 enum machine_mode target_mode = GET_MODE (target);
4754 rtx tem;
4755 rtx last = get_last_insn ();
4756 rtx pattern, comparison;
4758 if (unsignedp)
4759 code = unsigned_condition (code);
4761 /* If one operand is constant, make it the second one. Only do this
4762 if the other operand is not constant as well. */
4764 if (swap_commutative_operands_p (op0, op1))
4766 tem = op0;
4767 op0 = op1;
4768 op1 = tem;
4769 code = swap_condition (code);
4772 if (mode == VOIDmode)
4773 mode = GET_MODE (op0);
4775 /* For some comparisons with 1 and -1, we can convert this to
4776 comparisons with zero. This will often produce more opportunities for
4777 store-flag insns. */
4779 switch (code)
4781 case LT:
4782 if (op1 == const1_rtx)
4783 op1 = const0_rtx, code = LE;
4784 break;
4785 case LE:
4786 if (op1 == constm1_rtx)
4787 op1 = const0_rtx, code = LT;
4788 break;
4789 case GE:
4790 if (op1 == const1_rtx)
4791 op1 = const0_rtx, code = GT;
4792 break;
4793 case GT:
4794 if (op1 == constm1_rtx)
4795 op1 = const0_rtx, code = GE;
4796 break;
4797 case GEU:
4798 if (op1 == const1_rtx)
4799 op1 = const0_rtx, code = NE;
4800 break;
4801 case LTU:
4802 if (op1 == const1_rtx)
4803 op1 = const0_rtx, code = EQ;
4804 break;
4805 default:
4806 break;
4809 /* If we are comparing a double-word integer with zero or -1, we can
4810 convert the comparison into one involving a single word. */
4811 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4812 && GET_MODE_CLASS (mode) == MODE_INT
4813 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4815 if ((code == EQ || code == NE)
4816 && (op1 == const0_rtx || op1 == constm1_rtx))
4818 rtx op00, op01, op0both;
4820 /* Do a logical OR or AND of the two words and compare the result. */
4821 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4822 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4823 op0both = expand_binop (word_mode,
4824 op1 == const0_rtx ? ior_optab : and_optab,
4825 op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4827 if (op0both != 0)
4828 return emit_store_flag (target, code, op0both, op1, word_mode,
4829 unsignedp, normalizep);
4831 else if ((code == LT || code == GE) && op1 == const0_rtx)
4833 rtx op0h;
4835 /* If testing the sign bit, can just test on high word. */
4836 op0h = simplify_gen_subreg (word_mode, op0, mode,
4837 subreg_highpart_offset (word_mode, mode));
4838 return emit_store_flag (target, code, op0h, op1, word_mode,
4839 unsignedp, normalizep);
4843 /* From now on, we won't change CODE, so set ICODE now. */
4844 icode = setcc_gen_code[(int) code];
4846 /* If this is A < 0 or A >= 0, we can do this by taking the ones
4847 complement of A (for GE) and shifting the sign bit to the low bit. */
4848 if (op1 == const0_rtx && (code == LT || code == GE)
4849 && GET_MODE_CLASS (mode) == MODE_INT
4850 && (normalizep || STORE_FLAG_VALUE == 1
4851 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4852 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4853 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4855 subtarget = target;
4857 /* If the result is to be wider than OP0, it is best to convert it
4858 first. If it is to be narrower, it is *incorrect* to convert it
4859 first. */
4860 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4862 op0 = convert_modes (target_mode, mode, op0, 0);
4863 mode = target_mode;
4866 if (target_mode != mode)
4867 subtarget = 0;
4869 if (code == GE)
4870 op0 = expand_unop (mode, one_cmpl_optab, op0,
4871 ((STORE_FLAG_VALUE == 1 || normalizep)
4872 ? 0 : subtarget), 0);
4874 if (STORE_FLAG_VALUE == 1 || normalizep)
4875 /* If we are supposed to produce a 0/1 value, we want to do
4876 a logical shift from the sign bit to the low-order bit; for
4877 a -1/0 value, we do an arithmetic shift. */
4878 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4879 size_int (GET_MODE_BITSIZE (mode) - 1),
4880 subtarget, normalizep != -1);
4882 if (mode != target_mode)
4883 op0 = convert_modes (target_mode, mode, op0, 0);
4885 return op0;
4888 if (icode != CODE_FOR_nothing)
4890 insn_operand_predicate_fn pred;
4892 /* We think we may be able to do this with a scc insn. Emit the
4893 comparison and then the scc insn. */
4895 do_pending_stack_adjust ();
4896 last = get_last_insn ();
4898 comparison
4899 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4900 if (CONSTANT_P (comparison))
4902 if (GET_CODE (comparison) == CONST_INT)
4904 if (comparison == const0_rtx)
4905 return const0_rtx;
4907 #ifdef FLOAT_STORE_FLAG_VALUE
4908 else if (GET_CODE (comparison) == CONST_DOUBLE)
4910 if (comparison == CONST0_RTX (GET_MODE (comparison)))
4911 return const0_rtx;
4913 #endif
4914 else
4915 abort ();
4916 if (normalizep == 1)
4917 return const1_rtx;
4918 if (normalizep == -1)
4919 return constm1_rtx;
4920 return const_true_rtx;
4923 /* The code of COMPARISON may not match CODE if compare_from_rtx
4924 decided to swap its operands and reverse the original code.
4926 We know that compare_from_rtx returns either a CONST_INT or
4927 a new comparison code, so it is safe to just extract the
4928 code from COMPARISON. */
4929 code = GET_CODE (comparison);
4931 /* Get a reference to the target in the proper mode for this insn. */
4932 compare_mode = insn_data[(int) icode].operand[0].mode;
4933 subtarget = target;
4934 pred = insn_data[(int) icode].operand[0].predicate;
4935 if (optimize || ! (*pred) (subtarget, compare_mode))
4936 subtarget = gen_reg_rtx (compare_mode);
4938 pattern = GEN_FCN (icode) (subtarget);
4939 if (pattern)
4941 emit_insn (pattern);
4943 /* If we are converting to a wider mode, first convert to
4944 TARGET_MODE, then normalize. This produces better combining
4945 opportunities on machines that have a SIGN_EXTRACT when we are
4946 testing a single bit. This mostly benefits the 68k.
4948 If STORE_FLAG_VALUE does not have the sign bit set when
4949 interpreted in COMPARE_MODE, we can do this conversion as
4950 unsigned, which is usually more efficient. */
4951 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4953 convert_move (target, subtarget,
4954 (GET_MODE_BITSIZE (compare_mode)
4955 <= HOST_BITS_PER_WIDE_INT)
4956 && 0 == (STORE_FLAG_VALUE
4957 & ((HOST_WIDE_INT) 1
4958 << (GET_MODE_BITSIZE (compare_mode) -1))));
4959 op0 = target;
4960 compare_mode = target_mode;
4962 else
4963 op0 = subtarget;
4965 /* If we want to keep subexpressions around, don't reuse our
4966 last target. */
4968 if (optimize)
4969 subtarget = 0;
4971 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
4972 we don't have to do anything. */
4973 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4975 /* STORE_FLAG_VALUE might be the most negative number, so write
4976 the comparison this way to avoid a compiler-time warning. */
4977 else if (- normalizep == STORE_FLAG_VALUE)
4978 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4980 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4981 makes it hard to use a value of just the sign bit due to
4982 ANSI integer constant typing rules. */
4983 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4984 && (STORE_FLAG_VALUE
4985 & ((HOST_WIDE_INT) 1
4986 << (GET_MODE_BITSIZE (compare_mode) - 1))))
4987 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4988 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4989 subtarget, normalizep == 1);
4990 else if (STORE_FLAG_VALUE & 1)
4992 op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4993 if (normalizep == -1)
4994 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4996 else
4997 abort ();
4999 /* If we were converting to a smaller mode, do the
5000 conversion now. */
5001 if (target_mode != compare_mode)
5003 convert_move (target, op0, 0);
5004 return target;
5006 else
5007 return op0;
5011 delete_insns_since (last);
5013 /* If optimizing, use different pseudo registers for each insn, instead
5014 of reusing the same pseudo. This leads to better CSE, but slows
5015 down the compiler, since there are more pseudos */
5016 subtarget = (!optimize
5017 && (target_mode == mode)) ? target : NULL_RTX;
5019 /* If we reached here, we can't do this with a scc insn. However, there
5020 are some comparisons that can be done directly. For example, if
5021 this is an equality comparison of integers, we can try to exclusive-or
5022 (or subtract) the two operands and use a recursive call to try the
5023 comparison with zero. Don't do any of these cases if branches are
5024 very cheap. */
5026 if (BRANCH_COST > 0
5027 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5028 && op1 != const0_rtx)
5030 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5031 OPTAB_WIDEN);
5033 if (tem == 0)
5034 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5035 OPTAB_WIDEN);
5036 if (tem != 0)
5037 tem = emit_store_flag (target, code, tem, const0_rtx,
5038 mode, unsignedp, normalizep);
5039 if (tem == 0)
5040 delete_insns_since (last);
5041 return tem;
5044 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5045 the constant zero. Reject all other comparisons at this point. Only
5046 do LE and GT if branches are expensive since they are expensive on
5047 2-operand machines. */
5049 if (BRANCH_COST == 0
5050 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5051 || (code != EQ && code != NE
5052 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5053 return 0;
5055 /* See what we need to return. We can only return a 1, -1, or the
5056 sign bit. */
5058 if (normalizep == 0)
5060 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5061 normalizep = STORE_FLAG_VALUE;
5063 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5064 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5065 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5067 else
5068 return 0;
5071 /* Try to put the result of the comparison in the sign bit. Assume we can't
5072 do the necessary operation below. */
5074 tem = 0;
5076 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5077 the sign bit set. */
5079 if (code == LE)
5081 /* This is destructive, so SUBTARGET can't be OP0. */
5082 if (rtx_equal_p (subtarget, op0))
5083 subtarget = 0;
5085 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5086 OPTAB_WIDEN);
5087 if (tem)
5088 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5089 OPTAB_WIDEN);
5092 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5093 number of bits in the mode of OP0, minus one. */
5095 if (code == GT)
5097 if (rtx_equal_p (subtarget, op0))
5098 subtarget = 0;
5100 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5101 size_int (GET_MODE_BITSIZE (mode) - 1),
5102 subtarget, 0);
5103 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5104 OPTAB_WIDEN);
5107 if (code == EQ || code == NE)
5109 /* For EQ or NE, one way to do the comparison is to apply an operation
5110 that converts the operand into a positive number if it is nonzero
5111 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5112 for NE we negate. This puts the result in the sign bit. Then we
5113 normalize with a shift, if needed.
5115 Two operations that can do the above actions are ABS and FFS, so try
5116 them. If that doesn't work, and MODE is smaller than a full word,
5117 we can use zero-extension to the wider mode (an unsigned conversion)
5118 as the operation. */
5120 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5121 that is compensated by the subsequent overflow when subtracting
5122 one / negating. */
5124 if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5125 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5126 else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5127 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5128 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5130 tem = convert_modes (word_mode, mode, op0, 1);
5131 mode = word_mode;
5134 if (tem != 0)
5136 if (code == EQ)
5137 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5138 0, OPTAB_WIDEN);
5139 else
5140 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5143 /* If we couldn't do it that way, for NE we can "or" the two's complement
5144 of the value with itself. For EQ, we take the one's complement of
5145 that "or", which is an extra insn, so we only handle EQ if branches
5146 are expensive. */
5148 if (tem == 0 && (code == NE || BRANCH_COST > 1))
5150 if (rtx_equal_p (subtarget, op0))
5151 subtarget = 0;
5153 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5154 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5155 OPTAB_WIDEN);
5157 if (tem && code == EQ)
5158 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5162 if (tem && normalizep)
5163 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5164 size_int (GET_MODE_BITSIZE (mode) - 1),
5165 subtarget, normalizep == 1);
5167 if (tem)
5169 if (GET_MODE (tem) != target_mode)
5171 convert_move (target, tem, 0);
5172 tem = target;
5174 else if (!subtarget)
5176 emit_move_insn (target, tem);
5177 tem = target;
5180 else
5181 delete_insns_since (last);
5183 return tem;
5186 /* Like emit_store_flag, but always succeeds. */
5189 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5190 enum machine_mode mode, int unsignedp, int normalizep)
5192 rtx tem, label;
5194 /* First see if emit_store_flag can do the job. */
5195 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5196 if (tem != 0)
5197 return tem;
5199 if (normalizep == 0)
5200 normalizep = 1;
5202 /* If this failed, we have to do this with set/compare/jump/set code. */
5204 if (!REG_P (target)
5205 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5206 target = gen_reg_rtx (GET_MODE (target));
5208 emit_move_insn (target, const1_rtx);
5209 label = gen_label_rtx ();
5210 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5211 NULL_RTX, label);
5213 emit_move_insn (target, const0_rtx);
5214 emit_label (label);
5216 return target;
5219 /* Perform possibly multi-word comparison and conditional jump to LABEL
5220 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5222 The algorithm is based on the code in expr.c:do_jump.
5224 Note that this does not perform a general comparison. Only variants
5225 generated within expmed.c are correctly handled, others abort (but could
5226 be handled if needed). */
5228 static void
5229 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5230 rtx label)
5232 /* If this mode is an integer too wide to compare properly,
5233 compare word by word. Rely on cse to optimize constant cases. */
5235 if (GET_MODE_CLASS (mode) == MODE_INT
5236 && ! can_compare_p (op, mode, ccp_jump))
5238 rtx label2 = gen_label_rtx ();
5240 switch (op)
5242 case LTU:
5243 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5244 break;
5246 case LEU:
5247 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5248 break;
5250 case LT:
5251 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5252 break;
5254 case GT:
5255 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5256 break;
5258 case GE:
5259 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5260 break;
5262 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
5263 that's the only equality operations we do */
5264 case EQ:
5265 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5266 abort ();
5267 do_jump_by_parts_equality_rtx (arg1, label2, label);
5268 break;
5270 case NE:
5271 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5272 abort ();
5273 do_jump_by_parts_equality_rtx (arg1, label, label2);
5274 break;
5276 default:
5277 abort ();
5280 emit_label (label2);
5282 else
5283 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);