2011-04-29 Tobias Burnus <burnus@net-b.de>
[official-gcc.git] / gcc / expmed.c
blob7e9204d444a6e603faaf8d078103a31eb714dcd7
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT, rtx);
51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, rtx);
53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT, rtx, int, bool);
57 static rtx mask_rtx (enum machine_mode, int, int, int);
58 static rtx lshift_value (enum machine_mode, rtx, int, int);
59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, int);
61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
65 /* Test whether a value is zero of a power of two. */
66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
68 #ifndef SLOW_UNALIGNED_ACCESS
69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
70 #endif
73 /* Reduce conditional compilation elsewhere. */
74 #ifndef HAVE_insv
75 #define HAVE_insv 0
76 #define CODE_FOR_insv CODE_FOR_nothing
77 #define gen_insv(a,b,c,d) NULL_RTX
78 #endif
79 #ifndef HAVE_extv
80 #define HAVE_extv 0
81 #define CODE_FOR_extv CODE_FOR_nothing
82 #define gen_extv(a,b,c,d) NULL_RTX
83 #endif
84 #ifndef HAVE_extzv
85 #define HAVE_extzv 0
86 #define CODE_FOR_extzv CODE_FOR_nothing
87 #define gen_extzv(a,b,c,d) NULL_RTX
88 #endif
90 void
91 init_expmed (void)
93 struct
95 struct rtx_def reg; rtunion reg_fld[2];
96 struct rtx_def plus; rtunion plus_fld1;
97 struct rtx_def neg;
98 struct rtx_def mult; rtunion mult_fld1;
99 struct rtx_def sdiv; rtunion sdiv_fld1;
100 struct rtx_def udiv; rtunion udiv_fld1;
101 struct rtx_def zext;
102 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
103 struct rtx_def smod_32; rtunion smod_32_fld1;
104 struct rtx_def wide_mult; rtunion wide_mult_fld1;
105 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
106 struct rtx_def wide_trunc;
107 struct rtx_def shift; rtunion shift_fld1;
108 struct rtx_def shift_mult; rtunion shift_mult_fld1;
109 struct rtx_def shift_add; rtunion shift_add_fld1;
110 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
111 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
112 } all;
114 rtx pow2[MAX_BITS_PER_WORD];
115 rtx cint[MAX_BITS_PER_WORD];
116 int m, n;
117 enum machine_mode mode, wider_mode;
118 int speed;
121 for (m = 1; m < MAX_BITS_PER_WORD; m++)
123 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
124 cint[m] = GEN_INT (m);
126 memset (&all, 0, sizeof all);
128 PUT_CODE (&all.reg, REG);
129 /* Avoid using hard regs in ways which may be unsupported. */
130 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
132 PUT_CODE (&all.plus, PLUS);
133 XEXP (&all.plus, 0) = &all.reg;
134 XEXP (&all.plus, 1) = &all.reg;
136 PUT_CODE (&all.neg, NEG);
137 XEXP (&all.neg, 0) = &all.reg;
139 PUT_CODE (&all.mult, MULT);
140 XEXP (&all.mult, 0) = &all.reg;
141 XEXP (&all.mult, 1) = &all.reg;
143 PUT_CODE (&all.sdiv, DIV);
144 XEXP (&all.sdiv, 0) = &all.reg;
145 XEXP (&all.sdiv, 1) = &all.reg;
147 PUT_CODE (&all.udiv, UDIV);
148 XEXP (&all.udiv, 0) = &all.reg;
149 XEXP (&all.udiv, 1) = &all.reg;
151 PUT_CODE (&all.sdiv_32, DIV);
152 XEXP (&all.sdiv_32, 0) = &all.reg;
153 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
155 PUT_CODE (&all.smod_32, MOD);
156 XEXP (&all.smod_32, 0) = &all.reg;
157 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
159 PUT_CODE (&all.zext, ZERO_EXTEND);
160 XEXP (&all.zext, 0) = &all.reg;
162 PUT_CODE (&all.wide_mult, MULT);
163 XEXP (&all.wide_mult, 0) = &all.zext;
164 XEXP (&all.wide_mult, 1) = &all.zext;
166 PUT_CODE (&all.wide_lshr, LSHIFTRT);
167 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
169 PUT_CODE (&all.wide_trunc, TRUNCATE);
170 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
172 PUT_CODE (&all.shift, ASHIFT);
173 XEXP (&all.shift, 0) = &all.reg;
175 PUT_CODE (&all.shift_mult, MULT);
176 XEXP (&all.shift_mult, 0) = &all.reg;
178 PUT_CODE (&all.shift_add, PLUS);
179 XEXP (&all.shift_add, 0) = &all.shift_mult;
180 XEXP (&all.shift_add, 1) = &all.reg;
182 PUT_CODE (&all.shift_sub0, MINUS);
183 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
184 XEXP (&all.shift_sub0, 1) = &all.reg;
186 PUT_CODE (&all.shift_sub1, MINUS);
187 XEXP (&all.shift_sub1, 0) = &all.reg;
188 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
190 for (speed = 0; speed < 2; speed++)
192 crtl->maybe_hot_insn_p = speed;
193 zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
195 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
196 mode != VOIDmode;
197 mode = GET_MODE_WIDER_MODE (mode))
199 PUT_MODE (&all.reg, mode);
200 PUT_MODE (&all.plus, mode);
201 PUT_MODE (&all.neg, mode);
202 PUT_MODE (&all.mult, mode);
203 PUT_MODE (&all.sdiv, mode);
204 PUT_MODE (&all.udiv, mode);
205 PUT_MODE (&all.sdiv_32, mode);
206 PUT_MODE (&all.smod_32, mode);
207 PUT_MODE (&all.wide_trunc, mode);
208 PUT_MODE (&all.shift, mode);
209 PUT_MODE (&all.shift_mult, mode);
210 PUT_MODE (&all.shift_add, mode);
211 PUT_MODE (&all.shift_sub0, mode);
212 PUT_MODE (&all.shift_sub1, mode);
214 add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
215 neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
216 mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
217 sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
218 udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
220 sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
221 <= 2 * add_cost[speed][mode]);
222 smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
223 <= 4 * add_cost[speed][mode]);
225 wider_mode = GET_MODE_WIDER_MODE (mode);
226 if (wider_mode != VOIDmode)
228 PUT_MODE (&all.zext, wider_mode);
229 PUT_MODE (&all.wide_mult, wider_mode);
230 PUT_MODE (&all.wide_lshr, wider_mode);
231 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
233 mul_widen_cost[speed][wider_mode]
234 = rtx_cost (&all.wide_mult, SET, speed);
235 mul_highpart_cost[speed][mode]
236 = rtx_cost (&all.wide_trunc, SET, speed);
239 shift_cost[speed][mode][0] = 0;
240 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
241 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
243 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
244 for (m = 1; m < n; m++)
246 XEXP (&all.shift, 1) = cint[m];
247 XEXP (&all.shift_mult, 1) = pow2[m];
249 shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
250 shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
251 shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
252 shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
256 if (alg_hash_used_p)
257 memset (alg_hash, 0, sizeof (alg_hash));
258 else
259 alg_hash_used_p = true;
260 default_rtl_profile ();
263 /* Return an rtx representing minus the value of X.
264 MODE is the intended mode of the result,
265 useful if X is a CONST_INT. */
268 negate_rtx (enum machine_mode mode, rtx x)
270 rtx result = simplify_unary_operation (NEG, mode, x, mode);
272 if (result == 0)
273 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
275 return result;
278 /* Report on the availability of insv/extv/extzv and the desired mode
279 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
280 is false; else the mode of the specified operand. If OPNO is -1,
281 all the caller cares about is whether the insn is available. */
282 enum machine_mode
283 mode_for_extraction (enum extraction_pattern pattern, int opno)
285 const struct insn_data_d *data;
287 switch (pattern)
289 case EP_insv:
290 if (HAVE_insv)
292 data = &insn_data[CODE_FOR_insv];
293 break;
295 return MAX_MACHINE_MODE;
297 case EP_extv:
298 if (HAVE_extv)
300 data = &insn_data[CODE_FOR_extv];
301 break;
303 return MAX_MACHINE_MODE;
305 case EP_extzv:
306 if (HAVE_extzv)
308 data = &insn_data[CODE_FOR_extzv];
309 break;
311 return MAX_MACHINE_MODE;
313 default:
314 gcc_unreachable ();
317 if (opno == -1)
318 return VOIDmode;
320 /* Everyone who uses this function used to follow it with
321 if (result == VOIDmode) result = word_mode; */
322 if (data->operand[opno].mode == VOIDmode)
323 return word_mode;
324 return data->operand[opno].mode;
327 /* A subroutine of store_bit_field, with the same arguments. Return true
328 if the operation could be implemented.
330 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
331 no other way of implementing the operation. If FALLBACK_P is false,
332 return false instead. */
334 static bool
335 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
336 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
337 rtx value, bool fallback_p)
339 unsigned int unit
340 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
341 unsigned HOST_WIDE_INT offset, bitpos;
342 rtx op0 = str_rtx;
343 int byte_offset;
344 rtx orig_value;
346 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
348 while (GET_CODE (op0) == SUBREG)
350 /* The following line once was done only if WORDS_BIG_ENDIAN,
351 but I think that is a mistake. WORDS_BIG_ENDIAN is
352 meaningful at a much higher level; when structures are copied
353 between memory and regs, the higher-numbered regs
354 always get higher addresses. */
355 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
356 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
358 byte_offset = 0;
360 /* Paradoxical subregs need special handling on big endian machines. */
361 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
363 int difference = inner_mode_size - outer_mode_size;
365 if (WORDS_BIG_ENDIAN)
366 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
367 if (BYTES_BIG_ENDIAN)
368 byte_offset += difference % UNITS_PER_WORD;
370 else
371 byte_offset = SUBREG_BYTE (op0);
373 bitnum += byte_offset * BITS_PER_UNIT;
374 op0 = SUBREG_REG (op0);
377 /* No action is needed if the target is a register and if the field
378 lies completely outside that register. This can occur if the source
379 code contains an out-of-bounds access to a small array. */
380 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
381 return true;
383 /* Use vec_set patterns for inserting parts of vectors whenever
384 available. */
385 if (VECTOR_MODE_P (GET_MODE (op0))
386 && !MEM_P (op0)
387 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
388 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
389 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
390 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
392 struct expand_operand ops[3];
393 enum machine_mode outermode = GET_MODE (op0);
394 enum machine_mode innermode = GET_MODE_INNER (outermode);
395 enum insn_code icode = optab_handler (vec_set_optab, outermode);
396 int pos = bitnum / GET_MODE_BITSIZE (innermode);
398 create_fixed_operand (&ops[0], op0);
399 create_input_operand (&ops[1], value, innermode);
400 create_integer_operand (&ops[2], pos);
401 if (maybe_expand_insn (icode, 3, ops))
402 return true;
405 /* If the target is a register, overwriting the entire object, or storing
406 a full-word or multi-word field can be done with just a SUBREG.
408 If the target is memory, storing any naturally aligned field can be
409 done with a simple store. For targets that support fast unaligned
410 memory, any naturally sized, unit aligned field can be done directly. */
412 offset = bitnum / unit;
413 bitpos = bitnum % unit;
414 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
415 + (offset * UNITS_PER_WORD);
417 if (bitpos == 0
418 && bitsize == GET_MODE_BITSIZE (fieldmode)
419 && (!MEM_P (op0)
420 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
421 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
422 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
423 || validate_subreg (fieldmode, GET_MODE (op0), op0,
424 byte_offset)))
425 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
426 || (offset * BITS_PER_UNIT % bitsize == 0
427 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
429 if (MEM_P (op0))
430 op0 = adjust_address (op0, fieldmode, offset);
431 else if (GET_MODE (op0) != fieldmode)
432 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
433 byte_offset);
434 emit_move_insn (op0, value);
435 return true;
438 /* Make sure we are playing with integral modes. Pun with subregs
439 if we aren't. This must come after the entire register case above,
440 since that case is valid for any mode. The following cases are only
441 valid for integral modes. */
443 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
444 if (imode != GET_MODE (op0))
446 if (MEM_P (op0))
447 op0 = adjust_address (op0, imode, 0);
448 else
450 gcc_assert (imode != BLKmode);
451 op0 = gen_lowpart (imode, op0);
456 /* We may be accessing data outside the field, which means
457 we can alias adjacent data. */
458 if (MEM_P (op0))
460 op0 = shallow_copy_rtx (op0);
461 set_mem_alias_set (op0, 0);
462 set_mem_expr (op0, 0);
465 /* If OP0 is a register, BITPOS must count within a word.
466 But as we have it, it counts within whatever size OP0 now has.
467 On a bigendian machine, these are not the same, so convert. */
468 if (BYTES_BIG_ENDIAN
469 && !MEM_P (op0)
470 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
471 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
473 /* Storing an lsb-aligned field in a register
474 can be done with a movestrict instruction. */
476 if (!MEM_P (op0)
477 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
478 && bitsize == GET_MODE_BITSIZE (fieldmode)
479 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
481 struct expand_operand ops[2];
482 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
483 rtx arg0 = op0;
484 unsigned HOST_WIDE_INT subreg_off;
486 if (GET_CODE (arg0) == SUBREG)
488 /* Else we've got some float mode source being extracted into
489 a different float mode destination -- this combination of
490 subregs results in Severe Tire Damage. */
491 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
492 || GET_MODE_CLASS (fieldmode) == MODE_INT
493 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
494 arg0 = SUBREG_REG (arg0);
497 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
498 + (offset * UNITS_PER_WORD);
499 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
501 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
503 create_fixed_operand (&ops[0], arg0);
504 /* Shrink the source operand to FIELDMODE. */
505 create_convert_operand_to (&ops[1], value, fieldmode, false);
506 if (maybe_expand_insn (icode, 2, ops))
507 return true;
511 /* Handle fields bigger than a word. */
513 if (bitsize > BITS_PER_WORD)
515 /* Here we transfer the words of the field
516 in the order least significant first.
517 This is because the most significant word is the one which may
518 be less than full.
519 However, only do that if the value is not BLKmode. */
521 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
522 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
523 unsigned int i;
524 rtx last;
526 /* This is the mode we must force value to, so that there will be enough
527 subwords to extract. Note that fieldmode will often (always?) be
528 VOIDmode, because that is what store_field uses to indicate that this
529 is a bit field, but passing VOIDmode to operand_subword_force
530 is not allowed. */
531 fieldmode = GET_MODE (value);
532 if (fieldmode == VOIDmode)
533 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
535 last = get_last_insn ();
536 for (i = 0; i < nwords; i++)
538 /* If I is 0, use the low-order word in both field and target;
539 if I is 1, use the next to lowest word; and so on. */
540 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
541 unsigned int bit_offset = (backwards
542 ? MAX ((int) bitsize - ((int) i + 1)
543 * BITS_PER_WORD,
545 : (int) i * BITS_PER_WORD);
546 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
548 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
549 bitsize - i * BITS_PER_WORD),
550 bitnum + bit_offset, word_mode,
551 value_word, fallback_p))
553 delete_insns_since (last);
554 return false;
557 return true;
560 /* From here on we can assume that the field to be stored in is
561 a full-word (whatever type that is), since it is shorter than a word. */
563 /* OFFSET is the number of words or bytes (UNIT says which)
564 from STR_RTX to the first word or byte containing part of the field. */
566 if (!MEM_P (op0))
568 if (offset != 0
569 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
571 if (!REG_P (op0))
573 /* Since this is a destination (lvalue), we can't copy
574 it to a pseudo. We can remove a SUBREG that does not
575 change the size of the operand. Such a SUBREG may
576 have been added above. */
577 gcc_assert (GET_CODE (op0) == SUBREG
578 && (GET_MODE_SIZE (GET_MODE (op0))
579 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
580 op0 = SUBREG_REG (op0);
582 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
583 op0, (offset * UNITS_PER_WORD));
585 offset = 0;
588 /* If VALUE has a floating-point or complex mode, access it as an
589 integer of the corresponding size. This can occur on a machine
590 with 64 bit registers that uses SFmode for float. It can also
591 occur for unaligned float or complex fields. */
592 orig_value = value;
593 if (GET_MODE (value) != VOIDmode
594 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
595 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
597 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
598 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
601 /* Now OFFSET is nonzero only if OP0 is memory
602 and is therefore always measured in bytes. */
604 if (HAVE_insv
605 && GET_MODE (value) != BLKmode
606 && bitsize > 0
607 && GET_MODE_BITSIZE (op_mode) >= bitsize
608 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
609 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
611 struct expand_operand ops[4];
612 int xbitpos = bitpos;
613 rtx value1;
614 rtx xop0 = op0;
615 rtx last = get_last_insn ();
616 bool copy_back = false;
618 /* Add OFFSET into OP0's address. */
619 if (MEM_P (xop0))
620 xop0 = adjust_address (xop0, byte_mode, offset);
622 /* If xop0 is a register, we need it in OP_MODE
623 to make it acceptable to the format of insv. */
624 if (GET_CODE (xop0) == SUBREG)
625 /* We can't just change the mode, because this might clobber op0,
626 and we will need the original value of op0 if insv fails. */
627 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
628 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
629 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
631 /* If the destination is a paradoxical subreg such that we need a
632 truncate to the inner mode, perform the insertion on a temporary and
633 truncate the result to the original destination. Note that we can't
634 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
635 X) 0)) is (reg:N X). */
636 if (GET_CODE (xop0) == SUBREG
637 && REG_P (SUBREG_REG (xop0))
638 && (!TRULY_NOOP_TRUNCATION
639 (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
640 GET_MODE_BITSIZE (op_mode))))
642 rtx tem = gen_reg_rtx (op_mode);
643 emit_move_insn (tem, xop0);
644 xop0 = tem;
645 copy_back = true;
648 /* On big-endian machines, we count bits from the most significant.
649 If the bit field insn does not, we must invert. */
651 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
652 xbitpos = unit - bitsize - xbitpos;
654 /* We have been counting XBITPOS within UNIT.
655 Count instead within the size of the register. */
656 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
657 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
659 unit = GET_MODE_BITSIZE (op_mode);
661 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
662 value1 = value;
663 if (GET_MODE (value) != op_mode)
665 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
667 /* Optimization: Don't bother really extending VALUE
668 if it has all the bits we will actually use. However,
669 if we must narrow it, be sure we do it correctly. */
671 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
673 rtx tmp;
675 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
676 if (! tmp)
677 tmp = simplify_gen_subreg (op_mode,
678 force_reg (GET_MODE (value),
679 value1),
680 GET_MODE (value), 0);
681 value1 = tmp;
683 else
684 value1 = gen_lowpart (op_mode, value1);
686 else if (CONST_INT_P (value))
687 value1 = gen_int_mode (INTVAL (value), op_mode);
688 else
689 /* Parse phase is supposed to make VALUE's data type
690 match that of the component reference, which is a type
691 at least as wide as the field; so VALUE should have
692 a mode that corresponds to that type. */
693 gcc_assert (CONSTANT_P (value));
696 create_fixed_operand (&ops[0], xop0);
697 create_integer_operand (&ops[1], bitsize);
698 create_integer_operand (&ops[2], xbitpos);
699 create_input_operand (&ops[3], value1, op_mode);
700 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
702 if (copy_back)
703 convert_move (op0, xop0, true);
704 return true;
706 delete_insns_since (last);
709 /* If OP0 is a memory, try copying it to a register and seeing if a
710 cheap register alternative is available. */
711 if (HAVE_insv && MEM_P (op0))
713 enum machine_mode bestmode;
715 /* Get the mode to use for inserting into this field. If OP0 is
716 BLKmode, get the smallest mode consistent with the alignment. If
717 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
718 mode. Otherwise, use the smallest mode containing the field. */
720 if (GET_MODE (op0) == BLKmode
721 || (op_mode != MAX_MACHINE_MODE
722 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
723 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
724 (op_mode == MAX_MACHINE_MODE
725 ? VOIDmode : op_mode),
726 MEM_VOLATILE_P (op0));
727 else
728 bestmode = GET_MODE (op0);
730 if (bestmode != VOIDmode
731 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
732 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
733 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
735 rtx last, tempreg, xop0;
736 unsigned HOST_WIDE_INT xoffset, xbitpos;
738 last = get_last_insn ();
740 /* Adjust address to point to the containing unit of
741 that mode. Compute the offset as a multiple of this unit,
742 counting in bytes. */
743 unit = GET_MODE_BITSIZE (bestmode);
744 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
745 xbitpos = bitnum % unit;
746 xop0 = adjust_address (op0, bestmode, xoffset);
748 /* Fetch that unit, store the bitfield in it, then store
749 the unit. */
750 tempreg = copy_to_reg (xop0);
751 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
752 fieldmode, orig_value, false))
754 emit_move_insn (xop0, tempreg);
755 return true;
757 delete_insns_since (last);
761 if (!fallback_p)
762 return false;
764 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
765 return true;
768 /* Generate code to store value from rtx VALUE
769 into a bit-field within structure STR_RTX
770 containing BITSIZE bits starting at bit BITNUM.
771 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
773 void
774 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
775 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
776 rtx value)
778 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
779 gcc_unreachable ();
782 /* Use shifts and boolean operations to store VALUE
783 into a bit field of width BITSIZE
784 in a memory location specified by OP0 except offset by OFFSET bytes.
785 (OFFSET must be 0 if OP0 is a register.)
786 The field starts at position BITPOS within the byte.
787 (If OP0 is a register, it may be a full word or a narrower mode,
788 but BITPOS still counts within a full word,
789 which is significant on bigendian machines.) */
791 static void
792 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
793 unsigned HOST_WIDE_INT bitsize,
794 unsigned HOST_WIDE_INT bitpos, rtx value)
796 enum machine_mode mode;
797 unsigned int total_bits = BITS_PER_WORD;
798 rtx temp;
799 int all_zero = 0;
800 int all_one = 0;
802 /* There is a case not handled here:
803 a structure with a known alignment of just a halfword
804 and a field split across two aligned halfwords within the structure.
805 Or likewise a structure with a known alignment of just a byte
806 and a field split across two bytes.
807 Such cases are not supposed to be able to occur. */
809 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
811 gcc_assert (!offset);
812 /* Special treatment for a bit field split across two registers. */
813 if (bitsize + bitpos > BITS_PER_WORD)
815 store_split_bit_field (op0, bitsize, bitpos, value);
816 return;
819 else
821 /* Get the proper mode to use for this field. We want a mode that
822 includes the entire field. If such a mode would be larger than
823 a word, we won't be doing the extraction the normal way.
824 We don't want a mode bigger than the destination. */
826 mode = GET_MODE (op0);
827 if (GET_MODE_BITSIZE (mode) == 0
828 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
829 mode = word_mode;
831 if (MEM_VOLATILE_P (op0)
832 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
833 && flag_strict_volatile_bitfields > 0)
834 mode = GET_MODE (op0);
835 else
836 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
837 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
839 if (mode == VOIDmode)
841 /* The only way this should occur is if the field spans word
842 boundaries. */
843 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
844 value);
845 return;
848 total_bits = GET_MODE_BITSIZE (mode);
850 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
851 be in the range 0 to total_bits-1, and put any excess bytes in
852 OFFSET. */
853 if (bitpos >= total_bits)
855 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
856 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
857 * BITS_PER_UNIT);
860 /* Get ref to an aligned byte, halfword, or word containing the field.
861 Adjust BITPOS to be position within a word,
862 and OFFSET to be the offset of that word.
863 Then alter OP0 to refer to that word. */
864 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
865 offset -= (offset % (total_bits / BITS_PER_UNIT));
866 op0 = adjust_address (op0, mode, offset);
869 mode = GET_MODE (op0);
871 /* Now MODE is either some integral mode for a MEM as OP0,
872 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
873 The bit field is contained entirely within OP0.
874 BITPOS is the starting bit number within OP0.
875 (OP0's mode may actually be narrower than MODE.) */
877 if (BYTES_BIG_ENDIAN)
878 /* BITPOS is the distance between our msb
879 and that of the containing datum.
880 Convert it to the distance from the lsb. */
881 bitpos = total_bits - bitsize - bitpos;
883 /* Now BITPOS is always the distance between our lsb
884 and that of OP0. */
886 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
887 we must first convert its mode to MODE. */
889 if (CONST_INT_P (value))
891 HOST_WIDE_INT v = INTVAL (value);
893 if (bitsize < HOST_BITS_PER_WIDE_INT)
894 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
896 if (v == 0)
897 all_zero = 1;
898 else if ((bitsize < HOST_BITS_PER_WIDE_INT
899 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
900 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
901 all_one = 1;
903 value = lshift_value (mode, value, bitpos, bitsize);
905 else
907 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
908 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
910 if (GET_MODE (value) != mode)
911 value = convert_to_mode (mode, value, 1);
913 if (must_and)
914 value = expand_binop (mode, and_optab, value,
915 mask_rtx (mode, 0, bitsize, 0),
916 NULL_RTX, 1, OPTAB_LIB_WIDEN);
917 if (bitpos > 0)
918 value = expand_shift (LSHIFT_EXPR, mode, value,
919 bitpos, NULL_RTX, 1);
922 /* Now clear the chosen bits in OP0,
923 except that if VALUE is -1 we need not bother. */
924 /* We keep the intermediates in registers to allow CSE to combine
925 consecutive bitfield assignments. */
927 temp = force_reg (mode, op0);
929 if (! all_one)
931 temp = expand_binop (mode, and_optab, temp,
932 mask_rtx (mode, bitpos, bitsize, 1),
933 NULL_RTX, 1, OPTAB_LIB_WIDEN);
934 temp = force_reg (mode, temp);
937 /* Now logical-or VALUE into OP0, unless it is zero. */
939 if (! all_zero)
941 temp = expand_binop (mode, ior_optab, temp, value,
942 NULL_RTX, 1, OPTAB_LIB_WIDEN);
943 temp = force_reg (mode, temp);
946 if (op0 != temp)
948 op0 = copy_rtx (op0);
949 emit_move_insn (op0, temp);
953 /* Store a bit field that is split across multiple accessible memory objects.
955 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
956 BITSIZE is the field width; BITPOS the position of its first bit
957 (within the word).
958 VALUE is the value to store.
960 This does not yet handle fields wider than BITS_PER_WORD. */
962 static void
963 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
964 unsigned HOST_WIDE_INT bitpos, rtx value)
966 unsigned int unit;
967 unsigned int bitsdone = 0;
969 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
970 much at a time. */
971 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
972 unit = BITS_PER_WORD;
973 else
974 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
976 /* If VALUE is a constant other than a CONST_INT, get it into a register in
977 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
978 that VALUE might be a floating-point constant. */
979 if (CONSTANT_P (value) && !CONST_INT_P (value))
981 rtx word = gen_lowpart_common (word_mode, value);
983 if (word && (value != word))
984 value = word;
985 else
986 value = gen_lowpart_common (word_mode,
987 force_reg (GET_MODE (value) != VOIDmode
988 ? GET_MODE (value)
989 : word_mode, value));
992 while (bitsdone < bitsize)
994 unsigned HOST_WIDE_INT thissize;
995 rtx part, word;
996 unsigned HOST_WIDE_INT thispos;
997 unsigned HOST_WIDE_INT offset;
999 offset = (bitpos + bitsdone) / unit;
1000 thispos = (bitpos + bitsdone) % unit;
1002 /* THISSIZE must not overrun a word boundary. Otherwise,
1003 store_fixed_bit_field will call us again, and we will mutually
1004 recurse forever. */
1005 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1006 thissize = MIN (thissize, unit - thispos);
1008 if (BYTES_BIG_ENDIAN)
1010 int total_bits;
1012 /* We must do an endian conversion exactly the same way as it is
1013 done in extract_bit_field, so that the two calls to
1014 extract_fixed_bit_field will have comparable arguments. */
1015 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1016 total_bits = BITS_PER_WORD;
1017 else
1018 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1020 /* Fetch successively less significant portions. */
1021 if (CONST_INT_P (value))
1022 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1023 >> (bitsize - bitsdone - thissize))
1024 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1025 else
1026 /* The args are chosen so that the last part includes the
1027 lsb. Give extract_bit_field the value it needs (with
1028 endianness compensation) to fetch the piece we want. */
1029 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1030 total_bits - bitsize + bitsdone,
1031 NULL_RTX, 1, false);
1033 else
1035 /* Fetch successively more significant portions. */
1036 if (CONST_INT_P (value))
1037 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1038 >> bitsdone)
1039 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1040 else
1041 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1042 bitsdone, NULL_RTX, 1, false);
1045 /* If OP0 is a register, then handle OFFSET here.
1047 When handling multiword bitfields, extract_bit_field may pass
1048 down a word_mode SUBREG of a larger REG for a bitfield that actually
1049 crosses a word boundary. Thus, for a SUBREG, we must find
1050 the current word starting from the base register. */
1051 if (GET_CODE (op0) == SUBREG)
1053 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1054 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1055 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1056 word = word_offset ? const0_rtx : op0;
1057 else
1058 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1059 GET_MODE (SUBREG_REG (op0)));
1060 offset = 0;
1062 else if (REG_P (op0))
1064 enum machine_mode op0_mode = GET_MODE (op0);
1065 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1066 word = offset ? const0_rtx : op0;
1067 else
1068 word = operand_subword_force (op0, offset, GET_MODE (op0));
1069 offset = 0;
1071 else
1072 word = op0;
1074 /* OFFSET is in UNITs, and UNIT is in bits.
1075 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1076 it is just an out-of-bounds access. Ignore it. */
1077 if (word != const0_rtx)
1078 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1079 thispos, part);
1080 bitsdone += thissize;
1084 /* A subroutine of extract_bit_field_1 that converts return value X
1085 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1086 to extract_bit_field. */
1088 static rtx
1089 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1090 enum machine_mode tmode, bool unsignedp)
1092 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1093 return x;
1095 /* If the x mode is not a scalar integral, first convert to the
1096 integer mode of that size and then access it as a floating-point
1097 value via a SUBREG. */
1098 if (!SCALAR_INT_MODE_P (tmode))
1100 enum machine_mode smode;
1102 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1103 x = convert_to_mode (smode, x, unsignedp);
1104 x = force_reg (smode, x);
1105 return gen_lowpart (tmode, x);
1108 return convert_to_mode (tmode, x, unsignedp);
1111 /* A subroutine of extract_bit_field, with the same arguments.
1112 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1113 if we can find no other means of implementing the operation.
1114 if FALLBACK_P is false, return NULL instead. */
1116 static rtx
1117 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1118 unsigned HOST_WIDE_INT bitnum,
1119 int unsignedp, bool packedp, rtx target,
1120 enum machine_mode mode, enum machine_mode tmode,
1121 bool fallback_p)
1123 unsigned int unit
1124 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1125 unsigned HOST_WIDE_INT offset, bitpos;
1126 rtx op0 = str_rtx;
1127 enum machine_mode int_mode;
1128 enum machine_mode ext_mode;
1129 enum machine_mode mode1;
1130 enum insn_code icode;
1131 int byte_offset;
1133 if (tmode == VOIDmode)
1134 tmode = mode;
1136 while (GET_CODE (op0) == SUBREG)
1138 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1139 op0 = SUBREG_REG (op0);
1142 /* If we have an out-of-bounds access to a register, just return an
1143 uninitialized register of the required mode. This can occur if the
1144 source code contains an out-of-bounds access to a small array. */
1145 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1146 return gen_reg_rtx (tmode);
1148 if (REG_P (op0)
1149 && mode == GET_MODE (op0)
1150 && bitnum == 0
1151 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1153 /* We're trying to extract a full register from itself. */
1154 return op0;
1157 /* See if we can get a better vector mode before extracting. */
1158 if (VECTOR_MODE_P (GET_MODE (op0))
1159 && !MEM_P (op0)
1160 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1162 enum machine_mode new_mode;
1164 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1165 new_mode = MIN_MODE_VECTOR_FLOAT;
1166 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1167 new_mode = MIN_MODE_VECTOR_FRACT;
1168 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1169 new_mode = MIN_MODE_VECTOR_UFRACT;
1170 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1171 new_mode = MIN_MODE_VECTOR_ACCUM;
1172 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1173 new_mode = MIN_MODE_VECTOR_UACCUM;
1174 else
1175 new_mode = MIN_MODE_VECTOR_INT;
1177 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1178 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1179 && targetm.vector_mode_supported_p (new_mode))
1180 break;
1181 if (new_mode != VOIDmode)
1182 op0 = gen_lowpart (new_mode, op0);
1185 /* Use vec_extract patterns for extracting parts of vectors whenever
1186 available. */
1187 if (VECTOR_MODE_P (GET_MODE (op0))
1188 && !MEM_P (op0)
1189 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1190 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1191 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1193 struct expand_operand ops[3];
1194 enum machine_mode outermode = GET_MODE (op0);
1195 enum machine_mode innermode = GET_MODE_INNER (outermode);
1196 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1197 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1199 create_output_operand (&ops[0], target, innermode);
1200 create_input_operand (&ops[1], op0, outermode);
1201 create_integer_operand (&ops[2], pos);
1202 if (maybe_expand_insn (icode, 3, ops))
1204 target = ops[0].value;
1205 if (GET_MODE (target) != mode)
1206 return gen_lowpart (tmode, target);
1207 return target;
1211 /* Make sure we are playing with integral modes. Pun with subregs
1212 if we aren't. */
1214 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1215 if (imode != GET_MODE (op0))
1217 if (MEM_P (op0))
1218 op0 = adjust_address (op0, imode, 0);
1219 else if (imode != BLKmode)
1221 op0 = gen_lowpart (imode, op0);
1223 /* If we got a SUBREG, force it into a register since we
1224 aren't going to be able to do another SUBREG on it. */
1225 if (GET_CODE (op0) == SUBREG)
1226 op0 = force_reg (imode, op0);
1228 else if (REG_P (op0))
1230 rtx reg, subreg;
1231 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1232 MODE_INT);
1233 reg = gen_reg_rtx (imode);
1234 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1235 emit_move_insn (subreg, op0);
1236 op0 = reg;
1237 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1239 else
1241 rtx mem = assign_stack_temp (GET_MODE (op0),
1242 GET_MODE_SIZE (GET_MODE (op0)), 0);
1243 emit_move_insn (mem, op0);
1244 op0 = adjust_address (mem, BLKmode, 0);
1249 /* We may be accessing data outside the field, which means
1250 we can alias adjacent data. */
1251 if (MEM_P (op0))
1253 op0 = shallow_copy_rtx (op0);
1254 set_mem_alias_set (op0, 0);
1255 set_mem_expr (op0, 0);
1258 /* Extraction of a full-word or multi-word value from a structure
1259 in a register or aligned memory can be done with just a SUBREG.
1260 A subword value in the least significant part of a register
1261 can also be extracted with a SUBREG. For this, we need the
1262 byte offset of the value in op0. */
1264 bitpos = bitnum % unit;
1265 offset = bitnum / unit;
1266 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1268 /* If OP0 is a register, BITPOS must count within a word.
1269 But as we have it, it counts within whatever size OP0 now has.
1270 On a bigendian machine, these are not the same, so convert. */
1271 if (BYTES_BIG_ENDIAN
1272 && !MEM_P (op0)
1273 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1274 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1276 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1277 If that's wrong, the solution is to test for it and set TARGET to 0
1278 if needed. */
1280 /* Only scalar integer modes can be converted via subregs. There is an
1281 additional problem for FP modes here in that they can have a precision
1282 which is different from the size. mode_for_size uses precision, but
1283 we want a mode based on the size, so we must avoid calling it for FP
1284 modes. */
1285 mode1 = (SCALAR_INT_MODE_P (tmode)
1286 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1287 : mode);
1289 /* If the bitfield is volatile, we need to make sure the access
1290 remains on a type-aligned boundary. */
1291 if (GET_CODE (op0) == MEM
1292 && MEM_VOLATILE_P (op0)
1293 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1294 && flag_strict_volatile_bitfields > 0)
1295 goto no_subreg_mode_swap;
1297 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1298 && bitpos % BITS_PER_WORD == 0)
1299 || (mode1 != BLKmode
1300 /* ??? The big endian test here is wrong. This is correct
1301 if the value is in a register, and if mode_for_size is not
1302 the same mode as op0. This causes us to get unnecessarily
1303 inefficient code from the Thumb port when -mbig-endian. */
1304 && (BYTES_BIG_ENDIAN
1305 ? bitpos + bitsize == BITS_PER_WORD
1306 : bitpos == 0)))
1307 && ((!MEM_P (op0)
1308 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1309 GET_MODE_BITSIZE (GET_MODE (op0)))
1310 && GET_MODE_SIZE (mode1) != 0
1311 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1312 || (MEM_P (op0)
1313 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1314 || (offset * BITS_PER_UNIT % bitsize == 0
1315 && MEM_ALIGN (op0) % bitsize == 0)))))
1317 if (MEM_P (op0))
1318 op0 = adjust_address (op0, mode1, offset);
1319 else if (mode1 != GET_MODE (op0))
1321 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1322 byte_offset);
1323 if (sub == NULL)
1324 goto no_subreg_mode_swap;
1325 op0 = sub;
1327 if (mode1 != mode)
1328 return convert_to_mode (tmode, op0, unsignedp);
1329 return op0;
1331 no_subreg_mode_swap:
1333 /* Handle fields bigger than a word. */
1335 if (bitsize > BITS_PER_WORD)
1337 /* Here we transfer the words of the field
1338 in the order least significant first.
1339 This is because the most significant word is the one which may
1340 be less than full. */
1342 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1343 unsigned int i;
1345 if (target == 0 || !REG_P (target))
1346 target = gen_reg_rtx (mode);
1348 /* Indicate for flow that the entire target reg is being set. */
1349 emit_clobber (target);
1351 for (i = 0; i < nwords; i++)
1353 /* If I is 0, use the low-order word in both field and target;
1354 if I is 1, use the next to lowest word; and so on. */
1355 /* Word number in TARGET to use. */
1356 unsigned int wordnum
1357 = (WORDS_BIG_ENDIAN
1358 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1359 : i);
1360 /* Offset from start of field in OP0. */
1361 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1362 ? MAX (0, ((int) bitsize - ((int) i + 1)
1363 * (int) BITS_PER_WORD))
1364 : (int) i * BITS_PER_WORD);
1365 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1366 rtx result_part
1367 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1368 bitsize - i * BITS_PER_WORD),
1369 bitnum + bit_offset, 1, false, target_part, mode,
1370 word_mode);
1372 gcc_assert (target_part);
1374 if (result_part != target_part)
1375 emit_move_insn (target_part, result_part);
1378 if (unsignedp)
1380 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1381 need to be zero'd out. */
1382 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1384 unsigned int i, total_words;
1386 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1387 for (i = nwords; i < total_words; i++)
1388 emit_move_insn
1389 (operand_subword (target,
1390 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1391 1, VOIDmode),
1392 const0_rtx);
1394 return target;
1397 /* Signed bit field: sign-extend with two arithmetic shifts. */
1398 target = expand_shift (LSHIFT_EXPR, mode, target,
1399 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1400 return expand_shift (RSHIFT_EXPR, mode, target,
1401 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1404 /* From here on we know the desired field is smaller than a word. */
1406 /* Check if there is a correspondingly-sized integer field, so we can
1407 safely extract it as one size of integer, if necessary; then
1408 truncate or extend to the size that is wanted; then use SUBREGs or
1409 convert_to_mode to get one of the modes we really wanted. */
1411 int_mode = int_mode_for_mode (tmode);
1412 if (int_mode == BLKmode)
1413 int_mode = int_mode_for_mode (mode);
1414 /* Should probably push op0 out to memory and then do a load. */
1415 gcc_assert (int_mode != BLKmode);
1417 /* OFFSET is the number of words or bytes (UNIT says which)
1418 from STR_RTX to the first word or byte containing part of the field. */
1419 if (!MEM_P (op0))
1421 if (offset != 0
1422 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1424 if (!REG_P (op0))
1425 op0 = copy_to_reg (op0);
1426 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1427 op0, (offset * UNITS_PER_WORD));
1429 offset = 0;
1432 /* Now OFFSET is nonzero only for memory operands. */
1433 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1434 icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1435 if (ext_mode != MAX_MACHINE_MODE
1436 && bitsize > 0
1437 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1438 /* If op0 is a register, we need it in EXT_MODE to make it
1439 acceptable to the format of ext(z)v. */
1440 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1441 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1442 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1444 struct expand_operand ops[4];
1445 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1446 rtx xop0 = op0;
1447 rtx xtarget = target;
1448 rtx xspec_target = target;
1449 rtx xspec_target_subreg = 0;
1451 /* If op0 is a register, we need it in EXT_MODE to make it
1452 acceptable to the format of ext(z)v. */
1453 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1454 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1455 if (MEM_P (xop0))
1456 /* Get ref to first byte containing part of the field. */
1457 xop0 = adjust_address (xop0, byte_mode, xoffset);
1459 /* On big-endian machines, we count bits from the most significant.
1460 If the bit field insn does not, we must invert. */
1461 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1462 xbitpos = unit - bitsize - xbitpos;
1464 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1465 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1466 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1468 unit = GET_MODE_BITSIZE (ext_mode);
1470 if (xtarget == 0)
1471 xtarget = xspec_target = gen_reg_rtx (tmode);
1473 if (GET_MODE (xtarget) != ext_mode)
1475 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1476 between the mode of the extraction (word_mode) and the target
1477 mode. Instead, create a temporary and use convert_move to set
1478 the target. */
1479 if (REG_P (xtarget)
1480 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1481 GET_MODE_BITSIZE (ext_mode)))
1483 xtarget = gen_lowpart (ext_mode, xtarget);
1484 if (GET_MODE_SIZE (ext_mode)
1485 > GET_MODE_SIZE (GET_MODE (xspec_target)))
1486 xspec_target_subreg = xtarget;
1488 else
1489 xtarget = gen_reg_rtx (ext_mode);
1492 create_output_operand (&ops[0], xtarget, ext_mode);
1493 create_fixed_operand (&ops[1], xop0);
1494 create_integer_operand (&ops[2], bitsize);
1495 create_integer_operand (&ops[3], xbitpos);
1496 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1497 4, ops))
1499 xtarget = ops[0].value;
1500 if (xtarget == xspec_target)
1501 return xtarget;
1502 if (xtarget == xspec_target_subreg)
1503 return xspec_target;
1504 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1508 /* If OP0 is a memory, try copying it to a register and seeing if a
1509 cheap register alternative is available. */
1510 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1512 enum machine_mode bestmode;
1514 /* Get the mode to use for inserting into this field. If
1515 OP0 is BLKmode, get the smallest mode consistent with the
1516 alignment. If OP0 is a non-BLKmode object that is no
1517 wider than EXT_MODE, use its mode. Otherwise, use the
1518 smallest mode containing the field. */
1520 if (GET_MODE (op0) == BLKmode
1521 || (ext_mode != MAX_MACHINE_MODE
1522 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1523 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1524 (ext_mode == MAX_MACHINE_MODE
1525 ? VOIDmode : ext_mode),
1526 MEM_VOLATILE_P (op0));
1527 else
1528 bestmode = GET_MODE (op0);
1530 if (bestmode != VOIDmode
1531 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1532 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1534 unsigned HOST_WIDE_INT xoffset, xbitpos;
1536 /* Compute the offset as a multiple of this unit,
1537 counting in bytes. */
1538 unit = GET_MODE_BITSIZE (bestmode);
1539 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1540 xbitpos = bitnum % unit;
1542 /* Make sure the register is big enough for the whole field. */
1543 if (xoffset * BITS_PER_UNIT + unit
1544 >= offset * BITS_PER_UNIT + bitsize)
1546 rtx last, result, xop0;
1548 last = get_last_insn ();
1550 /* Fetch it to a register in that size. */
1551 xop0 = adjust_address (op0, bestmode, xoffset);
1552 xop0 = force_reg (bestmode, xop0);
1553 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1554 unsignedp, packedp, target,
1555 mode, tmode, false);
1556 if (result)
1557 return result;
1559 delete_insns_since (last);
1564 if (!fallback_p)
1565 return NULL;
1567 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1568 bitpos, target, unsignedp, packedp);
1569 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1572 /* Generate code to extract a byte-field from STR_RTX
1573 containing BITSIZE bits, starting at BITNUM,
1574 and put it in TARGET if possible (if TARGET is nonzero).
1575 Regardless of TARGET, we return the rtx for where the value is placed.
1577 STR_RTX is the structure containing the byte (a REG or MEM).
1578 UNSIGNEDP is nonzero if this is an unsigned bit field.
1579 PACKEDP is nonzero if the field has the packed attribute.
1580 MODE is the natural mode of the field value once extracted.
1581 TMODE is the mode the caller would like the value to have;
1582 but the value may be returned with type MODE instead.
1584 If a TARGET is specified and we can store in it at no extra cost,
1585 we do so, and return TARGET.
1586 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1587 if they are equally easy. */
1590 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1591 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1592 rtx target, enum machine_mode mode, enum machine_mode tmode)
1594 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1595 target, mode, tmode, true);
1598 /* Extract a bit field using shifts and boolean operations
1599 Returns an rtx to represent the value.
1600 OP0 addresses a register (word) or memory (byte).
1601 BITPOS says which bit within the word or byte the bit field starts in.
1602 OFFSET says how many bytes farther the bit field starts;
1603 it is 0 if OP0 is a register.
1604 BITSIZE says how many bits long the bit field is.
1605 (If OP0 is a register, it may be narrower than a full word,
1606 but BITPOS still counts within a full word,
1607 which is significant on bigendian machines.)
1609 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1610 PACKEDP is true if the field has the packed attribute.
1612 If TARGET is nonzero, attempts to store the value there
1613 and return TARGET, but this is not guaranteed.
1614 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1616 static rtx
1617 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1618 unsigned HOST_WIDE_INT offset,
1619 unsigned HOST_WIDE_INT bitsize,
1620 unsigned HOST_WIDE_INT bitpos, rtx target,
1621 int unsignedp, bool packedp)
1623 unsigned int total_bits = BITS_PER_WORD;
1624 enum machine_mode mode;
1626 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1628 /* Special treatment for a bit field split across two registers. */
1629 if (bitsize + bitpos > BITS_PER_WORD)
1630 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1632 else
1634 /* Get the proper mode to use for this field. We want a mode that
1635 includes the entire field. If such a mode would be larger than
1636 a word, we won't be doing the extraction the normal way. */
1638 if (MEM_VOLATILE_P (op0)
1639 && flag_strict_volatile_bitfields > 0)
1641 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1642 mode = GET_MODE (op0);
1643 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1644 mode = GET_MODE (target);
1645 else
1646 mode = tmode;
1648 else
1649 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1650 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1652 if (mode == VOIDmode)
1653 /* The only way this should occur is if the field spans word
1654 boundaries. */
1655 return extract_split_bit_field (op0, bitsize,
1656 bitpos + offset * BITS_PER_UNIT,
1657 unsignedp);
1659 total_bits = GET_MODE_BITSIZE (mode);
1661 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1662 be in the range 0 to total_bits-1, and put any excess bytes in
1663 OFFSET. */
1664 if (bitpos >= total_bits)
1666 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1667 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1668 * BITS_PER_UNIT);
1671 /* If we're accessing a volatile MEM, we can't do the next
1672 alignment step if it results in a multi-word access where we
1673 otherwise wouldn't have one. So, check for that case
1674 here. */
1675 if (MEM_P (op0)
1676 && MEM_VOLATILE_P (op0)
1677 && flag_strict_volatile_bitfields > 0
1678 && bitpos + bitsize <= total_bits
1679 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1681 if (STRICT_ALIGNMENT)
1683 static bool informed_about_misalignment = false;
1684 bool warned;
1686 if (packedp)
1688 if (bitsize == total_bits)
1689 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1690 "multiple accesses to volatile structure member"
1691 " because of packed attribute");
1692 else
1693 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1694 "multiple accesses to volatile structure bitfield"
1695 " because of packed attribute");
1697 return extract_split_bit_field (op0, bitsize,
1698 bitpos + offset * BITS_PER_UNIT,
1699 unsignedp);
1702 if (bitsize == total_bits)
1703 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1704 "mis-aligned access used for structure member");
1705 else
1706 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1707 "mis-aligned access used for structure bitfield");
1709 if (! informed_about_misalignment && warned)
1711 informed_about_misalignment = true;
1712 inform (input_location,
1713 "when a volatile object spans multiple type-sized locations,"
1714 " the compiler must choose between using a single mis-aligned access to"
1715 " preserve the volatility, or using multiple aligned accesses to avoid"
1716 " runtime faults; this code may fail at runtime if the hardware does"
1717 " not allow this access");
1721 else
1724 /* Get ref to an aligned byte, halfword, or word containing the field.
1725 Adjust BITPOS to be position within a word,
1726 and OFFSET to be the offset of that word.
1727 Then alter OP0 to refer to that word. */
1728 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1729 offset -= (offset % (total_bits / BITS_PER_UNIT));
1732 op0 = adjust_address (op0, mode, offset);
1735 mode = GET_MODE (op0);
1737 if (BYTES_BIG_ENDIAN)
1738 /* BITPOS is the distance between our msb and that of OP0.
1739 Convert it to the distance from the lsb. */
1740 bitpos = total_bits - bitsize - bitpos;
1742 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1743 We have reduced the big-endian case to the little-endian case. */
1745 if (unsignedp)
1747 if (bitpos)
1749 /* If the field does not already start at the lsb,
1750 shift it so it does. */
1751 /* Maybe propagate the target for the shift. */
1752 /* But not if we will return it--could confuse integrate.c. */
1753 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1754 if (tmode != mode) subtarget = 0;
1755 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1757 /* Convert the value to the desired mode. */
1758 if (mode != tmode)
1759 op0 = convert_to_mode (tmode, op0, 1);
1761 /* Unless the msb of the field used to be the msb when we shifted,
1762 mask out the upper bits. */
1764 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1765 return expand_binop (GET_MODE (op0), and_optab, op0,
1766 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1767 target, 1, OPTAB_LIB_WIDEN);
1768 return op0;
1771 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1772 then arithmetic-shift its lsb to the lsb of the word. */
1773 op0 = force_reg (mode, op0);
1774 if (mode != tmode)
1775 target = 0;
1777 /* Find the narrowest integer mode that contains the field. */
1779 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1780 mode = GET_MODE_WIDER_MODE (mode))
1781 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1783 op0 = convert_to_mode (mode, op0, 0);
1784 break;
1787 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1789 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1790 /* Maybe propagate the target for the shift. */
1791 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1792 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1795 return expand_shift (RSHIFT_EXPR, mode, op0,
1796 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1799 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1800 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1801 complement of that if COMPLEMENT. The mask is truncated if
1802 necessary to the width of mode MODE. The mask is zero-extended if
1803 BITSIZE+BITPOS is too small for MODE. */
1805 static rtx
1806 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1808 double_int mask;
1810 mask = double_int_mask (bitsize);
1811 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1813 if (complement)
1814 mask = double_int_not (mask);
1816 return immed_double_int_const (mask, mode);
1819 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1820 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1822 static rtx
1823 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1825 double_int val;
1827 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1828 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1830 return immed_double_int_const (val, mode);
1833 /* Extract a bit field that is split across two words
1834 and return an RTX for the result.
1836 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1837 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1838 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1840 static rtx
1841 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1842 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1844 unsigned int unit;
1845 unsigned int bitsdone = 0;
1846 rtx result = NULL_RTX;
1847 int first = 1;
1849 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1850 much at a time. */
1851 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1852 unit = BITS_PER_WORD;
1853 else
1854 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1856 while (bitsdone < bitsize)
1858 unsigned HOST_WIDE_INT thissize;
1859 rtx part, word;
1860 unsigned HOST_WIDE_INT thispos;
1861 unsigned HOST_WIDE_INT offset;
1863 offset = (bitpos + bitsdone) / unit;
1864 thispos = (bitpos + bitsdone) % unit;
1866 /* THISSIZE must not overrun a word boundary. Otherwise,
1867 extract_fixed_bit_field will call us again, and we will mutually
1868 recurse forever. */
1869 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1870 thissize = MIN (thissize, unit - thispos);
1872 /* If OP0 is a register, then handle OFFSET here.
1874 When handling multiword bitfields, extract_bit_field may pass
1875 down a word_mode SUBREG of a larger REG for a bitfield that actually
1876 crosses a word boundary. Thus, for a SUBREG, we must find
1877 the current word starting from the base register. */
1878 if (GET_CODE (op0) == SUBREG)
1880 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1881 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1882 GET_MODE (SUBREG_REG (op0)));
1883 offset = 0;
1885 else if (REG_P (op0))
1887 word = operand_subword_force (op0, offset, GET_MODE (op0));
1888 offset = 0;
1890 else
1891 word = op0;
1893 /* Extract the parts in bit-counting order,
1894 whose meaning is determined by BYTES_PER_UNIT.
1895 OFFSET is in UNITs, and UNIT is in bits.
1896 extract_fixed_bit_field wants offset in bytes. */
1897 part = extract_fixed_bit_field (word_mode, word,
1898 offset * unit / BITS_PER_UNIT,
1899 thissize, thispos, 0, 1, false);
1900 bitsdone += thissize;
1902 /* Shift this part into place for the result. */
1903 if (BYTES_BIG_ENDIAN)
1905 if (bitsize != bitsdone)
1906 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1907 bitsize - bitsdone, 0, 1);
1909 else
1911 if (bitsdone != thissize)
1912 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1913 bitsdone - thissize, 0, 1);
1916 if (first)
1917 result = part;
1918 else
1919 /* Combine the parts with bitwise or. This works
1920 because we extracted each part as an unsigned bit field. */
1921 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1922 OPTAB_LIB_WIDEN);
1924 first = 0;
1927 /* Unsigned bit field: we are done. */
1928 if (unsignedp)
1929 return result;
1930 /* Signed bit field: sign-extend with two arithmetic shifts. */
1931 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1932 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1933 return expand_shift (RSHIFT_EXPR, word_mode, result,
1934 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1937 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1938 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1939 MODE, fill the upper bits with zeros. Fail if the layout of either
1940 mode is unknown (as for CC modes) or if the extraction would involve
1941 unprofitable mode punning. Return the value on success, otherwise
1942 return null.
1944 This is different from gen_lowpart* in these respects:
1946 - the returned value must always be considered an rvalue
1948 - when MODE is wider than SRC_MODE, the extraction involves
1949 a zero extension
1951 - when MODE is smaller than SRC_MODE, the extraction involves
1952 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1954 In other words, this routine performs a computation, whereas the
1955 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1956 operations. */
1959 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1961 enum machine_mode int_mode, src_int_mode;
1963 if (mode == src_mode)
1964 return src;
1966 if (CONSTANT_P (src))
1968 /* simplify_gen_subreg can't be used here, as if simplify_subreg
1969 fails, it will happily create (subreg (symbol_ref)) or similar
1970 invalid SUBREGs. */
1971 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1972 rtx ret = simplify_subreg (mode, src, src_mode, byte);
1973 if (ret)
1974 return ret;
1976 if (GET_MODE (src) == VOIDmode
1977 || !validate_subreg (mode, src_mode, src, byte))
1978 return NULL_RTX;
1980 src = force_reg (GET_MODE (src), src);
1981 return gen_rtx_SUBREG (mode, src, byte);
1984 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1985 return NULL_RTX;
1987 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1988 && MODES_TIEABLE_P (mode, src_mode))
1990 rtx x = gen_lowpart_common (mode, src);
1991 if (x)
1992 return x;
1995 src_int_mode = int_mode_for_mode (src_mode);
1996 int_mode = int_mode_for_mode (mode);
1997 if (src_int_mode == BLKmode || int_mode == BLKmode)
1998 return NULL_RTX;
2000 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2001 return NULL_RTX;
2002 if (!MODES_TIEABLE_P (int_mode, mode))
2003 return NULL_RTX;
2005 src = gen_lowpart (src_int_mode, src);
2006 src = convert_modes (int_mode, src_int_mode, src, true);
2007 src = gen_lowpart (mode, src);
2008 return src;
2011 /* Add INC into TARGET. */
2013 void
2014 expand_inc (rtx target, rtx inc)
2016 rtx value = expand_binop (GET_MODE (target), add_optab,
2017 target, inc,
2018 target, 0, OPTAB_LIB_WIDEN);
2019 if (value != target)
2020 emit_move_insn (target, value);
2023 /* Subtract DEC from TARGET. */
2025 void
2026 expand_dec (rtx target, rtx dec)
2028 rtx value = expand_binop (GET_MODE (target), sub_optab,
2029 target, dec,
2030 target, 0, OPTAB_LIB_WIDEN);
2031 if (value != target)
2032 emit_move_insn (target, value);
2035 /* Output a shift instruction for expression code CODE,
2036 with SHIFTED being the rtx for the value to shift,
2037 and AMOUNT the tree for the amount to shift by.
2038 Store the result in the rtx TARGET, if that is convenient.
2039 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2040 Return the rtx for where the value is. */
2043 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2044 tree amount, rtx target, int unsignedp)
2046 rtx op1, temp = 0;
2047 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2048 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2049 optab lshift_optab = ashl_optab;
2050 optab rshift_arith_optab = ashr_optab;
2051 optab rshift_uns_optab = lshr_optab;
2052 optab lrotate_optab = rotl_optab;
2053 optab rrotate_optab = rotr_optab;
2054 enum machine_mode op1_mode;
2055 int attempt;
2056 bool speed = optimize_insn_for_speed_p ();
2058 op1 = expand_normal (amount);
2059 op1_mode = GET_MODE (op1);
2061 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2062 shift amount is a vector, use the vector/vector shift patterns. */
2063 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2065 lshift_optab = vashl_optab;
2066 rshift_arith_optab = vashr_optab;
2067 rshift_uns_optab = vlshr_optab;
2068 lrotate_optab = vrotl_optab;
2069 rrotate_optab = vrotr_optab;
2072 /* Previously detected shift-counts computed by NEGATE_EXPR
2073 and shifted in the other direction; but that does not work
2074 on all machines. */
2076 if (SHIFT_COUNT_TRUNCATED)
2078 if (CONST_INT_P (op1)
2079 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2080 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2081 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2082 % GET_MODE_BITSIZE (mode));
2083 else if (GET_CODE (op1) == SUBREG
2084 && subreg_lowpart_p (op1)
2085 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2086 op1 = SUBREG_REG (op1);
2089 if (op1 == const0_rtx)
2090 return shifted;
2092 /* Check whether its cheaper to implement a left shift by a constant
2093 bit count by a sequence of additions. */
2094 if (code == LSHIFT_EXPR
2095 && CONST_INT_P (op1)
2096 && INTVAL (op1) > 0
2097 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2098 && INTVAL (op1) < MAX_BITS_PER_WORD
2099 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2100 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2102 int i;
2103 for (i = 0; i < INTVAL (op1); i++)
2105 temp = force_reg (mode, shifted);
2106 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2107 unsignedp, OPTAB_LIB_WIDEN);
2109 return shifted;
2112 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2114 enum optab_methods methods;
2116 if (attempt == 0)
2117 methods = OPTAB_DIRECT;
2118 else if (attempt == 1)
2119 methods = OPTAB_WIDEN;
2120 else
2121 methods = OPTAB_LIB_WIDEN;
2123 if (rotate)
2125 /* Widening does not work for rotation. */
2126 if (methods == OPTAB_WIDEN)
2127 continue;
2128 else if (methods == OPTAB_LIB_WIDEN)
2130 /* If we have been unable to open-code this by a rotation,
2131 do it as the IOR of two shifts. I.e., to rotate A
2132 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2133 where C is the bitsize of A.
2135 It is theoretically possible that the target machine might
2136 not be able to perform either shift and hence we would
2137 be making two libcalls rather than just the one for the
2138 shift (similarly if IOR could not be done). We will allow
2139 this extremely unlikely lossage to avoid complicating the
2140 code below. */
2142 rtx subtarget = target == shifted ? 0 : target;
2143 tree new_amount, other_amount;
2144 rtx temp1;
2145 tree type = TREE_TYPE (amount);
2146 if (GET_MODE (op1) != TYPE_MODE (type)
2147 && GET_MODE (op1) != VOIDmode)
2148 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2149 new_amount = make_tree (type, op1);
2150 other_amount
2151 = fold_build2 (MINUS_EXPR, type,
2152 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2153 new_amount);
2155 shifted = force_reg (mode, shifted);
2157 temp = expand_variable_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2158 mode, shifted, new_amount, 0, 1);
2159 temp1 = expand_variable_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2160 mode, shifted, other_amount,
2161 subtarget, 1);
2162 return expand_binop (mode, ior_optab, temp, temp1, target,
2163 unsignedp, methods);
2166 temp = expand_binop (mode,
2167 left ? lrotate_optab : rrotate_optab,
2168 shifted, op1, target, unsignedp, methods);
2170 else if (unsignedp)
2171 temp = expand_binop (mode,
2172 left ? lshift_optab : rshift_uns_optab,
2173 shifted, op1, target, unsignedp, methods);
2175 /* Do arithmetic shifts.
2176 Also, if we are going to widen the operand, we can just as well
2177 use an arithmetic right-shift instead of a logical one. */
2178 if (temp == 0 && ! rotate
2179 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2181 enum optab_methods methods1 = methods;
2183 /* If trying to widen a log shift to an arithmetic shift,
2184 don't accept an arithmetic shift of the same size. */
2185 if (unsignedp)
2186 methods1 = OPTAB_MUST_WIDEN;
2188 /* Arithmetic shift */
2190 temp = expand_binop (mode,
2191 left ? lshift_optab : rshift_arith_optab,
2192 shifted, op1, target, unsignedp, methods1);
2195 /* We used to try extzv here for logical right shifts, but that was
2196 only useful for one machine, the VAX, and caused poor code
2197 generation there for lshrdi3, so the code was deleted and a
2198 define_expand for lshrsi3 was added to vax.md. */
2201 gcc_assert (temp);
2202 return temp;
2205 /* Output a shift instruction for expression code CODE,
2206 with SHIFTED being the rtx for the value to shift,
2207 and AMOUNT the amount to shift by.
2208 Store the result in the rtx TARGET, if that is convenient.
2209 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2210 Return the rtx for where the value is. */
2213 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2214 int amount, rtx target, int unsignedp)
2216 /* ??? With re-writing expand_shift we could avoid going through a
2217 tree for the shift amount and directly do GEN_INT (amount). */
2218 return expand_variable_shift (code, mode, shifted,
2219 build_int_cst (integer_type_node, amount),
2220 target, unsignedp);
2223 /* Indicates the type of fixup needed after a constant multiplication.
2224 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2225 the result should be negated, and ADD_VARIANT means that the
2226 multiplicand should be added to the result. */
2227 enum mult_variant {basic_variant, negate_variant, add_variant};
2229 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2230 const struct mult_cost *, enum machine_mode mode);
2231 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2232 struct algorithm *, enum mult_variant *, int);
2233 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2234 const struct algorithm *, enum mult_variant);
2235 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2236 int, rtx *, int *, int *);
2237 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2238 static rtx extract_high_half (enum machine_mode, rtx);
2239 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2240 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2241 int, int);
2242 /* Compute and return the best algorithm for multiplying by T.
2243 The algorithm must cost less than cost_limit
2244 If retval.cost >= COST_LIMIT, no algorithm was found and all
2245 other field of the returned struct are undefined.
2246 MODE is the machine mode of the multiplication. */
2248 static void
2249 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2250 const struct mult_cost *cost_limit, enum machine_mode mode)
2252 int m;
2253 struct algorithm *alg_in, *best_alg;
2254 struct mult_cost best_cost;
2255 struct mult_cost new_limit;
2256 int op_cost, op_latency;
2257 unsigned HOST_WIDE_INT orig_t = t;
2258 unsigned HOST_WIDE_INT q;
2259 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2260 int hash_index;
2261 bool cache_hit = false;
2262 enum alg_code cache_alg = alg_zero;
2263 bool speed = optimize_insn_for_speed_p ();
2265 /* Indicate that no algorithm is yet found. If no algorithm
2266 is found, this value will be returned and indicate failure. */
2267 alg_out->cost.cost = cost_limit->cost + 1;
2268 alg_out->cost.latency = cost_limit->latency + 1;
2270 if (cost_limit->cost < 0
2271 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2272 return;
2274 /* Restrict the bits of "t" to the multiplication's mode. */
2275 t &= GET_MODE_MASK (mode);
2277 /* t == 1 can be done in zero cost. */
2278 if (t == 1)
2280 alg_out->ops = 1;
2281 alg_out->cost.cost = 0;
2282 alg_out->cost.latency = 0;
2283 alg_out->op[0] = alg_m;
2284 return;
2287 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2288 fail now. */
2289 if (t == 0)
2291 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2292 return;
2293 else
2295 alg_out->ops = 1;
2296 alg_out->cost.cost = zero_cost[speed];
2297 alg_out->cost.latency = zero_cost[speed];
2298 alg_out->op[0] = alg_zero;
2299 return;
2303 /* We'll be needing a couple extra algorithm structures now. */
2305 alg_in = XALLOCA (struct algorithm);
2306 best_alg = XALLOCA (struct algorithm);
2307 best_cost = *cost_limit;
2309 /* Compute the hash index. */
2310 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2312 /* See if we already know what to do for T. */
2313 if (alg_hash[hash_index].t == t
2314 && alg_hash[hash_index].mode == mode
2315 && alg_hash[hash_index].mode == mode
2316 && alg_hash[hash_index].speed == speed
2317 && alg_hash[hash_index].alg != alg_unknown)
2319 cache_alg = alg_hash[hash_index].alg;
2321 if (cache_alg == alg_impossible)
2323 /* The cache tells us that it's impossible to synthesize
2324 multiplication by T within alg_hash[hash_index].cost. */
2325 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2326 /* COST_LIMIT is at least as restrictive as the one
2327 recorded in the hash table, in which case we have no
2328 hope of synthesizing a multiplication. Just
2329 return. */
2330 return;
2332 /* If we get here, COST_LIMIT is less restrictive than the
2333 one recorded in the hash table, so we may be able to
2334 synthesize a multiplication. Proceed as if we didn't
2335 have the cache entry. */
2337 else
2339 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2340 /* The cached algorithm shows that this multiplication
2341 requires more cost than COST_LIMIT. Just return. This
2342 way, we don't clobber this cache entry with
2343 alg_impossible but retain useful information. */
2344 return;
2346 cache_hit = true;
2348 switch (cache_alg)
2350 case alg_shift:
2351 goto do_alg_shift;
2353 case alg_add_t_m2:
2354 case alg_sub_t_m2:
2355 goto do_alg_addsub_t_m2;
2357 case alg_add_factor:
2358 case alg_sub_factor:
2359 goto do_alg_addsub_factor;
2361 case alg_add_t2_m:
2362 goto do_alg_add_t2_m;
2364 case alg_sub_t2_m:
2365 goto do_alg_sub_t2_m;
2367 default:
2368 gcc_unreachable ();
2373 /* If we have a group of zero bits at the low-order part of T, try
2374 multiplying by the remaining bits and then doing a shift. */
2376 if ((t & 1) == 0)
2378 do_alg_shift:
2379 m = floor_log2 (t & -t); /* m = number of low zero bits */
2380 if (m < maxm)
2382 q = t >> m;
2383 /* The function expand_shift will choose between a shift and
2384 a sequence of additions, so the observed cost is given as
2385 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2386 op_cost = m * add_cost[speed][mode];
2387 if (shift_cost[speed][mode][m] < op_cost)
2388 op_cost = shift_cost[speed][mode][m];
2389 new_limit.cost = best_cost.cost - op_cost;
2390 new_limit.latency = best_cost.latency - op_cost;
2391 synth_mult (alg_in, q, &new_limit, mode);
2393 alg_in->cost.cost += op_cost;
2394 alg_in->cost.latency += op_cost;
2395 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2397 struct algorithm *x;
2398 best_cost = alg_in->cost;
2399 x = alg_in, alg_in = best_alg, best_alg = x;
2400 best_alg->log[best_alg->ops] = m;
2401 best_alg->op[best_alg->ops] = alg_shift;
2404 /* See if treating ORIG_T as a signed number yields a better
2405 sequence. Try this sequence only for a negative ORIG_T
2406 as it would be useless for a non-negative ORIG_T. */
2407 if ((HOST_WIDE_INT) orig_t < 0)
2409 /* Shift ORIG_T as follows because a right shift of a
2410 negative-valued signed type is implementation
2411 defined. */
2412 q = ~(~orig_t >> m);
2413 /* The function expand_shift will choose between a shift
2414 and a sequence of additions, so the observed cost is
2415 given as MIN (m * add_cost[speed][mode],
2416 shift_cost[speed][mode][m]). */
2417 op_cost = m * add_cost[speed][mode];
2418 if (shift_cost[speed][mode][m] < op_cost)
2419 op_cost = shift_cost[speed][mode][m];
2420 new_limit.cost = best_cost.cost - op_cost;
2421 new_limit.latency = best_cost.latency - op_cost;
2422 synth_mult (alg_in, q, &new_limit, mode);
2424 alg_in->cost.cost += op_cost;
2425 alg_in->cost.latency += op_cost;
2426 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2428 struct algorithm *x;
2429 best_cost = alg_in->cost;
2430 x = alg_in, alg_in = best_alg, best_alg = x;
2431 best_alg->log[best_alg->ops] = m;
2432 best_alg->op[best_alg->ops] = alg_shift;
2436 if (cache_hit)
2437 goto done;
2440 /* If we have an odd number, add or subtract one. */
2441 if ((t & 1) != 0)
2443 unsigned HOST_WIDE_INT w;
2445 do_alg_addsub_t_m2:
2446 for (w = 1; (w & t) != 0; w <<= 1)
2448 /* If T was -1, then W will be zero after the loop. This is another
2449 case where T ends with ...111. Handling this with (T + 1) and
2450 subtract 1 produces slightly better code and results in algorithm
2451 selection much faster than treating it like the ...0111 case
2452 below. */
2453 if (w == 0
2454 || (w > 2
2455 /* Reject the case where t is 3.
2456 Thus we prefer addition in that case. */
2457 && t != 3))
2459 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2461 op_cost = add_cost[speed][mode];
2462 new_limit.cost = best_cost.cost - op_cost;
2463 new_limit.latency = best_cost.latency - op_cost;
2464 synth_mult (alg_in, t + 1, &new_limit, mode);
2466 alg_in->cost.cost += op_cost;
2467 alg_in->cost.latency += op_cost;
2468 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2470 struct algorithm *x;
2471 best_cost = alg_in->cost;
2472 x = alg_in, alg_in = best_alg, best_alg = x;
2473 best_alg->log[best_alg->ops] = 0;
2474 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2477 else
2479 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2481 op_cost = add_cost[speed][mode];
2482 new_limit.cost = best_cost.cost - op_cost;
2483 new_limit.latency = best_cost.latency - op_cost;
2484 synth_mult (alg_in, t - 1, &new_limit, mode);
2486 alg_in->cost.cost += op_cost;
2487 alg_in->cost.latency += op_cost;
2488 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2490 struct algorithm *x;
2491 best_cost = alg_in->cost;
2492 x = alg_in, alg_in = best_alg, best_alg = x;
2493 best_alg->log[best_alg->ops] = 0;
2494 best_alg->op[best_alg->ops] = alg_add_t_m2;
2498 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2499 quickly with a - a * n for some appropriate constant n. */
2500 m = exact_log2 (-orig_t + 1);
2501 if (m >= 0 && m < maxm)
2503 op_cost = shiftsub1_cost[speed][mode][m];
2504 new_limit.cost = best_cost.cost - op_cost;
2505 new_limit.latency = best_cost.latency - op_cost;
2506 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2508 alg_in->cost.cost += op_cost;
2509 alg_in->cost.latency += op_cost;
2510 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2512 struct algorithm *x;
2513 best_cost = alg_in->cost;
2514 x = alg_in, alg_in = best_alg, best_alg = x;
2515 best_alg->log[best_alg->ops] = m;
2516 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2520 if (cache_hit)
2521 goto done;
2524 /* Look for factors of t of the form
2525 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2526 If we find such a factor, we can multiply by t using an algorithm that
2527 multiplies by q, shift the result by m and add/subtract it to itself.
2529 We search for large factors first and loop down, even if large factors
2530 are less probable than small; if we find a large factor we will find a
2531 good sequence quickly, and therefore be able to prune (by decreasing
2532 COST_LIMIT) the search. */
2534 do_alg_addsub_factor:
2535 for (m = floor_log2 (t - 1); m >= 2; m--)
2537 unsigned HOST_WIDE_INT d;
2539 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2540 if (t % d == 0 && t > d && m < maxm
2541 && (!cache_hit || cache_alg == alg_add_factor))
2543 /* If the target has a cheap shift-and-add instruction use
2544 that in preference to a shift insn followed by an add insn.
2545 Assume that the shift-and-add is "atomic" with a latency
2546 equal to its cost, otherwise assume that on superscalar
2547 hardware the shift may be executed concurrently with the
2548 earlier steps in the algorithm. */
2549 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2550 if (shiftadd_cost[speed][mode][m] < op_cost)
2552 op_cost = shiftadd_cost[speed][mode][m];
2553 op_latency = op_cost;
2555 else
2556 op_latency = add_cost[speed][mode];
2558 new_limit.cost = best_cost.cost - op_cost;
2559 new_limit.latency = best_cost.latency - op_latency;
2560 synth_mult (alg_in, t / d, &new_limit, mode);
2562 alg_in->cost.cost += op_cost;
2563 alg_in->cost.latency += op_latency;
2564 if (alg_in->cost.latency < op_cost)
2565 alg_in->cost.latency = op_cost;
2566 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2568 struct algorithm *x;
2569 best_cost = alg_in->cost;
2570 x = alg_in, alg_in = best_alg, best_alg = x;
2571 best_alg->log[best_alg->ops] = m;
2572 best_alg->op[best_alg->ops] = alg_add_factor;
2574 /* Other factors will have been taken care of in the recursion. */
2575 break;
2578 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2579 if (t % d == 0 && t > d && m < maxm
2580 && (!cache_hit || cache_alg == alg_sub_factor))
2582 /* If the target has a cheap shift-and-subtract insn use
2583 that in preference to a shift insn followed by a sub insn.
2584 Assume that the shift-and-sub is "atomic" with a latency
2585 equal to it's cost, otherwise assume that on superscalar
2586 hardware the shift may be executed concurrently with the
2587 earlier steps in the algorithm. */
2588 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2589 if (shiftsub0_cost[speed][mode][m] < op_cost)
2591 op_cost = shiftsub0_cost[speed][mode][m];
2592 op_latency = op_cost;
2594 else
2595 op_latency = add_cost[speed][mode];
2597 new_limit.cost = best_cost.cost - op_cost;
2598 new_limit.latency = best_cost.latency - op_latency;
2599 synth_mult (alg_in, t / d, &new_limit, mode);
2601 alg_in->cost.cost += op_cost;
2602 alg_in->cost.latency += op_latency;
2603 if (alg_in->cost.latency < op_cost)
2604 alg_in->cost.latency = op_cost;
2605 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2607 struct algorithm *x;
2608 best_cost = alg_in->cost;
2609 x = alg_in, alg_in = best_alg, best_alg = x;
2610 best_alg->log[best_alg->ops] = m;
2611 best_alg->op[best_alg->ops] = alg_sub_factor;
2613 break;
2616 if (cache_hit)
2617 goto done;
2619 /* Try shift-and-add (load effective address) instructions,
2620 i.e. do a*3, a*5, a*9. */
2621 if ((t & 1) != 0)
2623 do_alg_add_t2_m:
2624 q = t - 1;
2625 q = q & -q;
2626 m = exact_log2 (q);
2627 if (m >= 0 && m < maxm)
2629 op_cost = shiftadd_cost[speed][mode][m];
2630 new_limit.cost = best_cost.cost - op_cost;
2631 new_limit.latency = best_cost.latency - op_cost;
2632 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2634 alg_in->cost.cost += op_cost;
2635 alg_in->cost.latency += op_cost;
2636 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2638 struct algorithm *x;
2639 best_cost = alg_in->cost;
2640 x = alg_in, alg_in = best_alg, best_alg = x;
2641 best_alg->log[best_alg->ops] = m;
2642 best_alg->op[best_alg->ops] = alg_add_t2_m;
2645 if (cache_hit)
2646 goto done;
2648 do_alg_sub_t2_m:
2649 q = t + 1;
2650 q = q & -q;
2651 m = exact_log2 (q);
2652 if (m >= 0 && m < maxm)
2654 op_cost = shiftsub0_cost[speed][mode][m];
2655 new_limit.cost = best_cost.cost - op_cost;
2656 new_limit.latency = best_cost.latency - op_cost;
2657 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2659 alg_in->cost.cost += op_cost;
2660 alg_in->cost.latency += op_cost;
2661 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2663 struct algorithm *x;
2664 best_cost = alg_in->cost;
2665 x = alg_in, alg_in = best_alg, best_alg = x;
2666 best_alg->log[best_alg->ops] = m;
2667 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2670 if (cache_hit)
2671 goto done;
2674 done:
2675 /* If best_cost has not decreased, we have not found any algorithm. */
2676 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2678 /* We failed to find an algorithm. Record alg_impossible for
2679 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2680 we are asked to find an algorithm for T within the same or
2681 lower COST_LIMIT, we can immediately return to the
2682 caller. */
2683 alg_hash[hash_index].t = t;
2684 alg_hash[hash_index].mode = mode;
2685 alg_hash[hash_index].speed = speed;
2686 alg_hash[hash_index].alg = alg_impossible;
2687 alg_hash[hash_index].cost = *cost_limit;
2688 return;
2691 /* Cache the result. */
2692 if (!cache_hit)
2694 alg_hash[hash_index].t = t;
2695 alg_hash[hash_index].mode = mode;
2696 alg_hash[hash_index].speed = speed;
2697 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2698 alg_hash[hash_index].cost.cost = best_cost.cost;
2699 alg_hash[hash_index].cost.latency = best_cost.latency;
2702 /* If we are getting a too long sequence for `struct algorithm'
2703 to record, make this search fail. */
2704 if (best_alg->ops == MAX_BITS_PER_WORD)
2705 return;
2707 /* Copy the algorithm from temporary space to the space at alg_out.
2708 We avoid using structure assignment because the majority of
2709 best_alg is normally undefined, and this is a critical function. */
2710 alg_out->ops = best_alg->ops + 1;
2711 alg_out->cost = best_cost;
2712 memcpy (alg_out->op, best_alg->op,
2713 alg_out->ops * sizeof *alg_out->op);
2714 memcpy (alg_out->log, best_alg->log,
2715 alg_out->ops * sizeof *alg_out->log);
2718 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2719 Try three variations:
2721 - a shift/add sequence based on VAL itself
2722 - a shift/add sequence based on -VAL, followed by a negation
2723 - a shift/add sequence based on VAL - 1, followed by an addition.
2725 Return true if the cheapest of these cost less than MULT_COST,
2726 describing the algorithm in *ALG and final fixup in *VARIANT. */
2728 static bool
2729 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2730 struct algorithm *alg, enum mult_variant *variant,
2731 int mult_cost)
2733 struct algorithm alg2;
2734 struct mult_cost limit;
2735 int op_cost;
2736 bool speed = optimize_insn_for_speed_p ();
2738 /* Fail quickly for impossible bounds. */
2739 if (mult_cost < 0)
2740 return false;
2742 /* Ensure that mult_cost provides a reasonable upper bound.
2743 Any constant multiplication can be performed with less
2744 than 2 * bits additions. */
2745 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2746 if (mult_cost > op_cost)
2747 mult_cost = op_cost;
2749 *variant = basic_variant;
2750 limit.cost = mult_cost;
2751 limit.latency = mult_cost;
2752 synth_mult (alg, val, &limit, mode);
2754 /* This works only if the inverted value actually fits in an
2755 `unsigned int' */
2756 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2758 op_cost = neg_cost[speed][mode];
2759 if (MULT_COST_LESS (&alg->cost, mult_cost))
2761 limit.cost = alg->cost.cost - op_cost;
2762 limit.latency = alg->cost.latency - op_cost;
2764 else
2766 limit.cost = mult_cost - op_cost;
2767 limit.latency = mult_cost - op_cost;
2770 synth_mult (&alg2, -val, &limit, mode);
2771 alg2.cost.cost += op_cost;
2772 alg2.cost.latency += op_cost;
2773 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2774 *alg = alg2, *variant = negate_variant;
2777 /* This proves very useful for division-by-constant. */
2778 op_cost = add_cost[speed][mode];
2779 if (MULT_COST_LESS (&alg->cost, mult_cost))
2781 limit.cost = alg->cost.cost - op_cost;
2782 limit.latency = alg->cost.latency - op_cost;
2784 else
2786 limit.cost = mult_cost - op_cost;
2787 limit.latency = mult_cost - op_cost;
2790 synth_mult (&alg2, val - 1, &limit, mode);
2791 alg2.cost.cost += op_cost;
2792 alg2.cost.latency += op_cost;
2793 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2794 *alg = alg2, *variant = add_variant;
2796 return MULT_COST_LESS (&alg->cost, mult_cost);
2799 /* A subroutine of expand_mult, used for constant multiplications.
2800 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2801 convenient. Use the shift/add sequence described by ALG and apply
2802 the final fixup specified by VARIANT. */
2804 static rtx
2805 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2806 rtx target, const struct algorithm *alg,
2807 enum mult_variant variant)
2809 HOST_WIDE_INT val_so_far;
2810 rtx insn, accum, tem;
2811 int opno;
2812 enum machine_mode nmode;
2814 /* Avoid referencing memory over and over and invalid sharing
2815 on SUBREGs. */
2816 op0 = force_reg (mode, op0);
2818 /* ACCUM starts out either as OP0 or as a zero, depending on
2819 the first operation. */
2821 if (alg->op[0] == alg_zero)
2823 accum = copy_to_mode_reg (mode, const0_rtx);
2824 val_so_far = 0;
2826 else if (alg->op[0] == alg_m)
2828 accum = copy_to_mode_reg (mode, op0);
2829 val_so_far = 1;
2831 else
2832 gcc_unreachable ();
2834 for (opno = 1; opno < alg->ops; opno++)
2836 int log = alg->log[opno];
2837 rtx shift_subtarget = optimize ? 0 : accum;
2838 rtx add_target
2839 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2840 && !optimize)
2841 ? target : 0;
2842 rtx accum_target = optimize ? 0 : accum;
2844 switch (alg->op[opno])
2846 case alg_shift:
2847 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2848 /* REG_EQUAL note will be attached to the following insn. */
2849 emit_move_insn (accum, tem);
2850 val_so_far <<= log;
2851 break;
2853 case alg_add_t_m2:
2854 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2855 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2856 add_target ? add_target : accum_target);
2857 val_so_far += (HOST_WIDE_INT) 1 << log;
2858 break;
2860 case alg_sub_t_m2:
2861 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2862 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2863 add_target ? add_target : accum_target);
2864 val_so_far -= (HOST_WIDE_INT) 1 << log;
2865 break;
2867 case alg_add_t2_m:
2868 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2869 log, shift_subtarget, 0);
2870 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2871 add_target ? add_target : accum_target);
2872 val_so_far = (val_so_far << log) + 1;
2873 break;
2875 case alg_sub_t2_m:
2876 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2877 log, shift_subtarget, 0);
2878 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2879 add_target ? add_target : accum_target);
2880 val_so_far = (val_so_far << log) - 1;
2881 break;
2883 case alg_add_factor:
2884 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2885 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2886 add_target ? add_target : accum_target);
2887 val_so_far += val_so_far << log;
2888 break;
2890 case alg_sub_factor:
2891 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2892 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2893 (add_target
2894 ? add_target : (optimize ? 0 : tem)));
2895 val_so_far = (val_so_far << log) - val_so_far;
2896 break;
2898 default:
2899 gcc_unreachable ();
2902 /* Write a REG_EQUAL note on the last insn so that we can cse
2903 multiplication sequences. Note that if ACCUM is a SUBREG,
2904 we've set the inner register and must properly indicate
2905 that. */
2907 tem = op0, nmode = mode;
2908 if (GET_CODE (accum) == SUBREG)
2910 nmode = GET_MODE (SUBREG_REG (accum));
2911 tem = gen_lowpart (nmode, op0);
2914 insn = get_last_insn ();
2915 set_unique_reg_note (insn, REG_EQUAL,
2916 gen_rtx_MULT (nmode, tem,
2917 GEN_INT (val_so_far)));
2920 if (variant == negate_variant)
2922 val_so_far = -val_so_far;
2923 accum = expand_unop (mode, neg_optab, accum, target, 0);
2925 else if (variant == add_variant)
2927 val_so_far = val_so_far + 1;
2928 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2931 /* Compare only the bits of val and val_so_far that are significant
2932 in the result mode, to avoid sign-/zero-extension confusion. */
2933 val &= GET_MODE_MASK (mode);
2934 val_so_far &= GET_MODE_MASK (mode);
2935 gcc_assert (val == val_so_far);
2937 return accum;
2940 /* Perform a multiplication and return an rtx for the result.
2941 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2942 TARGET is a suggestion for where to store the result (an rtx).
2944 We check specially for a constant integer as OP1.
2945 If you want this check for OP0 as well, then before calling
2946 you should swap the two operands if OP0 would be constant. */
2949 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2950 int unsignedp)
2952 enum mult_variant variant;
2953 struct algorithm algorithm;
2954 int max_cost;
2955 bool speed = optimize_insn_for_speed_p ();
2957 /* Handling const0_rtx here allows us to use zero as a rogue value for
2958 coeff below. */
2959 if (op1 == const0_rtx)
2960 return const0_rtx;
2961 if (op1 == const1_rtx)
2962 return op0;
2963 if (op1 == constm1_rtx)
2964 return expand_unop (mode,
2965 GET_MODE_CLASS (mode) == MODE_INT
2966 && !unsignedp && flag_trapv
2967 ? negv_optab : neg_optab,
2968 op0, target, 0);
2970 /* These are the operations that are potentially turned into a sequence
2971 of shifts and additions. */
2972 if (SCALAR_INT_MODE_P (mode)
2973 && (unsignedp || !flag_trapv))
2975 HOST_WIDE_INT coeff = 0;
2976 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
2978 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2979 less than or equal in size to `unsigned int' this doesn't matter.
2980 If the mode is larger than `unsigned int', then synth_mult works
2981 only if the constant value exactly fits in an `unsigned int' without
2982 any truncation. This means that multiplying by negative values does
2983 not work; results are off by 2^32 on a 32 bit machine. */
2985 if (CONST_INT_P (op1))
2987 /* Attempt to handle multiplication of DImode values by negative
2988 coefficients, by performing the multiplication by a positive
2989 multiplier and then inverting the result. */
2990 if (INTVAL (op1) < 0
2991 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
2993 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
2994 result is interpreted as an unsigned coefficient.
2995 Exclude cost of op0 from max_cost to match the cost
2996 calculation of the synth_mult. */
2997 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
2998 - neg_cost[speed][mode];
2999 if (max_cost > 0
3000 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3001 &variant, max_cost))
3003 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3004 NULL_RTX, &algorithm,
3005 variant);
3006 return expand_unop (mode, neg_optab, temp, target, 0);
3009 else coeff = INTVAL (op1);
3011 else if (GET_CODE (op1) == CONST_DOUBLE)
3013 /* If we are multiplying in DImode, it may still be a win
3014 to try to work with shifts and adds. */
3015 if (CONST_DOUBLE_HIGH (op1) == 0
3016 && CONST_DOUBLE_LOW (op1) > 0)
3017 coeff = CONST_DOUBLE_LOW (op1);
3018 else if (CONST_DOUBLE_LOW (op1) == 0
3019 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3021 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3022 + HOST_BITS_PER_WIDE_INT;
3023 return expand_shift (LSHIFT_EXPR, mode, op0,
3024 shift, target, unsignedp);
3028 /* We used to test optimize here, on the grounds that it's better to
3029 produce a smaller program when -O is not used. But this causes
3030 such a terrible slowdown sometimes that it seems better to always
3031 use synth_mult. */
3032 if (coeff != 0)
3034 /* Special case powers of two. */
3035 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3036 return expand_shift (LSHIFT_EXPR, mode, op0,
3037 floor_log2 (coeff), target, unsignedp);
3039 /* Exclude cost of op0 from max_cost to match the cost
3040 calculation of the synth_mult. */
3041 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3042 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3043 max_cost))
3044 return expand_mult_const (mode, op0, coeff, target,
3045 &algorithm, variant);
3049 if (GET_CODE (op0) == CONST_DOUBLE)
3051 rtx temp = op0;
3052 op0 = op1;
3053 op1 = temp;
3056 /* Expand x*2.0 as x+x. */
3057 if (GET_CODE (op1) == CONST_DOUBLE
3058 && SCALAR_FLOAT_MODE_P (mode))
3060 REAL_VALUE_TYPE d;
3061 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3063 if (REAL_VALUES_EQUAL (d, dconst2))
3065 op0 = force_reg (GET_MODE (op0), op0);
3066 return expand_binop (mode, add_optab, op0, op0,
3067 target, unsignedp, OPTAB_LIB_WIDEN);
3071 /* This used to use umul_optab if unsigned, but for non-widening multiply
3072 there is no difference between signed and unsigned. */
3073 op0 = expand_binop (mode,
3074 ! unsignedp
3075 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3076 ? smulv_optab : smul_optab,
3077 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3078 gcc_assert (op0);
3079 return op0;
3082 /* Perform a widening multiplication and return an rtx for the result.
3083 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3084 TARGET is a suggestion for where to store the result (an rtx).
3085 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3086 or smul_widen_optab.
3088 We check specially for a constant integer as OP1, comparing the
3089 cost of a widening multiply against the cost of a sequence of shifts
3090 and adds. */
3093 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3094 int unsignedp, optab this_optab)
3096 bool speed = optimize_insn_for_speed_p ();
3097 rtx cop1;
3099 if (CONST_INT_P (op1)
3100 && GET_MODE (op0) != VOIDmode
3101 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3102 this_optab == umul_widen_optab))
3103 && CONST_INT_P (cop1)
3104 && (INTVAL (cop1) >= 0
3105 || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3107 HOST_WIDE_INT coeff = INTVAL (cop1);
3108 int max_cost;
3109 enum mult_variant variant;
3110 struct algorithm algorithm;
3112 /* Special case powers of two. */
3113 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3115 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3116 return expand_shift (LSHIFT_EXPR, mode, op0,
3117 floor_log2 (coeff), target, unsignedp);
3120 /* Exclude cost of op0 from max_cost to match the cost
3121 calculation of the synth_mult. */
3122 max_cost = mul_widen_cost[speed][mode];
3123 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3124 max_cost))
3126 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3127 return expand_mult_const (mode, op0, coeff, target,
3128 &algorithm, variant);
3131 return expand_binop (mode, this_optab, op0, op1, target,
3132 unsignedp, OPTAB_LIB_WIDEN);
3135 /* Return the smallest n such that 2**n >= X. */
3138 ceil_log2 (unsigned HOST_WIDE_INT x)
3140 return floor_log2 (x - 1) + 1;
3143 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3144 replace division by D, and put the least significant N bits of the result
3145 in *MULTIPLIER_PTR and return the most significant bit.
3147 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3148 needed precision is in PRECISION (should be <= N).
3150 PRECISION should be as small as possible so this function can choose
3151 multiplier more freely.
3153 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3154 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3156 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3157 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3159 static
3160 unsigned HOST_WIDE_INT
3161 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3162 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3164 HOST_WIDE_INT mhigh_hi, mlow_hi;
3165 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3166 int lgup, post_shift;
3167 int pow, pow2;
3168 unsigned HOST_WIDE_INT nl, dummy1;
3169 HOST_WIDE_INT nh, dummy2;
3171 /* lgup = ceil(log2(divisor)); */
3172 lgup = ceil_log2 (d);
3174 gcc_assert (lgup <= n);
3176 pow = n + lgup;
3177 pow2 = n + lgup - precision;
3179 /* We could handle this with some effort, but this case is much
3180 better handled directly with a scc insn, so rely on caller using
3181 that. */
3182 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3184 /* mlow = 2^(N + lgup)/d */
3185 if (pow >= HOST_BITS_PER_WIDE_INT)
3187 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3188 nl = 0;
3190 else
3192 nh = 0;
3193 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3195 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3196 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3198 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3199 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3200 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3201 else
3202 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3203 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3204 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3206 gcc_assert (!mhigh_hi || nh - d < d);
3207 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3208 /* Assert that mlow < mhigh. */
3209 gcc_assert (mlow_hi < mhigh_hi
3210 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3212 /* If precision == N, then mlow, mhigh exceed 2^N
3213 (but they do not exceed 2^(N+1)). */
3215 /* Reduce to lowest terms. */
3216 for (post_shift = lgup; post_shift > 0; post_shift--)
3218 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3219 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3220 if (ml_lo >= mh_lo)
3221 break;
3223 mlow_hi = 0;
3224 mlow_lo = ml_lo;
3225 mhigh_hi = 0;
3226 mhigh_lo = mh_lo;
3229 *post_shift_ptr = post_shift;
3230 *lgup_ptr = lgup;
3231 if (n < HOST_BITS_PER_WIDE_INT)
3233 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3234 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3235 return mhigh_lo >= mask;
3237 else
3239 *multiplier_ptr = GEN_INT (mhigh_lo);
3240 return mhigh_hi;
3244 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3245 congruent to 1 (mod 2**N). */
3247 static unsigned HOST_WIDE_INT
3248 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3250 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3252 /* The algorithm notes that the choice y = x satisfies
3253 x*y == 1 mod 2^3, since x is assumed odd.
3254 Each iteration doubles the number of bits of significance in y. */
3256 unsigned HOST_WIDE_INT mask;
3257 unsigned HOST_WIDE_INT y = x;
3258 int nbit = 3;
3260 mask = (n == HOST_BITS_PER_WIDE_INT
3261 ? ~(unsigned HOST_WIDE_INT) 0
3262 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3264 while (nbit < n)
3266 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3267 nbit *= 2;
3269 return y;
3272 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3273 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3274 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3275 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3276 become signed.
3278 The result is put in TARGET if that is convenient.
3280 MODE is the mode of operation. */
3283 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3284 rtx op1, rtx target, int unsignedp)
3286 rtx tem;
3287 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3289 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3290 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3291 tem = expand_and (mode, tem, op1, NULL_RTX);
3292 adj_operand
3293 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3294 adj_operand);
3296 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3297 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3298 tem = expand_and (mode, tem, op0, NULL_RTX);
3299 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3300 target);
3302 return target;
3305 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3307 static rtx
3308 extract_high_half (enum machine_mode mode, rtx op)
3310 enum machine_mode wider_mode;
3312 if (mode == word_mode)
3313 return gen_highpart (mode, op);
3315 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3317 wider_mode = GET_MODE_WIDER_MODE (mode);
3318 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3319 GET_MODE_BITSIZE (mode), 0, 1);
3320 return convert_modes (mode, wider_mode, op, 0);
3323 /* Like expand_mult_highpart, but only consider using a multiplication
3324 optab. OP1 is an rtx for the constant operand. */
3326 static rtx
3327 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3328 rtx target, int unsignedp, int max_cost)
3330 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3331 enum machine_mode wider_mode;
3332 optab moptab;
3333 rtx tem;
3334 int size;
3335 bool speed = optimize_insn_for_speed_p ();
3337 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3339 wider_mode = GET_MODE_WIDER_MODE (mode);
3340 size = GET_MODE_BITSIZE (mode);
3342 /* Firstly, try using a multiplication insn that only generates the needed
3343 high part of the product, and in the sign flavor of unsignedp. */
3344 if (mul_highpart_cost[speed][mode] < max_cost)
3346 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3347 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3348 unsignedp, OPTAB_DIRECT);
3349 if (tem)
3350 return tem;
3353 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3354 Need to adjust the result after the multiplication. */
3355 if (size - 1 < BITS_PER_WORD
3356 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3357 + 4 * add_cost[speed][mode] < max_cost))
3359 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3360 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3361 unsignedp, OPTAB_DIRECT);
3362 if (tem)
3363 /* We used the wrong signedness. Adjust the result. */
3364 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3365 tem, unsignedp);
3368 /* Try widening multiplication. */
3369 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3370 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3371 && mul_widen_cost[speed][wider_mode] < max_cost)
3373 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3374 unsignedp, OPTAB_WIDEN);
3375 if (tem)
3376 return extract_high_half (mode, tem);
3379 /* Try widening the mode and perform a non-widening multiplication. */
3380 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3381 && size - 1 < BITS_PER_WORD
3382 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3384 rtx insns, wop0, wop1;
3386 /* We need to widen the operands, for example to ensure the
3387 constant multiplier is correctly sign or zero extended.
3388 Use a sequence to clean-up any instructions emitted by
3389 the conversions if things don't work out. */
3390 start_sequence ();
3391 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3392 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3393 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3394 unsignedp, OPTAB_WIDEN);
3395 insns = get_insns ();
3396 end_sequence ();
3398 if (tem)
3400 emit_insn (insns);
3401 return extract_high_half (mode, tem);
3405 /* Try widening multiplication of opposite signedness, and adjust. */
3406 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3407 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3408 && size - 1 < BITS_PER_WORD
3409 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3410 + 4 * add_cost[speed][mode] < max_cost))
3412 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3413 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3414 if (tem != 0)
3416 tem = extract_high_half (mode, tem);
3417 /* We used the wrong signedness. Adjust the result. */
3418 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3419 target, unsignedp);
3423 return 0;
3426 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3427 putting the high half of the result in TARGET if that is convenient,
3428 and return where the result is. If the operation can not be performed,
3429 0 is returned.
3431 MODE is the mode of operation and result.
3433 UNSIGNEDP nonzero means unsigned multiply.
3435 MAX_COST is the total allowed cost for the expanded RTL. */
3437 static rtx
3438 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3439 rtx target, int unsignedp, int max_cost)
3441 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3442 unsigned HOST_WIDE_INT cnst1;
3443 int extra_cost;
3444 bool sign_adjust = false;
3445 enum mult_variant variant;
3446 struct algorithm alg;
3447 rtx tem;
3448 bool speed = optimize_insn_for_speed_p ();
3450 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3451 /* We can't support modes wider than HOST_BITS_PER_INT. */
3452 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3454 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3456 /* We can't optimize modes wider than BITS_PER_WORD.
3457 ??? We might be able to perform double-word arithmetic if
3458 mode == word_mode, however all the cost calculations in
3459 synth_mult etc. assume single-word operations. */
3460 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3461 return expand_mult_highpart_optab (mode, op0, op1, target,
3462 unsignedp, max_cost);
3464 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3466 /* Check whether we try to multiply by a negative constant. */
3467 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3469 sign_adjust = true;
3470 extra_cost += add_cost[speed][mode];
3473 /* See whether shift/add multiplication is cheap enough. */
3474 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3475 max_cost - extra_cost))
3477 /* See whether the specialized multiplication optabs are
3478 cheaper than the shift/add version. */
3479 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3480 alg.cost.cost + extra_cost);
3481 if (tem)
3482 return tem;
3484 tem = convert_to_mode (wider_mode, op0, unsignedp);
3485 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3486 tem = extract_high_half (mode, tem);
3488 /* Adjust result for signedness. */
3489 if (sign_adjust)
3490 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3492 return tem;
3494 return expand_mult_highpart_optab (mode, op0, op1, target,
3495 unsignedp, max_cost);
3499 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3501 static rtx
3502 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3504 unsigned HOST_WIDE_INT masklow, maskhigh;
3505 rtx result, temp, shift, label;
3506 int logd;
3508 logd = floor_log2 (d);
3509 result = gen_reg_rtx (mode);
3511 /* Avoid conditional branches when they're expensive. */
3512 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3513 && optimize_insn_for_speed_p ())
3515 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3516 mode, 0, -1);
3517 if (signmask)
3519 signmask = force_reg (mode, signmask);
3520 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3521 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3523 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3524 which instruction sequence to use. If logical right shifts
3525 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3526 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3528 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3529 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3530 || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3532 temp = expand_binop (mode, xor_optab, op0, signmask,
3533 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3534 temp = expand_binop (mode, sub_optab, temp, signmask,
3535 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3536 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3538 temp = expand_binop (mode, xor_optab, temp, signmask,
3539 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3540 temp = expand_binop (mode, sub_optab, temp, signmask,
3541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3543 else
3545 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3546 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3547 signmask = force_reg (mode, signmask);
3549 temp = expand_binop (mode, add_optab, op0, signmask,
3550 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3551 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3552 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3553 temp = expand_binop (mode, sub_optab, temp, signmask,
3554 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3556 return temp;
3560 /* Mask contains the mode's signbit and the significant bits of the
3561 modulus. By including the signbit in the operation, many targets
3562 can avoid an explicit compare operation in the following comparison
3563 against zero. */
3565 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3566 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3568 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3569 maskhigh = -1;
3571 else
3572 maskhigh = (HOST_WIDE_INT) -1
3573 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3575 temp = expand_binop (mode, and_optab, op0,
3576 immed_double_const (masklow, maskhigh, mode),
3577 result, 1, OPTAB_LIB_WIDEN);
3578 if (temp != result)
3579 emit_move_insn (result, temp);
3581 label = gen_label_rtx ();
3582 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3584 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3585 0, OPTAB_LIB_WIDEN);
3586 masklow = (HOST_WIDE_INT) -1 << logd;
3587 maskhigh = -1;
3588 temp = expand_binop (mode, ior_optab, temp,
3589 immed_double_const (masklow, maskhigh, mode),
3590 result, 1, OPTAB_LIB_WIDEN);
3591 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3592 0, OPTAB_LIB_WIDEN);
3593 if (temp != result)
3594 emit_move_insn (result, temp);
3595 emit_label (label);
3596 return result;
3599 /* Expand signed division of OP0 by a power of two D in mode MODE.
3600 This routine is only called for positive values of D. */
3602 static rtx
3603 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3605 rtx temp, label;
3606 int logd;
3608 logd = floor_log2 (d);
3610 if (d == 2
3611 && BRANCH_COST (optimize_insn_for_speed_p (),
3612 false) >= 1)
3614 temp = gen_reg_rtx (mode);
3615 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3616 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3617 0, OPTAB_LIB_WIDEN);
3618 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3621 #ifdef HAVE_conditional_move
3622 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3623 >= 2)
3625 rtx temp2;
3627 /* ??? emit_conditional_move forces a stack adjustment via
3628 compare_from_rtx so, if the sequence is discarded, it will
3629 be lost. Do it now instead. */
3630 do_pending_stack_adjust ();
3632 start_sequence ();
3633 temp2 = copy_to_mode_reg (mode, op0);
3634 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3635 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3636 temp = force_reg (mode, temp);
3638 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3639 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3640 mode, temp, temp2, mode, 0);
3641 if (temp2)
3643 rtx seq = get_insns ();
3644 end_sequence ();
3645 emit_insn (seq);
3646 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3648 end_sequence ();
3650 #endif
3652 if (BRANCH_COST (optimize_insn_for_speed_p (),
3653 false) >= 2)
3655 int ushift = GET_MODE_BITSIZE (mode) - logd;
3657 temp = gen_reg_rtx (mode);
3658 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3659 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3660 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3661 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3662 else
3663 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3664 ushift, NULL_RTX, 1);
3665 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3666 0, OPTAB_LIB_WIDEN);
3667 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3670 label = gen_label_rtx ();
3671 temp = copy_to_mode_reg (mode, op0);
3672 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3673 expand_inc (temp, GEN_INT (d - 1));
3674 emit_label (label);
3675 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3678 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3679 if that is convenient, and returning where the result is.
3680 You may request either the quotient or the remainder as the result;
3681 specify REM_FLAG nonzero to get the remainder.
3683 CODE is the expression code for which kind of division this is;
3684 it controls how rounding is done. MODE is the machine mode to use.
3685 UNSIGNEDP nonzero means do unsigned division. */
3687 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3688 and then correct it by or'ing in missing high bits
3689 if result of ANDI is nonzero.
3690 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3691 This could optimize to a bfexts instruction.
3692 But C doesn't use these operations, so their optimizations are
3693 left for later. */
3694 /* ??? For modulo, we don't actually need the highpart of the first product,
3695 the low part will do nicely. And for small divisors, the second multiply
3696 can also be a low-part only multiply or even be completely left out.
3697 E.g. to calculate the remainder of a division by 3 with a 32 bit
3698 multiply, multiply with 0x55555556 and extract the upper two bits;
3699 the result is exact for inputs up to 0x1fffffff.
3700 The input range can be reduced by using cross-sum rules.
3701 For odd divisors >= 3, the following table gives right shift counts
3702 so that if a number is shifted by an integer multiple of the given
3703 amount, the remainder stays the same:
3704 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3705 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3706 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3707 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3708 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3710 Cross-sum rules for even numbers can be derived by leaving as many bits
3711 to the right alone as the divisor has zeros to the right.
3712 E.g. if x is an unsigned 32 bit number:
3713 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3717 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3718 rtx op0, rtx op1, rtx target, int unsignedp)
3720 enum machine_mode compute_mode;
3721 rtx tquotient;
3722 rtx quotient = 0, remainder = 0;
3723 rtx last;
3724 int size;
3725 rtx insn, set;
3726 optab optab1, optab2;
3727 int op1_is_constant, op1_is_pow2 = 0;
3728 int max_cost, extra_cost;
3729 static HOST_WIDE_INT last_div_const = 0;
3730 static HOST_WIDE_INT ext_op1;
3731 bool speed = optimize_insn_for_speed_p ();
3733 op1_is_constant = CONST_INT_P (op1);
3734 if (op1_is_constant)
3736 ext_op1 = INTVAL (op1);
3737 if (unsignedp)
3738 ext_op1 &= GET_MODE_MASK (mode);
3739 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3740 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3744 This is the structure of expand_divmod:
3746 First comes code to fix up the operands so we can perform the operations
3747 correctly and efficiently.
3749 Second comes a switch statement with code specific for each rounding mode.
3750 For some special operands this code emits all RTL for the desired
3751 operation, for other cases, it generates only a quotient and stores it in
3752 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3753 to indicate that it has not done anything.
3755 Last comes code that finishes the operation. If QUOTIENT is set and
3756 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3757 QUOTIENT is not set, it is computed using trunc rounding.
3759 We try to generate special code for division and remainder when OP1 is a
3760 constant. If |OP1| = 2**n we can use shifts and some other fast
3761 operations. For other values of OP1, we compute a carefully selected
3762 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3763 by m.
3765 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3766 half of the product. Different strategies for generating the product are
3767 implemented in expand_mult_highpart.
3769 If what we actually want is the remainder, we generate that by another
3770 by-constant multiplication and a subtraction. */
3772 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3773 code below will malfunction if we are, so check here and handle
3774 the special case if so. */
3775 if (op1 == const1_rtx)
3776 return rem_flag ? const0_rtx : op0;
3778 /* When dividing by -1, we could get an overflow.
3779 negv_optab can handle overflows. */
3780 if (! unsignedp && op1 == constm1_rtx)
3782 if (rem_flag)
3783 return const0_rtx;
3784 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3785 ? negv_optab : neg_optab, op0, target, 0);
3788 if (target
3789 /* Don't use the function value register as a target
3790 since we have to read it as well as write it,
3791 and function-inlining gets confused by this. */
3792 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3793 /* Don't clobber an operand while doing a multi-step calculation. */
3794 || ((rem_flag || op1_is_constant)
3795 && (reg_mentioned_p (target, op0)
3796 || (MEM_P (op0) && MEM_P (target))))
3797 || reg_mentioned_p (target, op1)
3798 || (MEM_P (op1) && MEM_P (target))))
3799 target = 0;
3801 /* Get the mode in which to perform this computation. Normally it will
3802 be MODE, but sometimes we can't do the desired operation in MODE.
3803 If so, pick a wider mode in which we can do the operation. Convert
3804 to that mode at the start to avoid repeated conversions.
3806 First see what operations we need. These depend on the expression
3807 we are evaluating. (We assume that divxx3 insns exist under the
3808 same conditions that modxx3 insns and that these insns don't normally
3809 fail. If these assumptions are not correct, we may generate less
3810 efficient code in some cases.)
3812 Then see if we find a mode in which we can open-code that operation
3813 (either a division, modulus, or shift). Finally, check for the smallest
3814 mode for which we can do the operation with a library call. */
3816 /* We might want to refine this now that we have division-by-constant
3817 optimization. Since expand_mult_highpart tries so many variants, it is
3818 not straightforward to generalize this. Maybe we should make an array
3819 of possible modes in init_expmed? Save this for GCC 2.7. */
3821 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3822 ? (unsignedp ? lshr_optab : ashr_optab)
3823 : (unsignedp ? udiv_optab : sdiv_optab));
3824 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3825 ? optab1
3826 : (unsignedp ? udivmod_optab : sdivmod_optab));
3828 for (compute_mode = mode; compute_mode != VOIDmode;
3829 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3830 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3831 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3832 break;
3834 if (compute_mode == VOIDmode)
3835 for (compute_mode = mode; compute_mode != VOIDmode;
3836 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3837 if (optab_libfunc (optab1, compute_mode)
3838 || optab_libfunc (optab2, compute_mode))
3839 break;
3841 /* If we still couldn't find a mode, use MODE, but expand_binop will
3842 probably die. */
3843 if (compute_mode == VOIDmode)
3844 compute_mode = mode;
3846 if (target && GET_MODE (target) == compute_mode)
3847 tquotient = target;
3848 else
3849 tquotient = gen_reg_rtx (compute_mode);
3851 size = GET_MODE_BITSIZE (compute_mode);
3852 #if 0
3853 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3854 (mode), and thereby get better code when OP1 is a constant. Do that
3855 later. It will require going over all usages of SIZE below. */
3856 size = GET_MODE_BITSIZE (mode);
3857 #endif
3859 /* Only deduct something for a REM if the last divide done was
3860 for a different constant. Then set the constant of the last
3861 divide. */
3862 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3863 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3864 && INTVAL (op1) == last_div_const))
3865 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3867 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3869 /* Now convert to the best mode to use. */
3870 if (compute_mode != mode)
3872 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3873 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3875 /* convert_modes may have placed op1 into a register, so we
3876 must recompute the following. */
3877 op1_is_constant = CONST_INT_P (op1);
3878 op1_is_pow2 = (op1_is_constant
3879 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3880 || (! unsignedp
3881 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3884 /* If one of the operands is a volatile MEM, copy it into a register. */
3886 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3887 op0 = force_reg (compute_mode, op0);
3888 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3889 op1 = force_reg (compute_mode, op1);
3891 /* If we need the remainder or if OP1 is constant, we need to
3892 put OP0 in a register in case it has any queued subexpressions. */
3893 if (rem_flag || op1_is_constant)
3894 op0 = force_reg (compute_mode, op0);
3896 last = get_last_insn ();
3898 /* Promote floor rounding to trunc rounding for unsigned operations. */
3899 if (unsignedp)
3901 if (code == FLOOR_DIV_EXPR)
3902 code = TRUNC_DIV_EXPR;
3903 if (code == FLOOR_MOD_EXPR)
3904 code = TRUNC_MOD_EXPR;
3905 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3906 code = TRUNC_DIV_EXPR;
3909 if (op1 != const0_rtx)
3910 switch (code)
3912 case TRUNC_MOD_EXPR:
3913 case TRUNC_DIV_EXPR:
3914 if (op1_is_constant)
3916 if (unsignedp)
3918 unsigned HOST_WIDE_INT mh;
3919 int pre_shift, post_shift;
3920 int dummy;
3921 rtx ml;
3922 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3923 & GET_MODE_MASK (compute_mode));
3925 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3927 pre_shift = floor_log2 (d);
3928 if (rem_flag)
3930 remainder
3931 = expand_binop (compute_mode, and_optab, op0,
3932 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3933 remainder, 1,
3934 OPTAB_LIB_WIDEN);
3935 if (remainder)
3936 return gen_lowpart (mode, remainder);
3938 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3939 pre_shift, tquotient, 1);
3941 else if (size <= HOST_BITS_PER_WIDE_INT)
3943 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3945 /* Most significant bit of divisor is set; emit an scc
3946 insn. */
3947 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
3948 compute_mode, 1, 1);
3950 else
3952 /* Find a suitable multiplier and right shift count
3953 instead of multiplying with D. */
3955 mh = choose_multiplier (d, size, size,
3956 &ml, &post_shift, &dummy);
3958 /* If the suggested multiplier is more than SIZE bits,
3959 we can do better for even divisors, using an
3960 initial right shift. */
3961 if (mh != 0 && (d & 1) == 0)
3963 pre_shift = floor_log2 (d & -d);
3964 mh = choose_multiplier (d >> pre_shift, size,
3965 size - pre_shift,
3966 &ml, &post_shift, &dummy);
3967 gcc_assert (!mh);
3969 else
3970 pre_shift = 0;
3972 if (mh != 0)
3974 rtx t1, t2, t3, t4;
3976 if (post_shift - 1 >= BITS_PER_WORD)
3977 goto fail1;
3979 extra_cost
3980 = (shift_cost[speed][compute_mode][post_shift - 1]
3981 + shift_cost[speed][compute_mode][1]
3982 + 2 * add_cost[speed][compute_mode]);
3983 t1 = expand_mult_highpart (compute_mode, op0, ml,
3984 NULL_RTX, 1,
3985 max_cost - extra_cost);
3986 if (t1 == 0)
3987 goto fail1;
3988 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3989 op0, t1),
3990 NULL_RTX);
3991 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
3992 t2, 1, NULL_RTX, 1);
3993 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3994 t1, t3),
3995 NULL_RTX);
3996 quotient = expand_shift
3997 (RSHIFT_EXPR, compute_mode, t4,
3998 post_shift - 1, tquotient, 1);
4000 else
4002 rtx t1, t2;
4004 if (pre_shift >= BITS_PER_WORD
4005 || post_shift >= BITS_PER_WORD)
4006 goto fail1;
4008 t1 = expand_shift
4009 (RSHIFT_EXPR, compute_mode, op0,
4010 pre_shift, NULL_RTX, 1);
4011 extra_cost
4012 = (shift_cost[speed][compute_mode][pre_shift]
4013 + shift_cost[speed][compute_mode][post_shift]);
4014 t2 = expand_mult_highpart (compute_mode, t1, ml,
4015 NULL_RTX, 1,
4016 max_cost - extra_cost);
4017 if (t2 == 0)
4018 goto fail1;
4019 quotient = expand_shift
4020 (RSHIFT_EXPR, compute_mode, t2,
4021 post_shift, tquotient, 1);
4025 else /* Too wide mode to use tricky code */
4026 break;
4028 insn = get_last_insn ();
4029 if (insn != last
4030 && (set = single_set (insn)) != 0
4031 && SET_DEST (set) == quotient)
4032 set_unique_reg_note (insn,
4033 REG_EQUAL,
4034 gen_rtx_UDIV (compute_mode, op0, op1));
4036 else /* TRUNC_DIV, signed */
4038 unsigned HOST_WIDE_INT ml;
4039 int lgup, post_shift;
4040 rtx mlr;
4041 HOST_WIDE_INT d = INTVAL (op1);
4042 unsigned HOST_WIDE_INT abs_d;
4044 /* Since d might be INT_MIN, we have to cast to
4045 unsigned HOST_WIDE_INT before negating to avoid
4046 undefined signed overflow. */
4047 abs_d = (d >= 0
4048 ? (unsigned HOST_WIDE_INT) d
4049 : - (unsigned HOST_WIDE_INT) d);
4051 /* n rem d = n rem -d */
4052 if (rem_flag && d < 0)
4054 d = abs_d;
4055 op1 = gen_int_mode (abs_d, compute_mode);
4058 if (d == 1)
4059 quotient = op0;
4060 else if (d == -1)
4061 quotient = expand_unop (compute_mode, neg_optab, op0,
4062 tquotient, 0);
4063 else if (HOST_BITS_PER_WIDE_INT >= size
4064 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4066 /* This case is not handled correctly below. */
4067 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4068 compute_mode, 1, 1);
4069 if (quotient == 0)
4070 goto fail1;
4072 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4073 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4074 : sdiv_pow2_cheap[speed][compute_mode])
4075 /* We assume that cheap metric is true if the
4076 optab has an expander for this mode. */
4077 && ((optab_handler ((rem_flag ? smod_optab
4078 : sdiv_optab),
4079 compute_mode)
4080 != CODE_FOR_nothing)
4081 || (optab_handler (sdivmod_optab,
4082 compute_mode)
4083 != CODE_FOR_nothing)))
4085 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4087 if (rem_flag)
4089 remainder = expand_smod_pow2 (compute_mode, op0, d);
4090 if (remainder)
4091 return gen_lowpart (mode, remainder);
4094 if (sdiv_pow2_cheap[speed][compute_mode]
4095 && ((optab_handler (sdiv_optab, compute_mode)
4096 != CODE_FOR_nothing)
4097 || (optab_handler (sdivmod_optab, compute_mode)
4098 != CODE_FOR_nothing)))
4099 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4100 compute_mode, op0,
4101 gen_int_mode (abs_d,
4102 compute_mode),
4103 NULL_RTX, 0);
4104 else
4105 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4107 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4108 negate the quotient. */
4109 if (d < 0)
4111 insn = get_last_insn ();
4112 if (insn != last
4113 && (set = single_set (insn)) != 0
4114 && SET_DEST (set) == quotient
4115 && abs_d < ((unsigned HOST_WIDE_INT) 1
4116 << (HOST_BITS_PER_WIDE_INT - 1)))
4117 set_unique_reg_note (insn,
4118 REG_EQUAL,
4119 gen_rtx_DIV (compute_mode,
4120 op0,
4121 GEN_INT
4122 (trunc_int_for_mode
4123 (abs_d,
4124 compute_mode))));
4126 quotient = expand_unop (compute_mode, neg_optab,
4127 quotient, quotient, 0);
4130 else if (size <= HOST_BITS_PER_WIDE_INT)
4132 choose_multiplier (abs_d, size, size - 1,
4133 &mlr, &post_shift, &lgup);
4134 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4135 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4137 rtx t1, t2, t3;
4139 if (post_shift >= BITS_PER_WORD
4140 || size - 1 >= BITS_PER_WORD)
4141 goto fail1;
4143 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4144 + shift_cost[speed][compute_mode][size - 1]
4145 + add_cost[speed][compute_mode]);
4146 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4147 NULL_RTX, 0,
4148 max_cost - extra_cost);
4149 if (t1 == 0)
4150 goto fail1;
4151 t2 = expand_shift
4152 (RSHIFT_EXPR, compute_mode, t1,
4153 post_shift, NULL_RTX, 0);
4154 t3 = expand_shift
4155 (RSHIFT_EXPR, compute_mode, op0,
4156 size - 1, NULL_RTX, 0);
4157 if (d < 0)
4158 quotient
4159 = force_operand (gen_rtx_MINUS (compute_mode,
4160 t3, t2),
4161 tquotient);
4162 else
4163 quotient
4164 = force_operand (gen_rtx_MINUS (compute_mode,
4165 t2, t3),
4166 tquotient);
4168 else
4170 rtx t1, t2, t3, t4;
4172 if (post_shift >= BITS_PER_WORD
4173 || size - 1 >= BITS_PER_WORD)
4174 goto fail1;
4176 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4177 mlr = gen_int_mode (ml, compute_mode);
4178 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4179 + shift_cost[speed][compute_mode][size - 1]
4180 + 2 * add_cost[speed][compute_mode]);
4181 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4182 NULL_RTX, 0,
4183 max_cost - extra_cost);
4184 if (t1 == 0)
4185 goto fail1;
4186 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4187 t1, op0),
4188 NULL_RTX);
4189 t3 = expand_shift
4190 (RSHIFT_EXPR, compute_mode, t2,
4191 post_shift, NULL_RTX, 0);
4192 t4 = expand_shift
4193 (RSHIFT_EXPR, compute_mode, op0,
4194 size - 1, NULL_RTX, 0);
4195 if (d < 0)
4196 quotient
4197 = force_operand (gen_rtx_MINUS (compute_mode,
4198 t4, t3),
4199 tquotient);
4200 else
4201 quotient
4202 = force_operand (gen_rtx_MINUS (compute_mode,
4203 t3, t4),
4204 tquotient);
4207 else /* Too wide mode to use tricky code */
4208 break;
4210 insn = get_last_insn ();
4211 if (insn != last
4212 && (set = single_set (insn)) != 0
4213 && SET_DEST (set) == quotient)
4214 set_unique_reg_note (insn,
4215 REG_EQUAL,
4216 gen_rtx_DIV (compute_mode, op0, op1));
4218 break;
4220 fail1:
4221 delete_insns_since (last);
4222 break;
4224 case FLOOR_DIV_EXPR:
4225 case FLOOR_MOD_EXPR:
4226 /* We will come here only for signed operations. */
4227 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4229 unsigned HOST_WIDE_INT mh;
4230 int pre_shift, lgup, post_shift;
4231 HOST_WIDE_INT d = INTVAL (op1);
4232 rtx ml;
4234 if (d > 0)
4236 /* We could just as easily deal with negative constants here,
4237 but it does not seem worth the trouble for GCC 2.6. */
4238 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4240 pre_shift = floor_log2 (d);
4241 if (rem_flag)
4243 remainder = expand_binop (compute_mode, and_optab, op0,
4244 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4245 remainder, 0, OPTAB_LIB_WIDEN);
4246 if (remainder)
4247 return gen_lowpart (mode, remainder);
4249 quotient = expand_shift
4250 (RSHIFT_EXPR, compute_mode, op0,
4251 pre_shift, tquotient, 0);
4253 else
4255 rtx t1, t2, t3, t4;
4257 mh = choose_multiplier (d, size, size - 1,
4258 &ml, &post_shift, &lgup);
4259 gcc_assert (!mh);
4261 if (post_shift < BITS_PER_WORD
4262 && size - 1 < BITS_PER_WORD)
4264 t1 = expand_shift
4265 (RSHIFT_EXPR, compute_mode, op0,
4266 size - 1, NULL_RTX, 0);
4267 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4268 NULL_RTX, 0, OPTAB_WIDEN);
4269 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4270 + shift_cost[speed][compute_mode][size - 1]
4271 + 2 * add_cost[speed][compute_mode]);
4272 t3 = expand_mult_highpart (compute_mode, t2, ml,
4273 NULL_RTX, 1,
4274 max_cost - extra_cost);
4275 if (t3 != 0)
4277 t4 = expand_shift
4278 (RSHIFT_EXPR, compute_mode, t3,
4279 post_shift, NULL_RTX, 1);
4280 quotient = expand_binop (compute_mode, xor_optab,
4281 t4, t1, tquotient, 0,
4282 OPTAB_WIDEN);
4287 else
4289 rtx nsign, t1, t2, t3, t4;
4290 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4291 op0, constm1_rtx), NULL_RTX);
4292 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4293 0, OPTAB_WIDEN);
4294 nsign = expand_shift
4295 (RSHIFT_EXPR, compute_mode, t2,
4296 size - 1, NULL_RTX, 0);
4297 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4298 NULL_RTX);
4299 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4300 NULL_RTX, 0);
4301 if (t4)
4303 rtx t5;
4304 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4305 NULL_RTX, 0);
4306 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4307 t4, t5),
4308 tquotient);
4313 if (quotient != 0)
4314 break;
4315 delete_insns_since (last);
4317 /* Try using an instruction that produces both the quotient and
4318 remainder, using truncation. We can easily compensate the quotient
4319 or remainder to get floor rounding, once we have the remainder.
4320 Notice that we compute also the final remainder value here,
4321 and return the result right away. */
4322 if (target == 0 || GET_MODE (target) != compute_mode)
4323 target = gen_reg_rtx (compute_mode);
4325 if (rem_flag)
4327 remainder
4328 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4329 quotient = gen_reg_rtx (compute_mode);
4331 else
4333 quotient
4334 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4335 remainder = gen_reg_rtx (compute_mode);
4338 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4339 quotient, remainder, 0))
4341 /* This could be computed with a branch-less sequence.
4342 Save that for later. */
4343 rtx tem;
4344 rtx label = gen_label_rtx ();
4345 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4346 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4347 NULL_RTX, 0, OPTAB_WIDEN);
4348 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4349 expand_dec (quotient, const1_rtx);
4350 expand_inc (remainder, op1);
4351 emit_label (label);
4352 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4355 /* No luck with division elimination or divmod. Have to do it
4356 by conditionally adjusting op0 *and* the result. */
4358 rtx label1, label2, label3, label4, label5;
4359 rtx adjusted_op0;
4360 rtx tem;
4362 quotient = gen_reg_rtx (compute_mode);
4363 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4364 label1 = gen_label_rtx ();
4365 label2 = gen_label_rtx ();
4366 label3 = gen_label_rtx ();
4367 label4 = gen_label_rtx ();
4368 label5 = gen_label_rtx ();
4369 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4370 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4371 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4372 quotient, 0, OPTAB_LIB_WIDEN);
4373 if (tem != quotient)
4374 emit_move_insn (quotient, tem);
4375 emit_jump_insn (gen_jump (label5));
4376 emit_barrier ();
4377 emit_label (label1);
4378 expand_inc (adjusted_op0, const1_rtx);
4379 emit_jump_insn (gen_jump (label4));
4380 emit_barrier ();
4381 emit_label (label2);
4382 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4383 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4384 quotient, 0, OPTAB_LIB_WIDEN);
4385 if (tem != quotient)
4386 emit_move_insn (quotient, tem);
4387 emit_jump_insn (gen_jump (label5));
4388 emit_barrier ();
4389 emit_label (label3);
4390 expand_dec (adjusted_op0, const1_rtx);
4391 emit_label (label4);
4392 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4393 quotient, 0, OPTAB_LIB_WIDEN);
4394 if (tem != quotient)
4395 emit_move_insn (quotient, tem);
4396 expand_dec (quotient, const1_rtx);
4397 emit_label (label5);
4399 break;
4401 case CEIL_DIV_EXPR:
4402 case CEIL_MOD_EXPR:
4403 if (unsignedp)
4405 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4407 rtx t1, t2, t3;
4408 unsigned HOST_WIDE_INT d = INTVAL (op1);
4409 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4410 floor_log2 (d), tquotient, 1);
4411 t2 = expand_binop (compute_mode, and_optab, op0,
4412 GEN_INT (d - 1),
4413 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4414 t3 = gen_reg_rtx (compute_mode);
4415 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4416 compute_mode, 1, 1);
4417 if (t3 == 0)
4419 rtx lab;
4420 lab = gen_label_rtx ();
4421 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4422 expand_inc (t1, const1_rtx);
4423 emit_label (lab);
4424 quotient = t1;
4426 else
4427 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4428 t1, t3),
4429 tquotient);
4430 break;
4433 /* Try using an instruction that produces both the quotient and
4434 remainder, using truncation. We can easily compensate the
4435 quotient or remainder to get ceiling rounding, once we have the
4436 remainder. Notice that we compute also the final remainder
4437 value here, and return the result right away. */
4438 if (target == 0 || GET_MODE (target) != compute_mode)
4439 target = gen_reg_rtx (compute_mode);
4441 if (rem_flag)
4443 remainder = (REG_P (target)
4444 ? target : gen_reg_rtx (compute_mode));
4445 quotient = gen_reg_rtx (compute_mode);
4447 else
4449 quotient = (REG_P (target)
4450 ? target : gen_reg_rtx (compute_mode));
4451 remainder = gen_reg_rtx (compute_mode);
4454 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4455 remainder, 1))
4457 /* This could be computed with a branch-less sequence.
4458 Save that for later. */
4459 rtx label = gen_label_rtx ();
4460 do_cmp_and_jump (remainder, const0_rtx, EQ,
4461 compute_mode, label);
4462 expand_inc (quotient, const1_rtx);
4463 expand_dec (remainder, op1);
4464 emit_label (label);
4465 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4468 /* No luck with division elimination or divmod. Have to do it
4469 by conditionally adjusting op0 *and* the result. */
4471 rtx label1, label2;
4472 rtx adjusted_op0, tem;
4474 quotient = gen_reg_rtx (compute_mode);
4475 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4476 label1 = gen_label_rtx ();
4477 label2 = gen_label_rtx ();
4478 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4479 compute_mode, label1);
4480 emit_move_insn (quotient, const0_rtx);
4481 emit_jump_insn (gen_jump (label2));
4482 emit_barrier ();
4483 emit_label (label1);
4484 expand_dec (adjusted_op0, const1_rtx);
4485 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4486 quotient, 1, OPTAB_LIB_WIDEN);
4487 if (tem != quotient)
4488 emit_move_insn (quotient, tem);
4489 expand_inc (quotient, const1_rtx);
4490 emit_label (label2);
4493 else /* signed */
4495 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4496 && INTVAL (op1) >= 0)
4498 /* This is extremely similar to the code for the unsigned case
4499 above. For 2.7 we should merge these variants, but for
4500 2.6.1 I don't want to touch the code for unsigned since that
4501 get used in C. The signed case will only be used by other
4502 languages (Ada). */
4504 rtx t1, t2, t3;
4505 unsigned HOST_WIDE_INT d = INTVAL (op1);
4506 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4507 floor_log2 (d), tquotient, 0);
4508 t2 = expand_binop (compute_mode, and_optab, op0,
4509 GEN_INT (d - 1),
4510 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4511 t3 = gen_reg_rtx (compute_mode);
4512 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4513 compute_mode, 1, 1);
4514 if (t3 == 0)
4516 rtx lab;
4517 lab = gen_label_rtx ();
4518 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4519 expand_inc (t1, const1_rtx);
4520 emit_label (lab);
4521 quotient = t1;
4523 else
4524 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4525 t1, t3),
4526 tquotient);
4527 break;
4530 /* Try using an instruction that produces both the quotient and
4531 remainder, using truncation. We can easily compensate the
4532 quotient or remainder to get ceiling rounding, once we have the
4533 remainder. Notice that we compute also the final remainder
4534 value here, and return the result right away. */
4535 if (target == 0 || GET_MODE (target) != compute_mode)
4536 target = gen_reg_rtx (compute_mode);
4537 if (rem_flag)
4539 remainder= (REG_P (target)
4540 ? target : gen_reg_rtx (compute_mode));
4541 quotient = gen_reg_rtx (compute_mode);
4543 else
4545 quotient = (REG_P (target)
4546 ? target : gen_reg_rtx (compute_mode));
4547 remainder = gen_reg_rtx (compute_mode);
4550 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4551 remainder, 0))
4553 /* This could be computed with a branch-less sequence.
4554 Save that for later. */
4555 rtx tem;
4556 rtx label = gen_label_rtx ();
4557 do_cmp_and_jump (remainder, const0_rtx, EQ,
4558 compute_mode, label);
4559 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4560 NULL_RTX, 0, OPTAB_WIDEN);
4561 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4562 expand_inc (quotient, const1_rtx);
4563 expand_dec (remainder, op1);
4564 emit_label (label);
4565 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4568 /* No luck with division elimination or divmod. Have to do it
4569 by conditionally adjusting op0 *and* the result. */
4571 rtx label1, label2, label3, label4, label5;
4572 rtx adjusted_op0;
4573 rtx tem;
4575 quotient = gen_reg_rtx (compute_mode);
4576 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4577 label1 = gen_label_rtx ();
4578 label2 = gen_label_rtx ();
4579 label3 = gen_label_rtx ();
4580 label4 = gen_label_rtx ();
4581 label5 = gen_label_rtx ();
4582 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4583 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4584 compute_mode, label1);
4585 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4586 quotient, 0, OPTAB_LIB_WIDEN);
4587 if (tem != quotient)
4588 emit_move_insn (quotient, tem);
4589 emit_jump_insn (gen_jump (label5));
4590 emit_barrier ();
4591 emit_label (label1);
4592 expand_dec (adjusted_op0, const1_rtx);
4593 emit_jump_insn (gen_jump (label4));
4594 emit_barrier ();
4595 emit_label (label2);
4596 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4597 compute_mode, label3);
4598 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4599 quotient, 0, OPTAB_LIB_WIDEN);
4600 if (tem != quotient)
4601 emit_move_insn (quotient, tem);
4602 emit_jump_insn (gen_jump (label5));
4603 emit_barrier ();
4604 emit_label (label3);
4605 expand_inc (adjusted_op0, const1_rtx);
4606 emit_label (label4);
4607 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4608 quotient, 0, OPTAB_LIB_WIDEN);
4609 if (tem != quotient)
4610 emit_move_insn (quotient, tem);
4611 expand_inc (quotient, const1_rtx);
4612 emit_label (label5);
4615 break;
4617 case EXACT_DIV_EXPR:
4618 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4620 HOST_WIDE_INT d = INTVAL (op1);
4621 unsigned HOST_WIDE_INT ml;
4622 int pre_shift;
4623 rtx t1;
4625 pre_shift = floor_log2 (d & -d);
4626 ml = invert_mod2n (d >> pre_shift, size);
4627 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4628 pre_shift, NULL_RTX, unsignedp);
4629 quotient = expand_mult (compute_mode, t1,
4630 gen_int_mode (ml, compute_mode),
4631 NULL_RTX, 1);
4633 insn = get_last_insn ();
4634 set_unique_reg_note (insn,
4635 REG_EQUAL,
4636 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4637 compute_mode,
4638 op0, op1));
4640 break;
4642 case ROUND_DIV_EXPR:
4643 case ROUND_MOD_EXPR:
4644 if (unsignedp)
4646 rtx tem;
4647 rtx label;
4648 label = gen_label_rtx ();
4649 quotient = gen_reg_rtx (compute_mode);
4650 remainder = gen_reg_rtx (compute_mode);
4651 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4653 rtx tem;
4654 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4655 quotient, 1, OPTAB_LIB_WIDEN);
4656 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4657 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4658 remainder, 1, OPTAB_LIB_WIDEN);
4660 tem = plus_constant (op1, -1);
4661 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4662 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4663 expand_inc (quotient, const1_rtx);
4664 expand_dec (remainder, op1);
4665 emit_label (label);
4667 else
4669 rtx abs_rem, abs_op1, tem, mask;
4670 rtx label;
4671 label = gen_label_rtx ();
4672 quotient = gen_reg_rtx (compute_mode);
4673 remainder = gen_reg_rtx (compute_mode);
4674 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4676 rtx tem;
4677 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4678 quotient, 0, OPTAB_LIB_WIDEN);
4679 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4680 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4681 remainder, 0, OPTAB_LIB_WIDEN);
4683 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4684 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4685 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4686 1, NULL_RTX, 1);
4687 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4688 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4689 NULL_RTX, 0, OPTAB_WIDEN);
4690 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4691 size - 1, NULL_RTX, 0);
4692 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4693 NULL_RTX, 0, OPTAB_WIDEN);
4694 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4695 NULL_RTX, 0, OPTAB_WIDEN);
4696 expand_inc (quotient, tem);
4697 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4698 NULL_RTX, 0, OPTAB_WIDEN);
4699 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4700 NULL_RTX, 0, OPTAB_WIDEN);
4701 expand_dec (remainder, tem);
4702 emit_label (label);
4704 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4706 default:
4707 gcc_unreachable ();
4710 if (quotient == 0)
4712 if (target && GET_MODE (target) != compute_mode)
4713 target = 0;
4715 if (rem_flag)
4717 /* Try to produce the remainder without producing the quotient.
4718 If we seem to have a divmod pattern that does not require widening,
4719 don't try widening here. We should really have a WIDEN argument
4720 to expand_twoval_binop, since what we'd really like to do here is
4721 1) try a mod insn in compute_mode
4722 2) try a divmod insn in compute_mode
4723 3) try a div insn in compute_mode and multiply-subtract to get
4724 remainder
4725 4) try the same things with widening allowed. */
4726 remainder
4727 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4728 op0, op1, target,
4729 unsignedp,
4730 ((optab_handler (optab2, compute_mode)
4731 != CODE_FOR_nothing)
4732 ? OPTAB_DIRECT : OPTAB_WIDEN));
4733 if (remainder == 0)
4735 /* No luck there. Can we do remainder and divide at once
4736 without a library call? */
4737 remainder = gen_reg_rtx (compute_mode);
4738 if (! expand_twoval_binop ((unsignedp
4739 ? udivmod_optab
4740 : sdivmod_optab),
4741 op0, op1,
4742 NULL_RTX, remainder, unsignedp))
4743 remainder = 0;
4746 if (remainder)
4747 return gen_lowpart (mode, remainder);
4750 /* Produce the quotient. Try a quotient insn, but not a library call.
4751 If we have a divmod in this mode, use it in preference to widening
4752 the div (for this test we assume it will not fail). Note that optab2
4753 is set to the one of the two optabs that the call below will use. */
4754 quotient
4755 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4756 op0, op1, rem_flag ? NULL_RTX : target,
4757 unsignedp,
4758 ((optab_handler (optab2, compute_mode)
4759 != CODE_FOR_nothing)
4760 ? OPTAB_DIRECT : OPTAB_WIDEN));
4762 if (quotient == 0)
4764 /* No luck there. Try a quotient-and-remainder insn,
4765 keeping the quotient alone. */
4766 quotient = gen_reg_rtx (compute_mode);
4767 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4768 op0, op1,
4769 quotient, NULL_RTX, unsignedp))
4771 quotient = 0;
4772 if (! rem_flag)
4773 /* Still no luck. If we are not computing the remainder,
4774 use a library call for the quotient. */
4775 quotient = sign_expand_binop (compute_mode,
4776 udiv_optab, sdiv_optab,
4777 op0, op1, target,
4778 unsignedp, OPTAB_LIB_WIDEN);
4783 if (rem_flag)
4785 if (target && GET_MODE (target) != compute_mode)
4786 target = 0;
4788 if (quotient == 0)
4790 /* No divide instruction either. Use library for remainder. */
4791 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4792 op0, op1, target,
4793 unsignedp, OPTAB_LIB_WIDEN);
4794 /* No remainder function. Try a quotient-and-remainder
4795 function, keeping the remainder. */
4796 if (!remainder)
4798 remainder = gen_reg_rtx (compute_mode);
4799 if (!expand_twoval_binop_libfunc
4800 (unsignedp ? udivmod_optab : sdivmod_optab,
4801 op0, op1,
4802 NULL_RTX, remainder,
4803 unsignedp ? UMOD : MOD))
4804 remainder = NULL_RTX;
4807 else
4809 /* We divided. Now finish doing X - Y * (X / Y). */
4810 remainder = expand_mult (compute_mode, quotient, op1,
4811 NULL_RTX, unsignedp);
4812 remainder = expand_binop (compute_mode, sub_optab, op0,
4813 remainder, target, unsignedp,
4814 OPTAB_LIB_WIDEN);
4818 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4821 /* Return a tree node with data type TYPE, describing the value of X.
4822 Usually this is an VAR_DECL, if there is no obvious better choice.
4823 X may be an expression, however we only support those expressions
4824 generated by loop.c. */
4826 tree
4827 make_tree (tree type, rtx x)
4829 tree t;
4831 switch (GET_CODE (x))
4833 case CONST_INT:
4835 HOST_WIDE_INT hi = 0;
4837 if (INTVAL (x) < 0
4838 && !(TYPE_UNSIGNED (type)
4839 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4840 < HOST_BITS_PER_WIDE_INT)))
4841 hi = -1;
4843 t = build_int_cst_wide (type, INTVAL (x), hi);
4845 return t;
4848 case CONST_DOUBLE:
4849 if (GET_MODE (x) == VOIDmode)
4850 t = build_int_cst_wide (type,
4851 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4852 else
4854 REAL_VALUE_TYPE d;
4856 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4857 t = build_real (type, d);
4860 return t;
4862 case CONST_VECTOR:
4864 int units = CONST_VECTOR_NUNITS (x);
4865 tree itype = TREE_TYPE (type);
4866 tree t = NULL_TREE;
4867 int i;
4870 /* Build a tree with vector elements. */
4871 for (i = units - 1; i >= 0; --i)
4873 rtx elt = CONST_VECTOR_ELT (x, i);
4874 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4877 return build_vector (type, t);
4880 case PLUS:
4881 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4882 make_tree (type, XEXP (x, 1)));
4884 case MINUS:
4885 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4886 make_tree (type, XEXP (x, 1)));
4888 case NEG:
4889 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4891 case MULT:
4892 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4893 make_tree (type, XEXP (x, 1)));
4895 case ASHIFT:
4896 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4897 make_tree (type, XEXP (x, 1)));
4899 case LSHIFTRT:
4900 t = unsigned_type_for (type);
4901 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4902 make_tree (t, XEXP (x, 0)),
4903 make_tree (type, XEXP (x, 1))));
4905 case ASHIFTRT:
4906 t = signed_type_for (type);
4907 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4908 make_tree (t, XEXP (x, 0)),
4909 make_tree (type, XEXP (x, 1))));
4911 case DIV:
4912 if (TREE_CODE (type) != REAL_TYPE)
4913 t = signed_type_for (type);
4914 else
4915 t = type;
4917 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4918 make_tree (t, XEXP (x, 0)),
4919 make_tree (t, XEXP (x, 1))));
4920 case UDIV:
4921 t = unsigned_type_for (type);
4922 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4923 make_tree (t, XEXP (x, 0)),
4924 make_tree (t, XEXP (x, 1))));
4926 case SIGN_EXTEND:
4927 case ZERO_EXTEND:
4928 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4929 GET_CODE (x) == ZERO_EXTEND);
4930 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4932 case CONST:
4933 return make_tree (type, XEXP (x, 0));
4935 case SYMBOL_REF:
4936 t = SYMBOL_REF_DECL (x);
4937 if (t)
4938 return fold_convert (type, build_fold_addr_expr (t));
4939 /* else fall through. */
4941 default:
4942 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
4944 /* If TYPE is a POINTER_TYPE, we might need to convert X from
4945 address mode to pointer mode. */
4946 if (POINTER_TYPE_P (type))
4947 x = convert_memory_address_addr_space
4948 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
4950 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4951 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4952 t->decl_with_rtl.rtl = x;
4954 return t;
4958 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4959 and returning TARGET.
4961 If TARGET is 0, a pseudo-register or constant is returned. */
4964 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4966 rtx tem = 0;
4968 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4969 tem = simplify_binary_operation (AND, mode, op0, op1);
4970 if (tem == 0)
4971 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4973 if (target == 0)
4974 target = tem;
4975 else if (tem != target)
4976 emit_move_insn (target, tem);
4977 return target;
4980 /* Helper function for emit_store_flag. */
4981 static rtx
4982 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
4983 enum machine_mode mode, enum machine_mode compare_mode,
4984 int unsignedp, rtx x, rtx y, int normalizep,
4985 enum machine_mode target_mode)
4987 struct expand_operand ops[4];
4988 rtx op0, last, comparison, subtarget;
4989 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
4991 last = get_last_insn ();
4992 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
4993 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
4994 if (!x || !y)
4996 delete_insns_since (last);
4997 return NULL_RTX;
5000 if (target_mode == VOIDmode)
5001 target_mode = result_mode;
5002 if (!target)
5003 target = gen_reg_rtx (target_mode);
5005 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5007 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5008 create_fixed_operand (&ops[1], comparison);
5009 create_fixed_operand (&ops[2], x);
5010 create_fixed_operand (&ops[3], y);
5011 if (!maybe_expand_insn (icode, 4, ops))
5013 delete_insns_since (last);
5014 return NULL_RTX;
5016 subtarget = ops[0].value;
5018 /* If we are converting to a wider mode, first convert to
5019 TARGET_MODE, then normalize. This produces better combining
5020 opportunities on machines that have a SIGN_EXTRACT when we are
5021 testing a single bit. This mostly benefits the 68k.
5023 If STORE_FLAG_VALUE does not have the sign bit set when
5024 interpreted in MODE, we can do this conversion as unsigned, which
5025 is usually more efficient. */
5026 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5028 convert_move (target, subtarget,
5029 (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5030 && 0 == (STORE_FLAG_VALUE
5031 & ((HOST_WIDE_INT) 1
5032 << (GET_MODE_BITSIZE (result_mode) -1))));
5033 op0 = target;
5034 result_mode = target_mode;
5036 else
5037 op0 = subtarget;
5039 /* If we want to keep subexpressions around, don't reuse our last
5040 target. */
5041 if (optimize)
5042 subtarget = 0;
5044 /* Now normalize to the proper value in MODE. Sometimes we don't
5045 have to do anything. */
5046 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5048 /* STORE_FLAG_VALUE might be the most negative number, so write
5049 the comparison this way to avoid a compiler-time warning. */
5050 else if (- normalizep == STORE_FLAG_VALUE)
5051 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5053 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5054 it hard to use a value of just the sign bit due to ANSI integer
5055 constant typing rules. */
5056 else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5057 && (STORE_FLAG_VALUE
5058 & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5059 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5060 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5061 normalizep == 1);
5062 else
5064 gcc_assert (STORE_FLAG_VALUE & 1);
5066 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5067 if (normalizep == -1)
5068 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5071 /* If we were converting to a smaller mode, do the conversion now. */
5072 if (target_mode != result_mode)
5074 convert_move (target, op0, 0);
5075 return target;
5077 else
5078 return op0;
5082 /* A subroutine of emit_store_flag only including "tricks" that do not
5083 need a recursive call. These are kept separate to avoid infinite
5084 loops. */
5086 static rtx
5087 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5088 enum machine_mode mode, int unsignedp, int normalizep,
5089 enum machine_mode target_mode)
5091 rtx subtarget;
5092 enum insn_code icode;
5093 enum machine_mode compare_mode;
5094 enum mode_class mclass;
5095 enum rtx_code scode;
5096 rtx tem;
5098 if (unsignedp)
5099 code = unsigned_condition (code);
5100 scode = swap_condition (code);
5102 /* If one operand is constant, make it the second one. Only do this
5103 if the other operand is not constant as well. */
5105 if (swap_commutative_operands_p (op0, op1))
5107 tem = op0;
5108 op0 = op1;
5109 op1 = tem;
5110 code = swap_condition (code);
5113 if (mode == VOIDmode)
5114 mode = GET_MODE (op0);
5116 /* For some comparisons with 1 and -1, we can convert this to
5117 comparisons with zero. This will often produce more opportunities for
5118 store-flag insns. */
5120 switch (code)
5122 case LT:
5123 if (op1 == const1_rtx)
5124 op1 = const0_rtx, code = LE;
5125 break;
5126 case LE:
5127 if (op1 == constm1_rtx)
5128 op1 = const0_rtx, code = LT;
5129 break;
5130 case GE:
5131 if (op1 == const1_rtx)
5132 op1 = const0_rtx, code = GT;
5133 break;
5134 case GT:
5135 if (op1 == constm1_rtx)
5136 op1 = const0_rtx, code = GE;
5137 break;
5138 case GEU:
5139 if (op1 == const1_rtx)
5140 op1 = const0_rtx, code = NE;
5141 break;
5142 case LTU:
5143 if (op1 == const1_rtx)
5144 op1 = const0_rtx, code = EQ;
5145 break;
5146 default:
5147 break;
5150 /* If we are comparing a double-word integer with zero or -1, we can
5151 convert the comparison into one involving a single word. */
5152 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5153 && GET_MODE_CLASS (mode) == MODE_INT
5154 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5156 if ((code == EQ || code == NE)
5157 && (op1 == const0_rtx || op1 == constm1_rtx))
5159 rtx op00, op01;
5161 /* Do a logical OR or AND of the two words and compare the
5162 result. */
5163 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5164 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5165 tem = expand_binop (word_mode,
5166 op1 == const0_rtx ? ior_optab : and_optab,
5167 op00, op01, NULL_RTX, unsignedp,
5168 OPTAB_DIRECT);
5170 if (tem != 0)
5171 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5172 unsignedp, normalizep);
5174 else if ((code == LT || code == GE) && op1 == const0_rtx)
5176 rtx op0h;
5178 /* If testing the sign bit, can just test on high word. */
5179 op0h = simplify_gen_subreg (word_mode, op0, mode,
5180 subreg_highpart_offset (word_mode,
5181 mode));
5182 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5183 unsignedp, normalizep);
5185 else
5186 tem = NULL_RTX;
5188 if (tem)
5190 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5191 return tem;
5192 if (!target)
5193 target = gen_reg_rtx (target_mode);
5195 convert_move (target, tem,
5196 0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5197 & ((HOST_WIDE_INT) 1
5198 << (GET_MODE_BITSIZE (word_mode) -1))));
5199 return target;
5203 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5204 complement of A (for GE) and shifting the sign bit to the low bit. */
5205 if (op1 == const0_rtx && (code == LT || code == GE)
5206 && GET_MODE_CLASS (mode) == MODE_INT
5207 && (normalizep || STORE_FLAG_VALUE == 1
5208 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5209 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5210 == ((unsigned HOST_WIDE_INT) 1
5211 << (GET_MODE_BITSIZE (mode) - 1))))))
5213 subtarget = target;
5215 if (!target)
5216 target_mode = mode;
5218 /* If the result is to be wider than OP0, it is best to convert it
5219 first. If it is to be narrower, it is *incorrect* to convert it
5220 first. */
5221 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5223 op0 = convert_modes (target_mode, mode, op0, 0);
5224 mode = target_mode;
5227 if (target_mode != mode)
5228 subtarget = 0;
5230 if (code == GE)
5231 op0 = expand_unop (mode, one_cmpl_optab, op0,
5232 ((STORE_FLAG_VALUE == 1 || normalizep)
5233 ? 0 : subtarget), 0);
5235 if (STORE_FLAG_VALUE == 1 || normalizep)
5236 /* If we are supposed to produce a 0/1 value, we want to do
5237 a logical shift from the sign bit to the low-order bit; for
5238 a -1/0 value, we do an arithmetic shift. */
5239 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5240 GET_MODE_BITSIZE (mode) - 1,
5241 subtarget, normalizep != -1);
5243 if (mode != target_mode)
5244 op0 = convert_modes (target_mode, mode, op0, 0);
5246 return op0;
5249 mclass = GET_MODE_CLASS (mode);
5250 for (compare_mode = mode; compare_mode != VOIDmode;
5251 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5253 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5254 icode = optab_handler (cstore_optab, optab_mode);
5255 if (icode != CODE_FOR_nothing)
5257 do_pending_stack_adjust ();
5258 tem = emit_cstore (target, icode, code, mode, compare_mode,
5259 unsignedp, op0, op1, normalizep, target_mode);
5260 if (tem)
5261 return tem;
5263 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5265 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5266 unsignedp, op1, op0, normalizep, target_mode);
5267 if (tem)
5268 return tem;
5270 break;
5274 return 0;
5277 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5278 and storing in TARGET. Normally return TARGET.
5279 Return 0 if that cannot be done.
5281 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5282 it is VOIDmode, they cannot both be CONST_INT.
5284 UNSIGNEDP is for the case where we have to widen the operands
5285 to perform the operation. It says to use zero-extension.
5287 NORMALIZEP is 1 if we should convert the result to be either zero
5288 or one. Normalize is -1 if we should convert the result to be
5289 either zero or -1. If NORMALIZEP is zero, the result will be left
5290 "raw" out of the scc insn. */
5293 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5294 enum machine_mode mode, int unsignedp, int normalizep)
5296 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5297 enum rtx_code rcode;
5298 rtx subtarget;
5299 rtx tem, last, trueval;
5301 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5302 target_mode);
5303 if (tem)
5304 return tem;
5306 /* If we reached here, we can't do this with a scc insn, however there
5307 are some comparisons that can be done in other ways. Don't do any
5308 of these cases if branches are very cheap. */
5309 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5310 return 0;
5312 /* See what we need to return. We can only return a 1, -1, or the
5313 sign bit. */
5315 if (normalizep == 0)
5317 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5318 normalizep = STORE_FLAG_VALUE;
5320 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5321 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5322 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5324 else
5325 return 0;
5328 last = get_last_insn ();
5330 /* If optimizing, use different pseudo registers for each insn, instead
5331 of reusing the same pseudo. This leads to better CSE, but slows
5332 down the compiler, since there are more pseudos */
5333 subtarget = (!optimize
5334 && (target_mode == mode)) ? target : NULL_RTX;
5335 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5337 /* For floating-point comparisons, try the reverse comparison or try
5338 changing the "orderedness" of the comparison. */
5339 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5341 enum rtx_code first_code;
5342 bool and_them;
5344 rcode = reverse_condition_maybe_unordered (code);
5345 if (can_compare_p (rcode, mode, ccp_store_flag)
5346 && (code == ORDERED || code == UNORDERED
5347 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5348 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5350 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5351 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5353 /* For the reverse comparison, use either an addition or a XOR. */
5354 if (want_add
5355 && rtx_cost (GEN_INT (normalizep), PLUS,
5356 optimize_insn_for_speed_p ()) == 0)
5358 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5359 STORE_FLAG_VALUE, target_mode);
5360 if (tem)
5361 return expand_binop (target_mode, add_optab, tem,
5362 GEN_INT (normalizep),
5363 target, 0, OPTAB_WIDEN);
5365 else if (!want_add
5366 && rtx_cost (trueval, XOR,
5367 optimize_insn_for_speed_p ()) == 0)
5369 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5370 normalizep, target_mode);
5371 if (tem)
5372 return expand_binop (target_mode, xor_optab, tem, trueval,
5373 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5377 delete_insns_since (last);
5379 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5380 if (code == ORDERED || code == UNORDERED)
5381 return 0;
5383 and_them = split_comparison (code, mode, &first_code, &code);
5385 /* If there are no NaNs, the first comparison should always fall through.
5386 Effectively change the comparison to the other one. */
5387 if (!HONOR_NANS (mode))
5389 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5390 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5391 target_mode);
5394 #ifdef HAVE_conditional_move
5395 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5396 conditional move. */
5397 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5398 normalizep, target_mode);
5399 if (tem == 0)
5400 return 0;
5402 if (and_them)
5403 tem = emit_conditional_move (target, code, op0, op1, mode,
5404 tem, const0_rtx, GET_MODE (tem), 0);
5405 else
5406 tem = emit_conditional_move (target, code, op0, op1, mode,
5407 trueval, tem, GET_MODE (tem), 0);
5409 if (tem == 0)
5410 delete_insns_since (last);
5411 return tem;
5412 #else
5413 return 0;
5414 #endif
5417 /* The remaining tricks only apply to integer comparisons. */
5419 if (GET_MODE_CLASS (mode) != MODE_INT)
5420 return 0;
5422 /* If this is an equality comparison of integers, we can try to exclusive-or
5423 (or subtract) the two operands and use a recursive call to try the
5424 comparison with zero. Don't do any of these cases if branches are
5425 very cheap. */
5427 if ((code == EQ || code == NE) && op1 != const0_rtx)
5429 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5430 OPTAB_WIDEN);
5432 if (tem == 0)
5433 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5434 OPTAB_WIDEN);
5435 if (tem != 0)
5436 tem = emit_store_flag (target, code, tem, const0_rtx,
5437 mode, unsignedp, normalizep);
5438 if (tem != 0)
5439 return tem;
5441 delete_insns_since (last);
5444 /* For integer comparisons, try the reverse comparison. However, for
5445 small X and if we'd have anyway to extend, implementing "X != 0"
5446 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5447 rcode = reverse_condition (code);
5448 if (can_compare_p (rcode, mode, ccp_store_flag)
5449 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5450 && code == NE
5451 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5452 && op1 == const0_rtx))
5454 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5455 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5457 /* Again, for the reverse comparison, use either an addition or a XOR. */
5458 if (want_add
5459 && rtx_cost (GEN_INT (normalizep), PLUS,
5460 optimize_insn_for_speed_p ()) == 0)
5462 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5463 STORE_FLAG_VALUE, target_mode);
5464 if (tem != 0)
5465 tem = expand_binop (target_mode, add_optab, tem,
5466 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5468 else if (!want_add
5469 && rtx_cost (trueval, XOR,
5470 optimize_insn_for_speed_p ()) == 0)
5472 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5473 normalizep, target_mode);
5474 if (tem != 0)
5475 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5476 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5479 if (tem != 0)
5480 return tem;
5481 delete_insns_since (last);
5484 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5485 the constant zero. Reject all other comparisons at this point. Only
5486 do LE and GT if branches are expensive since they are expensive on
5487 2-operand machines. */
5489 if (op1 != const0_rtx
5490 || (code != EQ && code != NE
5491 && (BRANCH_COST (optimize_insn_for_speed_p (),
5492 false) <= 1 || (code != LE && code != GT))))
5493 return 0;
5495 /* Try to put the result of the comparison in the sign bit. Assume we can't
5496 do the necessary operation below. */
5498 tem = 0;
5500 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5501 the sign bit set. */
5503 if (code == LE)
5505 /* This is destructive, so SUBTARGET can't be OP0. */
5506 if (rtx_equal_p (subtarget, op0))
5507 subtarget = 0;
5509 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5510 OPTAB_WIDEN);
5511 if (tem)
5512 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5513 OPTAB_WIDEN);
5516 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5517 number of bits in the mode of OP0, minus one. */
5519 if (code == GT)
5521 if (rtx_equal_p (subtarget, op0))
5522 subtarget = 0;
5524 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5525 GET_MODE_BITSIZE (mode) - 1,
5526 subtarget, 0);
5527 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5528 OPTAB_WIDEN);
5531 if (code == EQ || code == NE)
5533 /* For EQ or NE, one way to do the comparison is to apply an operation
5534 that converts the operand into a positive number if it is nonzero
5535 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5536 for NE we negate. This puts the result in the sign bit. Then we
5537 normalize with a shift, if needed.
5539 Two operations that can do the above actions are ABS and FFS, so try
5540 them. If that doesn't work, and MODE is smaller than a full word,
5541 we can use zero-extension to the wider mode (an unsigned conversion)
5542 as the operation. */
5544 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5545 that is compensated by the subsequent overflow when subtracting
5546 one / negating. */
5548 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5549 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5550 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5551 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5552 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5554 tem = convert_modes (word_mode, mode, op0, 1);
5555 mode = word_mode;
5558 if (tem != 0)
5560 if (code == EQ)
5561 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5562 0, OPTAB_WIDEN);
5563 else
5564 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5567 /* If we couldn't do it that way, for NE we can "or" the two's complement
5568 of the value with itself. For EQ, we take the one's complement of
5569 that "or", which is an extra insn, so we only handle EQ if branches
5570 are expensive. */
5572 if (tem == 0
5573 && (code == NE
5574 || BRANCH_COST (optimize_insn_for_speed_p (),
5575 false) > 1))
5577 if (rtx_equal_p (subtarget, op0))
5578 subtarget = 0;
5580 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5581 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5582 OPTAB_WIDEN);
5584 if (tem && code == EQ)
5585 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5589 if (tem && normalizep)
5590 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5591 GET_MODE_BITSIZE (mode) - 1,
5592 subtarget, normalizep == 1);
5594 if (tem)
5596 if (!target)
5598 else if (GET_MODE (tem) != target_mode)
5600 convert_move (target, tem, 0);
5601 tem = target;
5603 else if (!subtarget)
5605 emit_move_insn (target, tem);
5606 tem = target;
5609 else
5610 delete_insns_since (last);
5612 return tem;
5615 /* Like emit_store_flag, but always succeeds. */
5618 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5619 enum machine_mode mode, int unsignedp, int normalizep)
5621 rtx tem, label;
5622 rtx trueval, falseval;
5624 /* First see if emit_store_flag can do the job. */
5625 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5626 if (tem != 0)
5627 return tem;
5629 if (!target)
5630 target = gen_reg_rtx (word_mode);
5632 /* If this failed, we have to do this with set/compare/jump/set code.
5633 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5634 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5635 if (code == NE
5636 && GET_MODE_CLASS (mode) == MODE_INT
5637 && REG_P (target)
5638 && op0 == target
5639 && op1 == const0_rtx)
5641 label = gen_label_rtx ();
5642 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5643 mode, NULL_RTX, NULL_RTX, label, -1);
5644 emit_move_insn (target, trueval);
5645 emit_label (label);
5646 return target;
5649 if (!REG_P (target)
5650 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5651 target = gen_reg_rtx (GET_MODE (target));
5653 /* Jump in the right direction if the target cannot implement CODE
5654 but can jump on its reverse condition. */
5655 falseval = const0_rtx;
5656 if (! can_compare_p (code, mode, ccp_jump)
5657 && (! FLOAT_MODE_P (mode)
5658 || code == ORDERED || code == UNORDERED
5659 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5660 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5662 enum rtx_code rcode;
5663 if (FLOAT_MODE_P (mode))
5664 rcode = reverse_condition_maybe_unordered (code);
5665 else
5666 rcode = reverse_condition (code);
5668 /* Canonicalize to UNORDERED for the libcall. */
5669 if (can_compare_p (rcode, mode, ccp_jump)
5670 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5672 falseval = trueval;
5673 trueval = const0_rtx;
5674 code = rcode;
5678 emit_move_insn (target, trueval);
5679 label = gen_label_rtx ();
5680 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5681 NULL_RTX, label, -1);
5683 emit_move_insn (target, falseval);
5684 emit_label (label);
5686 return target;
5689 /* Perform possibly multi-word comparison and conditional jump to LABEL
5690 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5691 now a thin wrapper around do_compare_rtx_and_jump. */
5693 static void
5694 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5695 rtx label)
5697 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5698 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5699 NULL_RTX, NULL_RTX, label, -1);