* config/rx/rx.h (LABEL_ALIGN_FOR_BARRIER): Define.
[official-gcc.git] / gcc / expmed.c
blob6218f3d4ce80854ac77d57f393de430e28dd7e6b
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT, rtx);
51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, rtx);
53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT, rtx, int, bool);
57 static rtx mask_rtx (enum machine_mode, int, int, int);
58 static rtx lshift_value (enum machine_mode, rtx, int, int);
59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, int);
61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
65 /* Test whether a value is zero of a power of two. */
66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
68 #ifndef SLOW_UNALIGNED_ACCESS
69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
70 #endif
73 /* Reduce conditional compilation elsewhere. */
74 #ifndef HAVE_insv
75 #define HAVE_insv 0
76 #define CODE_FOR_insv CODE_FOR_nothing
77 #define gen_insv(a,b,c,d) NULL_RTX
78 #endif
79 #ifndef HAVE_extv
80 #define HAVE_extv 0
81 #define CODE_FOR_extv CODE_FOR_nothing
82 #define gen_extv(a,b,c,d) NULL_RTX
83 #endif
84 #ifndef HAVE_extzv
85 #define HAVE_extzv 0
86 #define CODE_FOR_extzv CODE_FOR_nothing
87 #define gen_extzv(a,b,c,d) NULL_RTX
88 #endif
90 void
91 init_expmed (void)
93 struct
95 struct rtx_def reg; rtunion reg_fld[2];
96 struct rtx_def plus; rtunion plus_fld1;
97 struct rtx_def neg;
98 struct rtx_def mult; rtunion mult_fld1;
99 struct rtx_def sdiv; rtunion sdiv_fld1;
100 struct rtx_def udiv; rtunion udiv_fld1;
101 struct rtx_def zext;
102 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
103 struct rtx_def smod_32; rtunion smod_32_fld1;
104 struct rtx_def wide_mult; rtunion wide_mult_fld1;
105 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
106 struct rtx_def wide_trunc;
107 struct rtx_def shift; rtunion shift_fld1;
108 struct rtx_def shift_mult; rtunion shift_mult_fld1;
109 struct rtx_def shift_add; rtunion shift_add_fld1;
110 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
111 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
112 } all;
114 rtx pow2[MAX_BITS_PER_WORD];
115 rtx cint[MAX_BITS_PER_WORD];
116 int m, n;
117 enum machine_mode mode, wider_mode;
118 int speed;
121 for (m = 1; m < MAX_BITS_PER_WORD; m++)
123 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
124 cint[m] = GEN_INT (m);
126 memset (&all, 0, sizeof all);
128 PUT_CODE (&all.reg, REG);
129 /* Avoid using hard regs in ways which may be unsupported. */
130 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
132 PUT_CODE (&all.plus, PLUS);
133 XEXP (&all.plus, 0) = &all.reg;
134 XEXP (&all.plus, 1) = &all.reg;
136 PUT_CODE (&all.neg, NEG);
137 XEXP (&all.neg, 0) = &all.reg;
139 PUT_CODE (&all.mult, MULT);
140 XEXP (&all.mult, 0) = &all.reg;
141 XEXP (&all.mult, 1) = &all.reg;
143 PUT_CODE (&all.sdiv, DIV);
144 XEXP (&all.sdiv, 0) = &all.reg;
145 XEXP (&all.sdiv, 1) = &all.reg;
147 PUT_CODE (&all.udiv, UDIV);
148 XEXP (&all.udiv, 0) = &all.reg;
149 XEXP (&all.udiv, 1) = &all.reg;
151 PUT_CODE (&all.sdiv_32, DIV);
152 XEXP (&all.sdiv_32, 0) = &all.reg;
153 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
155 PUT_CODE (&all.smod_32, MOD);
156 XEXP (&all.smod_32, 0) = &all.reg;
157 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
159 PUT_CODE (&all.zext, ZERO_EXTEND);
160 XEXP (&all.zext, 0) = &all.reg;
162 PUT_CODE (&all.wide_mult, MULT);
163 XEXP (&all.wide_mult, 0) = &all.zext;
164 XEXP (&all.wide_mult, 1) = &all.zext;
166 PUT_CODE (&all.wide_lshr, LSHIFTRT);
167 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
169 PUT_CODE (&all.wide_trunc, TRUNCATE);
170 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
172 PUT_CODE (&all.shift, ASHIFT);
173 XEXP (&all.shift, 0) = &all.reg;
175 PUT_CODE (&all.shift_mult, MULT);
176 XEXP (&all.shift_mult, 0) = &all.reg;
178 PUT_CODE (&all.shift_add, PLUS);
179 XEXP (&all.shift_add, 0) = &all.shift_mult;
180 XEXP (&all.shift_add, 1) = &all.reg;
182 PUT_CODE (&all.shift_sub0, MINUS);
183 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
184 XEXP (&all.shift_sub0, 1) = &all.reg;
186 PUT_CODE (&all.shift_sub1, MINUS);
187 XEXP (&all.shift_sub1, 0) = &all.reg;
188 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
190 for (speed = 0; speed < 2; speed++)
192 crtl->maybe_hot_insn_p = speed;
193 zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
195 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
196 mode != VOIDmode;
197 mode = GET_MODE_WIDER_MODE (mode))
199 PUT_MODE (&all.reg, mode);
200 PUT_MODE (&all.plus, mode);
201 PUT_MODE (&all.neg, mode);
202 PUT_MODE (&all.mult, mode);
203 PUT_MODE (&all.sdiv, mode);
204 PUT_MODE (&all.udiv, mode);
205 PUT_MODE (&all.sdiv_32, mode);
206 PUT_MODE (&all.smod_32, mode);
207 PUT_MODE (&all.wide_trunc, mode);
208 PUT_MODE (&all.shift, mode);
209 PUT_MODE (&all.shift_mult, mode);
210 PUT_MODE (&all.shift_add, mode);
211 PUT_MODE (&all.shift_sub0, mode);
212 PUT_MODE (&all.shift_sub1, mode);
214 add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
215 neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
216 mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
217 sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
218 udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
220 sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
221 <= 2 * add_cost[speed][mode]);
222 smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
223 <= 4 * add_cost[speed][mode]);
225 wider_mode = GET_MODE_WIDER_MODE (mode);
226 if (wider_mode != VOIDmode)
228 PUT_MODE (&all.zext, wider_mode);
229 PUT_MODE (&all.wide_mult, wider_mode);
230 PUT_MODE (&all.wide_lshr, wider_mode);
231 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
233 mul_widen_cost[speed][wider_mode]
234 = rtx_cost (&all.wide_mult, SET, speed);
235 mul_highpart_cost[speed][mode]
236 = rtx_cost (&all.wide_trunc, SET, speed);
239 shift_cost[speed][mode][0] = 0;
240 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
241 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
243 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
244 for (m = 1; m < n; m++)
246 XEXP (&all.shift, 1) = cint[m];
247 XEXP (&all.shift_mult, 1) = pow2[m];
249 shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
250 shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
251 shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
252 shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
256 if (alg_hash_used_p)
257 memset (alg_hash, 0, sizeof (alg_hash));
258 else
259 alg_hash_used_p = true;
260 default_rtl_profile ();
263 /* Return an rtx representing minus the value of X.
264 MODE is the intended mode of the result,
265 useful if X is a CONST_INT. */
268 negate_rtx (enum machine_mode mode, rtx x)
270 rtx result = simplify_unary_operation (NEG, mode, x, mode);
272 if (result == 0)
273 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
275 return result;
278 /* Report on the availability of insv/extv/extzv and the desired mode
279 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
280 is false; else the mode of the specified operand. If OPNO is -1,
281 all the caller cares about is whether the insn is available. */
282 enum machine_mode
283 mode_for_extraction (enum extraction_pattern pattern, int opno)
285 const struct insn_data_d *data;
287 switch (pattern)
289 case EP_insv:
290 if (HAVE_insv)
292 data = &insn_data[CODE_FOR_insv];
293 break;
295 return MAX_MACHINE_MODE;
297 case EP_extv:
298 if (HAVE_extv)
300 data = &insn_data[CODE_FOR_extv];
301 break;
303 return MAX_MACHINE_MODE;
305 case EP_extzv:
306 if (HAVE_extzv)
308 data = &insn_data[CODE_FOR_extzv];
309 break;
311 return MAX_MACHINE_MODE;
313 default:
314 gcc_unreachable ();
317 if (opno == -1)
318 return VOIDmode;
320 /* Everyone who uses this function used to follow it with
321 if (result == VOIDmode) result = word_mode; */
322 if (data->operand[opno].mode == VOIDmode)
323 return word_mode;
324 return data->operand[opno].mode;
327 /* A subroutine of store_bit_field, with the same arguments. Return true
328 if the operation could be implemented.
330 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
331 no other way of implementing the operation. If FALLBACK_P is false,
332 return false instead. */
334 static bool
335 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
336 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
337 rtx value, bool fallback_p)
339 unsigned int unit
340 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
341 unsigned HOST_WIDE_INT offset, bitpos;
342 rtx op0 = str_rtx;
343 int byte_offset;
344 rtx orig_value;
346 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
348 while (GET_CODE (op0) == SUBREG)
350 /* The following line once was done only if WORDS_BIG_ENDIAN,
351 but I think that is a mistake. WORDS_BIG_ENDIAN is
352 meaningful at a much higher level; when structures are copied
353 between memory and regs, the higher-numbered regs
354 always get higher addresses. */
355 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
356 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
358 byte_offset = 0;
360 /* Paradoxical subregs need special handling on big endian machines. */
361 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
363 int difference = inner_mode_size - outer_mode_size;
365 if (WORDS_BIG_ENDIAN)
366 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
367 if (BYTES_BIG_ENDIAN)
368 byte_offset += difference % UNITS_PER_WORD;
370 else
371 byte_offset = SUBREG_BYTE (op0);
373 bitnum += byte_offset * BITS_PER_UNIT;
374 op0 = SUBREG_REG (op0);
377 /* No action is needed if the target is a register and if the field
378 lies completely outside that register. This can occur if the source
379 code contains an out-of-bounds access to a small array. */
380 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
381 return true;
383 /* Use vec_set patterns for inserting parts of vectors whenever
384 available. */
385 if (VECTOR_MODE_P (GET_MODE (op0))
386 && !MEM_P (op0)
387 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
388 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
389 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
390 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
392 struct expand_operand ops[3];
393 enum machine_mode outermode = GET_MODE (op0);
394 enum machine_mode innermode = GET_MODE_INNER (outermode);
395 enum insn_code icode = optab_handler (vec_set_optab, outermode);
396 int pos = bitnum / GET_MODE_BITSIZE (innermode);
398 create_fixed_operand (&ops[0], op0);
399 create_input_operand (&ops[1], value, innermode);
400 create_integer_operand (&ops[2], pos);
401 if (maybe_expand_insn (icode, 3, ops))
402 return true;
405 /* If the target is a register, overwriting the entire object, or storing
406 a full-word or multi-word field can be done with just a SUBREG.
408 If the target is memory, storing any naturally aligned field can be
409 done with a simple store. For targets that support fast unaligned
410 memory, any naturally sized, unit aligned field can be done directly. */
412 offset = bitnum / unit;
413 bitpos = bitnum % unit;
414 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
415 + (offset * UNITS_PER_WORD);
417 if (bitpos == 0
418 && bitsize == GET_MODE_BITSIZE (fieldmode)
419 && (!MEM_P (op0)
420 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
421 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
422 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
423 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
424 || (offset * BITS_PER_UNIT % bitsize == 0
425 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
427 if (MEM_P (op0))
428 op0 = adjust_address (op0, fieldmode, offset);
429 else if (GET_MODE (op0) != fieldmode)
430 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
431 byte_offset);
432 emit_move_insn (op0, value);
433 return true;
436 /* Make sure we are playing with integral modes. Pun with subregs
437 if we aren't. This must come after the entire register case above,
438 since that case is valid for any mode. The following cases are only
439 valid for integral modes. */
441 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
442 if (imode != GET_MODE (op0))
444 if (MEM_P (op0))
445 op0 = adjust_address (op0, imode, 0);
446 else
448 gcc_assert (imode != BLKmode);
449 op0 = gen_lowpart (imode, op0);
454 /* We may be accessing data outside the field, which means
455 we can alias adjacent data. */
456 if (MEM_P (op0))
458 op0 = shallow_copy_rtx (op0);
459 set_mem_alias_set (op0, 0);
460 set_mem_expr (op0, 0);
463 /* If OP0 is a register, BITPOS must count within a word.
464 But as we have it, it counts within whatever size OP0 now has.
465 On a bigendian machine, these are not the same, so convert. */
466 if (BYTES_BIG_ENDIAN
467 && !MEM_P (op0)
468 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
469 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
471 /* Storing an lsb-aligned field in a register
472 can be done with a movestrict instruction. */
474 if (!MEM_P (op0)
475 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
476 && bitsize == GET_MODE_BITSIZE (fieldmode)
477 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
479 struct expand_operand ops[2];
480 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
481 rtx arg0 = op0;
483 if (GET_CODE (arg0) == SUBREG)
485 /* Else we've got some float mode source being extracted into
486 a different float mode destination -- this combination of
487 subregs results in Severe Tire Damage. */
488 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
489 || GET_MODE_CLASS (fieldmode) == MODE_INT
490 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
491 arg0 = SUBREG_REG (arg0);
494 arg0 = gen_rtx_SUBREG (fieldmode, arg0,
495 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
496 + (offset * UNITS_PER_WORD));
498 create_fixed_operand (&ops[0], arg0);
499 /* Shrink the source operand to FIELDMODE. */
500 create_convert_operand_to (&ops[1], value, fieldmode, false);
501 if (maybe_expand_insn (icode, 2, ops))
502 return true;
505 /* Handle fields bigger than a word. */
507 if (bitsize > BITS_PER_WORD)
509 /* Here we transfer the words of the field
510 in the order least significant first.
511 This is because the most significant word is the one which may
512 be less than full.
513 However, only do that if the value is not BLKmode. */
515 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
516 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
517 unsigned int i;
518 rtx last;
520 /* This is the mode we must force value to, so that there will be enough
521 subwords to extract. Note that fieldmode will often (always?) be
522 VOIDmode, because that is what store_field uses to indicate that this
523 is a bit field, but passing VOIDmode to operand_subword_force
524 is not allowed. */
525 fieldmode = GET_MODE (value);
526 if (fieldmode == VOIDmode)
527 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
529 last = get_last_insn ();
530 for (i = 0; i < nwords; i++)
532 /* If I is 0, use the low-order word in both field and target;
533 if I is 1, use the next to lowest word; and so on. */
534 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
535 unsigned int bit_offset = (backwards
536 ? MAX ((int) bitsize - ((int) i + 1)
537 * BITS_PER_WORD,
539 : (int) i * BITS_PER_WORD);
540 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
542 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
543 bitsize - i * BITS_PER_WORD),
544 bitnum + bit_offset, word_mode,
545 value_word, fallback_p))
547 delete_insns_since (last);
548 return false;
551 return true;
554 /* From here on we can assume that the field to be stored in is
555 a full-word (whatever type that is), since it is shorter than a word. */
557 /* OFFSET is the number of words or bytes (UNIT says which)
558 from STR_RTX to the first word or byte containing part of the field. */
560 if (!MEM_P (op0))
562 if (offset != 0
563 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
565 if (!REG_P (op0))
567 /* Since this is a destination (lvalue), we can't copy
568 it to a pseudo. We can remove a SUBREG that does not
569 change the size of the operand. Such a SUBREG may
570 have been added above. */
571 gcc_assert (GET_CODE (op0) == SUBREG
572 && (GET_MODE_SIZE (GET_MODE (op0))
573 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
574 op0 = SUBREG_REG (op0);
576 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
577 op0, (offset * UNITS_PER_WORD));
579 offset = 0;
582 /* If VALUE has a floating-point or complex mode, access it as an
583 integer of the corresponding size. This can occur on a machine
584 with 64 bit registers that uses SFmode for float. It can also
585 occur for unaligned float or complex fields. */
586 orig_value = value;
587 if (GET_MODE (value) != VOIDmode
588 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
589 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
591 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
592 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
595 /* Now OFFSET is nonzero only if OP0 is memory
596 and is therefore always measured in bytes. */
598 if (HAVE_insv
599 && GET_MODE (value) != BLKmode
600 && bitsize > 0
601 && GET_MODE_BITSIZE (op_mode) >= bitsize
602 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
603 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
605 struct expand_operand ops[4];
606 int xbitpos = bitpos;
607 rtx value1;
608 rtx xop0 = op0;
609 rtx last = get_last_insn ();
610 bool copy_back = false;
612 /* Add OFFSET into OP0's address. */
613 if (MEM_P (xop0))
614 xop0 = adjust_address (xop0, byte_mode, offset);
616 /* If xop0 is a register, we need it in OP_MODE
617 to make it acceptable to the format of insv. */
618 if (GET_CODE (xop0) == SUBREG)
619 /* We can't just change the mode, because this might clobber op0,
620 and we will need the original value of op0 if insv fails. */
621 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
622 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
623 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
625 /* If the destination is a paradoxical subreg such that we need a
626 truncate to the inner mode, perform the insertion on a temporary and
627 truncate the result to the original destination. Note that we can't
628 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
629 X) 0)) is (reg:N X). */
630 if (GET_CODE (xop0) == SUBREG
631 && REG_P (SUBREG_REG (xop0))
632 && (!TRULY_NOOP_TRUNCATION
633 (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
634 GET_MODE_BITSIZE (op_mode))))
636 rtx tem = gen_reg_rtx (op_mode);
637 emit_move_insn (tem, xop0);
638 xop0 = tem;
639 copy_back = true;
642 /* On big-endian machines, we count bits from the most significant.
643 If the bit field insn does not, we must invert. */
645 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
646 xbitpos = unit - bitsize - xbitpos;
648 /* We have been counting XBITPOS within UNIT.
649 Count instead within the size of the register. */
650 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
651 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
653 unit = GET_MODE_BITSIZE (op_mode);
655 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
656 value1 = value;
657 if (GET_MODE (value) != op_mode)
659 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
661 /* Optimization: Don't bother really extending VALUE
662 if it has all the bits we will actually use. However,
663 if we must narrow it, be sure we do it correctly. */
665 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
667 rtx tmp;
669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 if (! tmp)
671 tmp = simplify_gen_subreg (op_mode,
672 force_reg (GET_MODE (value),
673 value1),
674 GET_MODE (value), 0);
675 value1 = tmp;
677 else
678 value1 = gen_lowpart (op_mode, value1);
680 else if (CONST_INT_P (value))
681 value1 = gen_int_mode (INTVAL (value), op_mode);
682 else
683 /* Parse phase is supposed to make VALUE's data type
684 match that of the component reference, which is a type
685 at least as wide as the field; so VALUE should have
686 a mode that corresponds to that type. */
687 gcc_assert (CONSTANT_P (value));
690 create_fixed_operand (&ops[0], xop0);
691 create_integer_operand (&ops[1], bitsize);
692 create_integer_operand (&ops[2], xbitpos);
693 create_input_operand (&ops[3], value1, op_mode);
694 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
696 if (copy_back)
697 convert_move (op0, xop0, true);
698 return true;
700 delete_insns_since (last);
703 /* If OP0 is a memory, try copying it to a register and seeing if a
704 cheap register alternative is available. */
705 if (HAVE_insv && MEM_P (op0))
707 enum machine_mode bestmode;
709 /* Get the mode to use for inserting into this field. If OP0 is
710 BLKmode, get the smallest mode consistent with the alignment. If
711 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
712 mode. Otherwise, use the smallest mode containing the field. */
714 if (GET_MODE (op0) == BLKmode
715 || (op_mode != MAX_MACHINE_MODE
716 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
717 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
718 (op_mode == MAX_MACHINE_MODE
719 ? VOIDmode : op_mode),
720 MEM_VOLATILE_P (op0));
721 else
722 bestmode = GET_MODE (op0);
724 if (bestmode != VOIDmode
725 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
726 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
727 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
729 rtx last, tempreg, xop0;
730 unsigned HOST_WIDE_INT xoffset, xbitpos;
732 last = get_last_insn ();
734 /* Adjust address to point to the containing unit of
735 that mode. Compute the offset as a multiple of this unit,
736 counting in bytes. */
737 unit = GET_MODE_BITSIZE (bestmode);
738 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
739 xbitpos = bitnum % unit;
740 xop0 = adjust_address (op0, bestmode, xoffset);
742 /* Fetch that unit, store the bitfield in it, then store
743 the unit. */
744 tempreg = copy_to_reg (xop0);
745 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
746 fieldmode, orig_value, false))
748 emit_move_insn (xop0, tempreg);
749 return true;
751 delete_insns_since (last);
755 if (!fallback_p)
756 return false;
758 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
759 return true;
762 /* Generate code to store value from rtx VALUE
763 into a bit-field within structure STR_RTX
764 containing BITSIZE bits starting at bit BITNUM.
765 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
767 void
768 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
769 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
770 rtx value)
772 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
773 gcc_unreachable ();
776 /* Use shifts and boolean operations to store VALUE
777 into a bit field of width BITSIZE
778 in a memory location specified by OP0 except offset by OFFSET bytes.
779 (OFFSET must be 0 if OP0 is a register.)
780 The field starts at position BITPOS within the byte.
781 (If OP0 is a register, it may be a full word or a narrower mode,
782 but BITPOS still counts within a full word,
783 which is significant on bigendian machines.) */
785 static void
786 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
787 unsigned HOST_WIDE_INT bitsize,
788 unsigned HOST_WIDE_INT bitpos, rtx value)
790 enum machine_mode mode;
791 unsigned int total_bits = BITS_PER_WORD;
792 rtx temp;
793 int all_zero = 0;
794 int all_one = 0;
796 /* There is a case not handled here:
797 a structure with a known alignment of just a halfword
798 and a field split across two aligned halfwords within the structure.
799 Or likewise a structure with a known alignment of just a byte
800 and a field split across two bytes.
801 Such cases are not supposed to be able to occur. */
803 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
805 gcc_assert (!offset);
806 /* Special treatment for a bit field split across two registers. */
807 if (bitsize + bitpos > BITS_PER_WORD)
809 store_split_bit_field (op0, bitsize, bitpos, value);
810 return;
813 else
815 /* Get the proper mode to use for this field. We want a mode that
816 includes the entire field. If such a mode would be larger than
817 a word, we won't be doing the extraction the normal way.
818 We don't want a mode bigger than the destination. */
820 mode = GET_MODE (op0);
821 if (GET_MODE_BITSIZE (mode) == 0
822 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
823 mode = word_mode;
825 if (MEM_VOLATILE_P (op0)
826 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
827 && flag_strict_volatile_bitfields > 0)
828 mode = GET_MODE (op0);
829 else
830 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
831 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
833 if (mode == VOIDmode)
835 /* The only way this should occur is if the field spans word
836 boundaries. */
837 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
838 value);
839 return;
842 total_bits = GET_MODE_BITSIZE (mode);
844 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
845 be in the range 0 to total_bits-1, and put any excess bytes in
846 OFFSET. */
847 if (bitpos >= total_bits)
849 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
850 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
851 * BITS_PER_UNIT);
854 /* Get ref to an aligned byte, halfword, or word containing the field.
855 Adjust BITPOS to be position within a word,
856 and OFFSET to be the offset of that word.
857 Then alter OP0 to refer to that word. */
858 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
859 offset -= (offset % (total_bits / BITS_PER_UNIT));
860 op0 = adjust_address (op0, mode, offset);
863 mode = GET_MODE (op0);
865 /* Now MODE is either some integral mode for a MEM as OP0,
866 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
867 The bit field is contained entirely within OP0.
868 BITPOS is the starting bit number within OP0.
869 (OP0's mode may actually be narrower than MODE.) */
871 if (BYTES_BIG_ENDIAN)
872 /* BITPOS is the distance between our msb
873 and that of the containing datum.
874 Convert it to the distance from the lsb. */
875 bitpos = total_bits - bitsize - bitpos;
877 /* Now BITPOS is always the distance between our lsb
878 and that of OP0. */
880 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
881 we must first convert its mode to MODE. */
883 if (CONST_INT_P (value))
885 HOST_WIDE_INT v = INTVAL (value);
887 if (bitsize < HOST_BITS_PER_WIDE_INT)
888 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
890 if (v == 0)
891 all_zero = 1;
892 else if ((bitsize < HOST_BITS_PER_WIDE_INT
893 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
894 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
895 all_one = 1;
897 value = lshift_value (mode, value, bitpos, bitsize);
899 else
901 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
902 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
904 if (GET_MODE (value) != mode)
905 value = convert_to_mode (mode, value, 1);
907 if (must_and)
908 value = expand_binop (mode, and_optab, value,
909 mask_rtx (mode, 0, bitsize, 0),
910 NULL_RTX, 1, OPTAB_LIB_WIDEN);
911 if (bitpos > 0)
912 value = expand_shift (LSHIFT_EXPR, mode, value,
913 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
916 /* Now clear the chosen bits in OP0,
917 except that if VALUE is -1 we need not bother. */
918 /* We keep the intermediates in registers to allow CSE to combine
919 consecutive bitfield assignments. */
921 temp = force_reg (mode, op0);
923 if (! all_one)
925 temp = expand_binop (mode, and_optab, temp,
926 mask_rtx (mode, bitpos, bitsize, 1),
927 NULL_RTX, 1, OPTAB_LIB_WIDEN);
928 temp = force_reg (mode, temp);
931 /* Now logical-or VALUE into OP0, unless it is zero. */
933 if (! all_zero)
935 temp = expand_binop (mode, ior_optab, temp, value,
936 NULL_RTX, 1, OPTAB_LIB_WIDEN);
937 temp = force_reg (mode, temp);
940 if (op0 != temp)
942 op0 = copy_rtx (op0);
943 emit_move_insn (op0, temp);
947 /* Store a bit field that is split across multiple accessible memory objects.
949 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
950 BITSIZE is the field width; BITPOS the position of its first bit
951 (within the word).
952 VALUE is the value to store.
954 This does not yet handle fields wider than BITS_PER_WORD. */
956 static void
957 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
958 unsigned HOST_WIDE_INT bitpos, rtx value)
960 unsigned int unit;
961 unsigned int bitsdone = 0;
963 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
964 much at a time. */
965 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
966 unit = BITS_PER_WORD;
967 else
968 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
970 /* If VALUE is a constant other than a CONST_INT, get it into a register in
971 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
972 that VALUE might be a floating-point constant. */
973 if (CONSTANT_P (value) && !CONST_INT_P (value))
975 rtx word = gen_lowpart_common (word_mode, value);
977 if (word && (value != word))
978 value = word;
979 else
980 value = gen_lowpart_common (word_mode,
981 force_reg (GET_MODE (value) != VOIDmode
982 ? GET_MODE (value)
983 : word_mode, value));
986 while (bitsdone < bitsize)
988 unsigned HOST_WIDE_INT thissize;
989 rtx part, word;
990 unsigned HOST_WIDE_INT thispos;
991 unsigned HOST_WIDE_INT offset;
993 offset = (bitpos + bitsdone) / unit;
994 thispos = (bitpos + bitsdone) % unit;
996 /* THISSIZE must not overrun a word boundary. Otherwise,
997 store_fixed_bit_field will call us again, and we will mutually
998 recurse forever. */
999 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1000 thissize = MIN (thissize, unit - thispos);
1002 if (BYTES_BIG_ENDIAN)
1004 int total_bits;
1006 /* We must do an endian conversion exactly the same way as it is
1007 done in extract_bit_field, so that the two calls to
1008 extract_fixed_bit_field will have comparable arguments. */
1009 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1010 total_bits = BITS_PER_WORD;
1011 else
1012 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1014 /* Fetch successively less significant portions. */
1015 if (CONST_INT_P (value))
1016 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1017 >> (bitsize - bitsdone - thissize))
1018 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1019 else
1020 /* The args are chosen so that the last part includes the
1021 lsb. Give extract_bit_field the value it needs (with
1022 endianness compensation) to fetch the piece we want. */
1023 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1024 total_bits - bitsize + bitsdone,
1025 NULL_RTX, 1, false);
1027 else
1029 /* Fetch successively more significant portions. */
1030 if (CONST_INT_P (value))
1031 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1032 >> bitsdone)
1033 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1034 else
1035 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1036 bitsdone, NULL_RTX, 1, false);
1039 /* If OP0 is a register, then handle OFFSET here.
1041 When handling multiword bitfields, extract_bit_field may pass
1042 down a word_mode SUBREG of a larger REG for a bitfield that actually
1043 crosses a word boundary. Thus, for a SUBREG, we must find
1044 the current word starting from the base register. */
1045 if (GET_CODE (op0) == SUBREG)
1047 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1048 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1049 GET_MODE (SUBREG_REG (op0)));
1050 offset = 0;
1052 else if (REG_P (op0))
1054 word = operand_subword_force (op0, offset, GET_MODE (op0));
1055 offset = 0;
1057 else
1058 word = op0;
1060 /* OFFSET is in UNITs, and UNIT is in bits.
1061 store_fixed_bit_field wants offset in bytes. */
1062 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1063 thispos, part);
1064 bitsdone += thissize;
1068 /* A subroutine of extract_bit_field_1 that converts return value X
1069 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1070 to extract_bit_field. */
1072 static rtx
1073 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1074 enum machine_mode tmode, bool unsignedp)
1076 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1077 return x;
1079 /* If the x mode is not a scalar integral, first convert to the
1080 integer mode of that size and then access it as a floating-point
1081 value via a SUBREG. */
1082 if (!SCALAR_INT_MODE_P (tmode))
1084 enum machine_mode smode;
1086 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1087 x = convert_to_mode (smode, x, unsignedp);
1088 x = force_reg (smode, x);
1089 return gen_lowpart (tmode, x);
1092 return convert_to_mode (tmode, x, unsignedp);
1095 /* A subroutine of extract_bit_field, with the same arguments.
1096 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1097 if we can find no other means of implementing the operation.
1098 if FALLBACK_P is false, return NULL instead. */
1100 static rtx
1101 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1102 unsigned HOST_WIDE_INT bitnum,
1103 int unsignedp, bool packedp, rtx target,
1104 enum machine_mode mode, enum machine_mode tmode,
1105 bool fallback_p)
1107 unsigned int unit
1108 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1109 unsigned HOST_WIDE_INT offset, bitpos;
1110 rtx op0 = str_rtx;
1111 enum machine_mode int_mode;
1112 enum machine_mode ext_mode;
1113 enum machine_mode mode1;
1114 enum insn_code icode;
1115 int byte_offset;
1117 if (tmode == VOIDmode)
1118 tmode = mode;
1120 while (GET_CODE (op0) == SUBREG)
1122 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1123 op0 = SUBREG_REG (op0);
1126 /* If we have an out-of-bounds access to a register, just return an
1127 uninitialized register of the required mode. This can occur if the
1128 source code contains an out-of-bounds access to a small array. */
1129 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1130 return gen_reg_rtx (tmode);
1132 if (REG_P (op0)
1133 && mode == GET_MODE (op0)
1134 && bitnum == 0
1135 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1137 /* We're trying to extract a full register from itself. */
1138 return op0;
1141 /* See if we can get a better vector mode before extracting. */
1142 if (VECTOR_MODE_P (GET_MODE (op0))
1143 && !MEM_P (op0)
1144 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1146 enum machine_mode new_mode;
1148 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1149 new_mode = MIN_MODE_VECTOR_FLOAT;
1150 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1151 new_mode = MIN_MODE_VECTOR_FRACT;
1152 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1153 new_mode = MIN_MODE_VECTOR_UFRACT;
1154 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1155 new_mode = MIN_MODE_VECTOR_ACCUM;
1156 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1157 new_mode = MIN_MODE_VECTOR_UACCUM;
1158 else
1159 new_mode = MIN_MODE_VECTOR_INT;
1161 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1162 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1163 && targetm.vector_mode_supported_p (new_mode))
1164 break;
1165 if (new_mode != VOIDmode)
1166 op0 = gen_lowpart (new_mode, op0);
1169 /* Use vec_extract patterns for extracting parts of vectors whenever
1170 available. */
1171 if (VECTOR_MODE_P (GET_MODE (op0))
1172 && !MEM_P (op0)
1173 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1174 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1175 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1177 struct expand_operand ops[3];
1178 enum machine_mode outermode = GET_MODE (op0);
1179 enum machine_mode innermode = GET_MODE_INNER (outermode);
1180 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1181 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1183 create_output_operand (&ops[0], target, innermode);
1184 create_input_operand (&ops[1], op0, outermode);
1185 create_integer_operand (&ops[2], pos);
1186 if (maybe_expand_insn (icode, 3, ops))
1188 target = ops[0].value;
1189 if (GET_MODE (target) != mode)
1190 return gen_lowpart (tmode, target);
1191 return target;
1195 /* Make sure we are playing with integral modes. Pun with subregs
1196 if we aren't. */
1198 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1199 if (imode != GET_MODE (op0))
1201 if (MEM_P (op0))
1202 op0 = adjust_address (op0, imode, 0);
1203 else if (imode != BLKmode)
1205 op0 = gen_lowpart (imode, op0);
1207 /* If we got a SUBREG, force it into a register since we
1208 aren't going to be able to do another SUBREG on it. */
1209 if (GET_CODE (op0) == SUBREG)
1210 op0 = force_reg (imode, op0);
1212 else if (REG_P (op0))
1214 rtx reg, subreg;
1215 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1216 MODE_INT);
1217 reg = gen_reg_rtx (imode);
1218 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1219 emit_move_insn (subreg, op0);
1220 op0 = reg;
1221 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1223 else
1225 rtx mem = assign_stack_temp (GET_MODE (op0),
1226 GET_MODE_SIZE (GET_MODE (op0)), 0);
1227 emit_move_insn (mem, op0);
1228 op0 = adjust_address (mem, BLKmode, 0);
1233 /* We may be accessing data outside the field, which means
1234 we can alias adjacent data. */
1235 if (MEM_P (op0))
1237 op0 = shallow_copy_rtx (op0);
1238 set_mem_alias_set (op0, 0);
1239 set_mem_expr (op0, 0);
1242 /* Extraction of a full-word or multi-word value from a structure
1243 in a register or aligned memory can be done with just a SUBREG.
1244 A subword value in the least significant part of a register
1245 can also be extracted with a SUBREG. For this, we need the
1246 byte offset of the value in op0. */
1248 bitpos = bitnum % unit;
1249 offset = bitnum / unit;
1250 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1252 /* If OP0 is a register, BITPOS must count within a word.
1253 But as we have it, it counts within whatever size OP0 now has.
1254 On a bigendian machine, these are not the same, so convert. */
1255 if (BYTES_BIG_ENDIAN
1256 && !MEM_P (op0)
1257 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1258 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1260 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1261 If that's wrong, the solution is to test for it and set TARGET to 0
1262 if needed. */
1264 /* Only scalar integer modes can be converted via subregs. There is an
1265 additional problem for FP modes here in that they can have a precision
1266 which is different from the size. mode_for_size uses precision, but
1267 we want a mode based on the size, so we must avoid calling it for FP
1268 modes. */
1269 mode1 = (SCALAR_INT_MODE_P (tmode)
1270 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1271 : mode);
1273 /* If the bitfield is volatile, we need to make sure the access
1274 remains on a type-aligned boundary. */
1275 if (GET_CODE (op0) == MEM
1276 && MEM_VOLATILE_P (op0)
1277 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1278 && flag_strict_volatile_bitfields > 0)
1279 goto no_subreg_mode_swap;
1281 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1282 && bitpos % BITS_PER_WORD == 0)
1283 || (mode1 != BLKmode
1284 /* ??? The big endian test here is wrong. This is correct
1285 if the value is in a register, and if mode_for_size is not
1286 the same mode as op0. This causes us to get unnecessarily
1287 inefficient code from the Thumb port when -mbig-endian. */
1288 && (BYTES_BIG_ENDIAN
1289 ? bitpos + bitsize == BITS_PER_WORD
1290 : bitpos == 0)))
1291 && ((!MEM_P (op0)
1292 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1293 GET_MODE_BITSIZE (GET_MODE (op0)))
1294 && GET_MODE_SIZE (mode1) != 0
1295 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1296 || (MEM_P (op0)
1297 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1298 || (offset * BITS_PER_UNIT % bitsize == 0
1299 && MEM_ALIGN (op0) % bitsize == 0)))))
1301 if (MEM_P (op0))
1302 op0 = adjust_address (op0, mode1, offset);
1303 else if (mode1 != GET_MODE (op0))
1305 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1306 byte_offset);
1307 if (sub == NULL)
1308 goto no_subreg_mode_swap;
1309 op0 = sub;
1311 if (mode1 != mode)
1312 return convert_to_mode (tmode, op0, unsignedp);
1313 return op0;
1315 no_subreg_mode_swap:
1317 /* Handle fields bigger than a word. */
1319 if (bitsize > BITS_PER_WORD)
1321 /* Here we transfer the words of the field
1322 in the order least significant first.
1323 This is because the most significant word is the one which may
1324 be less than full. */
1326 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1327 unsigned int i;
1329 if (target == 0 || !REG_P (target))
1330 target = gen_reg_rtx (mode);
1332 /* Indicate for flow that the entire target reg is being set. */
1333 emit_clobber (target);
1335 for (i = 0; i < nwords; i++)
1337 /* If I is 0, use the low-order word in both field and target;
1338 if I is 1, use the next to lowest word; and so on. */
1339 /* Word number in TARGET to use. */
1340 unsigned int wordnum
1341 = (WORDS_BIG_ENDIAN
1342 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1343 : i);
1344 /* Offset from start of field in OP0. */
1345 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1346 ? MAX (0, ((int) bitsize - ((int) i + 1)
1347 * (int) BITS_PER_WORD))
1348 : (int) i * BITS_PER_WORD);
1349 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1350 rtx result_part
1351 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1352 bitsize - i * BITS_PER_WORD),
1353 bitnum + bit_offset, 1, false, target_part, mode,
1354 word_mode);
1356 gcc_assert (target_part);
1358 if (result_part != target_part)
1359 emit_move_insn (target_part, result_part);
1362 if (unsignedp)
1364 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1365 need to be zero'd out. */
1366 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1368 unsigned int i, total_words;
1370 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1371 for (i = nwords; i < total_words; i++)
1372 emit_move_insn
1373 (operand_subword (target,
1374 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1375 1, VOIDmode),
1376 const0_rtx);
1378 return target;
1381 /* Signed bit field: sign-extend with two arithmetic shifts. */
1382 target = expand_shift (LSHIFT_EXPR, mode, target,
1383 build_int_cst (NULL_TREE,
1384 GET_MODE_BITSIZE (mode) - bitsize),
1385 NULL_RTX, 0);
1386 return expand_shift (RSHIFT_EXPR, mode, target,
1387 build_int_cst (NULL_TREE,
1388 GET_MODE_BITSIZE (mode) - bitsize),
1389 NULL_RTX, 0);
1392 /* From here on we know the desired field is smaller than a word. */
1394 /* Check if there is a correspondingly-sized integer field, so we can
1395 safely extract it as one size of integer, if necessary; then
1396 truncate or extend to the size that is wanted; then use SUBREGs or
1397 convert_to_mode to get one of the modes we really wanted. */
1399 int_mode = int_mode_for_mode (tmode);
1400 if (int_mode == BLKmode)
1401 int_mode = int_mode_for_mode (mode);
1402 /* Should probably push op0 out to memory and then do a load. */
1403 gcc_assert (int_mode != BLKmode);
1405 /* OFFSET is the number of words or bytes (UNIT says which)
1406 from STR_RTX to the first word or byte containing part of the field. */
1407 if (!MEM_P (op0))
1409 if (offset != 0
1410 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1412 if (!REG_P (op0))
1413 op0 = copy_to_reg (op0);
1414 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1415 op0, (offset * UNITS_PER_WORD));
1417 offset = 0;
1420 /* Now OFFSET is nonzero only for memory operands. */
1421 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1422 icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1423 if (ext_mode != MAX_MACHINE_MODE
1424 && bitsize > 0
1425 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1426 /* If op0 is a register, we need it in EXT_MODE to make it
1427 acceptable to the format of ext(z)v. */
1428 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1429 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1430 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1432 struct expand_operand ops[4];
1433 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1434 rtx xop0 = op0;
1435 rtx xtarget = target;
1436 rtx xspec_target = target;
1437 rtx xspec_target_subreg = 0;
1439 /* If op0 is a register, we need it in EXT_MODE to make it
1440 acceptable to the format of ext(z)v. */
1441 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1442 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1443 if (MEM_P (xop0))
1444 /* Get ref to first byte containing part of the field. */
1445 xop0 = adjust_address (xop0, byte_mode, xoffset);
1447 /* On big-endian machines, we count bits from the most significant.
1448 If the bit field insn does not, we must invert. */
1449 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1450 xbitpos = unit - bitsize - xbitpos;
1452 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1453 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1454 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1456 unit = GET_MODE_BITSIZE (ext_mode);
1458 if (xtarget == 0)
1459 xtarget = xspec_target = gen_reg_rtx (tmode);
1461 if (GET_MODE (xtarget) != ext_mode)
1463 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1464 between the mode of the extraction (word_mode) and the target
1465 mode. Instead, create a temporary and use convert_move to set
1466 the target. */
1467 if (REG_P (xtarget)
1468 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1469 GET_MODE_BITSIZE (ext_mode)))
1471 xtarget = gen_lowpart (ext_mode, xtarget);
1472 if (GET_MODE_SIZE (ext_mode)
1473 > GET_MODE_SIZE (GET_MODE (xspec_target)))
1474 xspec_target_subreg = xtarget;
1476 else
1477 xtarget = gen_reg_rtx (ext_mode);
1480 create_output_operand (&ops[0], xtarget, ext_mode);
1481 create_fixed_operand (&ops[1], xop0);
1482 create_integer_operand (&ops[2], bitsize);
1483 create_integer_operand (&ops[3], xbitpos);
1484 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1485 4, ops))
1487 xtarget = ops[0].value;
1488 if (xtarget == xspec_target)
1489 return xtarget;
1490 if (xtarget == xspec_target_subreg)
1491 return xspec_target;
1492 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1496 /* If OP0 is a memory, try copying it to a register and seeing if a
1497 cheap register alternative is available. */
1498 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1500 enum machine_mode bestmode;
1502 /* Get the mode to use for inserting into this field. If
1503 OP0 is BLKmode, get the smallest mode consistent with the
1504 alignment. If OP0 is a non-BLKmode object that is no
1505 wider than EXT_MODE, use its mode. Otherwise, use the
1506 smallest mode containing the field. */
1508 if (GET_MODE (op0) == BLKmode
1509 || (ext_mode != MAX_MACHINE_MODE
1510 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1511 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1512 (ext_mode == MAX_MACHINE_MODE
1513 ? VOIDmode : ext_mode),
1514 MEM_VOLATILE_P (op0));
1515 else
1516 bestmode = GET_MODE (op0);
1518 if (bestmode != VOIDmode
1519 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1520 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1522 unsigned HOST_WIDE_INT xoffset, xbitpos;
1524 /* Compute the offset as a multiple of this unit,
1525 counting in bytes. */
1526 unit = GET_MODE_BITSIZE (bestmode);
1527 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1528 xbitpos = bitnum % unit;
1530 /* Make sure the register is big enough for the whole field. */
1531 if (xoffset * BITS_PER_UNIT + unit
1532 >= offset * BITS_PER_UNIT + bitsize)
1534 rtx last, result, xop0;
1536 last = get_last_insn ();
1538 /* Fetch it to a register in that size. */
1539 xop0 = adjust_address (op0, bestmode, xoffset);
1540 xop0 = force_reg (bestmode, xop0);
1541 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1542 unsignedp, packedp, target,
1543 mode, tmode, false);
1544 if (result)
1545 return result;
1547 delete_insns_since (last);
1552 if (!fallback_p)
1553 return NULL;
1555 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1556 bitpos, target, unsignedp, packedp);
1557 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1560 /* Generate code to extract a byte-field from STR_RTX
1561 containing BITSIZE bits, starting at BITNUM,
1562 and put it in TARGET if possible (if TARGET is nonzero).
1563 Regardless of TARGET, we return the rtx for where the value is placed.
1565 STR_RTX is the structure containing the byte (a REG or MEM).
1566 UNSIGNEDP is nonzero if this is an unsigned bit field.
1567 PACKEDP is nonzero if the field has the packed attribute.
1568 MODE is the natural mode of the field value once extracted.
1569 TMODE is the mode the caller would like the value to have;
1570 but the value may be returned with type MODE instead.
1572 If a TARGET is specified and we can store in it at no extra cost,
1573 we do so, and return TARGET.
1574 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1575 if they are equally easy. */
1578 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1579 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1580 rtx target, enum machine_mode mode, enum machine_mode tmode)
1582 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1583 target, mode, tmode, true);
1586 /* Extract a bit field using shifts and boolean operations
1587 Returns an rtx to represent the value.
1588 OP0 addresses a register (word) or memory (byte).
1589 BITPOS says which bit within the word or byte the bit field starts in.
1590 OFFSET says how many bytes farther the bit field starts;
1591 it is 0 if OP0 is a register.
1592 BITSIZE says how many bits long the bit field is.
1593 (If OP0 is a register, it may be narrower than a full word,
1594 but BITPOS still counts within a full word,
1595 which is significant on bigendian machines.)
1597 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1598 PACKEDP is true if the field has the packed attribute.
1600 If TARGET is nonzero, attempts to store the value there
1601 and return TARGET, but this is not guaranteed.
1602 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1604 static rtx
1605 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1606 unsigned HOST_WIDE_INT offset,
1607 unsigned HOST_WIDE_INT bitsize,
1608 unsigned HOST_WIDE_INT bitpos, rtx target,
1609 int unsignedp, bool packedp)
1611 unsigned int total_bits = BITS_PER_WORD;
1612 enum machine_mode mode;
1614 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1616 /* Special treatment for a bit field split across two registers. */
1617 if (bitsize + bitpos > BITS_PER_WORD)
1618 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1620 else
1622 /* Get the proper mode to use for this field. We want a mode that
1623 includes the entire field. If such a mode would be larger than
1624 a word, we won't be doing the extraction the normal way. */
1626 if (MEM_VOLATILE_P (op0)
1627 && flag_strict_volatile_bitfields > 0)
1629 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1630 mode = GET_MODE (op0);
1631 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1632 mode = GET_MODE (target);
1633 else
1634 mode = tmode;
1636 else
1637 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1638 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1640 if (mode == VOIDmode)
1641 /* The only way this should occur is if the field spans word
1642 boundaries. */
1643 return extract_split_bit_field (op0, bitsize,
1644 bitpos + offset * BITS_PER_UNIT,
1645 unsignedp);
1647 total_bits = GET_MODE_BITSIZE (mode);
1649 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1650 be in the range 0 to total_bits-1, and put any excess bytes in
1651 OFFSET. */
1652 if (bitpos >= total_bits)
1654 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1655 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1656 * BITS_PER_UNIT);
1659 /* If we're accessing a volatile MEM, we can't do the next
1660 alignment step if it results in a multi-word access where we
1661 otherwise wouldn't have one. So, check for that case
1662 here. */
1663 if (MEM_P (op0)
1664 && MEM_VOLATILE_P (op0)
1665 && flag_strict_volatile_bitfields > 0
1666 && bitpos + bitsize <= total_bits
1667 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1669 if (STRICT_ALIGNMENT)
1671 static bool informed_about_misalignment = false;
1672 bool warned;
1674 if (packedp)
1676 if (bitsize == total_bits)
1677 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1678 "multiple accesses to volatile structure member"
1679 " because of packed attribute");
1680 else
1681 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1682 "multiple accesses to volatile structure bitfield"
1683 " because of packed attribute");
1685 return extract_split_bit_field (op0, bitsize,
1686 bitpos + offset * BITS_PER_UNIT,
1687 unsignedp);
1690 if (bitsize == total_bits)
1691 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1692 "mis-aligned access used for structure member");
1693 else
1694 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1695 "mis-aligned access used for structure bitfield");
1697 if (! informed_about_misalignment && warned)
1699 informed_about_misalignment = true;
1700 inform (input_location,
1701 "when a volatile object spans multiple type-sized locations,"
1702 " the compiler must choose between using a single mis-aligned access to"
1703 " preserve the volatility, or using multiple aligned accesses to avoid"
1704 " runtime faults; this code may fail at runtime if the hardware does"
1705 " not allow this access");
1709 else
1712 /* Get ref to an aligned byte, halfword, or word containing the field.
1713 Adjust BITPOS to be position within a word,
1714 and OFFSET to be the offset of that word.
1715 Then alter OP0 to refer to that word. */
1716 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1717 offset -= (offset % (total_bits / BITS_PER_UNIT));
1720 op0 = adjust_address (op0, mode, offset);
1723 mode = GET_MODE (op0);
1725 if (BYTES_BIG_ENDIAN)
1726 /* BITPOS is the distance between our msb and that of OP0.
1727 Convert it to the distance from the lsb. */
1728 bitpos = total_bits - bitsize - bitpos;
1730 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1731 We have reduced the big-endian case to the little-endian case. */
1733 if (unsignedp)
1735 if (bitpos)
1737 /* If the field does not already start at the lsb,
1738 shift it so it does. */
1739 tree amount = build_int_cst (NULL_TREE, bitpos);
1740 /* Maybe propagate the target for the shift. */
1741 /* But not if we will return it--could confuse integrate.c. */
1742 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1743 if (tmode != mode) subtarget = 0;
1744 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1746 /* Convert the value to the desired mode. */
1747 if (mode != tmode)
1748 op0 = convert_to_mode (tmode, op0, 1);
1750 /* Unless the msb of the field used to be the msb when we shifted,
1751 mask out the upper bits. */
1753 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1754 return expand_binop (GET_MODE (op0), and_optab, op0,
1755 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1756 target, 1, OPTAB_LIB_WIDEN);
1757 return op0;
1760 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1761 then arithmetic-shift its lsb to the lsb of the word. */
1762 op0 = force_reg (mode, op0);
1763 if (mode != tmode)
1764 target = 0;
1766 /* Find the narrowest integer mode that contains the field. */
1768 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1769 mode = GET_MODE_WIDER_MODE (mode))
1770 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1772 op0 = convert_to_mode (mode, op0, 0);
1773 break;
1776 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1778 tree amount
1779 = build_int_cst (NULL_TREE,
1780 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1781 /* Maybe propagate the target for the shift. */
1782 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1783 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1786 return expand_shift (RSHIFT_EXPR, mode, op0,
1787 build_int_cst (NULL_TREE,
1788 GET_MODE_BITSIZE (mode) - bitsize),
1789 target, 0);
1792 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1793 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1794 complement of that if COMPLEMENT. The mask is truncated if
1795 necessary to the width of mode MODE. The mask is zero-extended if
1796 BITSIZE+BITPOS is too small for MODE. */
1798 static rtx
1799 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1801 double_int mask;
1803 mask = double_int_mask (bitsize);
1804 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1806 if (complement)
1807 mask = double_int_not (mask);
1809 return immed_double_int_const (mask, mode);
1812 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1813 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1815 static rtx
1816 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1818 double_int val;
1820 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1821 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1823 return immed_double_int_const (val, mode);
1826 /* Extract a bit field that is split across two words
1827 and return an RTX for the result.
1829 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1830 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1831 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1833 static rtx
1834 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1835 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1837 unsigned int unit;
1838 unsigned int bitsdone = 0;
1839 rtx result = NULL_RTX;
1840 int first = 1;
1842 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1843 much at a time. */
1844 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1845 unit = BITS_PER_WORD;
1846 else
1847 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1849 while (bitsdone < bitsize)
1851 unsigned HOST_WIDE_INT thissize;
1852 rtx part, word;
1853 unsigned HOST_WIDE_INT thispos;
1854 unsigned HOST_WIDE_INT offset;
1856 offset = (bitpos + bitsdone) / unit;
1857 thispos = (bitpos + bitsdone) % unit;
1859 /* THISSIZE must not overrun a word boundary. Otherwise,
1860 extract_fixed_bit_field will call us again, and we will mutually
1861 recurse forever. */
1862 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1863 thissize = MIN (thissize, unit - thispos);
1865 /* If OP0 is a register, then handle OFFSET here.
1867 When handling multiword bitfields, extract_bit_field may pass
1868 down a word_mode SUBREG of a larger REG for a bitfield that actually
1869 crosses a word boundary. Thus, for a SUBREG, we must find
1870 the current word starting from the base register. */
1871 if (GET_CODE (op0) == SUBREG)
1873 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1874 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1875 GET_MODE (SUBREG_REG (op0)));
1876 offset = 0;
1878 else if (REG_P (op0))
1880 word = operand_subword_force (op0, offset, GET_MODE (op0));
1881 offset = 0;
1883 else
1884 word = op0;
1886 /* Extract the parts in bit-counting order,
1887 whose meaning is determined by BYTES_PER_UNIT.
1888 OFFSET is in UNITs, and UNIT is in bits.
1889 extract_fixed_bit_field wants offset in bytes. */
1890 part = extract_fixed_bit_field (word_mode, word,
1891 offset * unit / BITS_PER_UNIT,
1892 thissize, thispos, 0, 1, false);
1893 bitsdone += thissize;
1895 /* Shift this part into place for the result. */
1896 if (BYTES_BIG_ENDIAN)
1898 if (bitsize != bitsdone)
1899 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1900 build_int_cst (NULL_TREE, bitsize - bitsdone),
1901 0, 1);
1903 else
1905 if (bitsdone != thissize)
1906 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1907 build_int_cst (NULL_TREE,
1908 bitsdone - thissize), 0, 1);
1911 if (first)
1912 result = part;
1913 else
1914 /* Combine the parts with bitwise or. This works
1915 because we extracted each part as an unsigned bit field. */
1916 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1917 OPTAB_LIB_WIDEN);
1919 first = 0;
1922 /* Unsigned bit field: we are done. */
1923 if (unsignedp)
1924 return result;
1925 /* Signed bit field: sign-extend with two arithmetic shifts. */
1926 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1927 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1928 NULL_RTX, 0);
1929 return expand_shift (RSHIFT_EXPR, word_mode, result,
1930 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1931 NULL_RTX, 0);
1934 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1935 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1936 MODE, fill the upper bits with zeros. Fail if the layout of either
1937 mode is unknown (as for CC modes) or if the extraction would involve
1938 unprofitable mode punning. Return the value on success, otherwise
1939 return null.
1941 This is different from gen_lowpart* in these respects:
1943 - the returned value must always be considered an rvalue
1945 - when MODE is wider than SRC_MODE, the extraction involves
1946 a zero extension
1948 - when MODE is smaller than SRC_MODE, the extraction involves
1949 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1951 In other words, this routine performs a computation, whereas the
1952 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1953 operations. */
1956 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1958 enum machine_mode int_mode, src_int_mode;
1960 if (mode == src_mode)
1961 return src;
1963 if (CONSTANT_P (src))
1965 /* simplify_gen_subreg can't be used here, as if simplify_subreg
1966 fails, it will happily create (subreg (symbol_ref)) or similar
1967 invalid SUBREGs. */
1968 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1969 rtx ret = simplify_subreg (mode, src, src_mode, byte);
1970 if (ret)
1971 return ret;
1973 if (GET_MODE (src) == VOIDmode
1974 || !validate_subreg (mode, src_mode, src, byte))
1975 return NULL_RTX;
1977 src = force_reg (GET_MODE (src), src);
1978 return gen_rtx_SUBREG (mode, src, byte);
1981 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1982 return NULL_RTX;
1984 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1985 && MODES_TIEABLE_P (mode, src_mode))
1987 rtx x = gen_lowpart_common (mode, src);
1988 if (x)
1989 return x;
1992 src_int_mode = int_mode_for_mode (src_mode);
1993 int_mode = int_mode_for_mode (mode);
1994 if (src_int_mode == BLKmode || int_mode == BLKmode)
1995 return NULL_RTX;
1997 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
1998 return NULL_RTX;
1999 if (!MODES_TIEABLE_P (int_mode, mode))
2000 return NULL_RTX;
2002 src = gen_lowpart (src_int_mode, src);
2003 src = convert_modes (int_mode, src_int_mode, src, true);
2004 src = gen_lowpart (mode, src);
2005 return src;
2008 /* Add INC into TARGET. */
2010 void
2011 expand_inc (rtx target, rtx inc)
2013 rtx value = expand_binop (GET_MODE (target), add_optab,
2014 target, inc,
2015 target, 0, OPTAB_LIB_WIDEN);
2016 if (value != target)
2017 emit_move_insn (target, value);
2020 /* Subtract DEC from TARGET. */
2022 void
2023 expand_dec (rtx target, rtx dec)
2025 rtx value = expand_binop (GET_MODE (target), sub_optab,
2026 target, dec,
2027 target, 0, OPTAB_LIB_WIDEN);
2028 if (value != target)
2029 emit_move_insn (target, value);
2032 /* Output a shift instruction for expression code CODE,
2033 with SHIFTED being the rtx for the value to shift,
2034 and AMOUNT the tree for the amount to shift by.
2035 Store the result in the rtx TARGET, if that is convenient.
2036 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2037 Return the rtx for where the value is. */
2040 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2041 tree amount, rtx target, int unsignedp)
2043 rtx op1, temp = 0;
2044 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2045 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2046 optab lshift_optab = ashl_optab;
2047 optab rshift_arith_optab = ashr_optab;
2048 optab rshift_uns_optab = lshr_optab;
2049 optab lrotate_optab = rotl_optab;
2050 optab rrotate_optab = rotr_optab;
2051 enum machine_mode op1_mode;
2052 int attempt;
2053 bool speed = optimize_insn_for_speed_p ();
2055 op1 = expand_normal (amount);
2056 op1_mode = GET_MODE (op1);
2058 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2059 shift amount is a vector, use the vector/vector shift patterns. */
2060 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2062 lshift_optab = vashl_optab;
2063 rshift_arith_optab = vashr_optab;
2064 rshift_uns_optab = vlshr_optab;
2065 lrotate_optab = vrotl_optab;
2066 rrotate_optab = vrotr_optab;
2069 /* Previously detected shift-counts computed by NEGATE_EXPR
2070 and shifted in the other direction; but that does not work
2071 on all machines. */
2073 if (SHIFT_COUNT_TRUNCATED)
2075 if (CONST_INT_P (op1)
2076 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2077 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2078 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2079 % GET_MODE_BITSIZE (mode));
2080 else if (GET_CODE (op1) == SUBREG
2081 && subreg_lowpart_p (op1)
2082 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2083 op1 = SUBREG_REG (op1);
2086 if (op1 == const0_rtx)
2087 return shifted;
2089 /* Check whether its cheaper to implement a left shift by a constant
2090 bit count by a sequence of additions. */
2091 if (code == LSHIFT_EXPR
2092 && CONST_INT_P (op1)
2093 && INTVAL (op1) > 0
2094 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2095 && INTVAL (op1) < MAX_BITS_PER_WORD
2096 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2097 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2099 int i;
2100 for (i = 0; i < INTVAL (op1); i++)
2102 temp = force_reg (mode, shifted);
2103 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2104 unsignedp, OPTAB_LIB_WIDEN);
2106 return shifted;
2109 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2111 enum optab_methods methods;
2113 if (attempt == 0)
2114 methods = OPTAB_DIRECT;
2115 else if (attempt == 1)
2116 methods = OPTAB_WIDEN;
2117 else
2118 methods = OPTAB_LIB_WIDEN;
2120 if (rotate)
2122 /* Widening does not work for rotation. */
2123 if (methods == OPTAB_WIDEN)
2124 continue;
2125 else if (methods == OPTAB_LIB_WIDEN)
2127 /* If we have been unable to open-code this by a rotation,
2128 do it as the IOR of two shifts. I.e., to rotate A
2129 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2130 where C is the bitsize of A.
2132 It is theoretically possible that the target machine might
2133 not be able to perform either shift and hence we would
2134 be making two libcalls rather than just the one for the
2135 shift (similarly if IOR could not be done). We will allow
2136 this extremely unlikely lossage to avoid complicating the
2137 code below. */
2139 rtx subtarget = target == shifted ? 0 : target;
2140 tree new_amount, other_amount;
2141 rtx temp1;
2142 tree type = TREE_TYPE (amount);
2143 if (GET_MODE (op1) != TYPE_MODE (type)
2144 && GET_MODE (op1) != VOIDmode)
2145 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2146 new_amount = make_tree (type, op1);
2147 other_amount
2148 = fold_build2 (MINUS_EXPR, type,
2149 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2150 new_amount);
2152 shifted = force_reg (mode, shifted);
2154 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2155 mode, shifted, new_amount, 0, 1);
2156 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2157 mode, shifted, other_amount, subtarget, 1);
2158 return expand_binop (mode, ior_optab, temp, temp1, target,
2159 unsignedp, methods);
2162 temp = expand_binop (mode,
2163 left ? lrotate_optab : rrotate_optab,
2164 shifted, op1, target, unsignedp, methods);
2166 else if (unsignedp)
2167 temp = expand_binop (mode,
2168 left ? lshift_optab : rshift_uns_optab,
2169 shifted, op1, target, unsignedp, methods);
2171 /* Do arithmetic shifts.
2172 Also, if we are going to widen the operand, we can just as well
2173 use an arithmetic right-shift instead of a logical one. */
2174 if (temp == 0 && ! rotate
2175 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2177 enum optab_methods methods1 = methods;
2179 /* If trying to widen a log shift to an arithmetic shift,
2180 don't accept an arithmetic shift of the same size. */
2181 if (unsignedp)
2182 methods1 = OPTAB_MUST_WIDEN;
2184 /* Arithmetic shift */
2186 temp = expand_binop (mode,
2187 left ? lshift_optab : rshift_arith_optab,
2188 shifted, op1, target, unsignedp, methods1);
2191 /* We used to try extzv here for logical right shifts, but that was
2192 only useful for one machine, the VAX, and caused poor code
2193 generation there for lshrdi3, so the code was deleted and a
2194 define_expand for lshrsi3 was added to vax.md. */
2197 gcc_assert (temp);
2198 return temp;
2201 /* Indicates the type of fixup needed after a constant multiplication.
2202 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2203 the result should be negated, and ADD_VARIANT means that the
2204 multiplicand should be added to the result. */
2205 enum mult_variant {basic_variant, negate_variant, add_variant};
2207 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2208 const struct mult_cost *, enum machine_mode mode);
2209 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2210 struct algorithm *, enum mult_variant *, int);
2211 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2212 const struct algorithm *, enum mult_variant);
2213 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2214 int, rtx *, int *, int *);
2215 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2216 static rtx extract_high_half (enum machine_mode, rtx);
2217 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2218 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2219 int, int);
2220 /* Compute and return the best algorithm for multiplying by T.
2221 The algorithm must cost less than cost_limit
2222 If retval.cost >= COST_LIMIT, no algorithm was found and all
2223 other field of the returned struct are undefined.
2224 MODE is the machine mode of the multiplication. */
2226 static void
2227 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2228 const struct mult_cost *cost_limit, enum machine_mode mode)
2230 int m;
2231 struct algorithm *alg_in, *best_alg;
2232 struct mult_cost best_cost;
2233 struct mult_cost new_limit;
2234 int op_cost, op_latency;
2235 unsigned HOST_WIDE_INT orig_t = t;
2236 unsigned HOST_WIDE_INT q;
2237 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2238 int hash_index;
2239 bool cache_hit = false;
2240 enum alg_code cache_alg = alg_zero;
2241 bool speed = optimize_insn_for_speed_p ();
2243 /* Indicate that no algorithm is yet found. If no algorithm
2244 is found, this value will be returned and indicate failure. */
2245 alg_out->cost.cost = cost_limit->cost + 1;
2246 alg_out->cost.latency = cost_limit->latency + 1;
2248 if (cost_limit->cost < 0
2249 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2250 return;
2252 /* Restrict the bits of "t" to the multiplication's mode. */
2253 t &= GET_MODE_MASK (mode);
2255 /* t == 1 can be done in zero cost. */
2256 if (t == 1)
2258 alg_out->ops = 1;
2259 alg_out->cost.cost = 0;
2260 alg_out->cost.latency = 0;
2261 alg_out->op[0] = alg_m;
2262 return;
2265 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2266 fail now. */
2267 if (t == 0)
2269 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2270 return;
2271 else
2273 alg_out->ops = 1;
2274 alg_out->cost.cost = zero_cost[speed];
2275 alg_out->cost.latency = zero_cost[speed];
2276 alg_out->op[0] = alg_zero;
2277 return;
2281 /* We'll be needing a couple extra algorithm structures now. */
2283 alg_in = XALLOCA (struct algorithm);
2284 best_alg = XALLOCA (struct algorithm);
2285 best_cost = *cost_limit;
2287 /* Compute the hash index. */
2288 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2290 /* See if we already know what to do for T. */
2291 if (alg_hash[hash_index].t == t
2292 && alg_hash[hash_index].mode == mode
2293 && alg_hash[hash_index].mode == mode
2294 && alg_hash[hash_index].speed == speed
2295 && alg_hash[hash_index].alg != alg_unknown)
2297 cache_alg = alg_hash[hash_index].alg;
2299 if (cache_alg == alg_impossible)
2301 /* The cache tells us that it's impossible to synthesize
2302 multiplication by T within alg_hash[hash_index].cost. */
2303 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2304 /* COST_LIMIT is at least as restrictive as the one
2305 recorded in the hash table, in which case we have no
2306 hope of synthesizing a multiplication. Just
2307 return. */
2308 return;
2310 /* If we get here, COST_LIMIT is less restrictive than the
2311 one recorded in the hash table, so we may be able to
2312 synthesize a multiplication. Proceed as if we didn't
2313 have the cache entry. */
2315 else
2317 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2318 /* The cached algorithm shows that this multiplication
2319 requires more cost than COST_LIMIT. Just return. This
2320 way, we don't clobber this cache entry with
2321 alg_impossible but retain useful information. */
2322 return;
2324 cache_hit = true;
2326 switch (cache_alg)
2328 case alg_shift:
2329 goto do_alg_shift;
2331 case alg_add_t_m2:
2332 case alg_sub_t_m2:
2333 goto do_alg_addsub_t_m2;
2335 case alg_add_factor:
2336 case alg_sub_factor:
2337 goto do_alg_addsub_factor;
2339 case alg_add_t2_m:
2340 goto do_alg_add_t2_m;
2342 case alg_sub_t2_m:
2343 goto do_alg_sub_t2_m;
2345 default:
2346 gcc_unreachable ();
2351 /* If we have a group of zero bits at the low-order part of T, try
2352 multiplying by the remaining bits and then doing a shift. */
2354 if ((t & 1) == 0)
2356 do_alg_shift:
2357 m = floor_log2 (t & -t); /* m = number of low zero bits */
2358 if (m < maxm)
2360 q = t >> m;
2361 /* The function expand_shift will choose between a shift and
2362 a sequence of additions, so the observed cost is given as
2363 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2364 op_cost = m * add_cost[speed][mode];
2365 if (shift_cost[speed][mode][m] < op_cost)
2366 op_cost = shift_cost[speed][mode][m];
2367 new_limit.cost = best_cost.cost - op_cost;
2368 new_limit.latency = best_cost.latency - op_cost;
2369 synth_mult (alg_in, q, &new_limit, mode);
2371 alg_in->cost.cost += op_cost;
2372 alg_in->cost.latency += op_cost;
2373 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2375 struct algorithm *x;
2376 best_cost = alg_in->cost;
2377 x = alg_in, alg_in = best_alg, best_alg = x;
2378 best_alg->log[best_alg->ops] = m;
2379 best_alg->op[best_alg->ops] = alg_shift;
2382 /* See if treating ORIG_T as a signed number yields a better
2383 sequence. Try this sequence only for a negative ORIG_T
2384 as it would be useless for a non-negative ORIG_T. */
2385 if ((HOST_WIDE_INT) orig_t < 0)
2387 /* Shift ORIG_T as follows because a right shift of a
2388 negative-valued signed type is implementation
2389 defined. */
2390 q = ~(~orig_t >> m);
2391 /* The function expand_shift will choose between a shift
2392 and a sequence of additions, so the observed cost is
2393 given as MIN (m * add_cost[speed][mode],
2394 shift_cost[speed][mode][m]). */
2395 op_cost = m * add_cost[speed][mode];
2396 if (shift_cost[speed][mode][m] < op_cost)
2397 op_cost = shift_cost[speed][mode][m];
2398 new_limit.cost = best_cost.cost - op_cost;
2399 new_limit.latency = best_cost.latency - op_cost;
2400 synth_mult (alg_in, q, &new_limit, mode);
2402 alg_in->cost.cost += op_cost;
2403 alg_in->cost.latency += op_cost;
2404 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2406 struct algorithm *x;
2407 best_cost = alg_in->cost;
2408 x = alg_in, alg_in = best_alg, best_alg = x;
2409 best_alg->log[best_alg->ops] = m;
2410 best_alg->op[best_alg->ops] = alg_shift;
2414 if (cache_hit)
2415 goto done;
2418 /* If we have an odd number, add or subtract one. */
2419 if ((t & 1) != 0)
2421 unsigned HOST_WIDE_INT w;
2423 do_alg_addsub_t_m2:
2424 for (w = 1; (w & t) != 0; w <<= 1)
2426 /* If T was -1, then W will be zero after the loop. This is another
2427 case where T ends with ...111. Handling this with (T + 1) and
2428 subtract 1 produces slightly better code and results in algorithm
2429 selection much faster than treating it like the ...0111 case
2430 below. */
2431 if (w == 0
2432 || (w > 2
2433 /* Reject the case where t is 3.
2434 Thus we prefer addition in that case. */
2435 && t != 3))
2437 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2439 op_cost = add_cost[speed][mode];
2440 new_limit.cost = best_cost.cost - op_cost;
2441 new_limit.latency = best_cost.latency - op_cost;
2442 synth_mult (alg_in, t + 1, &new_limit, mode);
2444 alg_in->cost.cost += op_cost;
2445 alg_in->cost.latency += op_cost;
2446 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2448 struct algorithm *x;
2449 best_cost = alg_in->cost;
2450 x = alg_in, alg_in = best_alg, best_alg = x;
2451 best_alg->log[best_alg->ops] = 0;
2452 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2455 else
2457 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2459 op_cost = add_cost[speed][mode];
2460 new_limit.cost = best_cost.cost - op_cost;
2461 new_limit.latency = best_cost.latency - op_cost;
2462 synth_mult (alg_in, t - 1, &new_limit, mode);
2464 alg_in->cost.cost += op_cost;
2465 alg_in->cost.latency += op_cost;
2466 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2468 struct algorithm *x;
2469 best_cost = alg_in->cost;
2470 x = alg_in, alg_in = best_alg, best_alg = x;
2471 best_alg->log[best_alg->ops] = 0;
2472 best_alg->op[best_alg->ops] = alg_add_t_m2;
2476 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2477 quickly with a - a * n for some appropriate constant n. */
2478 m = exact_log2 (-orig_t + 1);
2479 if (m >= 0 && m < maxm)
2481 op_cost = shiftsub1_cost[speed][mode][m];
2482 new_limit.cost = best_cost.cost - op_cost;
2483 new_limit.latency = best_cost.latency - op_cost;
2484 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2486 alg_in->cost.cost += op_cost;
2487 alg_in->cost.latency += op_cost;
2488 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2490 struct algorithm *x;
2491 best_cost = alg_in->cost;
2492 x = alg_in, alg_in = best_alg, best_alg = x;
2493 best_alg->log[best_alg->ops] = m;
2494 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2498 if (cache_hit)
2499 goto done;
2502 /* Look for factors of t of the form
2503 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2504 If we find such a factor, we can multiply by t using an algorithm that
2505 multiplies by q, shift the result by m and add/subtract it to itself.
2507 We search for large factors first and loop down, even if large factors
2508 are less probable than small; if we find a large factor we will find a
2509 good sequence quickly, and therefore be able to prune (by decreasing
2510 COST_LIMIT) the search. */
2512 do_alg_addsub_factor:
2513 for (m = floor_log2 (t - 1); m >= 2; m--)
2515 unsigned HOST_WIDE_INT d;
2517 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2518 if (t % d == 0 && t > d && m < maxm
2519 && (!cache_hit || cache_alg == alg_add_factor))
2521 /* If the target has a cheap shift-and-add instruction use
2522 that in preference to a shift insn followed by an add insn.
2523 Assume that the shift-and-add is "atomic" with a latency
2524 equal to its cost, otherwise assume that on superscalar
2525 hardware the shift may be executed concurrently with the
2526 earlier steps in the algorithm. */
2527 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2528 if (shiftadd_cost[speed][mode][m] < op_cost)
2530 op_cost = shiftadd_cost[speed][mode][m];
2531 op_latency = op_cost;
2533 else
2534 op_latency = add_cost[speed][mode];
2536 new_limit.cost = best_cost.cost - op_cost;
2537 new_limit.latency = best_cost.latency - op_latency;
2538 synth_mult (alg_in, t / d, &new_limit, mode);
2540 alg_in->cost.cost += op_cost;
2541 alg_in->cost.latency += op_latency;
2542 if (alg_in->cost.latency < op_cost)
2543 alg_in->cost.latency = op_cost;
2544 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2546 struct algorithm *x;
2547 best_cost = alg_in->cost;
2548 x = alg_in, alg_in = best_alg, best_alg = x;
2549 best_alg->log[best_alg->ops] = m;
2550 best_alg->op[best_alg->ops] = alg_add_factor;
2552 /* Other factors will have been taken care of in the recursion. */
2553 break;
2556 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2557 if (t % d == 0 && t > d && m < maxm
2558 && (!cache_hit || cache_alg == alg_sub_factor))
2560 /* If the target has a cheap shift-and-subtract insn use
2561 that in preference to a shift insn followed by a sub insn.
2562 Assume that the shift-and-sub is "atomic" with a latency
2563 equal to it's cost, otherwise assume that on superscalar
2564 hardware the shift may be executed concurrently with the
2565 earlier steps in the algorithm. */
2566 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2567 if (shiftsub0_cost[speed][mode][m] < op_cost)
2569 op_cost = shiftsub0_cost[speed][mode][m];
2570 op_latency = op_cost;
2572 else
2573 op_latency = add_cost[speed][mode];
2575 new_limit.cost = best_cost.cost - op_cost;
2576 new_limit.latency = best_cost.latency - op_latency;
2577 synth_mult (alg_in, t / d, &new_limit, mode);
2579 alg_in->cost.cost += op_cost;
2580 alg_in->cost.latency += op_latency;
2581 if (alg_in->cost.latency < op_cost)
2582 alg_in->cost.latency = op_cost;
2583 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2585 struct algorithm *x;
2586 best_cost = alg_in->cost;
2587 x = alg_in, alg_in = best_alg, best_alg = x;
2588 best_alg->log[best_alg->ops] = m;
2589 best_alg->op[best_alg->ops] = alg_sub_factor;
2591 break;
2594 if (cache_hit)
2595 goto done;
2597 /* Try shift-and-add (load effective address) instructions,
2598 i.e. do a*3, a*5, a*9. */
2599 if ((t & 1) != 0)
2601 do_alg_add_t2_m:
2602 q = t - 1;
2603 q = q & -q;
2604 m = exact_log2 (q);
2605 if (m >= 0 && m < maxm)
2607 op_cost = shiftadd_cost[speed][mode][m];
2608 new_limit.cost = best_cost.cost - op_cost;
2609 new_limit.latency = best_cost.latency - op_cost;
2610 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2612 alg_in->cost.cost += op_cost;
2613 alg_in->cost.latency += op_cost;
2614 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2616 struct algorithm *x;
2617 best_cost = alg_in->cost;
2618 x = alg_in, alg_in = best_alg, best_alg = x;
2619 best_alg->log[best_alg->ops] = m;
2620 best_alg->op[best_alg->ops] = alg_add_t2_m;
2623 if (cache_hit)
2624 goto done;
2626 do_alg_sub_t2_m:
2627 q = t + 1;
2628 q = q & -q;
2629 m = exact_log2 (q);
2630 if (m >= 0 && m < maxm)
2632 op_cost = shiftsub0_cost[speed][mode][m];
2633 new_limit.cost = best_cost.cost - op_cost;
2634 new_limit.latency = best_cost.latency - op_cost;
2635 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2637 alg_in->cost.cost += op_cost;
2638 alg_in->cost.latency += op_cost;
2639 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2641 struct algorithm *x;
2642 best_cost = alg_in->cost;
2643 x = alg_in, alg_in = best_alg, best_alg = x;
2644 best_alg->log[best_alg->ops] = m;
2645 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2648 if (cache_hit)
2649 goto done;
2652 done:
2653 /* If best_cost has not decreased, we have not found any algorithm. */
2654 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2656 /* We failed to find an algorithm. Record alg_impossible for
2657 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2658 we are asked to find an algorithm for T within the same or
2659 lower COST_LIMIT, we can immediately return to the
2660 caller. */
2661 alg_hash[hash_index].t = t;
2662 alg_hash[hash_index].mode = mode;
2663 alg_hash[hash_index].speed = speed;
2664 alg_hash[hash_index].alg = alg_impossible;
2665 alg_hash[hash_index].cost = *cost_limit;
2666 return;
2669 /* Cache the result. */
2670 if (!cache_hit)
2672 alg_hash[hash_index].t = t;
2673 alg_hash[hash_index].mode = mode;
2674 alg_hash[hash_index].speed = speed;
2675 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2676 alg_hash[hash_index].cost.cost = best_cost.cost;
2677 alg_hash[hash_index].cost.latency = best_cost.latency;
2680 /* If we are getting a too long sequence for `struct algorithm'
2681 to record, make this search fail. */
2682 if (best_alg->ops == MAX_BITS_PER_WORD)
2683 return;
2685 /* Copy the algorithm from temporary space to the space at alg_out.
2686 We avoid using structure assignment because the majority of
2687 best_alg is normally undefined, and this is a critical function. */
2688 alg_out->ops = best_alg->ops + 1;
2689 alg_out->cost = best_cost;
2690 memcpy (alg_out->op, best_alg->op,
2691 alg_out->ops * sizeof *alg_out->op);
2692 memcpy (alg_out->log, best_alg->log,
2693 alg_out->ops * sizeof *alg_out->log);
2696 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2697 Try three variations:
2699 - a shift/add sequence based on VAL itself
2700 - a shift/add sequence based on -VAL, followed by a negation
2701 - a shift/add sequence based on VAL - 1, followed by an addition.
2703 Return true if the cheapest of these cost less than MULT_COST,
2704 describing the algorithm in *ALG and final fixup in *VARIANT. */
2706 static bool
2707 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2708 struct algorithm *alg, enum mult_variant *variant,
2709 int mult_cost)
2711 struct algorithm alg2;
2712 struct mult_cost limit;
2713 int op_cost;
2714 bool speed = optimize_insn_for_speed_p ();
2716 /* Fail quickly for impossible bounds. */
2717 if (mult_cost < 0)
2718 return false;
2720 /* Ensure that mult_cost provides a reasonable upper bound.
2721 Any constant multiplication can be performed with less
2722 than 2 * bits additions. */
2723 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2724 if (mult_cost > op_cost)
2725 mult_cost = op_cost;
2727 *variant = basic_variant;
2728 limit.cost = mult_cost;
2729 limit.latency = mult_cost;
2730 synth_mult (alg, val, &limit, mode);
2732 /* This works only if the inverted value actually fits in an
2733 `unsigned int' */
2734 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2736 op_cost = neg_cost[speed][mode];
2737 if (MULT_COST_LESS (&alg->cost, mult_cost))
2739 limit.cost = alg->cost.cost - op_cost;
2740 limit.latency = alg->cost.latency - op_cost;
2742 else
2744 limit.cost = mult_cost - op_cost;
2745 limit.latency = mult_cost - op_cost;
2748 synth_mult (&alg2, -val, &limit, mode);
2749 alg2.cost.cost += op_cost;
2750 alg2.cost.latency += op_cost;
2751 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2752 *alg = alg2, *variant = negate_variant;
2755 /* This proves very useful for division-by-constant. */
2756 op_cost = add_cost[speed][mode];
2757 if (MULT_COST_LESS (&alg->cost, mult_cost))
2759 limit.cost = alg->cost.cost - op_cost;
2760 limit.latency = alg->cost.latency - op_cost;
2762 else
2764 limit.cost = mult_cost - op_cost;
2765 limit.latency = mult_cost - op_cost;
2768 synth_mult (&alg2, val - 1, &limit, mode);
2769 alg2.cost.cost += op_cost;
2770 alg2.cost.latency += op_cost;
2771 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2772 *alg = alg2, *variant = add_variant;
2774 return MULT_COST_LESS (&alg->cost, mult_cost);
2777 /* A subroutine of expand_mult, used for constant multiplications.
2778 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2779 convenient. Use the shift/add sequence described by ALG and apply
2780 the final fixup specified by VARIANT. */
2782 static rtx
2783 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2784 rtx target, const struct algorithm *alg,
2785 enum mult_variant variant)
2787 HOST_WIDE_INT val_so_far;
2788 rtx insn, accum, tem;
2789 int opno;
2790 enum machine_mode nmode;
2792 /* Avoid referencing memory over and over and invalid sharing
2793 on SUBREGs. */
2794 op0 = force_reg (mode, op0);
2796 /* ACCUM starts out either as OP0 or as a zero, depending on
2797 the first operation. */
2799 if (alg->op[0] == alg_zero)
2801 accum = copy_to_mode_reg (mode, const0_rtx);
2802 val_so_far = 0;
2804 else if (alg->op[0] == alg_m)
2806 accum = copy_to_mode_reg (mode, op0);
2807 val_so_far = 1;
2809 else
2810 gcc_unreachable ();
2812 for (opno = 1; opno < alg->ops; opno++)
2814 int log = alg->log[opno];
2815 rtx shift_subtarget = optimize ? 0 : accum;
2816 rtx add_target
2817 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2818 && !optimize)
2819 ? target : 0;
2820 rtx accum_target = optimize ? 0 : accum;
2822 switch (alg->op[opno])
2824 case alg_shift:
2825 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2826 build_int_cst (NULL_TREE, log),
2827 NULL_RTX, 0);
2828 /* REG_EQUAL note will be attached to the following insn. */
2829 emit_move_insn (accum, tem);
2830 val_so_far <<= log;
2831 break;
2833 case alg_add_t_m2:
2834 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2835 build_int_cst (NULL_TREE, log),
2836 NULL_RTX, 0);
2837 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2838 add_target ? add_target : accum_target);
2839 val_so_far += (HOST_WIDE_INT) 1 << log;
2840 break;
2842 case alg_sub_t_m2:
2843 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2844 build_int_cst (NULL_TREE, log),
2845 NULL_RTX, 0);
2846 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2847 add_target ? add_target : accum_target);
2848 val_so_far -= (HOST_WIDE_INT) 1 << log;
2849 break;
2851 case alg_add_t2_m:
2852 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2853 build_int_cst (NULL_TREE, log),
2854 shift_subtarget,
2856 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2857 add_target ? add_target : accum_target);
2858 val_so_far = (val_so_far << log) + 1;
2859 break;
2861 case alg_sub_t2_m:
2862 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2863 build_int_cst (NULL_TREE, log),
2864 shift_subtarget, 0);
2865 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2866 add_target ? add_target : accum_target);
2867 val_so_far = (val_so_far << log) - 1;
2868 break;
2870 case alg_add_factor:
2871 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2872 build_int_cst (NULL_TREE, log),
2873 NULL_RTX, 0);
2874 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2875 add_target ? add_target : accum_target);
2876 val_so_far += val_so_far << log;
2877 break;
2879 case alg_sub_factor:
2880 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2881 build_int_cst (NULL_TREE, log),
2882 NULL_RTX, 0);
2883 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2884 (add_target
2885 ? add_target : (optimize ? 0 : tem)));
2886 val_so_far = (val_so_far << log) - val_so_far;
2887 break;
2889 default:
2890 gcc_unreachable ();
2893 /* Write a REG_EQUAL note on the last insn so that we can cse
2894 multiplication sequences. Note that if ACCUM is a SUBREG,
2895 we've set the inner register and must properly indicate
2896 that. */
2898 tem = op0, nmode = mode;
2899 if (GET_CODE (accum) == SUBREG)
2901 nmode = GET_MODE (SUBREG_REG (accum));
2902 tem = gen_lowpart (nmode, op0);
2905 insn = get_last_insn ();
2906 set_unique_reg_note (insn, REG_EQUAL,
2907 gen_rtx_MULT (nmode, tem,
2908 GEN_INT (val_so_far)));
2911 if (variant == negate_variant)
2913 val_so_far = -val_so_far;
2914 accum = expand_unop (mode, neg_optab, accum, target, 0);
2916 else if (variant == add_variant)
2918 val_so_far = val_so_far + 1;
2919 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2922 /* Compare only the bits of val and val_so_far that are significant
2923 in the result mode, to avoid sign-/zero-extension confusion. */
2924 val &= GET_MODE_MASK (mode);
2925 val_so_far &= GET_MODE_MASK (mode);
2926 gcc_assert (val == val_so_far);
2928 return accum;
2931 /* Perform a multiplication and return an rtx for the result.
2932 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2933 TARGET is a suggestion for where to store the result (an rtx).
2935 We check specially for a constant integer as OP1.
2936 If you want this check for OP0 as well, then before calling
2937 you should swap the two operands if OP0 would be constant. */
2940 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2941 int unsignedp)
2943 enum mult_variant variant;
2944 struct algorithm algorithm;
2945 int max_cost;
2946 bool speed = optimize_insn_for_speed_p ();
2948 /* Handling const0_rtx here allows us to use zero as a rogue value for
2949 coeff below. */
2950 if (op1 == const0_rtx)
2951 return const0_rtx;
2952 if (op1 == const1_rtx)
2953 return op0;
2954 if (op1 == constm1_rtx)
2955 return expand_unop (mode,
2956 GET_MODE_CLASS (mode) == MODE_INT
2957 && !unsignedp && flag_trapv
2958 ? negv_optab : neg_optab,
2959 op0, target, 0);
2961 /* These are the operations that are potentially turned into a sequence
2962 of shifts and additions. */
2963 if (SCALAR_INT_MODE_P (mode)
2964 && (unsignedp || !flag_trapv))
2966 HOST_WIDE_INT coeff = 0;
2967 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
2969 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2970 less than or equal in size to `unsigned int' this doesn't matter.
2971 If the mode is larger than `unsigned int', then synth_mult works
2972 only if the constant value exactly fits in an `unsigned int' without
2973 any truncation. This means that multiplying by negative values does
2974 not work; results are off by 2^32 on a 32 bit machine. */
2976 if (CONST_INT_P (op1))
2978 /* Attempt to handle multiplication of DImode values by negative
2979 coefficients, by performing the multiplication by a positive
2980 multiplier and then inverting the result. */
2981 if (INTVAL (op1) < 0
2982 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
2984 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
2985 result is interpreted as an unsigned coefficient.
2986 Exclude cost of op0 from max_cost to match the cost
2987 calculation of the synth_mult. */
2988 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
2989 - neg_cost[speed][mode];
2990 if (max_cost > 0
2991 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
2992 &variant, max_cost))
2994 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
2995 NULL_RTX, &algorithm,
2996 variant);
2997 return expand_unop (mode, neg_optab, temp, target, 0);
3000 else coeff = INTVAL (op1);
3002 else if (GET_CODE (op1) == CONST_DOUBLE)
3004 /* If we are multiplying in DImode, it may still be a win
3005 to try to work with shifts and adds. */
3006 if (CONST_DOUBLE_HIGH (op1) == 0
3007 && CONST_DOUBLE_LOW (op1) > 0)
3008 coeff = CONST_DOUBLE_LOW (op1);
3009 else if (CONST_DOUBLE_LOW (op1) == 0
3010 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3012 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3013 + HOST_BITS_PER_WIDE_INT;
3014 return expand_shift (LSHIFT_EXPR, mode, op0,
3015 build_int_cst (NULL_TREE, shift),
3016 target, unsignedp);
3020 /* We used to test optimize here, on the grounds that it's better to
3021 produce a smaller program when -O is not used. But this causes
3022 such a terrible slowdown sometimes that it seems better to always
3023 use synth_mult. */
3024 if (coeff != 0)
3026 /* Special case powers of two. */
3027 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3028 return expand_shift (LSHIFT_EXPR, mode, op0,
3029 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3030 target, unsignedp);
3032 /* Exclude cost of op0 from max_cost to match the cost
3033 calculation of the synth_mult. */
3034 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3035 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3036 max_cost))
3037 return expand_mult_const (mode, op0, coeff, target,
3038 &algorithm, variant);
3042 if (GET_CODE (op0) == CONST_DOUBLE)
3044 rtx temp = op0;
3045 op0 = op1;
3046 op1 = temp;
3049 /* Expand x*2.0 as x+x. */
3050 if (GET_CODE (op1) == CONST_DOUBLE
3051 && SCALAR_FLOAT_MODE_P (mode))
3053 REAL_VALUE_TYPE d;
3054 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3056 if (REAL_VALUES_EQUAL (d, dconst2))
3058 op0 = force_reg (GET_MODE (op0), op0);
3059 return expand_binop (mode, add_optab, op0, op0,
3060 target, unsignedp, OPTAB_LIB_WIDEN);
3064 /* This used to use umul_optab if unsigned, but for non-widening multiply
3065 there is no difference between signed and unsigned. */
3066 op0 = expand_binop (mode,
3067 ! unsignedp
3068 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3069 ? smulv_optab : smul_optab,
3070 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3071 gcc_assert (op0);
3072 return op0;
3075 /* Perform a widening multiplication and return an rtx for the result.
3076 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3077 TARGET is a suggestion for where to store the result (an rtx).
3078 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3079 or smul_widen_optab.
3081 We check specially for a constant integer as OP1, comparing the
3082 cost of a widening multiply against the cost of a sequence of shifts
3083 and adds. */
3086 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3087 int unsignedp, optab this_optab)
3089 bool speed = optimize_insn_for_speed_p ();
3090 rtx cop1;
3092 if (CONST_INT_P (op1)
3093 && GET_MODE (op0) != VOIDmode
3094 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3095 this_optab == umul_widen_optab))
3096 && CONST_INT_P (cop1)
3097 && (INTVAL (cop1) >= 0
3098 || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3100 HOST_WIDE_INT coeff = INTVAL (cop1);
3101 int max_cost;
3102 enum mult_variant variant;
3103 struct algorithm algorithm;
3105 /* Special case powers of two. */
3106 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3108 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3109 return expand_shift (LSHIFT_EXPR, mode, op0,
3110 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3111 target, unsignedp);
3114 /* Exclude cost of op0 from max_cost to match the cost
3115 calculation of the synth_mult. */
3116 max_cost = mul_widen_cost[speed][mode];
3117 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3118 max_cost))
3120 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3121 return expand_mult_const (mode, op0, coeff, target,
3122 &algorithm, variant);
3125 return expand_binop (mode, this_optab, op0, op1, target,
3126 unsignedp, OPTAB_LIB_WIDEN);
3129 /* Return the smallest n such that 2**n >= X. */
3132 ceil_log2 (unsigned HOST_WIDE_INT x)
3134 return floor_log2 (x - 1) + 1;
3137 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3138 replace division by D, and put the least significant N bits of the result
3139 in *MULTIPLIER_PTR and return the most significant bit.
3141 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3142 needed precision is in PRECISION (should be <= N).
3144 PRECISION should be as small as possible so this function can choose
3145 multiplier more freely.
3147 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3148 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3150 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3151 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3153 static
3154 unsigned HOST_WIDE_INT
3155 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3156 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3158 HOST_WIDE_INT mhigh_hi, mlow_hi;
3159 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3160 int lgup, post_shift;
3161 int pow, pow2;
3162 unsigned HOST_WIDE_INT nl, dummy1;
3163 HOST_WIDE_INT nh, dummy2;
3165 /* lgup = ceil(log2(divisor)); */
3166 lgup = ceil_log2 (d);
3168 gcc_assert (lgup <= n);
3170 pow = n + lgup;
3171 pow2 = n + lgup - precision;
3173 /* We could handle this with some effort, but this case is much
3174 better handled directly with a scc insn, so rely on caller using
3175 that. */
3176 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3178 /* mlow = 2^(N + lgup)/d */
3179 if (pow >= HOST_BITS_PER_WIDE_INT)
3181 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3182 nl = 0;
3184 else
3186 nh = 0;
3187 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3189 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3190 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3192 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3193 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3194 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3195 else
3196 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3197 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3198 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3200 gcc_assert (!mhigh_hi || nh - d < d);
3201 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3202 /* Assert that mlow < mhigh. */
3203 gcc_assert (mlow_hi < mhigh_hi
3204 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3206 /* If precision == N, then mlow, mhigh exceed 2^N
3207 (but they do not exceed 2^(N+1)). */
3209 /* Reduce to lowest terms. */
3210 for (post_shift = lgup; post_shift > 0; post_shift--)
3212 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3213 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3214 if (ml_lo >= mh_lo)
3215 break;
3217 mlow_hi = 0;
3218 mlow_lo = ml_lo;
3219 mhigh_hi = 0;
3220 mhigh_lo = mh_lo;
3223 *post_shift_ptr = post_shift;
3224 *lgup_ptr = lgup;
3225 if (n < HOST_BITS_PER_WIDE_INT)
3227 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3228 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3229 return mhigh_lo >= mask;
3231 else
3233 *multiplier_ptr = GEN_INT (mhigh_lo);
3234 return mhigh_hi;
3238 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3239 congruent to 1 (mod 2**N). */
3241 static unsigned HOST_WIDE_INT
3242 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3244 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3246 /* The algorithm notes that the choice y = x satisfies
3247 x*y == 1 mod 2^3, since x is assumed odd.
3248 Each iteration doubles the number of bits of significance in y. */
3250 unsigned HOST_WIDE_INT mask;
3251 unsigned HOST_WIDE_INT y = x;
3252 int nbit = 3;
3254 mask = (n == HOST_BITS_PER_WIDE_INT
3255 ? ~(unsigned HOST_WIDE_INT) 0
3256 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3258 while (nbit < n)
3260 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3261 nbit *= 2;
3263 return y;
3266 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3267 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3268 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3269 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3270 become signed.
3272 The result is put in TARGET if that is convenient.
3274 MODE is the mode of operation. */
3277 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3278 rtx op1, rtx target, int unsignedp)
3280 rtx tem;
3281 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3283 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3284 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3285 NULL_RTX, 0);
3286 tem = expand_and (mode, tem, op1, NULL_RTX);
3287 adj_operand
3288 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3289 adj_operand);
3291 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3292 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3293 NULL_RTX, 0);
3294 tem = expand_and (mode, tem, op0, NULL_RTX);
3295 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3296 target);
3298 return target;
3301 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3303 static rtx
3304 extract_high_half (enum machine_mode mode, rtx op)
3306 enum machine_mode wider_mode;
3308 if (mode == word_mode)
3309 return gen_highpart (mode, op);
3311 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3313 wider_mode = GET_MODE_WIDER_MODE (mode);
3314 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3315 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3316 return convert_modes (mode, wider_mode, op, 0);
3319 /* Like expand_mult_highpart, but only consider using a multiplication
3320 optab. OP1 is an rtx for the constant operand. */
3322 static rtx
3323 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3324 rtx target, int unsignedp, int max_cost)
3326 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3327 enum machine_mode wider_mode;
3328 optab moptab;
3329 rtx tem;
3330 int size;
3331 bool speed = optimize_insn_for_speed_p ();
3333 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3335 wider_mode = GET_MODE_WIDER_MODE (mode);
3336 size = GET_MODE_BITSIZE (mode);
3338 /* Firstly, try using a multiplication insn that only generates the needed
3339 high part of the product, and in the sign flavor of unsignedp. */
3340 if (mul_highpart_cost[speed][mode] < max_cost)
3342 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3343 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3344 unsignedp, OPTAB_DIRECT);
3345 if (tem)
3346 return tem;
3349 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3350 Need to adjust the result after the multiplication. */
3351 if (size - 1 < BITS_PER_WORD
3352 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3353 + 4 * add_cost[speed][mode] < max_cost))
3355 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3356 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3357 unsignedp, OPTAB_DIRECT);
3358 if (tem)
3359 /* We used the wrong signedness. Adjust the result. */
3360 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3361 tem, unsignedp);
3364 /* Try widening multiplication. */
3365 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3366 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3367 && mul_widen_cost[speed][wider_mode] < max_cost)
3369 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3370 unsignedp, OPTAB_WIDEN);
3371 if (tem)
3372 return extract_high_half (mode, tem);
3375 /* Try widening the mode and perform a non-widening multiplication. */
3376 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3377 && size - 1 < BITS_PER_WORD
3378 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3380 rtx insns, wop0, wop1;
3382 /* We need to widen the operands, for example to ensure the
3383 constant multiplier is correctly sign or zero extended.
3384 Use a sequence to clean-up any instructions emitted by
3385 the conversions if things don't work out. */
3386 start_sequence ();
3387 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3388 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3389 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3390 unsignedp, OPTAB_WIDEN);
3391 insns = get_insns ();
3392 end_sequence ();
3394 if (tem)
3396 emit_insn (insns);
3397 return extract_high_half (mode, tem);
3401 /* Try widening multiplication of opposite signedness, and adjust. */
3402 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3403 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3404 && size - 1 < BITS_PER_WORD
3405 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3406 + 4 * add_cost[speed][mode] < max_cost))
3408 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3409 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3410 if (tem != 0)
3412 tem = extract_high_half (mode, tem);
3413 /* We used the wrong signedness. Adjust the result. */
3414 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3415 target, unsignedp);
3419 return 0;
3422 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3423 putting the high half of the result in TARGET if that is convenient,
3424 and return where the result is. If the operation can not be performed,
3425 0 is returned.
3427 MODE is the mode of operation and result.
3429 UNSIGNEDP nonzero means unsigned multiply.
3431 MAX_COST is the total allowed cost for the expanded RTL. */
3433 static rtx
3434 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3435 rtx target, int unsignedp, int max_cost)
3437 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3438 unsigned HOST_WIDE_INT cnst1;
3439 int extra_cost;
3440 bool sign_adjust = false;
3441 enum mult_variant variant;
3442 struct algorithm alg;
3443 rtx tem;
3444 bool speed = optimize_insn_for_speed_p ();
3446 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3447 /* We can't support modes wider than HOST_BITS_PER_INT. */
3448 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3450 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3452 /* We can't optimize modes wider than BITS_PER_WORD.
3453 ??? We might be able to perform double-word arithmetic if
3454 mode == word_mode, however all the cost calculations in
3455 synth_mult etc. assume single-word operations. */
3456 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3457 return expand_mult_highpart_optab (mode, op0, op1, target,
3458 unsignedp, max_cost);
3460 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3462 /* Check whether we try to multiply by a negative constant. */
3463 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3465 sign_adjust = true;
3466 extra_cost += add_cost[speed][mode];
3469 /* See whether shift/add multiplication is cheap enough. */
3470 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3471 max_cost - extra_cost))
3473 /* See whether the specialized multiplication optabs are
3474 cheaper than the shift/add version. */
3475 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3476 alg.cost.cost + extra_cost);
3477 if (tem)
3478 return tem;
3480 tem = convert_to_mode (wider_mode, op0, unsignedp);
3481 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3482 tem = extract_high_half (mode, tem);
3484 /* Adjust result for signedness. */
3485 if (sign_adjust)
3486 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3488 return tem;
3490 return expand_mult_highpart_optab (mode, op0, op1, target,
3491 unsignedp, max_cost);
3495 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3497 static rtx
3498 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3500 unsigned HOST_WIDE_INT masklow, maskhigh;
3501 rtx result, temp, shift, label;
3502 int logd;
3504 logd = floor_log2 (d);
3505 result = gen_reg_rtx (mode);
3507 /* Avoid conditional branches when they're expensive. */
3508 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3509 && optimize_insn_for_speed_p ())
3511 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3512 mode, 0, -1);
3513 if (signmask)
3515 signmask = force_reg (mode, signmask);
3516 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3517 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3519 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3520 which instruction sequence to use. If logical right shifts
3521 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3522 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3524 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3525 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3526 || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3528 temp = expand_binop (mode, xor_optab, op0, signmask,
3529 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3530 temp = expand_binop (mode, sub_optab, temp, signmask,
3531 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3532 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3533 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3534 temp = expand_binop (mode, xor_optab, temp, signmask,
3535 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3536 temp = expand_binop (mode, sub_optab, temp, signmask,
3537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3539 else
3541 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3542 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3543 signmask = force_reg (mode, signmask);
3545 temp = expand_binop (mode, add_optab, op0, signmask,
3546 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3547 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3548 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3549 temp = expand_binop (mode, sub_optab, temp, signmask,
3550 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552 return temp;
3556 /* Mask contains the mode's signbit and the significant bits of the
3557 modulus. By including the signbit in the operation, many targets
3558 can avoid an explicit compare operation in the following comparison
3559 against zero. */
3561 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3562 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3564 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3565 maskhigh = -1;
3567 else
3568 maskhigh = (HOST_WIDE_INT) -1
3569 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3571 temp = expand_binop (mode, and_optab, op0,
3572 immed_double_const (masklow, maskhigh, mode),
3573 result, 1, OPTAB_LIB_WIDEN);
3574 if (temp != result)
3575 emit_move_insn (result, temp);
3577 label = gen_label_rtx ();
3578 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3580 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3581 0, OPTAB_LIB_WIDEN);
3582 masklow = (HOST_WIDE_INT) -1 << logd;
3583 maskhigh = -1;
3584 temp = expand_binop (mode, ior_optab, temp,
3585 immed_double_const (masklow, maskhigh, mode),
3586 result, 1, OPTAB_LIB_WIDEN);
3587 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3588 0, OPTAB_LIB_WIDEN);
3589 if (temp != result)
3590 emit_move_insn (result, temp);
3591 emit_label (label);
3592 return result;
3595 /* Expand signed division of OP0 by a power of two D in mode MODE.
3596 This routine is only called for positive values of D. */
3598 static rtx
3599 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3601 rtx temp, label;
3602 tree shift;
3603 int logd;
3605 logd = floor_log2 (d);
3606 shift = build_int_cst (NULL_TREE, logd);
3608 if (d == 2
3609 && BRANCH_COST (optimize_insn_for_speed_p (),
3610 false) >= 1)
3612 temp = gen_reg_rtx (mode);
3613 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3614 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3615 0, OPTAB_LIB_WIDEN);
3616 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3619 #ifdef HAVE_conditional_move
3620 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3621 >= 2)
3623 rtx temp2;
3625 /* ??? emit_conditional_move forces a stack adjustment via
3626 compare_from_rtx so, if the sequence is discarded, it will
3627 be lost. Do it now instead. */
3628 do_pending_stack_adjust ();
3630 start_sequence ();
3631 temp2 = copy_to_mode_reg (mode, op0);
3632 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3633 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3634 temp = force_reg (mode, temp);
3636 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3637 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3638 mode, temp, temp2, mode, 0);
3639 if (temp2)
3641 rtx seq = get_insns ();
3642 end_sequence ();
3643 emit_insn (seq);
3644 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3646 end_sequence ();
3648 #endif
3650 if (BRANCH_COST (optimize_insn_for_speed_p (),
3651 false) >= 2)
3653 int ushift = GET_MODE_BITSIZE (mode) - logd;
3655 temp = gen_reg_rtx (mode);
3656 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3657 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3658 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3659 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3660 else
3661 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3662 build_int_cst (NULL_TREE, ushift),
3663 NULL_RTX, 1);
3664 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3665 0, OPTAB_LIB_WIDEN);
3666 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3669 label = gen_label_rtx ();
3670 temp = copy_to_mode_reg (mode, op0);
3671 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3672 expand_inc (temp, GEN_INT (d - 1));
3673 emit_label (label);
3674 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3677 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3678 if that is convenient, and returning where the result is.
3679 You may request either the quotient or the remainder as the result;
3680 specify REM_FLAG nonzero to get the remainder.
3682 CODE is the expression code for which kind of division this is;
3683 it controls how rounding is done. MODE is the machine mode to use.
3684 UNSIGNEDP nonzero means do unsigned division. */
3686 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3687 and then correct it by or'ing in missing high bits
3688 if result of ANDI is nonzero.
3689 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3690 This could optimize to a bfexts instruction.
3691 But C doesn't use these operations, so their optimizations are
3692 left for later. */
3693 /* ??? For modulo, we don't actually need the highpart of the first product,
3694 the low part will do nicely. And for small divisors, the second multiply
3695 can also be a low-part only multiply or even be completely left out.
3696 E.g. to calculate the remainder of a division by 3 with a 32 bit
3697 multiply, multiply with 0x55555556 and extract the upper two bits;
3698 the result is exact for inputs up to 0x1fffffff.
3699 The input range can be reduced by using cross-sum rules.
3700 For odd divisors >= 3, the following table gives right shift counts
3701 so that if a number is shifted by an integer multiple of the given
3702 amount, the remainder stays the same:
3703 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3704 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3705 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3706 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3707 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3709 Cross-sum rules for even numbers can be derived by leaving as many bits
3710 to the right alone as the divisor has zeros to the right.
3711 E.g. if x is an unsigned 32 bit number:
3712 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3716 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3717 rtx op0, rtx op1, rtx target, int unsignedp)
3719 enum machine_mode compute_mode;
3720 rtx tquotient;
3721 rtx quotient = 0, remainder = 0;
3722 rtx last;
3723 int size;
3724 rtx insn, set;
3725 optab optab1, optab2;
3726 int op1_is_constant, op1_is_pow2 = 0;
3727 int max_cost, extra_cost;
3728 static HOST_WIDE_INT last_div_const = 0;
3729 static HOST_WIDE_INT ext_op1;
3730 bool speed = optimize_insn_for_speed_p ();
3732 op1_is_constant = CONST_INT_P (op1);
3733 if (op1_is_constant)
3735 ext_op1 = INTVAL (op1);
3736 if (unsignedp)
3737 ext_op1 &= GET_MODE_MASK (mode);
3738 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3739 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3743 This is the structure of expand_divmod:
3745 First comes code to fix up the operands so we can perform the operations
3746 correctly and efficiently.
3748 Second comes a switch statement with code specific for each rounding mode.
3749 For some special operands this code emits all RTL for the desired
3750 operation, for other cases, it generates only a quotient and stores it in
3751 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3752 to indicate that it has not done anything.
3754 Last comes code that finishes the operation. If QUOTIENT is set and
3755 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3756 QUOTIENT is not set, it is computed using trunc rounding.
3758 We try to generate special code for division and remainder when OP1 is a
3759 constant. If |OP1| = 2**n we can use shifts and some other fast
3760 operations. For other values of OP1, we compute a carefully selected
3761 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3762 by m.
3764 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3765 half of the product. Different strategies for generating the product are
3766 implemented in expand_mult_highpart.
3768 If what we actually want is the remainder, we generate that by another
3769 by-constant multiplication and a subtraction. */
3771 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3772 code below will malfunction if we are, so check here and handle
3773 the special case if so. */
3774 if (op1 == const1_rtx)
3775 return rem_flag ? const0_rtx : op0;
3777 /* When dividing by -1, we could get an overflow.
3778 negv_optab can handle overflows. */
3779 if (! unsignedp && op1 == constm1_rtx)
3781 if (rem_flag)
3782 return const0_rtx;
3783 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3784 ? negv_optab : neg_optab, op0, target, 0);
3787 if (target
3788 /* Don't use the function value register as a target
3789 since we have to read it as well as write it,
3790 and function-inlining gets confused by this. */
3791 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3792 /* Don't clobber an operand while doing a multi-step calculation. */
3793 || ((rem_flag || op1_is_constant)
3794 && (reg_mentioned_p (target, op0)
3795 || (MEM_P (op0) && MEM_P (target))))
3796 || reg_mentioned_p (target, op1)
3797 || (MEM_P (op1) && MEM_P (target))))
3798 target = 0;
3800 /* Get the mode in which to perform this computation. Normally it will
3801 be MODE, but sometimes we can't do the desired operation in MODE.
3802 If so, pick a wider mode in which we can do the operation. Convert
3803 to that mode at the start to avoid repeated conversions.
3805 First see what operations we need. These depend on the expression
3806 we are evaluating. (We assume that divxx3 insns exist under the
3807 same conditions that modxx3 insns and that these insns don't normally
3808 fail. If these assumptions are not correct, we may generate less
3809 efficient code in some cases.)
3811 Then see if we find a mode in which we can open-code that operation
3812 (either a division, modulus, or shift). Finally, check for the smallest
3813 mode for which we can do the operation with a library call. */
3815 /* We might want to refine this now that we have division-by-constant
3816 optimization. Since expand_mult_highpart tries so many variants, it is
3817 not straightforward to generalize this. Maybe we should make an array
3818 of possible modes in init_expmed? Save this for GCC 2.7. */
3820 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3821 ? (unsignedp ? lshr_optab : ashr_optab)
3822 : (unsignedp ? udiv_optab : sdiv_optab));
3823 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3824 ? optab1
3825 : (unsignedp ? udivmod_optab : sdivmod_optab));
3827 for (compute_mode = mode; compute_mode != VOIDmode;
3828 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3829 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3830 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3831 break;
3833 if (compute_mode == VOIDmode)
3834 for (compute_mode = mode; compute_mode != VOIDmode;
3835 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3836 if (optab_libfunc (optab1, compute_mode)
3837 || optab_libfunc (optab2, compute_mode))
3838 break;
3840 /* If we still couldn't find a mode, use MODE, but expand_binop will
3841 probably die. */
3842 if (compute_mode == VOIDmode)
3843 compute_mode = mode;
3845 if (target && GET_MODE (target) == compute_mode)
3846 tquotient = target;
3847 else
3848 tquotient = gen_reg_rtx (compute_mode);
3850 size = GET_MODE_BITSIZE (compute_mode);
3851 #if 0
3852 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3853 (mode), and thereby get better code when OP1 is a constant. Do that
3854 later. It will require going over all usages of SIZE below. */
3855 size = GET_MODE_BITSIZE (mode);
3856 #endif
3858 /* Only deduct something for a REM if the last divide done was
3859 for a different constant. Then set the constant of the last
3860 divide. */
3861 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3862 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3863 && INTVAL (op1) == last_div_const))
3864 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3866 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3868 /* Now convert to the best mode to use. */
3869 if (compute_mode != mode)
3871 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3872 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3874 /* convert_modes may have placed op1 into a register, so we
3875 must recompute the following. */
3876 op1_is_constant = CONST_INT_P (op1);
3877 op1_is_pow2 = (op1_is_constant
3878 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3879 || (! unsignedp
3880 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3883 /* If one of the operands is a volatile MEM, copy it into a register. */
3885 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3886 op0 = force_reg (compute_mode, op0);
3887 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3888 op1 = force_reg (compute_mode, op1);
3890 /* If we need the remainder or if OP1 is constant, we need to
3891 put OP0 in a register in case it has any queued subexpressions. */
3892 if (rem_flag || op1_is_constant)
3893 op0 = force_reg (compute_mode, op0);
3895 last = get_last_insn ();
3897 /* Promote floor rounding to trunc rounding for unsigned operations. */
3898 if (unsignedp)
3900 if (code == FLOOR_DIV_EXPR)
3901 code = TRUNC_DIV_EXPR;
3902 if (code == FLOOR_MOD_EXPR)
3903 code = TRUNC_MOD_EXPR;
3904 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3905 code = TRUNC_DIV_EXPR;
3908 if (op1 != const0_rtx)
3909 switch (code)
3911 case TRUNC_MOD_EXPR:
3912 case TRUNC_DIV_EXPR:
3913 if (op1_is_constant)
3915 if (unsignedp)
3917 unsigned HOST_WIDE_INT mh;
3918 int pre_shift, post_shift;
3919 int dummy;
3920 rtx ml;
3921 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3922 & GET_MODE_MASK (compute_mode));
3924 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3926 pre_shift = floor_log2 (d);
3927 if (rem_flag)
3929 remainder
3930 = expand_binop (compute_mode, and_optab, op0,
3931 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3932 remainder, 1,
3933 OPTAB_LIB_WIDEN);
3934 if (remainder)
3935 return gen_lowpart (mode, remainder);
3937 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3938 build_int_cst (NULL_TREE,
3939 pre_shift),
3940 tquotient, 1);
3942 else if (size <= HOST_BITS_PER_WIDE_INT)
3944 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3946 /* Most significant bit of divisor is set; emit an scc
3947 insn. */
3948 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
3949 compute_mode, 1, 1);
3951 else
3953 /* Find a suitable multiplier and right shift count
3954 instead of multiplying with D. */
3956 mh = choose_multiplier (d, size, size,
3957 &ml, &post_shift, &dummy);
3959 /* If the suggested multiplier is more than SIZE bits,
3960 we can do better for even divisors, using an
3961 initial right shift. */
3962 if (mh != 0 && (d & 1) == 0)
3964 pre_shift = floor_log2 (d & -d);
3965 mh = choose_multiplier (d >> pre_shift, size,
3966 size - pre_shift,
3967 &ml, &post_shift, &dummy);
3968 gcc_assert (!mh);
3970 else
3971 pre_shift = 0;
3973 if (mh != 0)
3975 rtx t1, t2, t3, t4;
3977 if (post_shift - 1 >= BITS_PER_WORD)
3978 goto fail1;
3980 extra_cost
3981 = (shift_cost[speed][compute_mode][post_shift - 1]
3982 + shift_cost[speed][compute_mode][1]
3983 + 2 * add_cost[speed][compute_mode]);
3984 t1 = expand_mult_highpart (compute_mode, op0, ml,
3985 NULL_RTX, 1,
3986 max_cost - extra_cost);
3987 if (t1 == 0)
3988 goto fail1;
3989 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3990 op0, t1),
3991 NULL_RTX);
3992 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3993 integer_one_node, NULL_RTX, 1);
3994 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3995 t1, t3),
3996 NULL_RTX);
3997 quotient = expand_shift
3998 (RSHIFT_EXPR, compute_mode, t4,
3999 build_int_cst (NULL_TREE, post_shift - 1),
4000 tquotient, 1);
4002 else
4004 rtx t1, t2;
4006 if (pre_shift >= BITS_PER_WORD
4007 || post_shift >= BITS_PER_WORD)
4008 goto fail1;
4010 t1 = expand_shift
4011 (RSHIFT_EXPR, compute_mode, op0,
4012 build_int_cst (NULL_TREE, pre_shift),
4013 NULL_RTX, 1);
4014 extra_cost
4015 = (shift_cost[speed][compute_mode][pre_shift]
4016 + shift_cost[speed][compute_mode][post_shift]);
4017 t2 = expand_mult_highpart (compute_mode, t1, ml,
4018 NULL_RTX, 1,
4019 max_cost - extra_cost);
4020 if (t2 == 0)
4021 goto fail1;
4022 quotient = expand_shift
4023 (RSHIFT_EXPR, compute_mode, t2,
4024 build_int_cst (NULL_TREE, post_shift),
4025 tquotient, 1);
4029 else /* Too wide mode to use tricky code */
4030 break;
4032 insn = get_last_insn ();
4033 if (insn != last
4034 && (set = single_set (insn)) != 0
4035 && SET_DEST (set) == quotient)
4036 set_unique_reg_note (insn,
4037 REG_EQUAL,
4038 gen_rtx_UDIV (compute_mode, op0, op1));
4040 else /* TRUNC_DIV, signed */
4042 unsigned HOST_WIDE_INT ml;
4043 int lgup, post_shift;
4044 rtx mlr;
4045 HOST_WIDE_INT d = INTVAL (op1);
4046 unsigned HOST_WIDE_INT abs_d;
4048 /* Since d might be INT_MIN, we have to cast to
4049 unsigned HOST_WIDE_INT before negating to avoid
4050 undefined signed overflow. */
4051 abs_d = (d >= 0
4052 ? (unsigned HOST_WIDE_INT) d
4053 : - (unsigned HOST_WIDE_INT) d);
4055 /* n rem d = n rem -d */
4056 if (rem_flag && d < 0)
4058 d = abs_d;
4059 op1 = gen_int_mode (abs_d, compute_mode);
4062 if (d == 1)
4063 quotient = op0;
4064 else if (d == -1)
4065 quotient = expand_unop (compute_mode, neg_optab, op0,
4066 tquotient, 0);
4067 else if (HOST_BITS_PER_WIDE_INT >= size
4068 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4070 /* This case is not handled correctly below. */
4071 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4072 compute_mode, 1, 1);
4073 if (quotient == 0)
4074 goto fail1;
4076 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4077 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4078 : sdiv_pow2_cheap[speed][compute_mode])
4079 /* We assume that cheap metric is true if the
4080 optab has an expander for this mode. */
4081 && ((optab_handler ((rem_flag ? smod_optab
4082 : sdiv_optab),
4083 compute_mode)
4084 != CODE_FOR_nothing)
4085 || (optab_handler (sdivmod_optab,
4086 compute_mode)
4087 != CODE_FOR_nothing)))
4089 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4091 if (rem_flag)
4093 remainder = expand_smod_pow2 (compute_mode, op0, d);
4094 if (remainder)
4095 return gen_lowpart (mode, remainder);
4098 if (sdiv_pow2_cheap[speed][compute_mode]
4099 && ((optab_handler (sdiv_optab, compute_mode)
4100 != CODE_FOR_nothing)
4101 || (optab_handler (sdivmod_optab, compute_mode)
4102 != CODE_FOR_nothing)))
4103 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4104 compute_mode, op0,
4105 gen_int_mode (abs_d,
4106 compute_mode),
4107 NULL_RTX, 0);
4108 else
4109 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4111 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4112 negate the quotient. */
4113 if (d < 0)
4115 insn = get_last_insn ();
4116 if (insn != last
4117 && (set = single_set (insn)) != 0
4118 && SET_DEST (set) == quotient
4119 && abs_d < ((unsigned HOST_WIDE_INT) 1
4120 << (HOST_BITS_PER_WIDE_INT - 1)))
4121 set_unique_reg_note (insn,
4122 REG_EQUAL,
4123 gen_rtx_DIV (compute_mode,
4124 op0,
4125 GEN_INT
4126 (trunc_int_for_mode
4127 (abs_d,
4128 compute_mode))));
4130 quotient = expand_unop (compute_mode, neg_optab,
4131 quotient, quotient, 0);
4134 else if (size <= HOST_BITS_PER_WIDE_INT)
4136 choose_multiplier (abs_d, size, size - 1,
4137 &mlr, &post_shift, &lgup);
4138 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4139 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4141 rtx t1, t2, t3;
4143 if (post_shift >= BITS_PER_WORD
4144 || size - 1 >= BITS_PER_WORD)
4145 goto fail1;
4147 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4148 + shift_cost[speed][compute_mode][size - 1]
4149 + add_cost[speed][compute_mode]);
4150 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4151 NULL_RTX, 0,
4152 max_cost - extra_cost);
4153 if (t1 == 0)
4154 goto fail1;
4155 t2 = expand_shift
4156 (RSHIFT_EXPR, compute_mode, t1,
4157 build_int_cst (NULL_TREE, post_shift),
4158 NULL_RTX, 0);
4159 t3 = expand_shift
4160 (RSHIFT_EXPR, compute_mode, op0,
4161 build_int_cst (NULL_TREE, size - 1),
4162 NULL_RTX, 0);
4163 if (d < 0)
4164 quotient
4165 = force_operand (gen_rtx_MINUS (compute_mode,
4166 t3, t2),
4167 tquotient);
4168 else
4169 quotient
4170 = force_operand (gen_rtx_MINUS (compute_mode,
4171 t2, t3),
4172 tquotient);
4174 else
4176 rtx t1, t2, t3, t4;
4178 if (post_shift >= BITS_PER_WORD
4179 || size - 1 >= BITS_PER_WORD)
4180 goto fail1;
4182 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4183 mlr = gen_int_mode (ml, compute_mode);
4184 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4185 + shift_cost[speed][compute_mode][size - 1]
4186 + 2 * add_cost[speed][compute_mode]);
4187 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4188 NULL_RTX, 0,
4189 max_cost - extra_cost);
4190 if (t1 == 0)
4191 goto fail1;
4192 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4193 t1, op0),
4194 NULL_RTX);
4195 t3 = expand_shift
4196 (RSHIFT_EXPR, compute_mode, t2,
4197 build_int_cst (NULL_TREE, post_shift),
4198 NULL_RTX, 0);
4199 t4 = expand_shift
4200 (RSHIFT_EXPR, compute_mode, op0,
4201 build_int_cst (NULL_TREE, size - 1),
4202 NULL_RTX, 0);
4203 if (d < 0)
4204 quotient
4205 = force_operand (gen_rtx_MINUS (compute_mode,
4206 t4, t3),
4207 tquotient);
4208 else
4209 quotient
4210 = force_operand (gen_rtx_MINUS (compute_mode,
4211 t3, t4),
4212 tquotient);
4215 else /* Too wide mode to use tricky code */
4216 break;
4218 insn = get_last_insn ();
4219 if (insn != last
4220 && (set = single_set (insn)) != 0
4221 && SET_DEST (set) == quotient)
4222 set_unique_reg_note (insn,
4223 REG_EQUAL,
4224 gen_rtx_DIV (compute_mode, op0, op1));
4226 break;
4228 fail1:
4229 delete_insns_since (last);
4230 break;
4232 case FLOOR_DIV_EXPR:
4233 case FLOOR_MOD_EXPR:
4234 /* We will come here only for signed operations. */
4235 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4237 unsigned HOST_WIDE_INT mh;
4238 int pre_shift, lgup, post_shift;
4239 HOST_WIDE_INT d = INTVAL (op1);
4240 rtx ml;
4242 if (d > 0)
4244 /* We could just as easily deal with negative constants here,
4245 but it does not seem worth the trouble for GCC 2.6. */
4246 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4248 pre_shift = floor_log2 (d);
4249 if (rem_flag)
4251 remainder = expand_binop (compute_mode, and_optab, op0,
4252 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4253 remainder, 0, OPTAB_LIB_WIDEN);
4254 if (remainder)
4255 return gen_lowpart (mode, remainder);
4257 quotient = expand_shift
4258 (RSHIFT_EXPR, compute_mode, op0,
4259 build_int_cst (NULL_TREE, pre_shift),
4260 tquotient, 0);
4262 else
4264 rtx t1, t2, t3, t4;
4266 mh = choose_multiplier (d, size, size - 1,
4267 &ml, &post_shift, &lgup);
4268 gcc_assert (!mh);
4270 if (post_shift < BITS_PER_WORD
4271 && size - 1 < BITS_PER_WORD)
4273 t1 = expand_shift
4274 (RSHIFT_EXPR, compute_mode, op0,
4275 build_int_cst (NULL_TREE, size - 1),
4276 NULL_RTX, 0);
4277 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4278 NULL_RTX, 0, OPTAB_WIDEN);
4279 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4280 + shift_cost[speed][compute_mode][size - 1]
4281 + 2 * add_cost[speed][compute_mode]);
4282 t3 = expand_mult_highpart (compute_mode, t2, ml,
4283 NULL_RTX, 1,
4284 max_cost - extra_cost);
4285 if (t3 != 0)
4287 t4 = expand_shift
4288 (RSHIFT_EXPR, compute_mode, t3,
4289 build_int_cst (NULL_TREE, post_shift),
4290 NULL_RTX, 1);
4291 quotient = expand_binop (compute_mode, xor_optab,
4292 t4, t1, tquotient, 0,
4293 OPTAB_WIDEN);
4298 else
4300 rtx nsign, t1, t2, t3, t4;
4301 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4302 op0, constm1_rtx), NULL_RTX);
4303 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4304 0, OPTAB_WIDEN);
4305 nsign = expand_shift
4306 (RSHIFT_EXPR, compute_mode, t2,
4307 build_int_cst (NULL_TREE, size - 1),
4308 NULL_RTX, 0);
4309 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4310 NULL_RTX);
4311 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4312 NULL_RTX, 0);
4313 if (t4)
4315 rtx t5;
4316 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4317 NULL_RTX, 0);
4318 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4319 t4, t5),
4320 tquotient);
4325 if (quotient != 0)
4326 break;
4327 delete_insns_since (last);
4329 /* Try using an instruction that produces both the quotient and
4330 remainder, using truncation. We can easily compensate the quotient
4331 or remainder to get floor rounding, once we have the remainder.
4332 Notice that we compute also the final remainder value here,
4333 and return the result right away. */
4334 if (target == 0 || GET_MODE (target) != compute_mode)
4335 target = gen_reg_rtx (compute_mode);
4337 if (rem_flag)
4339 remainder
4340 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4341 quotient = gen_reg_rtx (compute_mode);
4343 else
4345 quotient
4346 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4347 remainder = gen_reg_rtx (compute_mode);
4350 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4351 quotient, remainder, 0))
4353 /* This could be computed with a branch-less sequence.
4354 Save that for later. */
4355 rtx tem;
4356 rtx label = gen_label_rtx ();
4357 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4358 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4359 NULL_RTX, 0, OPTAB_WIDEN);
4360 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4361 expand_dec (quotient, const1_rtx);
4362 expand_inc (remainder, op1);
4363 emit_label (label);
4364 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4367 /* No luck with division elimination or divmod. Have to do it
4368 by conditionally adjusting op0 *and* the result. */
4370 rtx label1, label2, label3, label4, label5;
4371 rtx adjusted_op0;
4372 rtx tem;
4374 quotient = gen_reg_rtx (compute_mode);
4375 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4376 label1 = gen_label_rtx ();
4377 label2 = gen_label_rtx ();
4378 label3 = gen_label_rtx ();
4379 label4 = gen_label_rtx ();
4380 label5 = gen_label_rtx ();
4381 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4382 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4383 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4384 quotient, 0, OPTAB_LIB_WIDEN);
4385 if (tem != quotient)
4386 emit_move_insn (quotient, tem);
4387 emit_jump_insn (gen_jump (label5));
4388 emit_barrier ();
4389 emit_label (label1);
4390 expand_inc (adjusted_op0, const1_rtx);
4391 emit_jump_insn (gen_jump (label4));
4392 emit_barrier ();
4393 emit_label (label2);
4394 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4395 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4396 quotient, 0, OPTAB_LIB_WIDEN);
4397 if (tem != quotient)
4398 emit_move_insn (quotient, tem);
4399 emit_jump_insn (gen_jump (label5));
4400 emit_barrier ();
4401 emit_label (label3);
4402 expand_dec (adjusted_op0, const1_rtx);
4403 emit_label (label4);
4404 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4405 quotient, 0, OPTAB_LIB_WIDEN);
4406 if (tem != quotient)
4407 emit_move_insn (quotient, tem);
4408 expand_dec (quotient, const1_rtx);
4409 emit_label (label5);
4411 break;
4413 case CEIL_DIV_EXPR:
4414 case CEIL_MOD_EXPR:
4415 if (unsignedp)
4417 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4419 rtx t1, t2, t3;
4420 unsigned HOST_WIDE_INT d = INTVAL (op1);
4421 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4422 build_int_cst (NULL_TREE, floor_log2 (d)),
4423 tquotient, 1);
4424 t2 = expand_binop (compute_mode, and_optab, op0,
4425 GEN_INT (d - 1),
4426 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4427 t3 = gen_reg_rtx (compute_mode);
4428 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4429 compute_mode, 1, 1);
4430 if (t3 == 0)
4432 rtx lab;
4433 lab = gen_label_rtx ();
4434 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4435 expand_inc (t1, const1_rtx);
4436 emit_label (lab);
4437 quotient = t1;
4439 else
4440 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4441 t1, t3),
4442 tquotient);
4443 break;
4446 /* Try using an instruction that produces both the quotient and
4447 remainder, using truncation. We can easily compensate the
4448 quotient or remainder to get ceiling rounding, once we have the
4449 remainder. Notice that we compute also the final remainder
4450 value here, and return the result right away. */
4451 if (target == 0 || GET_MODE (target) != compute_mode)
4452 target = gen_reg_rtx (compute_mode);
4454 if (rem_flag)
4456 remainder = (REG_P (target)
4457 ? target : gen_reg_rtx (compute_mode));
4458 quotient = gen_reg_rtx (compute_mode);
4460 else
4462 quotient = (REG_P (target)
4463 ? target : gen_reg_rtx (compute_mode));
4464 remainder = gen_reg_rtx (compute_mode);
4467 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4468 remainder, 1))
4470 /* This could be computed with a branch-less sequence.
4471 Save that for later. */
4472 rtx label = gen_label_rtx ();
4473 do_cmp_and_jump (remainder, const0_rtx, EQ,
4474 compute_mode, label);
4475 expand_inc (quotient, const1_rtx);
4476 expand_dec (remainder, op1);
4477 emit_label (label);
4478 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4481 /* No luck with division elimination or divmod. Have to do it
4482 by conditionally adjusting op0 *and* the result. */
4484 rtx label1, label2;
4485 rtx adjusted_op0, tem;
4487 quotient = gen_reg_rtx (compute_mode);
4488 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4489 label1 = gen_label_rtx ();
4490 label2 = gen_label_rtx ();
4491 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4492 compute_mode, label1);
4493 emit_move_insn (quotient, const0_rtx);
4494 emit_jump_insn (gen_jump (label2));
4495 emit_barrier ();
4496 emit_label (label1);
4497 expand_dec (adjusted_op0, const1_rtx);
4498 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4499 quotient, 1, OPTAB_LIB_WIDEN);
4500 if (tem != quotient)
4501 emit_move_insn (quotient, tem);
4502 expand_inc (quotient, const1_rtx);
4503 emit_label (label2);
4506 else /* signed */
4508 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4509 && INTVAL (op1) >= 0)
4511 /* This is extremely similar to the code for the unsigned case
4512 above. For 2.7 we should merge these variants, but for
4513 2.6.1 I don't want to touch the code for unsigned since that
4514 get used in C. The signed case will only be used by other
4515 languages (Ada). */
4517 rtx t1, t2, t3;
4518 unsigned HOST_WIDE_INT d = INTVAL (op1);
4519 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4520 build_int_cst (NULL_TREE, floor_log2 (d)),
4521 tquotient, 0);
4522 t2 = expand_binop (compute_mode, and_optab, op0,
4523 GEN_INT (d - 1),
4524 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4525 t3 = gen_reg_rtx (compute_mode);
4526 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4527 compute_mode, 1, 1);
4528 if (t3 == 0)
4530 rtx lab;
4531 lab = gen_label_rtx ();
4532 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4533 expand_inc (t1, const1_rtx);
4534 emit_label (lab);
4535 quotient = t1;
4537 else
4538 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4539 t1, t3),
4540 tquotient);
4541 break;
4544 /* Try using an instruction that produces both the quotient and
4545 remainder, using truncation. We can easily compensate the
4546 quotient or remainder to get ceiling rounding, once we have the
4547 remainder. Notice that we compute also the final remainder
4548 value here, and return the result right away. */
4549 if (target == 0 || GET_MODE (target) != compute_mode)
4550 target = gen_reg_rtx (compute_mode);
4551 if (rem_flag)
4553 remainder= (REG_P (target)
4554 ? target : gen_reg_rtx (compute_mode));
4555 quotient = gen_reg_rtx (compute_mode);
4557 else
4559 quotient = (REG_P (target)
4560 ? target : gen_reg_rtx (compute_mode));
4561 remainder = gen_reg_rtx (compute_mode);
4564 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4565 remainder, 0))
4567 /* This could be computed with a branch-less sequence.
4568 Save that for later. */
4569 rtx tem;
4570 rtx label = gen_label_rtx ();
4571 do_cmp_and_jump (remainder, const0_rtx, EQ,
4572 compute_mode, label);
4573 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4574 NULL_RTX, 0, OPTAB_WIDEN);
4575 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4576 expand_inc (quotient, const1_rtx);
4577 expand_dec (remainder, op1);
4578 emit_label (label);
4579 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4582 /* No luck with division elimination or divmod. Have to do it
4583 by conditionally adjusting op0 *and* the result. */
4585 rtx label1, label2, label3, label4, label5;
4586 rtx adjusted_op0;
4587 rtx tem;
4589 quotient = gen_reg_rtx (compute_mode);
4590 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4591 label1 = gen_label_rtx ();
4592 label2 = gen_label_rtx ();
4593 label3 = gen_label_rtx ();
4594 label4 = gen_label_rtx ();
4595 label5 = gen_label_rtx ();
4596 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4597 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4598 compute_mode, label1);
4599 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4600 quotient, 0, OPTAB_LIB_WIDEN);
4601 if (tem != quotient)
4602 emit_move_insn (quotient, tem);
4603 emit_jump_insn (gen_jump (label5));
4604 emit_barrier ();
4605 emit_label (label1);
4606 expand_dec (adjusted_op0, const1_rtx);
4607 emit_jump_insn (gen_jump (label4));
4608 emit_barrier ();
4609 emit_label (label2);
4610 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4611 compute_mode, label3);
4612 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4613 quotient, 0, OPTAB_LIB_WIDEN);
4614 if (tem != quotient)
4615 emit_move_insn (quotient, tem);
4616 emit_jump_insn (gen_jump (label5));
4617 emit_barrier ();
4618 emit_label (label3);
4619 expand_inc (adjusted_op0, const1_rtx);
4620 emit_label (label4);
4621 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4622 quotient, 0, OPTAB_LIB_WIDEN);
4623 if (tem != quotient)
4624 emit_move_insn (quotient, tem);
4625 expand_inc (quotient, const1_rtx);
4626 emit_label (label5);
4629 break;
4631 case EXACT_DIV_EXPR:
4632 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4634 HOST_WIDE_INT d = INTVAL (op1);
4635 unsigned HOST_WIDE_INT ml;
4636 int pre_shift;
4637 rtx t1;
4639 pre_shift = floor_log2 (d & -d);
4640 ml = invert_mod2n (d >> pre_shift, size);
4641 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4642 build_int_cst (NULL_TREE, pre_shift),
4643 NULL_RTX, unsignedp);
4644 quotient = expand_mult (compute_mode, t1,
4645 gen_int_mode (ml, compute_mode),
4646 NULL_RTX, 1);
4648 insn = get_last_insn ();
4649 set_unique_reg_note (insn,
4650 REG_EQUAL,
4651 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4652 compute_mode,
4653 op0, op1));
4655 break;
4657 case ROUND_DIV_EXPR:
4658 case ROUND_MOD_EXPR:
4659 if (unsignedp)
4661 rtx tem;
4662 rtx label;
4663 label = gen_label_rtx ();
4664 quotient = gen_reg_rtx (compute_mode);
4665 remainder = gen_reg_rtx (compute_mode);
4666 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4668 rtx tem;
4669 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4670 quotient, 1, OPTAB_LIB_WIDEN);
4671 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4672 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4673 remainder, 1, OPTAB_LIB_WIDEN);
4675 tem = plus_constant (op1, -1);
4676 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4677 integer_one_node, NULL_RTX, 1);
4678 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4679 expand_inc (quotient, const1_rtx);
4680 expand_dec (remainder, op1);
4681 emit_label (label);
4683 else
4685 rtx abs_rem, abs_op1, tem, mask;
4686 rtx label;
4687 label = gen_label_rtx ();
4688 quotient = gen_reg_rtx (compute_mode);
4689 remainder = gen_reg_rtx (compute_mode);
4690 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4692 rtx tem;
4693 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4694 quotient, 0, OPTAB_LIB_WIDEN);
4695 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4696 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4697 remainder, 0, OPTAB_LIB_WIDEN);
4699 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4700 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4701 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4702 integer_one_node, NULL_RTX, 1);
4703 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4704 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4705 NULL_RTX, 0, OPTAB_WIDEN);
4706 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4707 build_int_cst (NULL_TREE, size - 1),
4708 NULL_RTX, 0);
4709 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4710 NULL_RTX, 0, OPTAB_WIDEN);
4711 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4712 NULL_RTX, 0, OPTAB_WIDEN);
4713 expand_inc (quotient, tem);
4714 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4715 NULL_RTX, 0, OPTAB_WIDEN);
4716 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4717 NULL_RTX, 0, OPTAB_WIDEN);
4718 expand_dec (remainder, tem);
4719 emit_label (label);
4721 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4723 default:
4724 gcc_unreachable ();
4727 if (quotient == 0)
4729 if (target && GET_MODE (target) != compute_mode)
4730 target = 0;
4732 if (rem_flag)
4734 /* Try to produce the remainder without producing the quotient.
4735 If we seem to have a divmod pattern that does not require widening,
4736 don't try widening here. We should really have a WIDEN argument
4737 to expand_twoval_binop, since what we'd really like to do here is
4738 1) try a mod insn in compute_mode
4739 2) try a divmod insn in compute_mode
4740 3) try a div insn in compute_mode and multiply-subtract to get
4741 remainder
4742 4) try the same things with widening allowed. */
4743 remainder
4744 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4745 op0, op1, target,
4746 unsignedp,
4747 ((optab_handler (optab2, compute_mode)
4748 != CODE_FOR_nothing)
4749 ? OPTAB_DIRECT : OPTAB_WIDEN));
4750 if (remainder == 0)
4752 /* No luck there. Can we do remainder and divide at once
4753 without a library call? */
4754 remainder = gen_reg_rtx (compute_mode);
4755 if (! expand_twoval_binop ((unsignedp
4756 ? udivmod_optab
4757 : sdivmod_optab),
4758 op0, op1,
4759 NULL_RTX, remainder, unsignedp))
4760 remainder = 0;
4763 if (remainder)
4764 return gen_lowpart (mode, remainder);
4767 /* Produce the quotient. Try a quotient insn, but not a library call.
4768 If we have a divmod in this mode, use it in preference to widening
4769 the div (for this test we assume it will not fail). Note that optab2
4770 is set to the one of the two optabs that the call below will use. */
4771 quotient
4772 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4773 op0, op1, rem_flag ? NULL_RTX : target,
4774 unsignedp,
4775 ((optab_handler (optab2, compute_mode)
4776 != CODE_FOR_nothing)
4777 ? OPTAB_DIRECT : OPTAB_WIDEN));
4779 if (quotient == 0)
4781 /* No luck there. Try a quotient-and-remainder insn,
4782 keeping the quotient alone. */
4783 quotient = gen_reg_rtx (compute_mode);
4784 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4785 op0, op1,
4786 quotient, NULL_RTX, unsignedp))
4788 quotient = 0;
4789 if (! rem_flag)
4790 /* Still no luck. If we are not computing the remainder,
4791 use a library call for the quotient. */
4792 quotient = sign_expand_binop (compute_mode,
4793 udiv_optab, sdiv_optab,
4794 op0, op1, target,
4795 unsignedp, OPTAB_LIB_WIDEN);
4800 if (rem_flag)
4802 if (target && GET_MODE (target) != compute_mode)
4803 target = 0;
4805 if (quotient == 0)
4807 /* No divide instruction either. Use library for remainder. */
4808 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4809 op0, op1, target,
4810 unsignedp, OPTAB_LIB_WIDEN);
4811 /* No remainder function. Try a quotient-and-remainder
4812 function, keeping the remainder. */
4813 if (!remainder)
4815 remainder = gen_reg_rtx (compute_mode);
4816 if (!expand_twoval_binop_libfunc
4817 (unsignedp ? udivmod_optab : sdivmod_optab,
4818 op0, op1,
4819 NULL_RTX, remainder,
4820 unsignedp ? UMOD : MOD))
4821 remainder = NULL_RTX;
4824 else
4826 /* We divided. Now finish doing X - Y * (X / Y). */
4827 remainder = expand_mult (compute_mode, quotient, op1,
4828 NULL_RTX, unsignedp);
4829 remainder = expand_binop (compute_mode, sub_optab, op0,
4830 remainder, target, unsignedp,
4831 OPTAB_LIB_WIDEN);
4835 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4838 /* Return a tree node with data type TYPE, describing the value of X.
4839 Usually this is an VAR_DECL, if there is no obvious better choice.
4840 X may be an expression, however we only support those expressions
4841 generated by loop.c. */
4843 tree
4844 make_tree (tree type, rtx x)
4846 tree t;
4848 switch (GET_CODE (x))
4850 case CONST_INT:
4852 HOST_WIDE_INT hi = 0;
4854 if (INTVAL (x) < 0
4855 && !(TYPE_UNSIGNED (type)
4856 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4857 < HOST_BITS_PER_WIDE_INT)))
4858 hi = -1;
4860 t = build_int_cst_wide (type, INTVAL (x), hi);
4862 return t;
4865 case CONST_DOUBLE:
4866 if (GET_MODE (x) == VOIDmode)
4867 t = build_int_cst_wide (type,
4868 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4869 else
4871 REAL_VALUE_TYPE d;
4873 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4874 t = build_real (type, d);
4877 return t;
4879 case CONST_VECTOR:
4881 int units = CONST_VECTOR_NUNITS (x);
4882 tree itype = TREE_TYPE (type);
4883 tree t = NULL_TREE;
4884 int i;
4887 /* Build a tree with vector elements. */
4888 for (i = units - 1; i >= 0; --i)
4890 rtx elt = CONST_VECTOR_ELT (x, i);
4891 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4894 return build_vector (type, t);
4897 case PLUS:
4898 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4899 make_tree (type, XEXP (x, 1)));
4901 case MINUS:
4902 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4903 make_tree (type, XEXP (x, 1)));
4905 case NEG:
4906 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4908 case MULT:
4909 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4910 make_tree (type, XEXP (x, 1)));
4912 case ASHIFT:
4913 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4914 make_tree (type, XEXP (x, 1)));
4916 case LSHIFTRT:
4917 t = unsigned_type_for (type);
4918 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4919 make_tree (t, XEXP (x, 0)),
4920 make_tree (type, XEXP (x, 1))));
4922 case ASHIFTRT:
4923 t = signed_type_for (type);
4924 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4925 make_tree (t, XEXP (x, 0)),
4926 make_tree (type, XEXP (x, 1))));
4928 case DIV:
4929 if (TREE_CODE (type) != REAL_TYPE)
4930 t = signed_type_for (type);
4931 else
4932 t = type;
4934 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4935 make_tree (t, XEXP (x, 0)),
4936 make_tree (t, XEXP (x, 1))));
4937 case UDIV:
4938 t = unsigned_type_for (type);
4939 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4940 make_tree (t, XEXP (x, 0)),
4941 make_tree (t, XEXP (x, 1))));
4943 case SIGN_EXTEND:
4944 case ZERO_EXTEND:
4945 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4946 GET_CODE (x) == ZERO_EXTEND);
4947 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4949 case CONST:
4950 return make_tree (type, XEXP (x, 0));
4952 case SYMBOL_REF:
4953 t = SYMBOL_REF_DECL (x);
4954 if (t)
4955 return fold_convert (type, build_fold_addr_expr (t));
4956 /* else fall through. */
4958 default:
4959 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
4961 /* If TYPE is a POINTER_TYPE, we might need to convert X from
4962 address mode to pointer mode. */
4963 if (POINTER_TYPE_P (type))
4964 x = convert_memory_address_addr_space
4965 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
4967 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4968 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4969 t->decl_with_rtl.rtl = x;
4971 return t;
4975 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4976 and returning TARGET.
4978 If TARGET is 0, a pseudo-register or constant is returned. */
4981 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4983 rtx tem = 0;
4985 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4986 tem = simplify_binary_operation (AND, mode, op0, op1);
4987 if (tem == 0)
4988 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4990 if (target == 0)
4991 target = tem;
4992 else if (tem != target)
4993 emit_move_insn (target, tem);
4994 return target;
4997 /* Helper function for emit_store_flag. */
4998 static rtx
4999 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5000 enum machine_mode mode, enum machine_mode compare_mode,
5001 int unsignedp, rtx x, rtx y, int normalizep,
5002 enum machine_mode target_mode)
5004 struct expand_operand ops[4];
5005 rtx op0, last, comparison, subtarget;
5006 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5008 last = get_last_insn ();
5009 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5010 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5011 if (!x || !y)
5013 delete_insns_since (last);
5014 return NULL_RTX;
5017 if (target_mode == VOIDmode)
5018 target_mode = result_mode;
5019 if (!target)
5020 target = gen_reg_rtx (target_mode);
5022 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5024 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5025 create_fixed_operand (&ops[1], comparison);
5026 create_fixed_operand (&ops[2], x);
5027 create_fixed_operand (&ops[3], y);
5028 if (!maybe_expand_insn (icode, 4, ops))
5030 delete_insns_since (last);
5031 return NULL_RTX;
5033 subtarget = ops[0].value;
5035 /* If we are converting to a wider mode, first convert to
5036 TARGET_MODE, then normalize. This produces better combining
5037 opportunities on machines that have a SIGN_EXTRACT when we are
5038 testing a single bit. This mostly benefits the 68k.
5040 If STORE_FLAG_VALUE does not have the sign bit set when
5041 interpreted in MODE, we can do this conversion as unsigned, which
5042 is usually more efficient. */
5043 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5045 convert_move (target, subtarget,
5046 (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5047 && 0 == (STORE_FLAG_VALUE
5048 & ((HOST_WIDE_INT) 1
5049 << (GET_MODE_BITSIZE (result_mode) -1))));
5050 op0 = target;
5051 result_mode = target_mode;
5053 else
5054 op0 = subtarget;
5056 /* If we want to keep subexpressions around, don't reuse our last
5057 target. */
5058 if (optimize)
5059 subtarget = 0;
5061 /* Now normalize to the proper value in MODE. Sometimes we don't
5062 have to do anything. */
5063 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5065 /* STORE_FLAG_VALUE might be the most negative number, so write
5066 the comparison this way to avoid a compiler-time warning. */
5067 else if (- normalizep == STORE_FLAG_VALUE)
5068 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5070 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5071 it hard to use a value of just the sign bit due to ANSI integer
5072 constant typing rules. */
5073 else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5074 && (STORE_FLAG_VALUE
5075 & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5076 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5077 size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5078 normalizep == 1);
5079 else
5081 gcc_assert (STORE_FLAG_VALUE & 1);
5083 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5084 if (normalizep == -1)
5085 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5088 /* If we were converting to a smaller mode, do the conversion now. */
5089 if (target_mode != result_mode)
5091 convert_move (target, op0, 0);
5092 return target;
5094 else
5095 return op0;
5099 /* A subroutine of emit_store_flag only including "tricks" that do not
5100 need a recursive call. These are kept separate to avoid infinite
5101 loops. */
5103 static rtx
5104 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5105 enum machine_mode mode, int unsignedp, int normalizep,
5106 enum machine_mode target_mode)
5108 rtx subtarget;
5109 enum insn_code icode;
5110 enum machine_mode compare_mode;
5111 enum mode_class mclass;
5112 enum rtx_code scode;
5113 rtx tem;
5115 if (unsignedp)
5116 code = unsigned_condition (code);
5117 scode = swap_condition (code);
5119 /* If one operand is constant, make it the second one. Only do this
5120 if the other operand is not constant as well. */
5122 if (swap_commutative_operands_p (op0, op1))
5124 tem = op0;
5125 op0 = op1;
5126 op1 = tem;
5127 code = swap_condition (code);
5130 if (mode == VOIDmode)
5131 mode = GET_MODE (op0);
5133 /* For some comparisons with 1 and -1, we can convert this to
5134 comparisons with zero. This will often produce more opportunities for
5135 store-flag insns. */
5137 switch (code)
5139 case LT:
5140 if (op1 == const1_rtx)
5141 op1 = const0_rtx, code = LE;
5142 break;
5143 case LE:
5144 if (op1 == constm1_rtx)
5145 op1 = const0_rtx, code = LT;
5146 break;
5147 case GE:
5148 if (op1 == const1_rtx)
5149 op1 = const0_rtx, code = GT;
5150 break;
5151 case GT:
5152 if (op1 == constm1_rtx)
5153 op1 = const0_rtx, code = GE;
5154 break;
5155 case GEU:
5156 if (op1 == const1_rtx)
5157 op1 = const0_rtx, code = NE;
5158 break;
5159 case LTU:
5160 if (op1 == const1_rtx)
5161 op1 = const0_rtx, code = EQ;
5162 break;
5163 default:
5164 break;
5167 /* If we are comparing a double-word integer with zero or -1, we can
5168 convert the comparison into one involving a single word. */
5169 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5170 && GET_MODE_CLASS (mode) == MODE_INT
5171 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5173 if ((code == EQ || code == NE)
5174 && (op1 == const0_rtx || op1 == constm1_rtx))
5176 rtx op00, op01;
5178 /* Do a logical OR or AND of the two words and compare the
5179 result. */
5180 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5181 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5182 tem = expand_binop (word_mode,
5183 op1 == const0_rtx ? ior_optab : and_optab,
5184 op00, op01, NULL_RTX, unsignedp,
5185 OPTAB_DIRECT);
5187 if (tem != 0)
5188 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5189 unsignedp, normalizep);
5191 else if ((code == LT || code == GE) && op1 == const0_rtx)
5193 rtx op0h;
5195 /* If testing the sign bit, can just test on high word. */
5196 op0h = simplify_gen_subreg (word_mode, op0, mode,
5197 subreg_highpart_offset (word_mode,
5198 mode));
5199 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5200 unsignedp, normalizep);
5202 else
5203 tem = NULL_RTX;
5205 if (tem)
5207 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5208 return tem;
5209 if (!target)
5210 target = gen_reg_rtx (target_mode);
5212 convert_move (target, tem,
5213 0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5214 & ((HOST_WIDE_INT) 1
5215 << (GET_MODE_BITSIZE (word_mode) -1))));
5216 return target;
5220 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5221 complement of A (for GE) and shifting the sign bit to the low bit. */
5222 if (op1 == const0_rtx && (code == LT || code == GE)
5223 && GET_MODE_CLASS (mode) == MODE_INT
5224 && (normalizep || STORE_FLAG_VALUE == 1
5225 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5226 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5227 == ((unsigned HOST_WIDE_INT) 1
5228 << (GET_MODE_BITSIZE (mode) - 1))))))
5230 subtarget = target;
5232 if (!target)
5233 target_mode = mode;
5235 /* If the result is to be wider than OP0, it is best to convert it
5236 first. If it is to be narrower, it is *incorrect* to convert it
5237 first. */
5238 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5240 op0 = convert_modes (target_mode, mode, op0, 0);
5241 mode = target_mode;
5244 if (target_mode != mode)
5245 subtarget = 0;
5247 if (code == GE)
5248 op0 = expand_unop (mode, one_cmpl_optab, op0,
5249 ((STORE_FLAG_VALUE == 1 || normalizep)
5250 ? 0 : subtarget), 0);
5252 if (STORE_FLAG_VALUE == 1 || normalizep)
5253 /* If we are supposed to produce a 0/1 value, we want to do
5254 a logical shift from the sign bit to the low-order bit; for
5255 a -1/0 value, we do an arithmetic shift. */
5256 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5257 size_int (GET_MODE_BITSIZE (mode) - 1),
5258 subtarget, normalizep != -1);
5260 if (mode != target_mode)
5261 op0 = convert_modes (target_mode, mode, op0, 0);
5263 return op0;
5266 mclass = GET_MODE_CLASS (mode);
5267 for (compare_mode = mode; compare_mode != VOIDmode;
5268 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5270 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5271 icode = optab_handler (cstore_optab, optab_mode);
5272 if (icode != CODE_FOR_nothing)
5274 do_pending_stack_adjust ();
5275 tem = emit_cstore (target, icode, code, mode, compare_mode,
5276 unsignedp, op0, op1, normalizep, target_mode);
5277 if (tem)
5278 return tem;
5280 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5282 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5283 unsignedp, op1, op0, normalizep, target_mode);
5284 if (tem)
5285 return tem;
5287 break;
5291 return 0;
5294 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5295 and storing in TARGET. Normally return TARGET.
5296 Return 0 if that cannot be done.
5298 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5299 it is VOIDmode, they cannot both be CONST_INT.
5301 UNSIGNEDP is for the case where we have to widen the operands
5302 to perform the operation. It says to use zero-extension.
5304 NORMALIZEP is 1 if we should convert the result to be either zero
5305 or one. Normalize is -1 if we should convert the result to be
5306 either zero or -1. If NORMALIZEP is zero, the result will be left
5307 "raw" out of the scc insn. */
5310 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5311 enum machine_mode mode, int unsignedp, int normalizep)
5313 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5314 enum rtx_code rcode;
5315 rtx subtarget;
5316 rtx tem, last, trueval;
5318 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5319 target_mode);
5320 if (tem)
5321 return tem;
5323 /* If we reached here, we can't do this with a scc insn, however there
5324 are some comparisons that can be done in other ways. Don't do any
5325 of these cases if branches are very cheap. */
5326 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5327 return 0;
5329 /* See what we need to return. We can only return a 1, -1, or the
5330 sign bit. */
5332 if (normalizep == 0)
5334 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5335 normalizep = STORE_FLAG_VALUE;
5337 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5338 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5339 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5341 else
5342 return 0;
5345 last = get_last_insn ();
5347 /* If optimizing, use different pseudo registers for each insn, instead
5348 of reusing the same pseudo. This leads to better CSE, but slows
5349 down the compiler, since there are more pseudos */
5350 subtarget = (!optimize
5351 && (target_mode == mode)) ? target : NULL_RTX;
5352 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5354 /* For floating-point comparisons, try the reverse comparison or try
5355 changing the "orderedness" of the comparison. */
5356 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5358 enum rtx_code first_code;
5359 bool and_them;
5361 rcode = reverse_condition_maybe_unordered (code);
5362 if (can_compare_p (rcode, mode, ccp_store_flag)
5363 && (code == ORDERED || code == UNORDERED
5364 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5365 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5367 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5368 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5370 /* For the reverse comparison, use either an addition or a XOR. */
5371 if (want_add
5372 && rtx_cost (GEN_INT (normalizep), PLUS,
5373 optimize_insn_for_speed_p ()) == 0)
5375 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5376 STORE_FLAG_VALUE, target_mode);
5377 if (tem)
5378 return expand_binop (target_mode, add_optab, tem,
5379 GEN_INT (normalizep),
5380 target, 0, OPTAB_WIDEN);
5382 else if (!want_add
5383 && rtx_cost (trueval, XOR,
5384 optimize_insn_for_speed_p ()) == 0)
5386 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5387 normalizep, target_mode);
5388 if (tem)
5389 return expand_binop (target_mode, xor_optab, tem, trueval,
5390 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5394 delete_insns_since (last);
5396 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5397 if (code == ORDERED || code == UNORDERED)
5398 return 0;
5400 and_them = split_comparison (code, mode, &first_code, &code);
5402 /* If there are no NaNs, the first comparison should always fall through.
5403 Effectively change the comparison to the other one. */
5404 if (!HONOR_NANS (mode))
5406 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5407 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5408 target_mode);
5411 #ifdef HAVE_conditional_move
5412 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5413 conditional move. */
5414 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5415 normalizep, target_mode);
5416 if (tem == 0)
5417 return 0;
5419 if (and_them)
5420 tem = emit_conditional_move (target, code, op0, op1, mode,
5421 tem, const0_rtx, GET_MODE (tem), 0);
5422 else
5423 tem = emit_conditional_move (target, code, op0, op1, mode,
5424 trueval, tem, GET_MODE (tem), 0);
5426 if (tem == 0)
5427 delete_insns_since (last);
5428 return tem;
5429 #else
5430 return 0;
5431 #endif
5434 /* The remaining tricks only apply to integer comparisons. */
5436 if (GET_MODE_CLASS (mode) != MODE_INT)
5437 return 0;
5439 /* If this is an equality comparison of integers, we can try to exclusive-or
5440 (or subtract) the two operands and use a recursive call to try the
5441 comparison with zero. Don't do any of these cases if branches are
5442 very cheap. */
5444 if ((code == EQ || code == NE) && op1 != const0_rtx)
5446 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5447 OPTAB_WIDEN);
5449 if (tem == 0)
5450 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5451 OPTAB_WIDEN);
5452 if (tem != 0)
5453 tem = emit_store_flag (target, code, tem, const0_rtx,
5454 mode, unsignedp, normalizep);
5455 if (tem != 0)
5456 return tem;
5458 delete_insns_since (last);
5461 /* For integer comparisons, try the reverse comparison. However, for
5462 small X and if we'd have anyway to extend, implementing "X != 0"
5463 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5464 rcode = reverse_condition (code);
5465 if (can_compare_p (rcode, mode, ccp_store_flag)
5466 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5467 && code == NE
5468 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5469 && op1 == const0_rtx))
5471 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5472 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5474 /* Again, for the reverse comparison, use either an addition or a XOR. */
5475 if (want_add
5476 && rtx_cost (GEN_INT (normalizep), PLUS,
5477 optimize_insn_for_speed_p ()) == 0)
5479 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5480 STORE_FLAG_VALUE, target_mode);
5481 if (tem != 0)
5482 tem = expand_binop (target_mode, add_optab, tem,
5483 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5485 else if (!want_add
5486 && rtx_cost (trueval, XOR,
5487 optimize_insn_for_speed_p ()) == 0)
5489 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5490 normalizep, target_mode);
5491 if (tem != 0)
5492 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5493 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5496 if (tem != 0)
5497 return tem;
5498 delete_insns_since (last);
5501 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5502 the constant zero. Reject all other comparisons at this point. Only
5503 do LE and GT if branches are expensive since they are expensive on
5504 2-operand machines. */
5506 if (op1 != const0_rtx
5507 || (code != EQ && code != NE
5508 && (BRANCH_COST (optimize_insn_for_speed_p (),
5509 false) <= 1 || (code != LE && code != GT))))
5510 return 0;
5512 /* Try to put the result of the comparison in the sign bit. Assume we can't
5513 do the necessary operation below. */
5515 tem = 0;
5517 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5518 the sign bit set. */
5520 if (code == LE)
5522 /* This is destructive, so SUBTARGET can't be OP0. */
5523 if (rtx_equal_p (subtarget, op0))
5524 subtarget = 0;
5526 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5527 OPTAB_WIDEN);
5528 if (tem)
5529 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5530 OPTAB_WIDEN);
5533 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5534 number of bits in the mode of OP0, minus one. */
5536 if (code == GT)
5538 if (rtx_equal_p (subtarget, op0))
5539 subtarget = 0;
5541 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5542 size_int (GET_MODE_BITSIZE (mode) - 1),
5543 subtarget, 0);
5544 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5545 OPTAB_WIDEN);
5548 if (code == EQ || code == NE)
5550 /* For EQ or NE, one way to do the comparison is to apply an operation
5551 that converts the operand into a positive number if it is nonzero
5552 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5553 for NE we negate. This puts the result in the sign bit. Then we
5554 normalize with a shift, if needed.
5556 Two operations that can do the above actions are ABS and FFS, so try
5557 them. If that doesn't work, and MODE is smaller than a full word,
5558 we can use zero-extension to the wider mode (an unsigned conversion)
5559 as the operation. */
5561 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5562 that is compensated by the subsequent overflow when subtracting
5563 one / negating. */
5565 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5566 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5567 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5568 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5569 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5571 tem = convert_modes (word_mode, mode, op0, 1);
5572 mode = word_mode;
5575 if (tem != 0)
5577 if (code == EQ)
5578 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5579 0, OPTAB_WIDEN);
5580 else
5581 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5584 /* If we couldn't do it that way, for NE we can "or" the two's complement
5585 of the value with itself. For EQ, we take the one's complement of
5586 that "or", which is an extra insn, so we only handle EQ if branches
5587 are expensive. */
5589 if (tem == 0
5590 && (code == NE
5591 || BRANCH_COST (optimize_insn_for_speed_p (),
5592 false) > 1))
5594 if (rtx_equal_p (subtarget, op0))
5595 subtarget = 0;
5597 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5598 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5599 OPTAB_WIDEN);
5601 if (tem && code == EQ)
5602 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5606 if (tem && normalizep)
5607 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5608 size_int (GET_MODE_BITSIZE (mode) - 1),
5609 subtarget, normalizep == 1);
5611 if (tem)
5613 if (!target)
5615 else if (GET_MODE (tem) != target_mode)
5617 convert_move (target, tem, 0);
5618 tem = target;
5620 else if (!subtarget)
5622 emit_move_insn (target, tem);
5623 tem = target;
5626 else
5627 delete_insns_since (last);
5629 return tem;
5632 /* Like emit_store_flag, but always succeeds. */
5635 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5636 enum machine_mode mode, int unsignedp, int normalizep)
5638 rtx tem, label;
5639 rtx trueval, falseval;
5641 /* First see if emit_store_flag can do the job. */
5642 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5643 if (tem != 0)
5644 return tem;
5646 if (!target)
5647 target = gen_reg_rtx (word_mode);
5649 /* If this failed, we have to do this with set/compare/jump/set code.
5650 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5651 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5652 if (code == NE
5653 && GET_MODE_CLASS (mode) == MODE_INT
5654 && REG_P (target)
5655 && op0 == target
5656 && op1 == const0_rtx)
5658 label = gen_label_rtx ();
5659 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5660 mode, NULL_RTX, NULL_RTX, label, -1);
5661 emit_move_insn (target, trueval);
5662 emit_label (label);
5663 return target;
5666 if (!REG_P (target)
5667 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5668 target = gen_reg_rtx (GET_MODE (target));
5670 /* Jump in the right direction if the target cannot implement CODE
5671 but can jump on its reverse condition. */
5672 falseval = const0_rtx;
5673 if (! can_compare_p (code, mode, ccp_jump)
5674 && (! FLOAT_MODE_P (mode)
5675 || code == ORDERED || code == UNORDERED
5676 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5677 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5679 enum rtx_code rcode;
5680 if (FLOAT_MODE_P (mode))
5681 rcode = reverse_condition_maybe_unordered (code);
5682 else
5683 rcode = reverse_condition (code);
5685 /* Canonicalize to UNORDERED for the libcall. */
5686 if (can_compare_p (rcode, mode, ccp_jump)
5687 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5689 falseval = trueval;
5690 trueval = const0_rtx;
5691 code = rcode;
5695 emit_move_insn (target, trueval);
5696 label = gen_label_rtx ();
5697 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5698 NULL_RTX, label, -1);
5700 emit_move_insn (target, falseval);
5701 emit_label (label);
5703 return target;
5706 /* Perform possibly multi-word comparison and conditional jump to LABEL
5707 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5708 now a thin wrapper around do_compare_rtx_and_jump. */
5710 static void
5711 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5712 rtx label)
5714 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5715 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5716 NULL_RTX, NULL_RTX, label, -1);