PR rtl-optimization/53827
[official-gcc.git] / gcc / expmed.c
blobcec8d23da1a4a425a0dbc7c30c1e6bd46c375db8
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
96 struct init_expmed_rtl
98 struct rtx_def reg; rtunion reg_fld[2];
99 struct rtx_def plus; rtunion plus_fld1;
100 struct rtx_def neg;
101 struct rtx_def mult; rtunion mult_fld1;
102 struct rtx_def sdiv; rtunion sdiv_fld1;
103 struct rtx_def udiv; rtunion udiv_fld1;
104 struct rtx_def zext;
105 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
106 struct rtx_def smod_32; rtunion smod_32_fld1;
107 struct rtx_def wide_mult; rtunion wide_mult_fld1;
108 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
109 struct rtx_def wide_trunc;
110 struct rtx_def shift; rtunion shift_fld1;
111 struct rtx_def shift_mult; rtunion shift_mult_fld1;
112 struct rtx_def shift_add; rtunion shift_add_fld1;
113 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
114 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
116 rtx pow2[MAX_BITS_PER_WORD];
117 rtx cint[MAX_BITS_PER_WORD];
120 static void
121 init_expmed_one_mode (struct init_expmed_rtl *all,
122 enum machine_mode mode, int speed)
124 int m, n, mode_bitsize;
126 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
128 PUT_MODE (&all->reg, mode);
129 PUT_MODE (&all->plus, mode);
130 PUT_MODE (&all->neg, mode);
131 PUT_MODE (&all->mult, mode);
132 PUT_MODE (&all->sdiv, mode);
133 PUT_MODE (&all->udiv, mode);
134 PUT_MODE (&all->sdiv_32, mode);
135 PUT_MODE (&all->smod_32, mode);
136 PUT_MODE (&all->wide_trunc, mode);
137 PUT_MODE (&all->shift, mode);
138 PUT_MODE (&all->shift_mult, mode);
139 PUT_MODE (&all->shift_add, mode);
140 PUT_MODE (&all->shift_sub0, mode);
141 PUT_MODE (&all->shift_sub1, mode);
143 add_cost[speed][mode] = set_src_cost (&all->plus, speed);
144 neg_cost[speed][mode] = set_src_cost (&all->neg, speed);
145 mul_cost[speed][mode] = set_src_cost (&all->mult, speed);
146 sdiv_cost[speed][mode] = set_src_cost (&all->sdiv, speed);
147 udiv_cost[speed][mode] = set_src_cost (&all->udiv, speed);
149 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all->sdiv_32, speed)
150 <= 2 * add_cost[speed][mode]);
151 smod_pow2_cheap[speed][mode] = (set_src_cost (&all->smod_32, speed)
152 <= 4 * add_cost[speed][mode]);
154 shift_cost[speed][mode][0] = 0;
155 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
156 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
158 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
159 for (m = 1; m < n; m++)
161 XEXP (&all->shift, 1) = all->cint[m];
162 XEXP (&all->shift_mult, 1) = all->pow2[m];
164 shift_cost[speed][mode][m] = set_src_cost (&all->shift, speed);
165 shiftadd_cost[speed][mode][m] = set_src_cost (&all->shift_add, speed);
166 shiftsub0_cost[speed][mode][m] = set_src_cost (&all->shift_sub0, speed);
167 shiftsub1_cost[speed][mode][m] = set_src_cost (&all->shift_sub1, speed);
170 if (SCALAR_INT_MODE_P (mode))
172 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
174 if (wider_mode != VOIDmode)
176 PUT_MODE (&all->zext, wider_mode);
177 PUT_MODE (&all->wide_mult, wider_mode);
178 PUT_MODE (&all->wide_lshr, wider_mode);
179 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
181 mul_widen_cost[speed][wider_mode]
182 = set_src_cost (&all->wide_mult, speed);
183 mul_highpart_cost[speed][mode]
184 = set_src_cost (&all->wide_trunc, speed);
189 void
190 init_expmed (void)
192 struct init_expmed_rtl all;
193 enum machine_mode mode;
194 int m, speed;
196 memset (&all, 0, sizeof all);
197 for (m = 1; m < MAX_BITS_PER_WORD; m++)
199 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
200 all.cint[m] = GEN_INT (m);
203 PUT_CODE (&all.reg, REG);
204 /* Avoid using hard regs in ways which may be unsupported. */
205 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
207 PUT_CODE (&all.plus, PLUS);
208 XEXP (&all.plus, 0) = &all.reg;
209 XEXP (&all.plus, 1) = &all.reg;
211 PUT_CODE (&all.neg, NEG);
212 XEXP (&all.neg, 0) = &all.reg;
214 PUT_CODE (&all.mult, MULT);
215 XEXP (&all.mult, 0) = &all.reg;
216 XEXP (&all.mult, 1) = &all.reg;
218 PUT_CODE (&all.sdiv, DIV);
219 XEXP (&all.sdiv, 0) = &all.reg;
220 XEXP (&all.sdiv, 1) = &all.reg;
222 PUT_CODE (&all.udiv, UDIV);
223 XEXP (&all.udiv, 0) = &all.reg;
224 XEXP (&all.udiv, 1) = &all.reg;
226 PUT_CODE (&all.sdiv_32, DIV);
227 XEXP (&all.sdiv_32, 0) = &all.reg;
228 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
230 PUT_CODE (&all.smod_32, MOD);
231 XEXP (&all.smod_32, 0) = &all.reg;
232 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
234 PUT_CODE (&all.zext, ZERO_EXTEND);
235 XEXP (&all.zext, 0) = &all.reg;
237 PUT_CODE (&all.wide_mult, MULT);
238 XEXP (&all.wide_mult, 0) = &all.zext;
239 XEXP (&all.wide_mult, 1) = &all.zext;
241 PUT_CODE (&all.wide_lshr, LSHIFTRT);
242 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
244 PUT_CODE (&all.wide_trunc, TRUNCATE);
245 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
247 PUT_CODE (&all.shift, ASHIFT);
248 XEXP (&all.shift, 0) = &all.reg;
250 PUT_CODE (&all.shift_mult, MULT);
251 XEXP (&all.shift_mult, 0) = &all.reg;
253 PUT_CODE (&all.shift_add, PLUS);
254 XEXP (&all.shift_add, 0) = &all.shift_mult;
255 XEXP (&all.shift_add, 1) = &all.reg;
257 PUT_CODE (&all.shift_sub0, MINUS);
258 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
259 XEXP (&all.shift_sub0, 1) = &all.reg;
261 PUT_CODE (&all.shift_sub1, MINUS);
262 XEXP (&all.shift_sub1, 0) = &all.reg;
263 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
265 for (speed = 0; speed < 2; speed++)
267 crtl->maybe_hot_insn_p = speed;
268 zero_cost[speed] = set_src_cost (const0_rtx, speed);
270 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
271 mode != VOIDmode;
272 mode = GET_MODE_WIDER_MODE (mode))
273 init_expmed_one_mode (&all, mode, speed);
275 for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT);
276 mode != VOIDmode;
277 mode = GET_MODE_WIDER_MODE (mode))
278 init_expmed_one_mode (&all, mode, speed);
281 if (alg_hash_used_p)
282 memset (alg_hash, 0, sizeof (alg_hash));
283 else
284 alg_hash_used_p = true;
285 default_rtl_profile ();
288 /* Return an rtx representing minus the value of X.
289 MODE is the intended mode of the result,
290 useful if X is a CONST_INT. */
293 negate_rtx (enum machine_mode mode, rtx x)
295 rtx result = simplify_unary_operation (NEG, mode, x, mode);
297 if (result == 0)
298 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
300 return result;
303 /* Report on the availability of insv/extv/extzv and the desired mode
304 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
305 is false; else the mode of the specified operand. If OPNO is -1,
306 all the caller cares about is whether the insn is available. */
307 enum machine_mode
308 mode_for_extraction (enum extraction_pattern pattern, int opno)
310 const struct insn_data_d *data;
312 switch (pattern)
314 case EP_insv:
315 if (HAVE_insv)
317 data = &insn_data[CODE_FOR_insv];
318 break;
320 return MAX_MACHINE_MODE;
322 case EP_extv:
323 if (HAVE_extv)
325 data = &insn_data[CODE_FOR_extv];
326 break;
328 return MAX_MACHINE_MODE;
330 case EP_extzv:
331 if (HAVE_extzv)
333 data = &insn_data[CODE_FOR_extzv];
334 break;
336 return MAX_MACHINE_MODE;
338 default:
339 gcc_unreachable ();
342 if (opno == -1)
343 return VOIDmode;
345 /* Everyone who uses this function used to follow it with
346 if (result == VOIDmode) result = word_mode; */
347 if (data->operand[opno].mode == VOIDmode)
348 return word_mode;
349 return data->operand[opno].mode;
352 /* A subroutine of store_bit_field, with the same arguments. Return true
353 if the operation could be implemented.
355 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
356 no other way of implementing the operation. If FALLBACK_P is false,
357 return false instead. */
359 static bool
360 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
361 unsigned HOST_WIDE_INT bitnum,
362 unsigned HOST_WIDE_INT bitregion_start,
363 unsigned HOST_WIDE_INT bitregion_end,
364 enum machine_mode fieldmode,
365 rtx value, bool fallback_p)
367 unsigned int unit
368 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
369 unsigned HOST_WIDE_INT offset, bitpos;
370 rtx op0 = str_rtx;
371 int byte_offset;
372 rtx orig_value;
374 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
376 while (GET_CODE (op0) == SUBREG)
378 /* The following line once was done only if WORDS_BIG_ENDIAN,
379 but I think that is a mistake. WORDS_BIG_ENDIAN is
380 meaningful at a much higher level; when structures are copied
381 between memory and regs, the higher-numbered regs
382 always get higher addresses. */
383 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
384 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
386 byte_offset = 0;
388 /* Paradoxical subregs need special handling on big endian machines. */
389 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
391 int difference = inner_mode_size - outer_mode_size;
393 if (WORDS_BIG_ENDIAN)
394 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
395 if (BYTES_BIG_ENDIAN)
396 byte_offset += difference % UNITS_PER_WORD;
398 else
399 byte_offset = SUBREG_BYTE (op0);
401 bitnum += byte_offset * BITS_PER_UNIT;
402 op0 = SUBREG_REG (op0);
405 /* No action is needed if the target is a register and if the field
406 lies completely outside that register. This can occur if the source
407 code contains an out-of-bounds access to a small array. */
408 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
409 return true;
411 /* Use vec_set patterns for inserting parts of vectors whenever
412 available. */
413 if (VECTOR_MODE_P (GET_MODE (op0))
414 && !MEM_P (op0)
415 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
416 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
417 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
418 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
420 struct expand_operand ops[3];
421 enum machine_mode outermode = GET_MODE (op0);
422 enum machine_mode innermode = GET_MODE_INNER (outermode);
423 enum insn_code icode = optab_handler (vec_set_optab, outermode);
424 int pos = bitnum / GET_MODE_BITSIZE (innermode);
426 create_fixed_operand (&ops[0], op0);
427 create_input_operand (&ops[1], value, innermode);
428 create_integer_operand (&ops[2], pos);
429 if (maybe_expand_insn (icode, 3, ops))
430 return true;
433 /* If the target is a register, overwriting the entire object, or storing
434 a full-word or multi-word field can be done with just a SUBREG.
436 If the target is memory, storing any naturally aligned field can be
437 done with a simple store. For targets that support fast unaligned
438 memory, any naturally sized, unit aligned field can be done directly. */
440 offset = bitnum / unit;
441 bitpos = bitnum % unit;
442 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
443 + (offset * UNITS_PER_WORD);
445 if (bitpos == 0
446 && bitsize == GET_MODE_BITSIZE (fieldmode)
447 && (!MEM_P (op0)
448 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
449 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
450 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
451 || validate_subreg (fieldmode, GET_MODE (op0), op0,
452 byte_offset)))
453 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
454 || (offset * BITS_PER_UNIT % bitsize == 0
455 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
457 if (MEM_P (op0))
458 op0 = adjust_address (op0, fieldmode, offset);
459 else if (GET_MODE (op0) != fieldmode)
460 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
461 byte_offset);
462 emit_move_insn (op0, value);
463 return true;
466 /* Make sure we are playing with integral modes. Pun with subregs
467 if we aren't. This must come after the entire register case above,
468 since that case is valid for any mode. The following cases are only
469 valid for integral modes. */
471 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
472 if (imode != GET_MODE (op0))
474 if (MEM_P (op0))
475 op0 = adjust_address (op0, imode, 0);
476 else
478 gcc_assert (imode != BLKmode);
479 op0 = gen_lowpart (imode, op0);
484 /* We may be accessing data outside the field, which means
485 we can alias adjacent data. */
486 /* ?? not always for C++0x memory model ?? */
487 if (MEM_P (op0))
489 op0 = shallow_copy_rtx (op0);
490 set_mem_alias_set (op0, 0);
491 set_mem_expr (op0, 0);
494 /* If OP0 is a register, BITPOS must count within a word.
495 But as we have it, it counts within whatever size OP0 now has.
496 On a bigendian machine, these are not the same, so convert. */
497 if (BYTES_BIG_ENDIAN
498 && !MEM_P (op0)
499 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
500 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
502 /* Storing an lsb-aligned field in a register
503 can be done with a movestrict instruction. */
505 if (!MEM_P (op0)
506 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
507 && bitsize == GET_MODE_BITSIZE (fieldmode)
508 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
510 struct expand_operand ops[2];
511 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
512 rtx arg0 = op0;
513 unsigned HOST_WIDE_INT subreg_off;
515 if (GET_CODE (arg0) == SUBREG)
517 /* Else we've got some float mode source being extracted into
518 a different float mode destination -- this combination of
519 subregs results in Severe Tire Damage. */
520 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
521 || GET_MODE_CLASS (fieldmode) == MODE_INT
522 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
523 arg0 = SUBREG_REG (arg0);
526 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
527 + (offset * UNITS_PER_WORD);
528 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
530 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
532 create_fixed_operand (&ops[0], arg0);
533 /* Shrink the source operand to FIELDMODE. */
534 create_convert_operand_to (&ops[1], value, fieldmode, false);
535 if (maybe_expand_insn (icode, 2, ops))
536 return true;
540 /* Handle fields bigger than a word. */
542 if (bitsize > BITS_PER_WORD)
544 /* Here we transfer the words of the field
545 in the order least significant first.
546 This is because the most significant word is the one which may
547 be less than full.
548 However, only do that if the value is not BLKmode. */
550 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
551 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
552 unsigned int i;
553 rtx last;
555 /* This is the mode we must force value to, so that there will be enough
556 subwords to extract. Note that fieldmode will often (always?) be
557 VOIDmode, because that is what store_field uses to indicate that this
558 is a bit field, but passing VOIDmode to operand_subword_force
559 is not allowed. */
560 fieldmode = GET_MODE (value);
561 if (fieldmode == VOIDmode)
562 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
564 last = get_last_insn ();
565 for (i = 0; i < nwords; i++)
567 /* If I is 0, use the low-order word in both field and target;
568 if I is 1, use the next to lowest word; and so on. */
569 unsigned int wordnum = (backwards
570 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
571 - i - 1
572 : i);
573 unsigned int bit_offset = (backwards
574 ? MAX ((int) bitsize - ((int) i + 1)
575 * BITS_PER_WORD,
577 : (int) i * BITS_PER_WORD);
578 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
579 unsigned HOST_WIDE_INT new_bitsize =
580 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
582 /* If the remaining chunk doesn't have full wordsize we have
583 to make sure that for big endian machines the higher order
584 bits are used. */
585 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
586 value_word = simplify_expand_binop (word_mode, lshr_optab,
587 value_word,
588 GEN_INT (BITS_PER_WORD
589 - new_bitsize),
590 NULL_RTX, true,
591 OPTAB_LIB_WIDEN);
593 if (!store_bit_field_1 (op0, new_bitsize,
594 bitnum + bit_offset,
595 bitregion_start, bitregion_end,
596 word_mode,
597 value_word, fallback_p))
599 delete_insns_since (last);
600 return false;
603 return true;
606 /* From here on we can assume that the field to be stored in is
607 a full-word (whatever type that is), since it is shorter than a word. */
609 /* OFFSET is the number of words or bytes (UNIT says which)
610 from STR_RTX to the first word or byte containing part of the field. */
612 if (!MEM_P (op0))
614 if (offset != 0
615 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
617 if (!REG_P (op0))
619 /* Since this is a destination (lvalue), we can't copy
620 it to a pseudo. We can remove a SUBREG that does not
621 change the size of the operand. Such a SUBREG may
622 have been added above. */
623 gcc_assert (GET_CODE (op0) == SUBREG
624 && (GET_MODE_SIZE (GET_MODE (op0))
625 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
626 op0 = SUBREG_REG (op0);
628 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
629 op0, (offset * UNITS_PER_WORD));
631 offset = 0;
634 /* If VALUE has a floating-point or complex mode, access it as an
635 integer of the corresponding size. This can occur on a machine
636 with 64 bit registers that uses SFmode for float. It can also
637 occur for unaligned float or complex fields. */
638 orig_value = value;
639 if (GET_MODE (value) != VOIDmode
640 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
641 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
643 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
644 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
647 /* Now OFFSET is nonzero only if OP0 is memory
648 and is therefore always measured in bytes. */
650 if (HAVE_insv
651 && GET_MODE (value) != BLKmode
652 && bitsize > 0
653 && GET_MODE_BITSIZE (op_mode) >= bitsize
654 /* Do not use insv for volatile bitfields when
655 -fstrict-volatile-bitfields is in effect. */
656 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
657 && flag_strict_volatile_bitfields > 0)
658 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
659 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
660 /* Do not use insv if the bit region is restricted and
661 op_mode integer at offset doesn't fit into the
662 restricted region. */
663 && !(MEM_P (op0) && bitregion_end
664 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
665 > bitregion_end + 1))
667 struct expand_operand ops[4];
668 int xbitpos = bitpos;
669 rtx value1;
670 rtx xop0 = op0;
671 rtx last = get_last_insn ();
672 bool copy_back = false;
674 /* Add OFFSET into OP0's address. */
675 if (MEM_P (xop0))
676 xop0 = adjust_address (xop0, byte_mode, offset);
678 /* If xop0 is a register, we need it in OP_MODE
679 to make it acceptable to the format of insv. */
680 if (GET_CODE (xop0) == SUBREG)
681 /* We can't just change the mode, because this might clobber op0,
682 and we will need the original value of op0 if insv fails. */
683 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
684 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
685 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
687 /* If the destination is a paradoxical subreg such that we need a
688 truncate to the inner mode, perform the insertion on a temporary and
689 truncate the result to the original destination. Note that we can't
690 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
691 X) 0)) is (reg:N X). */
692 if (GET_CODE (xop0) == SUBREG
693 && REG_P (SUBREG_REG (xop0))
694 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
695 op_mode)))
697 rtx tem = gen_reg_rtx (op_mode);
698 emit_move_insn (tem, xop0);
699 xop0 = tem;
700 copy_back = true;
703 /* We have been counting XBITPOS within UNIT.
704 Count instead within the size of the register. */
705 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
706 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
708 unit = GET_MODE_BITSIZE (op_mode);
710 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
711 "backwards" from the size of the unit we are inserting into.
712 Otherwise, we count bits from the most significant on a
713 BYTES/BITS_BIG_ENDIAN machine. */
715 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
716 xbitpos = unit - bitsize - xbitpos;
718 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
719 value1 = value;
720 if (GET_MODE (value) != op_mode)
722 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
724 /* Optimization: Don't bother really extending VALUE
725 if it has all the bits we will actually use. However,
726 if we must narrow it, be sure we do it correctly. */
728 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
730 rtx tmp;
732 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
733 if (! tmp)
734 tmp = simplify_gen_subreg (op_mode,
735 force_reg (GET_MODE (value),
736 value1),
737 GET_MODE (value), 0);
738 value1 = tmp;
740 else
741 value1 = gen_lowpart (op_mode, value1);
743 else if (CONST_INT_P (value))
744 value1 = gen_int_mode (INTVAL (value), op_mode);
745 else
746 /* Parse phase is supposed to make VALUE's data type
747 match that of the component reference, which is a type
748 at least as wide as the field; so VALUE should have
749 a mode that corresponds to that type. */
750 gcc_assert (CONSTANT_P (value));
753 create_fixed_operand (&ops[0], xop0);
754 create_integer_operand (&ops[1], bitsize);
755 create_integer_operand (&ops[2], xbitpos);
756 create_input_operand (&ops[3], value1, op_mode);
757 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
759 if (copy_back)
760 convert_move (op0, xop0, true);
761 return true;
763 delete_insns_since (last);
766 /* If OP0 is a memory, try copying it to a register and seeing if a
767 cheap register alternative is available. */
768 if (HAVE_insv && MEM_P (op0))
770 enum machine_mode bestmode;
771 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
773 if (bitregion_end)
774 maxbits = bitregion_end - bitregion_start + 1;
776 /* Get the mode to use for inserting into this field. If OP0 is
777 BLKmode, get the smallest mode consistent with the alignment. If
778 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
779 mode. Otherwise, use the smallest mode containing the field. */
781 if (GET_MODE (op0) == BLKmode
782 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
783 || (op_mode != MAX_MACHINE_MODE
784 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
785 bestmode = get_best_mode (bitsize, bitnum,
786 bitregion_start, bitregion_end,
787 MEM_ALIGN (op0),
788 (op_mode == MAX_MACHINE_MODE
789 ? VOIDmode : op_mode),
790 MEM_VOLATILE_P (op0));
791 else
792 bestmode = GET_MODE (op0);
794 if (bestmode != VOIDmode
795 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
796 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
797 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
799 rtx last, tempreg, xop0;
800 unsigned HOST_WIDE_INT xoffset, xbitpos;
802 last = get_last_insn ();
804 /* Adjust address to point to the containing unit of
805 that mode. Compute the offset as a multiple of this unit,
806 counting in bytes. */
807 unit = GET_MODE_BITSIZE (bestmode);
808 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
809 xbitpos = bitnum % unit;
810 xop0 = adjust_address (op0, bestmode, xoffset);
812 /* Fetch that unit, store the bitfield in it, then store
813 the unit. */
814 tempreg = copy_to_reg (xop0);
815 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
816 bitregion_start, bitregion_end,
817 fieldmode, orig_value, false))
819 emit_move_insn (xop0, tempreg);
820 return true;
822 delete_insns_since (last);
826 if (!fallback_p)
827 return false;
829 store_fixed_bit_field (op0, offset, bitsize, bitpos,
830 bitregion_start, bitregion_end, value);
831 return true;
834 /* Generate code to store value from rtx VALUE
835 into a bit-field within structure STR_RTX
836 containing BITSIZE bits starting at bit BITNUM.
838 BITREGION_START is bitpos of the first bitfield in this region.
839 BITREGION_END is the bitpos of the ending bitfield in this region.
840 These two fields are 0, if the C++ memory model does not apply,
841 or we are not interested in keeping track of bitfield regions.
843 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
845 void
846 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
847 unsigned HOST_WIDE_INT bitnum,
848 unsigned HOST_WIDE_INT bitregion_start,
849 unsigned HOST_WIDE_INT bitregion_end,
850 enum machine_mode fieldmode,
851 rtx value)
853 /* Under the C++0x memory model, we must not touch bits outside the
854 bit region. Adjust the address to start at the beginning of the
855 bit region. */
856 if (MEM_P (str_rtx) && bitregion_start > 0)
858 enum machine_mode bestmode;
859 enum machine_mode op_mode;
860 unsigned HOST_WIDE_INT offset;
862 op_mode = mode_for_extraction (EP_insv, 3);
863 if (op_mode == MAX_MACHINE_MODE)
864 op_mode = VOIDmode;
866 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
868 offset = bitregion_start / BITS_PER_UNIT;
869 bitnum -= bitregion_start;
870 bitregion_end -= bitregion_start;
871 bitregion_start = 0;
872 bestmode = get_best_mode (bitsize, bitnum,
873 bitregion_start, bitregion_end,
874 MEM_ALIGN (str_rtx),
875 op_mode,
876 MEM_VOLATILE_P (str_rtx));
877 str_rtx = adjust_address (str_rtx, bestmode, offset);
880 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
881 bitregion_start, bitregion_end,
882 fieldmode, value, true))
883 gcc_unreachable ();
886 /* Use shifts and boolean operations to store VALUE
887 into a bit field of width BITSIZE
888 in a memory location specified by OP0 except offset by OFFSET bytes.
889 (OFFSET must be 0 if OP0 is a register.)
890 The field starts at position BITPOS within the byte.
891 (If OP0 is a register, it may be a full word or a narrower mode,
892 but BITPOS still counts within a full word,
893 which is significant on bigendian machines.) */
895 static void
896 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
897 unsigned HOST_WIDE_INT bitsize,
898 unsigned HOST_WIDE_INT bitpos,
899 unsigned HOST_WIDE_INT bitregion_start,
900 unsigned HOST_WIDE_INT bitregion_end,
901 rtx value)
903 enum machine_mode mode;
904 unsigned int total_bits = BITS_PER_WORD;
905 rtx temp;
906 int all_zero = 0;
907 int all_one = 0;
909 /* There is a case not handled here:
910 a structure with a known alignment of just a halfword
911 and a field split across two aligned halfwords within the structure.
912 Or likewise a structure with a known alignment of just a byte
913 and a field split across two bytes.
914 Such cases are not supposed to be able to occur. */
916 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
918 gcc_assert (!offset);
919 /* Special treatment for a bit field split across two registers. */
920 if (bitsize + bitpos > BITS_PER_WORD)
922 store_split_bit_field (op0, bitsize, bitpos,
923 bitregion_start, bitregion_end,
924 value);
925 return;
928 else
930 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
932 if (bitregion_end)
933 maxbits = bitregion_end - bitregion_start + 1;
935 /* Get the proper mode to use for this field. We want a mode that
936 includes the entire field. If such a mode would be larger than
937 a word, we won't be doing the extraction the normal way.
938 We don't want a mode bigger than the destination. */
940 mode = GET_MODE (op0);
941 if (GET_MODE_BITSIZE (mode) == 0
942 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
943 mode = word_mode;
945 if (MEM_VOLATILE_P (op0)
946 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
947 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
948 && flag_strict_volatile_bitfields > 0)
949 mode = GET_MODE (op0);
950 else
951 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
952 bitregion_start, bitregion_end,
953 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
955 if (mode == VOIDmode)
957 /* The only way this should occur is if the field spans word
958 boundaries. */
959 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
960 bitregion_start, bitregion_end, value);
961 return;
964 total_bits = GET_MODE_BITSIZE (mode);
966 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
967 be in the range 0 to total_bits-1, and put any excess bytes in
968 OFFSET. */
969 if (bitpos >= total_bits)
971 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
972 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
973 * BITS_PER_UNIT);
976 /* Get ref to an aligned byte, halfword, or word containing the field.
977 Adjust BITPOS to be position within a word,
978 and OFFSET to be the offset of that word.
979 Then alter OP0 to refer to that word. */
980 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
981 offset -= (offset % (total_bits / BITS_PER_UNIT));
982 op0 = adjust_address (op0, mode, offset);
985 mode = GET_MODE (op0);
987 /* Now MODE is either some integral mode for a MEM as OP0,
988 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
989 The bit field is contained entirely within OP0.
990 BITPOS is the starting bit number within OP0.
991 (OP0's mode may actually be narrower than MODE.) */
993 if (BYTES_BIG_ENDIAN)
994 /* BITPOS is the distance between our msb
995 and that of the containing datum.
996 Convert it to the distance from the lsb. */
997 bitpos = total_bits - bitsize - bitpos;
999 /* Now BITPOS is always the distance between our lsb
1000 and that of OP0. */
1002 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1003 we must first convert its mode to MODE. */
1005 if (CONST_INT_P (value))
1007 HOST_WIDE_INT v = INTVAL (value);
1009 if (bitsize < HOST_BITS_PER_WIDE_INT)
1010 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1012 if (v == 0)
1013 all_zero = 1;
1014 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1015 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1016 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1017 all_one = 1;
1019 value = lshift_value (mode, value, bitpos, bitsize);
1021 else
1023 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1024 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1026 if (GET_MODE (value) != mode)
1027 value = convert_to_mode (mode, value, 1);
1029 if (must_and)
1030 value = expand_binop (mode, and_optab, value,
1031 mask_rtx (mode, 0, bitsize, 0),
1032 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1033 if (bitpos > 0)
1034 value = expand_shift (LSHIFT_EXPR, mode, value,
1035 bitpos, NULL_RTX, 1);
1038 /* Now clear the chosen bits in OP0,
1039 except that if VALUE is -1 we need not bother. */
1040 /* We keep the intermediates in registers to allow CSE to combine
1041 consecutive bitfield assignments. */
1043 temp = force_reg (mode, op0);
1045 if (! all_one)
1047 temp = expand_binop (mode, and_optab, temp,
1048 mask_rtx (mode, bitpos, bitsize, 1),
1049 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1050 temp = force_reg (mode, temp);
1053 /* Now logical-or VALUE into OP0, unless it is zero. */
1055 if (! all_zero)
1057 temp = expand_binop (mode, ior_optab, temp, value,
1058 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1059 temp = force_reg (mode, temp);
1062 if (op0 != temp)
1064 op0 = copy_rtx (op0);
1065 emit_move_insn (op0, temp);
1069 /* Store a bit field that is split across multiple accessible memory objects.
1071 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1072 BITSIZE is the field width; BITPOS the position of its first bit
1073 (within the word).
1074 VALUE is the value to store.
1076 This does not yet handle fields wider than BITS_PER_WORD. */
1078 static void
1079 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1080 unsigned HOST_WIDE_INT bitpos,
1081 unsigned HOST_WIDE_INT bitregion_start,
1082 unsigned HOST_WIDE_INT bitregion_end,
1083 rtx value)
1085 unsigned int unit;
1086 unsigned int bitsdone = 0;
1088 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1089 much at a time. */
1090 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1091 unit = BITS_PER_WORD;
1092 else
1093 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1095 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1096 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1097 that VALUE might be a floating-point constant. */
1098 if (CONSTANT_P (value) && !CONST_INT_P (value))
1100 rtx word = gen_lowpart_common (word_mode, value);
1102 if (word && (value != word))
1103 value = word;
1104 else
1105 value = gen_lowpart_common (word_mode,
1106 force_reg (GET_MODE (value) != VOIDmode
1107 ? GET_MODE (value)
1108 : word_mode, value));
1111 while (bitsdone < bitsize)
1113 unsigned HOST_WIDE_INT thissize;
1114 rtx part, word;
1115 unsigned HOST_WIDE_INT thispos;
1116 unsigned HOST_WIDE_INT offset;
1118 offset = (bitpos + bitsdone) / unit;
1119 thispos = (bitpos + bitsdone) % unit;
1121 /* When region of bytes we can touch is restricted, decrease
1122 UNIT close to the end of the region as needed. */
1123 if (bitregion_end
1124 && unit > BITS_PER_UNIT
1125 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1127 unit = unit / 2;
1128 continue;
1131 /* THISSIZE must not overrun a word boundary. Otherwise,
1132 store_fixed_bit_field will call us again, and we will mutually
1133 recurse forever. */
1134 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1135 thissize = MIN (thissize, unit - thispos);
1137 if (BYTES_BIG_ENDIAN)
1139 int total_bits;
1141 /* We must do an endian conversion exactly the same way as it is
1142 done in extract_bit_field, so that the two calls to
1143 extract_fixed_bit_field will have comparable arguments. */
1144 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1145 total_bits = BITS_PER_WORD;
1146 else
1147 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1149 /* Fetch successively less significant portions. */
1150 if (CONST_INT_P (value))
1151 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1152 >> (bitsize - bitsdone - thissize))
1153 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1154 else
1155 /* The args are chosen so that the last part includes the
1156 lsb. Give extract_bit_field the value it needs (with
1157 endianness compensation) to fetch the piece we want. */
1158 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1159 total_bits - bitsize + bitsdone,
1160 NULL_RTX, 1, false);
1162 else
1164 /* Fetch successively more significant portions. */
1165 if (CONST_INT_P (value))
1166 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1167 >> bitsdone)
1168 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1169 else
1170 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1171 bitsdone, NULL_RTX, 1, false);
1174 /* If OP0 is a register, then handle OFFSET here.
1176 When handling multiword bitfields, extract_bit_field may pass
1177 down a word_mode SUBREG of a larger REG for a bitfield that actually
1178 crosses a word boundary. Thus, for a SUBREG, we must find
1179 the current word starting from the base register. */
1180 if (GET_CODE (op0) == SUBREG)
1182 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1183 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1184 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1185 word = word_offset ? const0_rtx : op0;
1186 else
1187 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1188 GET_MODE (SUBREG_REG (op0)));
1189 offset = 0;
1191 else if (REG_P (op0))
1193 enum machine_mode op0_mode = GET_MODE (op0);
1194 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1195 word = offset ? const0_rtx : op0;
1196 else
1197 word = operand_subword_force (op0, offset, GET_MODE (op0));
1198 offset = 0;
1200 else
1201 word = op0;
1203 /* OFFSET is in UNITs, and UNIT is in bits.
1204 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1205 it is just an out-of-bounds access. Ignore it. */
1206 if (word != const0_rtx)
1207 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1208 thispos, bitregion_start, bitregion_end, part);
1209 bitsdone += thissize;
1213 /* A subroutine of extract_bit_field_1 that converts return value X
1214 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1215 to extract_bit_field. */
1217 static rtx
1218 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1219 enum machine_mode tmode, bool unsignedp)
1221 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1222 return x;
1224 /* If the x mode is not a scalar integral, first convert to the
1225 integer mode of that size and then access it as a floating-point
1226 value via a SUBREG. */
1227 if (!SCALAR_INT_MODE_P (tmode))
1229 enum machine_mode smode;
1231 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1232 x = convert_to_mode (smode, x, unsignedp);
1233 x = force_reg (smode, x);
1234 return gen_lowpart (tmode, x);
1237 return convert_to_mode (tmode, x, unsignedp);
1240 /* A subroutine of extract_bit_field, with the same arguments.
1241 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1242 if we can find no other means of implementing the operation.
1243 if FALLBACK_P is false, return NULL instead. */
1245 static rtx
1246 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1247 unsigned HOST_WIDE_INT bitnum,
1248 int unsignedp, bool packedp, rtx target,
1249 enum machine_mode mode, enum machine_mode tmode,
1250 bool fallback_p)
1252 unsigned int unit
1253 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1254 unsigned HOST_WIDE_INT offset, bitpos;
1255 rtx op0 = str_rtx;
1256 enum machine_mode int_mode;
1257 enum machine_mode ext_mode;
1258 enum machine_mode mode1;
1259 int byte_offset;
1261 if (tmode == VOIDmode)
1262 tmode = mode;
1264 while (GET_CODE (op0) == SUBREG)
1266 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1267 op0 = SUBREG_REG (op0);
1270 /* If we have an out-of-bounds access to a register, just return an
1271 uninitialized register of the required mode. This can occur if the
1272 source code contains an out-of-bounds access to a small array. */
1273 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1274 return gen_reg_rtx (tmode);
1276 if (REG_P (op0)
1277 && mode == GET_MODE (op0)
1278 && bitnum == 0
1279 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1281 /* We're trying to extract a full register from itself. */
1282 return op0;
1285 /* See if we can get a better vector mode before extracting. */
1286 if (VECTOR_MODE_P (GET_MODE (op0))
1287 && !MEM_P (op0)
1288 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1290 enum machine_mode new_mode;
1292 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1293 new_mode = MIN_MODE_VECTOR_FLOAT;
1294 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1295 new_mode = MIN_MODE_VECTOR_FRACT;
1296 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1297 new_mode = MIN_MODE_VECTOR_UFRACT;
1298 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1299 new_mode = MIN_MODE_VECTOR_ACCUM;
1300 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1301 new_mode = MIN_MODE_VECTOR_UACCUM;
1302 else
1303 new_mode = MIN_MODE_VECTOR_INT;
1305 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1306 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1307 && targetm.vector_mode_supported_p (new_mode))
1308 break;
1309 if (new_mode != VOIDmode)
1310 op0 = gen_lowpart (new_mode, op0);
1313 /* Use vec_extract patterns for extracting parts of vectors whenever
1314 available. */
1315 if (VECTOR_MODE_P (GET_MODE (op0))
1316 && !MEM_P (op0)
1317 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1318 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1319 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1321 struct expand_operand ops[3];
1322 enum machine_mode outermode = GET_MODE (op0);
1323 enum machine_mode innermode = GET_MODE_INNER (outermode);
1324 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1325 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1327 create_output_operand (&ops[0], target, innermode);
1328 create_input_operand (&ops[1], op0, outermode);
1329 create_integer_operand (&ops[2], pos);
1330 if (maybe_expand_insn (icode, 3, ops))
1332 target = ops[0].value;
1333 if (GET_MODE (target) != mode)
1334 return gen_lowpart (tmode, target);
1335 return target;
1339 /* Make sure we are playing with integral modes. Pun with subregs
1340 if we aren't. */
1342 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1343 if (imode != GET_MODE (op0))
1345 if (MEM_P (op0))
1346 op0 = adjust_address (op0, imode, 0);
1347 else if (imode != BLKmode)
1349 op0 = gen_lowpart (imode, op0);
1351 /* If we got a SUBREG, force it into a register since we
1352 aren't going to be able to do another SUBREG on it. */
1353 if (GET_CODE (op0) == SUBREG)
1354 op0 = force_reg (imode, op0);
1356 else if (REG_P (op0))
1358 rtx reg, subreg;
1359 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1360 MODE_INT);
1361 reg = gen_reg_rtx (imode);
1362 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1363 emit_move_insn (subreg, op0);
1364 op0 = reg;
1365 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1367 else
1369 rtx mem = assign_stack_temp (GET_MODE (op0),
1370 GET_MODE_SIZE (GET_MODE (op0)));
1371 emit_move_insn (mem, op0);
1372 op0 = adjust_address (mem, BLKmode, 0);
1377 /* We may be accessing data outside the field, which means
1378 we can alias adjacent data. */
1379 if (MEM_P (op0))
1381 op0 = shallow_copy_rtx (op0);
1382 set_mem_alias_set (op0, 0);
1383 set_mem_expr (op0, 0);
1386 /* Extraction of a full-word or multi-word value from a structure
1387 in a register or aligned memory can be done with just a SUBREG.
1388 A subword value in the least significant part of a register
1389 can also be extracted with a SUBREG. For this, we need the
1390 byte offset of the value in op0. */
1392 bitpos = bitnum % unit;
1393 offset = bitnum / unit;
1394 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1396 /* If OP0 is a register, BITPOS must count within a word.
1397 But as we have it, it counts within whatever size OP0 now has.
1398 On a bigendian machine, these are not the same, so convert. */
1399 if (BYTES_BIG_ENDIAN
1400 && !MEM_P (op0)
1401 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1402 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1404 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1405 If that's wrong, the solution is to test for it and set TARGET to 0
1406 if needed. */
1408 /* Only scalar integer modes can be converted via subregs. There is an
1409 additional problem for FP modes here in that they can have a precision
1410 which is different from the size. mode_for_size uses precision, but
1411 we want a mode based on the size, so we must avoid calling it for FP
1412 modes. */
1413 mode1 = (SCALAR_INT_MODE_P (tmode)
1414 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1415 : mode);
1417 /* If the bitfield is volatile, we need to make sure the access
1418 remains on a type-aligned boundary. */
1419 if (GET_CODE (op0) == MEM
1420 && MEM_VOLATILE_P (op0)
1421 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1422 && flag_strict_volatile_bitfields > 0)
1423 goto no_subreg_mode_swap;
1425 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1426 && bitpos % BITS_PER_WORD == 0)
1427 || (mode1 != BLKmode
1428 /* ??? The big endian test here is wrong. This is correct
1429 if the value is in a register, and if mode_for_size is not
1430 the same mode as op0. This causes us to get unnecessarily
1431 inefficient code from the Thumb port when -mbig-endian. */
1432 && (BYTES_BIG_ENDIAN
1433 ? bitpos + bitsize == BITS_PER_WORD
1434 : bitpos == 0)))
1435 && ((!MEM_P (op0)
1436 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1437 && GET_MODE_SIZE (mode1) != 0
1438 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1439 || (MEM_P (op0)
1440 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1441 || (offset * BITS_PER_UNIT % bitsize == 0
1442 && MEM_ALIGN (op0) % bitsize == 0)))))
1444 if (MEM_P (op0))
1445 op0 = adjust_address (op0, mode1, offset);
1446 else if (mode1 != GET_MODE (op0))
1448 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1449 byte_offset);
1450 if (sub == NULL)
1451 goto no_subreg_mode_swap;
1452 op0 = sub;
1454 if (mode1 != mode)
1455 return convert_to_mode (tmode, op0, unsignedp);
1456 return op0;
1458 no_subreg_mode_swap:
1460 /* Handle fields bigger than a word. */
1462 if (bitsize > BITS_PER_WORD)
1464 /* Here we transfer the words of the field
1465 in the order least significant first.
1466 This is because the most significant word is the one which may
1467 be less than full. */
1469 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1470 unsigned int i;
1472 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1473 target = gen_reg_rtx (mode);
1475 /* Indicate for flow that the entire target reg is being set. */
1476 emit_clobber (target);
1478 for (i = 0; i < nwords; i++)
1480 /* If I is 0, use the low-order word in both field and target;
1481 if I is 1, use the next to lowest word; and so on. */
1482 /* Word number in TARGET to use. */
1483 unsigned int wordnum
1484 = (WORDS_BIG_ENDIAN
1485 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1486 : i);
1487 /* Offset from start of field in OP0. */
1488 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1489 ? MAX (0, ((int) bitsize - ((int) i + 1)
1490 * (int) BITS_PER_WORD))
1491 : (int) i * BITS_PER_WORD);
1492 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1493 rtx result_part
1494 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1495 bitsize - i * BITS_PER_WORD),
1496 bitnum + bit_offset, 1, false, target_part, mode,
1497 word_mode);
1499 gcc_assert (target_part);
1501 if (result_part != target_part)
1502 emit_move_insn (target_part, result_part);
1505 if (unsignedp)
1507 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1508 need to be zero'd out. */
1509 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1511 unsigned int i, total_words;
1513 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1514 for (i = nwords; i < total_words; i++)
1515 emit_move_insn
1516 (operand_subword (target,
1517 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1518 1, VOIDmode),
1519 const0_rtx);
1521 return target;
1524 /* Signed bit field: sign-extend with two arithmetic shifts. */
1525 target = expand_shift (LSHIFT_EXPR, mode, target,
1526 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1527 return expand_shift (RSHIFT_EXPR, mode, target,
1528 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1531 /* From here on we know the desired field is smaller than a word. */
1533 /* Check if there is a correspondingly-sized integer field, so we can
1534 safely extract it as one size of integer, if necessary; then
1535 truncate or extend to the size that is wanted; then use SUBREGs or
1536 convert_to_mode to get one of the modes we really wanted. */
1538 int_mode = int_mode_for_mode (tmode);
1539 if (int_mode == BLKmode)
1540 int_mode = int_mode_for_mode (mode);
1541 /* Should probably push op0 out to memory and then do a load. */
1542 gcc_assert (int_mode != BLKmode);
1544 /* OFFSET is the number of words or bytes (UNIT says which)
1545 from STR_RTX to the first word or byte containing part of the field. */
1546 if (!MEM_P (op0))
1548 if (offset != 0
1549 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1551 if (!REG_P (op0))
1552 op0 = copy_to_reg (op0);
1553 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1554 op0, (offset * UNITS_PER_WORD));
1556 offset = 0;
1559 /* Now OFFSET is nonzero only for memory operands. */
1560 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1561 if (ext_mode != MAX_MACHINE_MODE
1562 && bitsize > 0
1563 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1564 /* Do not use extv/extzv for volatile bitfields when
1565 -fstrict-volatile-bitfields is in effect. */
1566 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1567 && flag_strict_volatile_bitfields > 0)
1568 /* If op0 is a register, we need it in EXT_MODE to make it
1569 acceptable to the format of ext(z)v. */
1570 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1571 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1572 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1574 struct expand_operand ops[4];
1575 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1576 rtx xop0 = op0;
1577 rtx xtarget = target;
1578 rtx xspec_target = target;
1579 rtx xspec_target_subreg = 0;
1581 /* If op0 is a register, we need it in EXT_MODE to make it
1582 acceptable to the format of ext(z)v. */
1583 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1584 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1585 if (MEM_P (xop0))
1586 /* Get ref to first byte containing part of the field. */
1587 xop0 = adjust_address (xop0, byte_mode, xoffset);
1589 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1590 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1591 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1593 unit = GET_MODE_BITSIZE (ext_mode);
1595 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1596 "backwards" from the size of the unit we are extracting from.
1597 Otherwise, we count bits from the most significant on a
1598 BYTES/BITS_BIG_ENDIAN machine. */
1600 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1601 xbitpos = unit - bitsize - xbitpos;
1603 if (xtarget == 0)
1604 xtarget = xspec_target = gen_reg_rtx (tmode);
1606 if (GET_MODE (xtarget) != ext_mode)
1608 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1609 between the mode of the extraction (word_mode) and the target
1610 mode. Instead, create a temporary and use convert_move to set
1611 the target. */
1612 if (REG_P (xtarget)
1613 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1615 xtarget = gen_lowpart (ext_mode, xtarget);
1616 if (GET_MODE_PRECISION (ext_mode)
1617 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1618 xspec_target_subreg = xtarget;
1620 else
1621 xtarget = gen_reg_rtx (ext_mode);
1624 create_output_operand (&ops[0], xtarget, ext_mode);
1625 create_fixed_operand (&ops[1], xop0);
1626 create_integer_operand (&ops[2], bitsize);
1627 create_integer_operand (&ops[3], xbitpos);
1628 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1629 4, ops))
1631 xtarget = ops[0].value;
1632 if (xtarget == xspec_target)
1633 return xtarget;
1634 if (xtarget == xspec_target_subreg)
1635 return xspec_target;
1636 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1640 /* If OP0 is a memory, try copying it to a register and seeing if a
1641 cheap register alternative is available. */
1642 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1644 enum machine_mode bestmode;
1646 /* Get the mode to use for inserting into this field. If
1647 OP0 is BLKmode, get the smallest mode consistent with the
1648 alignment. If OP0 is a non-BLKmode object that is no
1649 wider than EXT_MODE, use its mode. Otherwise, use the
1650 smallest mode containing the field. */
1652 if (GET_MODE (op0) == BLKmode
1653 || (ext_mode != MAX_MACHINE_MODE
1654 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1655 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1656 (ext_mode == MAX_MACHINE_MODE
1657 ? VOIDmode : ext_mode),
1658 MEM_VOLATILE_P (op0));
1659 else
1660 bestmode = GET_MODE (op0);
1662 if (bestmode != VOIDmode
1663 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1664 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1666 unsigned HOST_WIDE_INT xoffset, xbitpos;
1668 /* Compute the offset as a multiple of this unit,
1669 counting in bytes. */
1670 unit = GET_MODE_BITSIZE (bestmode);
1671 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1672 xbitpos = bitnum % unit;
1674 /* Make sure the register is big enough for the whole field. */
1675 if (xoffset * BITS_PER_UNIT + unit
1676 >= offset * BITS_PER_UNIT + bitsize)
1678 rtx last, result, xop0;
1680 last = get_last_insn ();
1682 /* Fetch it to a register in that size. */
1683 xop0 = adjust_address (op0, bestmode, xoffset);
1684 xop0 = force_reg (bestmode, xop0);
1685 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1686 unsignedp, packedp, target,
1687 mode, tmode, false);
1688 if (result)
1689 return result;
1691 delete_insns_since (last);
1696 if (!fallback_p)
1697 return NULL;
1699 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1700 bitpos, target, unsignedp, packedp);
1701 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1704 /* Generate code to extract a byte-field from STR_RTX
1705 containing BITSIZE bits, starting at BITNUM,
1706 and put it in TARGET if possible (if TARGET is nonzero).
1707 Regardless of TARGET, we return the rtx for where the value is placed.
1709 STR_RTX is the structure containing the byte (a REG or MEM).
1710 UNSIGNEDP is nonzero if this is an unsigned bit field.
1711 PACKEDP is nonzero if the field has the packed attribute.
1712 MODE is the natural mode of the field value once extracted.
1713 TMODE is the mode the caller would like the value to have;
1714 but the value may be returned with type MODE instead.
1716 If a TARGET is specified and we can store in it at no extra cost,
1717 we do so, and return TARGET.
1718 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1719 if they are equally easy. */
1722 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1723 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1724 rtx target, enum machine_mode mode, enum machine_mode tmode)
1726 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1727 target, mode, tmode, true);
1730 /* Extract a bit field using shifts and boolean operations
1731 Returns an rtx to represent the value.
1732 OP0 addresses a register (word) or memory (byte).
1733 BITPOS says which bit within the word or byte the bit field starts in.
1734 OFFSET says how many bytes farther the bit field starts;
1735 it is 0 if OP0 is a register.
1736 BITSIZE says how many bits long the bit field is.
1737 (If OP0 is a register, it may be narrower than a full word,
1738 but BITPOS still counts within a full word,
1739 which is significant on bigendian machines.)
1741 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1742 PACKEDP is true if the field has the packed attribute.
1744 If TARGET is nonzero, attempts to store the value there
1745 and return TARGET, but this is not guaranteed.
1746 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1748 static rtx
1749 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1750 unsigned HOST_WIDE_INT offset,
1751 unsigned HOST_WIDE_INT bitsize,
1752 unsigned HOST_WIDE_INT bitpos, rtx target,
1753 int unsignedp, bool packedp)
1755 unsigned int total_bits = BITS_PER_WORD;
1756 enum machine_mode mode;
1758 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1760 /* Special treatment for a bit field split across two registers. */
1761 if (bitsize + bitpos > BITS_PER_WORD)
1762 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1764 else
1766 /* Get the proper mode to use for this field. We want a mode that
1767 includes the entire field. If such a mode would be larger than
1768 a word, we won't be doing the extraction the normal way. */
1770 if (MEM_VOLATILE_P (op0)
1771 && flag_strict_volatile_bitfields > 0)
1773 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1774 mode = GET_MODE (op0);
1775 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1776 mode = GET_MODE (target);
1777 else
1778 mode = tmode;
1780 else
1781 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1782 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1784 if (mode == VOIDmode)
1785 /* The only way this should occur is if the field spans word
1786 boundaries. */
1787 return extract_split_bit_field (op0, bitsize,
1788 bitpos + offset * BITS_PER_UNIT,
1789 unsignedp);
1791 total_bits = GET_MODE_BITSIZE (mode);
1793 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1794 be in the range 0 to total_bits-1, and put any excess bytes in
1795 OFFSET. */
1796 if (bitpos >= total_bits)
1798 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1799 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1800 * BITS_PER_UNIT);
1803 /* If we're accessing a volatile MEM, we can't do the next
1804 alignment step if it results in a multi-word access where we
1805 otherwise wouldn't have one. So, check for that case
1806 here. */
1807 if (MEM_P (op0)
1808 && MEM_VOLATILE_P (op0)
1809 && flag_strict_volatile_bitfields > 0
1810 && bitpos + bitsize <= total_bits
1811 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1813 if (STRICT_ALIGNMENT)
1815 static bool informed_about_misalignment = false;
1816 bool warned;
1818 if (packedp)
1820 if (bitsize == total_bits)
1821 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1822 "multiple accesses to volatile structure member"
1823 " because of packed attribute");
1824 else
1825 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1826 "multiple accesses to volatile structure bitfield"
1827 " because of packed attribute");
1829 return extract_split_bit_field (op0, bitsize,
1830 bitpos + offset * BITS_PER_UNIT,
1831 unsignedp);
1834 if (bitsize == total_bits)
1835 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1836 "mis-aligned access used for structure member");
1837 else
1838 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1839 "mis-aligned access used for structure bitfield");
1841 if (! informed_about_misalignment && warned)
1843 informed_about_misalignment = true;
1844 inform (input_location,
1845 "when a volatile object spans multiple type-sized locations,"
1846 " the compiler must choose between using a single mis-aligned access to"
1847 " preserve the volatility, or using multiple aligned accesses to avoid"
1848 " runtime faults; this code may fail at runtime if the hardware does"
1849 " not allow this access");
1853 else
1856 /* Get ref to an aligned byte, halfword, or word containing the field.
1857 Adjust BITPOS to be position within a word,
1858 and OFFSET to be the offset of that word.
1859 Then alter OP0 to refer to that word. */
1860 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1861 offset -= (offset % (total_bits / BITS_PER_UNIT));
1864 op0 = adjust_address (op0, mode, offset);
1867 mode = GET_MODE (op0);
1869 if (BYTES_BIG_ENDIAN)
1870 /* BITPOS is the distance between our msb and that of OP0.
1871 Convert it to the distance from the lsb. */
1872 bitpos = total_bits - bitsize - bitpos;
1874 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1875 We have reduced the big-endian case to the little-endian case. */
1877 if (unsignedp)
1879 if (bitpos)
1881 /* If the field does not already start at the lsb,
1882 shift it so it does. */
1883 /* Maybe propagate the target for the shift. */
1884 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1885 if (tmode != mode)
1886 subtarget = 0;
1887 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1889 /* Convert the value to the desired mode. */
1890 if (mode != tmode)
1891 op0 = convert_to_mode (tmode, op0, 1);
1893 /* Unless the msb of the field used to be the msb when we shifted,
1894 mask out the upper bits. */
1896 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1897 return expand_binop (GET_MODE (op0), and_optab, op0,
1898 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1899 target, 1, OPTAB_LIB_WIDEN);
1900 return op0;
1903 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1904 then arithmetic-shift its lsb to the lsb of the word. */
1905 op0 = force_reg (mode, op0);
1907 /* Find the narrowest integer mode that contains the field. */
1909 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1910 mode = GET_MODE_WIDER_MODE (mode))
1911 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1913 op0 = convert_to_mode (mode, op0, 0);
1914 break;
1917 if (mode != tmode)
1918 target = 0;
1920 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1922 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1923 /* Maybe propagate the target for the shift. */
1924 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1925 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1928 return expand_shift (RSHIFT_EXPR, mode, op0,
1929 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1932 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1933 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1934 complement of that if COMPLEMENT. The mask is truncated if
1935 necessary to the width of mode MODE. The mask is zero-extended if
1936 BITSIZE+BITPOS is too small for MODE. */
1938 static rtx
1939 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1941 double_int mask;
1943 mask = double_int_mask (bitsize);
1944 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1946 if (complement)
1947 mask = double_int_not (mask);
1949 return immed_double_int_const (mask, mode);
1952 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1953 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1955 static rtx
1956 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1958 double_int val;
1960 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1961 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1963 return immed_double_int_const (val, mode);
1966 /* Extract a bit field that is split across two words
1967 and return an RTX for the result.
1969 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1970 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1971 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1973 static rtx
1974 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1975 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1977 unsigned int unit;
1978 unsigned int bitsdone = 0;
1979 rtx result = NULL_RTX;
1980 int first = 1;
1982 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1983 much at a time. */
1984 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1985 unit = BITS_PER_WORD;
1986 else
1987 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1989 while (bitsdone < bitsize)
1991 unsigned HOST_WIDE_INT thissize;
1992 rtx part, word;
1993 unsigned HOST_WIDE_INT thispos;
1994 unsigned HOST_WIDE_INT offset;
1996 offset = (bitpos + bitsdone) / unit;
1997 thispos = (bitpos + bitsdone) % unit;
1999 /* THISSIZE must not overrun a word boundary. Otherwise,
2000 extract_fixed_bit_field will call us again, and we will mutually
2001 recurse forever. */
2002 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2003 thissize = MIN (thissize, unit - thispos);
2005 /* If OP0 is a register, then handle OFFSET here.
2007 When handling multiword bitfields, extract_bit_field may pass
2008 down a word_mode SUBREG of a larger REG for a bitfield that actually
2009 crosses a word boundary. Thus, for a SUBREG, we must find
2010 the current word starting from the base register. */
2011 if (GET_CODE (op0) == SUBREG)
2013 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2014 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2015 GET_MODE (SUBREG_REG (op0)));
2016 offset = 0;
2018 else if (REG_P (op0))
2020 word = operand_subword_force (op0, offset, GET_MODE (op0));
2021 offset = 0;
2023 else
2024 word = op0;
2026 /* Extract the parts in bit-counting order,
2027 whose meaning is determined by BYTES_PER_UNIT.
2028 OFFSET is in UNITs, and UNIT is in bits.
2029 extract_fixed_bit_field wants offset in bytes. */
2030 part = extract_fixed_bit_field (word_mode, word,
2031 offset * unit / BITS_PER_UNIT,
2032 thissize, thispos, 0, 1, false);
2033 bitsdone += thissize;
2035 /* Shift this part into place for the result. */
2036 if (BYTES_BIG_ENDIAN)
2038 if (bitsize != bitsdone)
2039 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2040 bitsize - bitsdone, 0, 1);
2042 else
2044 if (bitsdone != thissize)
2045 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2046 bitsdone - thissize, 0, 1);
2049 if (first)
2050 result = part;
2051 else
2052 /* Combine the parts with bitwise or. This works
2053 because we extracted each part as an unsigned bit field. */
2054 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2055 OPTAB_LIB_WIDEN);
2057 first = 0;
2060 /* Unsigned bit field: we are done. */
2061 if (unsignedp)
2062 return result;
2063 /* Signed bit field: sign-extend with two arithmetic shifts. */
2064 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2065 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2066 return expand_shift (RSHIFT_EXPR, word_mode, result,
2067 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2070 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2071 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2072 MODE, fill the upper bits with zeros. Fail if the layout of either
2073 mode is unknown (as for CC modes) or if the extraction would involve
2074 unprofitable mode punning. Return the value on success, otherwise
2075 return null.
2077 This is different from gen_lowpart* in these respects:
2079 - the returned value must always be considered an rvalue
2081 - when MODE is wider than SRC_MODE, the extraction involves
2082 a zero extension
2084 - when MODE is smaller than SRC_MODE, the extraction involves
2085 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2087 In other words, this routine performs a computation, whereas the
2088 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2089 operations. */
2092 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2094 enum machine_mode int_mode, src_int_mode;
2096 if (mode == src_mode)
2097 return src;
2099 if (CONSTANT_P (src))
2101 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2102 fails, it will happily create (subreg (symbol_ref)) or similar
2103 invalid SUBREGs. */
2104 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2105 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2106 if (ret)
2107 return ret;
2109 if (GET_MODE (src) == VOIDmode
2110 || !validate_subreg (mode, src_mode, src, byte))
2111 return NULL_RTX;
2113 src = force_reg (GET_MODE (src), src);
2114 return gen_rtx_SUBREG (mode, src, byte);
2117 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2118 return NULL_RTX;
2120 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2121 && MODES_TIEABLE_P (mode, src_mode))
2123 rtx x = gen_lowpart_common (mode, src);
2124 if (x)
2125 return x;
2128 src_int_mode = int_mode_for_mode (src_mode);
2129 int_mode = int_mode_for_mode (mode);
2130 if (src_int_mode == BLKmode || int_mode == BLKmode)
2131 return NULL_RTX;
2133 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2134 return NULL_RTX;
2135 if (!MODES_TIEABLE_P (int_mode, mode))
2136 return NULL_RTX;
2138 src = gen_lowpart (src_int_mode, src);
2139 src = convert_modes (int_mode, src_int_mode, src, true);
2140 src = gen_lowpart (mode, src);
2141 return src;
2144 /* Add INC into TARGET. */
2146 void
2147 expand_inc (rtx target, rtx inc)
2149 rtx value = expand_binop (GET_MODE (target), add_optab,
2150 target, inc,
2151 target, 0, OPTAB_LIB_WIDEN);
2152 if (value != target)
2153 emit_move_insn (target, value);
2156 /* Subtract DEC from TARGET. */
2158 void
2159 expand_dec (rtx target, rtx dec)
2161 rtx value = expand_binop (GET_MODE (target), sub_optab,
2162 target, dec,
2163 target, 0, OPTAB_LIB_WIDEN);
2164 if (value != target)
2165 emit_move_insn (target, value);
2168 /* Output a shift instruction for expression code CODE,
2169 with SHIFTED being the rtx for the value to shift,
2170 and AMOUNT the rtx for the amount to shift by.
2171 Store the result in the rtx TARGET, if that is convenient.
2172 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2173 Return the rtx for where the value is. */
2175 static rtx
2176 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2177 rtx amount, rtx target, int unsignedp)
2179 rtx op1, temp = 0;
2180 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2181 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2182 optab lshift_optab = ashl_optab;
2183 optab rshift_arith_optab = ashr_optab;
2184 optab rshift_uns_optab = lshr_optab;
2185 optab lrotate_optab = rotl_optab;
2186 optab rrotate_optab = rotr_optab;
2187 enum machine_mode op1_mode;
2188 int attempt;
2189 bool speed = optimize_insn_for_speed_p ();
2191 op1 = amount;
2192 op1_mode = GET_MODE (op1);
2194 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2195 shift amount is a vector, use the vector/vector shift patterns. */
2196 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2198 lshift_optab = vashl_optab;
2199 rshift_arith_optab = vashr_optab;
2200 rshift_uns_optab = vlshr_optab;
2201 lrotate_optab = vrotl_optab;
2202 rrotate_optab = vrotr_optab;
2205 /* Previously detected shift-counts computed by NEGATE_EXPR
2206 and shifted in the other direction; but that does not work
2207 on all machines. */
2209 if (SHIFT_COUNT_TRUNCATED)
2211 if (CONST_INT_P (op1)
2212 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2213 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2214 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2215 % GET_MODE_BITSIZE (mode));
2216 else if (GET_CODE (op1) == SUBREG
2217 && subreg_lowpart_p (op1)
2218 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2219 op1 = SUBREG_REG (op1);
2222 if (op1 == const0_rtx)
2223 return shifted;
2225 /* Check whether its cheaper to implement a left shift by a constant
2226 bit count by a sequence of additions. */
2227 if (code == LSHIFT_EXPR
2228 && CONST_INT_P (op1)
2229 && INTVAL (op1) > 0
2230 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2231 && INTVAL (op1) < MAX_BITS_PER_WORD
2232 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2233 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2235 int i;
2236 for (i = 0; i < INTVAL (op1); i++)
2238 temp = force_reg (mode, shifted);
2239 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2240 unsignedp, OPTAB_LIB_WIDEN);
2242 return shifted;
2245 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2247 enum optab_methods methods;
2249 if (attempt == 0)
2250 methods = OPTAB_DIRECT;
2251 else if (attempt == 1)
2252 methods = OPTAB_WIDEN;
2253 else
2254 methods = OPTAB_LIB_WIDEN;
2256 if (rotate)
2258 /* Widening does not work for rotation. */
2259 if (methods == OPTAB_WIDEN)
2260 continue;
2261 else if (methods == OPTAB_LIB_WIDEN)
2263 /* If we have been unable to open-code this by a rotation,
2264 do it as the IOR of two shifts. I.e., to rotate A
2265 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2266 where C is the bitsize of A.
2268 It is theoretically possible that the target machine might
2269 not be able to perform either shift and hence we would
2270 be making two libcalls rather than just the one for the
2271 shift (similarly if IOR could not be done). We will allow
2272 this extremely unlikely lossage to avoid complicating the
2273 code below. */
2275 rtx subtarget = target == shifted ? 0 : target;
2276 rtx new_amount, other_amount;
2277 rtx temp1;
2279 new_amount = op1;
2280 if (CONST_INT_P (op1))
2281 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2282 - INTVAL (op1));
2283 else
2284 other_amount
2285 = simplify_gen_binary (MINUS, GET_MODE (op1),
2286 GEN_INT (GET_MODE_PRECISION (mode)),
2287 op1);
2289 shifted = force_reg (mode, shifted);
2291 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2292 mode, shifted, new_amount, 0, 1);
2293 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2294 mode, shifted, other_amount,
2295 subtarget, 1);
2296 return expand_binop (mode, ior_optab, temp, temp1, target,
2297 unsignedp, methods);
2300 temp = expand_binop (mode,
2301 left ? lrotate_optab : rrotate_optab,
2302 shifted, op1, target, unsignedp, methods);
2304 else if (unsignedp)
2305 temp = expand_binop (mode,
2306 left ? lshift_optab : rshift_uns_optab,
2307 shifted, op1, target, unsignedp, methods);
2309 /* Do arithmetic shifts.
2310 Also, if we are going to widen the operand, we can just as well
2311 use an arithmetic right-shift instead of a logical one. */
2312 if (temp == 0 && ! rotate
2313 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2315 enum optab_methods methods1 = methods;
2317 /* If trying to widen a log shift to an arithmetic shift,
2318 don't accept an arithmetic shift of the same size. */
2319 if (unsignedp)
2320 methods1 = OPTAB_MUST_WIDEN;
2322 /* Arithmetic shift */
2324 temp = expand_binop (mode,
2325 left ? lshift_optab : rshift_arith_optab,
2326 shifted, op1, target, unsignedp, methods1);
2329 /* We used to try extzv here for logical right shifts, but that was
2330 only useful for one machine, the VAX, and caused poor code
2331 generation there for lshrdi3, so the code was deleted and a
2332 define_expand for lshrsi3 was added to vax.md. */
2335 gcc_assert (temp);
2336 return temp;
2339 /* Output a shift instruction for expression code CODE,
2340 with SHIFTED being the rtx for the value to shift,
2341 and AMOUNT the amount to shift by.
2342 Store the result in the rtx TARGET, if that is convenient.
2343 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2344 Return the rtx for where the value is. */
2347 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2348 int amount, rtx target, int unsignedp)
2350 return expand_shift_1 (code, mode,
2351 shifted, GEN_INT (amount), target, unsignedp);
2354 /* Output a shift instruction for expression code CODE,
2355 with SHIFTED being the rtx for the value to shift,
2356 and AMOUNT the tree for the amount to shift by.
2357 Store the result in the rtx TARGET, if that is convenient.
2358 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2359 Return the rtx for where the value is. */
2362 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2363 tree amount, rtx target, int unsignedp)
2365 return expand_shift_1 (code, mode,
2366 shifted, expand_normal (amount), target, unsignedp);
2370 /* Indicates the type of fixup needed after a constant multiplication.
2371 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2372 the result should be negated, and ADD_VARIANT means that the
2373 multiplicand should be added to the result. */
2374 enum mult_variant {basic_variant, negate_variant, add_variant};
2376 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2377 const struct mult_cost *, enum machine_mode mode);
2378 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2379 struct algorithm *, enum mult_variant *, int);
2380 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2381 const struct algorithm *, enum mult_variant);
2382 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2383 static rtx extract_high_half (enum machine_mode, rtx);
2384 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2385 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2386 int, int);
2387 /* Compute and return the best algorithm for multiplying by T.
2388 The algorithm must cost less than cost_limit
2389 If retval.cost >= COST_LIMIT, no algorithm was found and all
2390 other field of the returned struct are undefined.
2391 MODE is the machine mode of the multiplication. */
2393 static void
2394 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2395 const struct mult_cost *cost_limit, enum machine_mode mode)
2397 int m;
2398 struct algorithm *alg_in, *best_alg;
2399 struct mult_cost best_cost;
2400 struct mult_cost new_limit;
2401 int op_cost, op_latency;
2402 unsigned HOST_WIDE_INT orig_t = t;
2403 unsigned HOST_WIDE_INT q;
2404 int maxm, hash_index;
2405 bool cache_hit = false;
2406 enum alg_code cache_alg = alg_zero;
2407 bool speed = optimize_insn_for_speed_p ();
2408 enum machine_mode imode;
2410 /* Indicate that no algorithm is yet found. If no algorithm
2411 is found, this value will be returned and indicate failure. */
2412 alg_out->cost.cost = cost_limit->cost + 1;
2413 alg_out->cost.latency = cost_limit->latency + 1;
2415 if (cost_limit->cost < 0
2416 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2417 return;
2419 /* Be prepared for vector modes. */
2420 imode = GET_MODE_INNER (mode);
2421 if (imode == VOIDmode)
2422 imode = mode;
2424 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2426 /* Restrict the bits of "t" to the multiplication's mode. */
2427 t &= GET_MODE_MASK (imode);
2429 /* t == 1 can be done in zero cost. */
2430 if (t == 1)
2432 alg_out->ops = 1;
2433 alg_out->cost.cost = 0;
2434 alg_out->cost.latency = 0;
2435 alg_out->op[0] = alg_m;
2436 return;
2439 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2440 fail now. */
2441 if (t == 0)
2443 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2444 return;
2445 else
2447 alg_out->ops = 1;
2448 alg_out->cost.cost = zero_cost[speed];
2449 alg_out->cost.latency = zero_cost[speed];
2450 alg_out->op[0] = alg_zero;
2451 return;
2455 /* We'll be needing a couple extra algorithm structures now. */
2457 alg_in = XALLOCA (struct algorithm);
2458 best_alg = XALLOCA (struct algorithm);
2459 best_cost = *cost_limit;
2461 /* Compute the hash index. */
2462 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2464 /* See if we already know what to do for T. */
2465 if (alg_hash[hash_index].t == t
2466 && alg_hash[hash_index].mode == mode
2467 && alg_hash[hash_index].mode == mode
2468 && alg_hash[hash_index].speed == speed
2469 && alg_hash[hash_index].alg != alg_unknown)
2471 cache_alg = alg_hash[hash_index].alg;
2473 if (cache_alg == alg_impossible)
2475 /* The cache tells us that it's impossible to synthesize
2476 multiplication by T within alg_hash[hash_index].cost. */
2477 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2478 /* COST_LIMIT is at least as restrictive as the one
2479 recorded in the hash table, in which case we have no
2480 hope of synthesizing a multiplication. Just
2481 return. */
2482 return;
2484 /* If we get here, COST_LIMIT is less restrictive than the
2485 one recorded in the hash table, so we may be able to
2486 synthesize a multiplication. Proceed as if we didn't
2487 have the cache entry. */
2489 else
2491 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2492 /* The cached algorithm shows that this multiplication
2493 requires more cost than COST_LIMIT. Just return. This
2494 way, we don't clobber this cache entry with
2495 alg_impossible but retain useful information. */
2496 return;
2498 cache_hit = true;
2500 switch (cache_alg)
2502 case alg_shift:
2503 goto do_alg_shift;
2505 case alg_add_t_m2:
2506 case alg_sub_t_m2:
2507 goto do_alg_addsub_t_m2;
2509 case alg_add_factor:
2510 case alg_sub_factor:
2511 goto do_alg_addsub_factor;
2513 case alg_add_t2_m:
2514 goto do_alg_add_t2_m;
2516 case alg_sub_t2_m:
2517 goto do_alg_sub_t2_m;
2519 default:
2520 gcc_unreachable ();
2525 /* If we have a group of zero bits at the low-order part of T, try
2526 multiplying by the remaining bits and then doing a shift. */
2528 if ((t & 1) == 0)
2530 do_alg_shift:
2531 m = floor_log2 (t & -t); /* m = number of low zero bits */
2532 if (m < maxm)
2534 q = t >> m;
2535 /* The function expand_shift will choose between a shift and
2536 a sequence of additions, so the observed cost is given as
2537 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2538 op_cost = m * add_cost[speed][mode];
2539 if (shift_cost[speed][mode][m] < op_cost)
2540 op_cost = shift_cost[speed][mode][m];
2541 new_limit.cost = best_cost.cost - op_cost;
2542 new_limit.latency = best_cost.latency - op_cost;
2543 synth_mult (alg_in, q, &new_limit, mode);
2545 alg_in->cost.cost += op_cost;
2546 alg_in->cost.latency += op_cost;
2547 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2549 struct algorithm *x;
2550 best_cost = alg_in->cost;
2551 x = alg_in, alg_in = best_alg, best_alg = x;
2552 best_alg->log[best_alg->ops] = m;
2553 best_alg->op[best_alg->ops] = alg_shift;
2556 /* See if treating ORIG_T as a signed number yields a better
2557 sequence. Try this sequence only for a negative ORIG_T
2558 as it would be useless for a non-negative ORIG_T. */
2559 if ((HOST_WIDE_INT) orig_t < 0)
2561 /* Shift ORIG_T as follows because a right shift of a
2562 negative-valued signed type is implementation
2563 defined. */
2564 q = ~(~orig_t >> m);
2565 /* The function expand_shift will choose between a shift
2566 and a sequence of additions, so the observed cost is
2567 given as MIN (m * add_cost[speed][mode],
2568 shift_cost[speed][mode][m]). */
2569 op_cost = m * add_cost[speed][mode];
2570 if (shift_cost[speed][mode][m] < op_cost)
2571 op_cost = shift_cost[speed][mode][m];
2572 new_limit.cost = best_cost.cost - op_cost;
2573 new_limit.latency = best_cost.latency - op_cost;
2574 synth_mult (alg_in, q, &new_limit, mode);
2576 alg_in->cost.cost += op_cost;
2577 alg_in->cost.latency += op_cost;
2578 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2580 struct algorithm *x;
2581 best_cost = alg_in->cost;
2582 x = alg_in, alg_in = best_alg, best_alg = x;
2583 best_alg->log[best_alg->ops] = m;
2584 best_alg->op[best_alg->ops] = alg_shift;
2588 if (cache_hit)
2589 goto done;
2592 /* If we have an odd number, add or subtract one. */
2593 if ((t & 1) != 0)
2595 unsigned HOST_WIDE_INT w;
2597 do_alg_addsub_t_m2:
2598 for (w = 1; (w & t) != 0; w <<= 1)
2600 /* If T was -1, then W will be zero after the loop. This is another
2601 case where T ends with ...111. Handling this with (T + 1) and
2602 subtract 1 produces slightly better code and results in algorithm
2603 selection much faster than treating it like the ...0111 case
2604 below. */
2605 if (w == 0
2606 || (w > 2
2607 /* Reject the case where t is 3.
2608 Thus we prefer addition in that case. */
2609 && t != 3))
2611 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2613 op_cost = add_cost[speed][mode];
2614 new_limit.cost = best_cost.cost - op_cost;
2615 new_limit.latency = best_cost.latency - op_cost;
2616 synth_mult (alg_in, t + 1, &new_limit, mode);
2618 alg_in->cost.cost += op_cost;
2619 alg_in->cost.latency += op_cost;
2620 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2622 struct algorithm *x;
2623 best_cost = alg_in->cost;
2624 x = alg_in, alg_in = best_alg, best_alg = x;
2625 best_alg->log[best_alg->ops] = 0;
2626 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2629 else
2631 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2633 op_cost = add_cost[speed][mode];
2634 new_limit.cost = best_cost.cost - op_cost;
2635 new_limit.latency = best_cost.latency - op_cost;
2636 synth_mult (alg_in, t - 1, &new_limit, mode);
2638 alg_in->cost.cost += op_cost;
2639 alg_in->cost.latency += op_cost;
2640 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2642 struct algorithm *x;
2643 best_cost = alg_in->cost;
2644 x = alg_in, alg_in = best_alg, best_alg = x;
2645 best_alg->log[best_alg->ops] = 0;
2646 best_alg->op[best_alg->ops] = alg_add_t_m2;
2650 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2651 quickly with a - a * n for some appropriate constant n. */
2652 m = exact_log2 (-orig_t + 1);
2653 if (m >= 0 && m < maxm)
2655 op_cost = shiftsub1_cost[speed][mode][m];
2656 new_limit.cost = best_cost.cost - op_cost;
2657 new_limit.latency = best_cost.latency - op_cost;
2658 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2659 &new_limit, mode);
2661 alg_in->cost.cost += op_cost;
2662 alg_in->cost.latency += op_cost;
2663 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2665 struct algorithm *x;
2666 best_cost = alg_in->cost;
2667 x = alg_in, alg_in = best_alg, best_alg = x;
2668 best_alg->log[best_alg->ops] = m;
2669 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2673 if (cache_hit)
2674 goto done;
2677 /* Look for factors of t of the form
2678 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2679 If we find such a factor, we can multiply by t using an algorithm that
2680 multiplies by q, shift the result by m and add/subtract it to itself.
2682 We search for large factors first and loop down, even if large factors
2683 are less probable than small; if we find a large factor we will find a
2684 good sequence quickly, and therefore be able to prune (by decreasing
2685 COST_LIMIT) the search. */
2687 do_alg_addsub_factor:
2688 for (m = floor_log2 (t - 1); m >= 2; m--)
2690 unsigned HOST_WIDE_INT d;
2692 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2693 if (t % d == 0 && t > d && m < maxm
2694 && (!cache_hit || cache_alg == alg_add_factor))
2696 /* If the target has a cheap shift-and-add instruction use
2697 that in preference to a shift insn followed by an add insn.
2698 Assume that the shift-and-add is "atomic" with a latency
2699 equal to its cost, otherwise assume that on superscalar
2700 hardware the shift may be executed concurrently with the
2701 earlier steps in the algorithm. */
2702 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2703 if (shiftadd_cost[speed][mode][m] < op_cost)
2705 op_cost = shiftadd_cost[speed][mode][m];
2706 op_latency = op_cost;
2708 else
2709 op_latency = add_cost[speed][mode];
2711 new_limit.cost = best_cost.cost - op_cost;
2712 new_limit.latency = best_cost.latency - op_latency;
2713 synth_mult (alg_in, t / d, &new_limit, mode);
2715 alg_in->cost.cost += op_cost;
2716 alg_in->cost.latency += op_latency;
2717 if (alg_in->cost.latency < op_cost)
2718 alg_in->cost.latency = op_cost;
2719 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2721 struct algorithm *x;
2722 best_cost = alg_in->cost;
2723 x = alg_in, alg_in = best_alg, best_alg = x;
2724 best_alg->log[best_alg->ops] = m;
2725 best_alg->op[best_alg->ops] = alg_add_factor;
2727 /* Other factors will have been taken care of in the recursion. */
2728 break;
2731 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2732 if (t % d == 0 && t > d && m < maxm
2733 && (!cache_hit || cache_alg == alg_sub_factor))
2735 /* If the target has a cheap shift-and-subtract insn use
2736 that in preference to a shift insn followed by a sub insn.
2737 Assume that the shift-and-sub is "atomic" with a latency
2738 equal to it's cost, otherwise assume that on superscalar
2739 hardware the shift may be executed concurrently with the
2740 earlier steps in the algorithm. */
2741 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2742 if (shiftsub0_cost[speed][mode][m] < op_cost)
2744 op_cost = shiftsub0_cost[speed][mode][m];
2745 op_latency = op_cost;
2747 else
2748 op_latency = add_cost[speed][mode];
2750 new_limit.cost = best_cost.cost - op_cost;
2751 new_limit.latency = best_cost.latency - op_latency;
2752 synth_mult (alg_in, t / d, &new_limit, mode);
2754 alg_in->cost.cost += op_cost;
2755 alg_in->cost.latency += op_latency;
2756 if (alg_in->cost.latency < op_cost)
2757 alg_in->cost.latency = op_cost;
2758 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2760 struct algorithm *x;
2761 best_cost = alg_in->cost;
2762 x = alg_in, alg_in = best_alg, best_alg = x;
2763 best_alg->log[best_alg->ops] = m;
2764 best_alg->op[best_alg->ops] = alg_sub_factor;
2766 break;
2769 if (cache_hit)
2770 goto done;
2772 /* Try shift-and-add (load effective address) instructions,
2773 i.e. do a*3, a*5, a*9. */
2774 if ((t & 1) != 0)
2776 do_alg_add_t2_m:
2777 q = t - 1;
2778 q = q & -q;
2779 m = exact_log2 (q);
2780 if (m >= 0 && m < maxm)
2782 op_cost = shiftadd_cost[speed][mode][m];
2783 new_limit.cost = best_cost.cost - op_cost;
2784 new_limit.latency = best_cost.latency - op_cost;
2785 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2787 alg_in->cost.cost += op_cost;
2788 alg_in->cost.latency += op_cost;
2789 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2791 struct algorithm *x;
2792 best_cost = alg_in->cost;
2793 x = alg_in, alg_in = best_alg, best_alg = x;
2794 best_alg->log[best_alg->ops] = m;
2795 best_alg->op[best_alg->ops] = alg_add_t2_m;
2798 if (cache_hit)
2799 goto done;
2801 do_alg_sub_t2_m:
2802 q = t + 1;
2803 q = q & -q;
2804 m = exact_log2 (q);
2805 if (m >= 0 && m < maxm)
2807 op_cost = shiftsub0_cost[speed][mode][m];
2808 new_limit.cost = best_cost.cost - op_cost;
2809 new_limit.latency = best_cost.latency - op_cost;
2810 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2812 alg_in->cost.cost += op_cost;
2813 alg_in->cost.latency += op_cost;
2814 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2816 struct algorithm *x;
2817 best_cost = alg_in->cost;
2818 x = alg_in, alg_in = best_alg, best_alg = x;
2819 best_alg->log[best_alg->ops] = m;
2820 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2823 if (cache_hit)
2824 goto done;
2827 done:
2828 /* If best_cost has not decreased, we have not found any algorithm. */
2829 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2831 /* We failed to find an algorithm. Record alg_impossible for
2832 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2833 we are asked to find an algorithm for T within the same or
2834 lower COST_LIMIT, we can immediately return to the
2835 caller. */
2836 alg_hash[hash_index].t = t;
2837 alg_hash[hash_index].mode = mode;
2838 alg_hash[hash_index].speed = speed;
2839 alg_hash[hash_index].alg = alg_impossible;
2840 alg_hash[hash_index].cost = *cost_limit;
2841 return;
2844 /* Cache the result. */
2845 if (!cache_hit)
2847 alg_hash[hash_index].t = t;
2848 alg_hash[hash_index].mode = mode;
2849 alg_hash[hash_index].speed = speed;
2850 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2851 alg_hash[hash_index].cost.cost = best_cost.cost;
2852 alg_hash[hash_index].cost.latency = best_cost.latency;
2855 /* If we are getting a too long sequence for `struct algorithm'
2856 to record, make this search fail. */
2857 if (best_alg->ops == MAX_BITS_PER_WORD)
2858 return;
2860 /* Copy the algorithm from temporary space to the space at alg_out.
2861 We avoid using structure assignment because the majority of
2862 best_alg is normally undefined, and this is a critical function. */
2863 alg_out->ops = best_alg->ops + 1;
2864 alg_out->cost = best_cost;
2865 memcpy (alg_out->op, best_alg->op,
2866 alg_out->ops * sizeof *alg_out->op);
2867 memcpy (alg_out->log, best_alg->log,
2868 alg_out->ops * sizeof *alg_out->log);
2871 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2872 Try three variations:
2874 - a shift/add sequence based on VAL itself
2875 - a shift/add sequence based on -VAL, followed by a negation
2876 - a shift/add sequence based on VAL - 1, followed by an addition.
2878 Return true if the cheapest of these cost less than MULT_COST,
2879 describing the algorithm in *ALG and final fixup in *VARIANT. */
2881 static bool
2882 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2883 struct algorithm *alg, enum mult_variant *variant,
2884 int mult_cost)
2886 struct algorithm alg2;
2887 struct mult_cost limit;
2888 int op_cost;
2889 bool speed = optimize_insn_for_speed_p ();
2891 /* Fail quickly for impossible bounds. */
2892 if (mult_cost < 0)
2893 return false;
2895 /* Ensure that mult_cost provides a reasonable upper bound.
2896 Any constant multiplication can be performed with less
2897 than 2 * bits additions. */
2898 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost[speed][mode];
2899 if (mult_cost > op_cost)
2900 mult_cost = op_cost;
2902 *variant = basic_variant;
2903 limit.cost = mult_cost;
2904 limit.latency = mult_cost;
2905 synth_mult (alg, val, &limit, mode);
2907 /* This works only if the inverted value actually fits in an
2908 `unsigned int' */
2909 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2911 op_cost = neg_cost[speed][mode];
2912 if (MULT_COST_LESS (&alg->cost, mult_cost))
2914 limit.cost = alg->cost.cost - op_cost;
2915 limit.latency = alg->cost.latency - op_cost;
2917 else
2919 limit.cost = mult_cost - op_cost;
2920 limit.latency = mult_cost - op_cost;
2923 synth_mult (&alg2, -val, &limit, mode);
2924 alg2.cost.cost += op_cost;
2925 alg2.cost.latency += op_cost;
2926 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2927 *alg = alg2, *variant = negate_variant;
2930 /* This proves very useful for division-by-constant. */
2931 op_cost = add_cost[speed][mode];
2932 if (MULT_COST_LESS (&alg->cost, mult_cost))
2934 limit.cost = alg->cost.cost - op_cost;
2935 limit.latency = alg->cost.latency - op_cost;
2937 else
2939 limit.cost = mult_cost - op_cost;
2940 limit.latency = mult_cost - op_cost;
2943 synth_mult (&alg2, val - 1, &limit, mode);
2944 alg2.cost.cost += op_cost;
2945 alg2.cost.latency += op_cost;
2946 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2947 *alg = alg2, *variant = add_variant;
2949 return MULT_COST_LESS (&alg->cost, mult_cost);
2952 /* A subroutine of expand_mult, used for constant multiplications.
2953 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2954 convenient. Use the shift/add sequence described by ALG and apply
2955 the final fixup specified by VARIANT. */
2957 static rtx
2958 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2959 rtx target, const struct algorithm *alg,
2960 enum mult_variant variant)
2962 HOST_WIDE_INT val_so_far;
2963 rtx insn, accum, tem;
2964 int opno;
2965 enum machine_mode nmode;
2967 /* Avoid referencing memory over and over and invalid sharing
2968 on SUBREGs. */
2969 op0 = force_reg (mode, op0);
2971 /* ACCUM starts out either as OP0 or as a zero, depending on
2972 the first operation. */
2974 if (alg->op[0] == alg_zero)
2976 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2977 val_so_far = 0;
2979 else if (alg->op[0] == alg_m)
2981 accum = copy_to_mode_reg (mode, op0);
2982 val_so_far = 1;
2984 else
2985 gcc_unreachable ();
2987 for (opno = 1; opno < alg->ops; opno++)
2989 int log = alg->log[opno];
2990 rtx shift_subtarget = optimize ? 0 : accum;
2991 rtx add_target
2992 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993 && !optimize)
2994 ? target : 0;
2995 rtx accum_target = optimize ? 0 : accum;
2996 rtx accum_inner;
2998 switch (alg->op[opno])
3000 case alg_shift:
3001 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002 /* REG_EQUAL note will be attached to the following insn. */
3003 emit_move_insn (accum, tem);
3004 val_so_far <<= log;
3005 break;
3007 case alg_add_t_m2:
3008 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3009 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010 add_target ? add_target : accum_target);
3011 val_so_far += (HOST_WIDE_INT) 1 << log;
3012 break;
3014 case alg_sub_t_m2:
3015 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3016 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3017 add_target ? add_target : accum_target);
3018 val_so_far -= (HOST_WIDE_INT) 1 << log;
3019 break;
3021 case alg_add_t2_m:
3022 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023 log, shift_subtarget, 0);
3024 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3025 add_target ? add_target : accum_target);
3026 val_so_far = (val_so_far << log) + 1;
3027 break;
3029 case alg_sub_t2_m:
3030 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031 log, shift_subtarget, 0);
3032 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3033 add_target ? add_target : accum_target);
3034 val_so_far = (val_so_far << log) - 1;
3035 break;
3037 case alg_add_factor:
3038 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3039 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3040 add_target ? add_target : accum_target);
3041 val_so_far += val_so_far << log;
3042 break;
3044 case alg_sub_factor:
3045 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3046 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3047 (add_target
3048 ? add_target : (optimize ? 0 : tem)));
3049 val_so_far = (val_so_far << log) - val_so_far;
3050 break;
3052 default:
3053 gcc_unreachable ();
3056 if (SCALAR_INT_MODE_P (mode))
3058 /* Write a REG_EQUAL note on the last insn so that we can cse
3059 multiplication sequences. Note that if ACCUM is a SUBREG,
3060 we've set the inner register and must properly indicate that. */
3061 tem = op0, nmode = mode;
3062 accum_inner = accum;
3063 if (GET_CODE (accum) == SUBREG)
3065 accum_inner = SUBREG_REG (accum);
3066 nmode = GET_MODE (accum_inner);
3067 tem = gen_lowpart (nmode, op0);
3070 insn = get_last_insn ();
3071 set_dst_reg_note (insn, REG_EQUAL,
3072 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3073 accum_inner);
3077 if (variant == negate_variant)
3079 val_so_far = -val_so_far;
3080 accum = expand_unop (mode, neg_optab, accum, target, 0);
3082 else if (variant == add_variant)
3084 val_so_far = val_so_far + 1;
3085 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3088 /* Compare only the bits of val and val_so_far that are significant
3089 in the result mode, to avoid sign-/zero-extension confusion. */
3090 nmode = GET_MODE_INNER (mode);
3091 if (nmode == VOIDmode)
3092 nmode = mode;
3093 val &= GET_MODE_MASK (nmode);
3094 val_so_far &= GET_MODE_MASK (nmode);
3095 gcc_assert (val == val_so_far);
3097 return accum;
3100 /* Perform a multiplication and return an rtx for the result.
3101 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3102 TARGET is a suggestion for where to store the result (an rtx).
3104 We check specially for a constant integer as OP1.
3105 If you want this check for OP0 as well, then before calling
3106 you should swap the two operands if OP0 would be constant. */
3109 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3110 int unsignedp)
3112 enum mult_variant variant;
3113 struct algorithm algorithm;
3114 rtx scalar_op1;
3115 int max_cost;
3116 bool speed = optimize_insn_for_speed_p ();
3117 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3119 if (CONSTANT_P (op0))
3121 rtx temp = op0;
3122 op0 = op1;
3123 op1 = temp;
3126 /* For vectors, there are several simplifications that can be made if
3127 all elements of the vector constant are identical. */
3128 scalar_op1 = op1;
3129 if (GET_CODE (op1) == CONST_VECTOR)
3131 int i, n = CONST_VECTOR_NUNITS (op1);
3132 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3133 for (i = 1; i < n; ++i)
3134 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3135 goto skip_scalar;
3138 if (INTEGRAL_MODE_P (mode))
3140 rtx fake_reg;
3141 HOST_WIDE_INT coeff = 0;
3142 bool is_neg = false;
3143 int mode_bitsize;
3145 if (op1 == CONST0_RTX (mode))
3146 return op1;
3147 if (op1 == CONST1_RTX (mode))
3148 return op0;
3149 if (op1 == CONSTM1_RTX (mode))
3150 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3151 op0, target, 0);
3153 if (do_trapv)
3154 goto skip_synth;
3156 /* These are the operations that are potentially turned into
3157 a sequence of shifts and additions. */
3158 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3160 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3161 less than or equal in size to `unsigned int' this doesn't matter.
3162 If the mode is larger than `unsigned int', then synth_mult works
3163 only if the constant value exactly fits in an `unsigned int' without
3164 any truncation. This means that multiplying by negative values does
3165 not work; results are off by 2^32 on a 32 bit machine. */
3167 if (CONST_INT_P (scalar_op1))
3169 coeff = INTVAL (scalar_op1);
3170 is_neg = coeff < 0;
3172 else if (CONST_DOUBLE_P (scalar_op1))
3174 /* If we are multiplying in DImode, it may still be a win
3175 to try to work with shifts and adds. */
3176 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3177 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3179 coeff = CONST_DOUBLE_LOW (scalar_op1);
3180 is_neg = false;
3182 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3184 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3185 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3187 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3188 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3189 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3190 return expand_shift (LSHIFT_EXPR, mode, op0,
3191 shift, target, unsignedp);
3193 goto skip_synth;
3196 else
3197 goto skip_synth;
3199 /* We used to test optimize here, on the grounds that it's better to
3200 produce a smaller program when -O is not used. But this causes
3201 such a terrible slowdown sometimes that it seems better to always
3202 use synth_mult. */
3204 /* Special case powers of two. */
3205 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3206 return expand_shift (LSHIFT_EXPR, mode, op0,
3207 floor_log2 (coeff), target, unsignedp);
3209 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3211 /* Attempt to handle multiplication of DImode values by negative
3212 coefficients, by performing the multiplication by a positive
3213 multiplier and then inverting the result. */
3214 /* ??? How is this not slightly redundant with the neg variant? */
3215 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3217 /* Its safe to use -coeff even for INT_MIN, as the
3218 result is interpreted as an unsigned coefficient.
3219 Exclude cost of op0 from max_cost to match the cost
3220 calculation of the synth_mult. */
3221 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3222 - neg_cost[speed][mode]);
3223 if (max_cost > 0
3224 && choose_mult_variant (mode, -coeff, &algorithm,
3225 &variant, max_cost))
3227 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3228 &algorithm, variant);
3229 return expand_unop (mode, neg_optab, temp, target, 0);
3233 /* Exclude cost of op0 from max_cost to match the cost
3234 calculation of the synth_mult. */
3235 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3236 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3237 return expand_mult_const (mode, op0, coeff, target,
3238 &algorithm, variant);
3240 skip_synth:
3242 /* Expand x*2.0 as x+x. */
3243 if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
3245 REAL_VALUE_TYPE d;
3246 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3248 if (REAL_VALUES_EQUAL (d, dconst2))
3250 op0 = force_reg (GET_MODE (op0), op0);
3251 return expand_binop (mode, add_optab, op0, op0,
3252 target, unsignedp, OPTAB_LIB_WIDEN);
3255 skip_scalar:
3257 /* This used to use umul_optab if unsigned, but for non-widening multiply
3258 there is no difference between signed and unsigned. */
3259 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3260 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3261 gcc_assert (op0);
3262 return op0;
3265 /* Perform a widening multiplication and return an rtx for the result.
3266 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3267 TARGET is a suggestion for where to store the result (an rtx).
3268 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3269 or smul_widen_optab.
3271 We check specially for a constant integer as OP1, comparing the
3272 cost of a widening multiply against the cost of a sequence of shifts
3273 and adds. */
3276 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3277 int unsignedp, optab this_optab)
3279 bool speed = optimize_insn_for_speed_p ();
3280 rtx cop1;
3282 if (CONST_INT_P (op1)
3283 && GET_MODE (op0) != VOIDmode
3284 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3285 this_optab == umul_widen_optab))
3286 && CONST_INT_P (cop1)
3287 && (INTVAL (cop1) >= 0
3288 || HWI_COMPUTABLE_MODE_P (mode)))
3290 HOST_WIDE_INT coeff = INTVAL (cop1);
3291 int max_cost;
3292 enum mult_variant variant;
3293 struct algorithm algorithm;
3295 /* Special case powers of two. */
3296 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3298 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3299 return expand_shift (LSHIFT_EXPR, mode, op0,
3300 floor_log2 (coeff), target, unsignedp);
3303 /* Exclude cost of op0 from max_cost to match the cost
3304 calculation of the synth_mult. */
3305 max_cost = mul_widen_cost[speed][mode];
3306 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3307 max_cost))
3309 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3310 return expand_mult_const (mode, op0, coeff, target,
3311 &algorithm, variant);
3314 return expand_binop (mode, this_optab, op0, op1, target,
3315 unsignedp, OPTAB_LIB_WIDEN);
3318 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3319 replace division by D, and put the least significant N bits of the result
3320 in *MULTIPLIER_PTR and return the most significant bit.
3322 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3323 needed precision is in PRECISION (should be <= N).
3325 PRECISION should be as small as possible so this function can choose
3326 multiplier more freely.
3328 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3329 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3331 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3332 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3334 unsigned HOST_WIDE_INT
3335 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3336 unsigned HOST_WIDE_INT *multiplier_ptr,
3337 int *post_shift_ptr, int *lgup_ptr)
3339 HOST_WIDE_INT mhigh_hi, mlow_hi;
3340 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3341 int lgup, post_shift;
3342 int pow, pow2;
3343 unsigned HOST_WIDE_INT nl, dummy1;
3344 HOST_WIDE_INT nh, dummy2;
3346 /* lgup = ceil(log2(divisor)); */
3347 lgup = ceil_log2 (d);
3349 gcc_assert (lgup <= n);
3351 pow = n + lgup;
3352 pow2 = n + lgup - precision;
3354 /* We could handle this with some effort, but this case is much
3355 better handled directly with a scc insn, so rely on caller using
3356 that. */
3357 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3359 /* mlow = 2^(N + lgup)/d */
3360 if (pow >= HOST_BITS_PER_WIDE_INT)
3362 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3363 nl = 0;
3365 else
3367 nh = 0;
3368 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3370 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3371 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3373 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3374 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3375 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3376 else
3377 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3378 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3379 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3381 gcc_assert (!mhigh_hi || nh - d < d);
3382 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3383 /* Assert that mlow < mhigh. */
3384 gcc_assert (mlow_hi < mhigh_hi
3385 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3387 /* If precision == N, then mlow, mhigh exceed 2^N
3388 (but they do not exceed 2^(N+1)). */
3390 /* Reduce to lowest terms. */
3391 for (post_shift = lgup; post_shift > 0; post_shift--)
3393 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3394 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3395 if (ml_lo >= mh_lo)
3396 break;
3398 mlow_hi = 0;
3399 mlow_lo = ml_lo;
3400 mhigh_hi = 0;
3401 mhigh_lo = mh_lo;
3404 *post_shift_ptr = post_shift;
3405 *lgup_ptr = lgup;
3406 if (n < HOST_BITS_PER_WIDE_INT)
3408 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3409 *multiplier_ptr = mhigh_lo & mask;
3410 return mhigh_lo >= mask;
3412 else
3414 *multiplier_ptr = mhigh_lo;
3415 return mhigh_hi;
3419 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3420 congruent to 1 (mod 2**N). */
3422 static unsigned HOST_WIDE_INT
3423 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3425 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3427 /* The algorithm notes that the choice y = x satisfies
3428 x*y == 1 mod 2^3, since x is assumed odd.
3429 Each iteration doubles the number of bits of significance in y. */
3431 unsigned HOST_WIDE_INT mask;
3432 unsigned HOST_WIDE_INT y = x;
3433 int nbit = 3;
3435 mask = (n == HOST_BITS_PER_WIDE_INT
3436 ? ~(unsigned HOST_WIDE_INT) 0
3437 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3439 while (nbit < n)
3441 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3442 nbit *= 2;
3444 return y;
3447 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3448 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3449 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3450 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3451 become signed.
3453 The result is put in TARGET if that is convenient.
3455 MODE is the mode of operation. */
3458 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3459 rtx op1, rtx target, int unsignedp)
3461 rtx tem;
3462 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3464 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3465 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3466 tem = expand_and (mode, tem, op1, NULL_RTX);
3467 adj_operand
3468 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3469 adj_operand);
3471 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3472 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3473 tem = expand_and (mode, tem, op0, NULL_RTX);
3474 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3475 target);
3477 return target;
3480 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3482 static rtx
3483 extract_high_half (enum machine_mode mode, rtx op)
3485 enum machine_mode wider_mode;
3487 if (mode == word_mode)
3488 return gen_highpart (mode, op);
3490 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3492 wider_mode = GET_MODE_WIDER_MODE (mode);
3493 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3494 GET_MODE_BITSIZE (mode), 0, 1);
3495 return convert_modes (mode, wider_mode, op, 0);
3498 /* Like expand_mult_highpart, but only consider using a multiplication
3499 optab. OP1 is an rtx for the constant operand. */
3501 static rtx
3502 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3503 rtx target, int unsignedp, int max_cost)
3505 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3506 enum machine_mode wider_mode;
3507 optab moptab;
3508 rtx tem;
3509 int size;
3510 bool speed = optimize_insn_for_speed_p ();
3512 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3514 wider_mode = GET_MODE_WIDER_MODE (mode);
3515 size = GET_MODE_BITSIZE (mode);
3517 /* Firstly, try using a multiplication insn that only generates the needed
3518 high part of the product, and in the sign flavor of unsignedp. */
3519 if (mul_highpart_cost[speed][mode] < max_cost)
3521 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3522 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3523 unsignedp, OPTAB_DIRECT);
3524 if (tem)
3525 return tem;
3528 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3529 Need to adjust the result after the multiplication. */
3530 if (size - 1 < BITS_PER_WORD
3531 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3532 + 4 * add_cost[speed][mode] < max_cost))
3534 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3535 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3536 unsignedp, OPTAB_DIRECT);
3537 if (tem)
3538 /* We used the wrong signedness. Adjust the result. */
3539 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3540 tem, unsignedp);
3543 /* Try widening multiplication. */
3544 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3545 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3546 && mul_widen_cost[speed][wider_mode] < max_cost)
3548 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3549 unsignedp, OPTAB_WIDEN);
3550 if (tem)
3551 return extract_high_half (mode, tem);
3554 /* Try widening the mode and perform a non-widening multiplication. */
3555 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3556 && size - 1 < BITS_PER_WORD
3557 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3559 rtx insns, wop0, wop1;
3561 /* We need to widen the operands, for example to ensure the
3562 constant multiplier is correctly sign or zero extended.
3563 Use a sequence to clean-up any instructions emitted by
3564 the conversions if things don't work out. */
3565 start_sequence ();
3566 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3567 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3568 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3569 unsignedp, OPTAB_WIDEN);
3570 insns = get_insns ();
3571 end_sequence ();
3573 if (tem)
3575 emit_insn (insns);
3576 return extract_high_half (mode, tem);
3580 /* Try widening multiplication of opposite signedness, and adjust. */
3581 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3582 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3583 && size - 1 < BITS_PER_WORD
3584 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3585 + 4 * add_cost[speed][mode] < max_cost))
3587 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3588 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3589 if (tem != 0)
3591 tem = extract_high_half (mode, tem);
3592 /* We used the wrong signedness. Adjust the result. */
3593 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3594 target, unsignedp);
3598 return 0;
3601 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3602 putting the high half of the result in TARGET if that is convenient,
3603 and return where the result is. If the operation can not be performed,
3604 0 is returned.
3606 MODE is the mode of operation and result.
3608 UNSIGNEDP nonzero means unsigned multiply.
3610 MAX_COST is the total allowed cost for the expanded RTL. */
3612 static rtx
3613 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3614 rtx target, int unsignedp, int max_cost)
3616 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3617 unsigned HOST_WIDE_INT cnst1;
3618 int extra_cost;
3619 bool sign_adjust = false;
3620 enum mult_variant variant;
3621 struct algorithm alg;
3622 rtx tem;
3623 bool speed = optimize_insn_for_speed_p ();
3625 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3626 /* We can't support modes wider than HOST_BITS_PER_INT. */
3627 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3629 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3631 /* We can't optimize modes wider than BITS_PER_WORD.
3632 ??? We might be able to perform double-word arithmetic if
3633 mode == word_mode, however all the cost calculations in
3634 synth_mult etc. assume single-word operations. */
3635 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3636 return expand_mult_highpart_optab (mode, op0, op1, target,
3637 unsignedp, max_cost);
3639 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3641 /* Check whether we try to multiply by a negative constant. */
3642 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3644 sign_adjust = true;
3645 extra_cost += add_cost[speed][mode];
3648 /* See whether shift/add multiplication is cheap enough. */
3649 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3650 max_cost - extra_cost))
3652 /* See whether the specialized multiplication optabs are
3653 cheaper than the shift/add version. */
3654 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3655 alg.cost.cost + extra_cost);
3656 if (tem)
3657 return tem;
3659 tem = convert_to_mode (wider_mode, op0, unsignedp);
3660 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3661 tem = extract_high_half (mode, tem);
3663 /* Adjust result for signedness. */
3664 if (sign_adjust)
3665 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3667 return tem;
3669 return expand_mult_highpart_optab (mode, op0, op1, target,
3670 unsignedp, max_cost);
3674 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3676 static rtx
3677 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3679 unsigned HOST_WIDE_INT masklow, maskhigh;
3680 rtx result, temp, shift, label;
3681 int logd;
3683 logd = floor_log2 (d);
3684 result = gen_reg_rtx (mode);
3686 /* Avoid conditional branches when they're expensive. */
3687 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3688 && optimize_insn_for_speed_p ())
3690 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3691 mode, 0, -1);
3692 if (signmask)
3694 signmask = force_reg (mode, signmask);
3695 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3696 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3698 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3699 which instruction sequence to use. If logical right shifts
3700 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3701 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3703 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3704 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3705 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3706 > COSTS_N_INSNS (2)))
3708 temp = expand_binop (mode, xor_optab, op0, signmask,
3709 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3710 temp = expand_binop (mode, sub_optab, temp, signmask,
3711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3713 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3714 temp = expand_binop (mode, xor_optab, temp, signmask,
3715 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3716 temp = expand_binop (mode, sub_optab, temp, signmask,
3717 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719 else
3721 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3722 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723 signmask = force_reg (mode, signmask);
3725 temp = expand_binop (mode, add_optab, op0, signmask,
3726 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3728 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3729 temp = expand_binop (mode, sub_optab, temp, signmask,
3730 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3732 return temp;
3736 /* Mask contains the mode's signbit and the significant bits of the
3737 modulus. By including the signbit in the operation, many targets
3738 can avoid an explicit compare operation in the following comparison
3739 against zero. */
3741 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3742 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3744 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3745 maskhigh = -1;
3747 else
3748 maskhigh = (HOST_WIDE_INT) -1
3749 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3751 temp = expand_binop (mode, and_optab, op0,
3752 immed_double_const (masklow, maskhigh, mode),
3753 result, 1, OPTAB_LIB_WIDEN);
3754 if (temp != result)
3755 emit_move_insn (result, temp);
3757 label = gen_label_rtx ();
3758 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3760 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3761 0, OPTAB_LIB_WIDEN);
3762 masklow = (HOST_WIDE_INT) -1 << logd;
3763 maskhigh = -1;
3764 temp = expand_binop (mode, ior_optab, temp,
3765 immed_double_const (masklow, maskhigh, mode),
3766 result, 1, OPTAB_LIB_WIDEN);
3767 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3768 0, OPTAB_LIB_WIDEN);
3769 if (temp != result)
3770 emit_move_insn (result, temp);
3771 emit_label (label);
3772 return result;
3775 /* Expand signed division of OP0 by a power of two D in mode MODE.
3776 This routine is only called for positive values of D. */
3778 static rtx
3779 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3781 rtx temp, label;
3782 int logd;
3784 logd = floor_log2 (d);
3786 if (d == 2
3787 && BRANCH_COST (optimize_insn_for_speed_p (),
3788 false) >= 1)
3790 temp = gen_reg_rtx (mode);
3791 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3792 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3793 0, OPTAB_LIB_WIDEN);
3794 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3797 #ifdef HAVE_conditional_move
3798 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3799 >= 2)
3801 rtx temp2;
3803 /* ??? emit_conditional_move forces a stack adjustment via
3804 compare_from_rtx so, if the sequence is discarded, it will
3805 be lost. Do it now instead. */
3806 do_pending_stack_adjust ();
3808 start_sequence ();
3809 temp2 = copy_to_mode_reg (mode, op0);
3810 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3811 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3812 temp = force_reg (mode, temp);
3814 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3815 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3816 mode, temp, temp2, mode, 0);
3817 if (temp2)
3819 rtx seq = get_insns ();
3820 end_sequence ();
3821 emit_insn (seq);
3822 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3824 end_sequence ();
3826 #endif
3828 if (BRANCH_COST (optimize_insn_for_speed_p (),
3829 false) >= 2)
3831 int ushift = GET_MODE_BITSIZE (mode) - logd;
3833 temp = gen_reg_rtx (mode);
3834 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3835 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3836 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3837 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3838 else
3839 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3840 ushift, NULL_RTX, 1);
3841 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3842 0, OPTAB_LIB_WIDEN);
3843 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3846 label = gen_label_rtx ();
3847 temp = copy_to_mode_reg (mode, op0);
3848 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3849 expand_inc (temp, GEN_INT (d - 1));
3850 emit_label (label);
3851 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3854 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3855 if that is convenient, and returning where the result is.
3856 You may request either the quotient or the remainder as the result;
3857 specify REM_FLAG nonzero to get the remainder.
3859 CODE is the expression code for which kind of division this is;
3860 it controls how rounding is done. MODE is the machine mode to use.
3861 UNSIGNEDP nonzero means do unsigned division. */
3863 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3864 and then correct it by or'ing in missing high bits
3865 if result of ANDI is nonzero.
3866 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3867 This could optimize to a bfexts instruction.
3868 But C doesn't use these operations, so their optimizations are
3869 left for later. */
3870 /* ??? For modulo, we don't actually need the highpart of the first product,
3871 the low part will do nicely. And for small divisors, the second multiply
3872 can also be a low-part only multiply or even be completely left out.
3873 E.g. to calculate the remainder of a division by 3 with a 32 bit
3874 multiply, multiply with 0x55555556 and extract the upper two bits;
3875 the result is exact for inputs up to 0x1fffffff.
3876 The input range can be reduced by using cross-sum rules.
3877 For odd divisors >= 3, the following table gives right shift counts
3878 so that if a number is shifted by an integer multiple of the given
3879 amount, the remainder stays the same:
3880 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3881 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3882 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3883 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3884 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3886 Cross-sum rules for even numbers can be derived by leaving as many bits
3887 to the right alone as the divisor has zeros to the right.
3888 E.g. if x is an unsigned 32 bit number:
3889 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3893 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3894 rtx op0, rtx op1, rtx target, int unsignedp)
3896 enum machine_mode compute_mode;
3897 rtx tquotient;
3898 rtx quotient = 0, remainder = 0;
3899 rtx last;
3900 int size;
3901 rtx insn;
3902 optab optab1, optab2;
3903 int op1_is_constant, op1_is_pow2 = 0;
3904 int max_cost, extra_cost;
3905 static HOST_WIDE_INT last_div_const = 0;
3906 static HOST_WIDE_INT ext_op1;
3907 bool speed = optimize_insn_for_speed_p ();
3909 op1_is_constant = CONST_INT_P (op1);
3910 if (op1_is_constant)
3912 ext_op1 = INTVAL (op1);
3913 if (unsignedp)
3914 ext_op1 &= GET_MODE_MASK (mode);
3915 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3916 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3920 This is the structure of expand_divmod:
3922 First comes code to fix up the operands so we can perform the operations
3923 correctly and efficiently.
3925 Second comes a switch statement with code specific for each rounding mode.
3926 For some special operands this code emits all RTL for the desired
3927 operation, for other cases, it generates only a quotient and stores it in
3928 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3929 to indicate that it has not done anything.
3931 Last comes code that finishes the operation. If QUOTIENT is set and
3932 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3933 QUOTIENT is not set, it is computed using trunc rounding.
3935 We try to generate special code for division and remainder when OP1 is a
3936 constant. If |OP1| = 2**n we can use shifts and some other fast
3937 operations. For other values of OP1, we compute a carefully selected
3938 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3939 by m.
3941 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3942 half of the product. Different strategies for generating the product are
3943 implemented in expand_mult_highpart.
3945 If what we actually want is the remainder, we generate that by another
3946 by-constant multiplication and a subtraction. */
3948 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3949 code below will malfunction if we are, so check here and handle
3950 the special case if so. */
3951 if (op1 == const1_rtx)
3952 return rem_flag ? const0_rtx : op0;
3954 /* When dividing by -1, we could get an overflow.
3955 negv_optab can handle overflows. */
3956 if (! unsignedp && op1 == constm1_rtx)
3958 if (rem_flag)
3959 return const0_rtx;
3960 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3961 ? negv_optab : neg_optab, op0, target, 0);
3964 if (target
3965 /* Don't use the function value register as a target
3966 since we have to read it as well as write it,
3967 and function-inlining gets confused by this. */
3968 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3969 /* Don't clobber an operand while doing a multi-step calculation. */
3970 || ((rem_flag || op1_is_constant)
3971 && (reg_mentioned_p (target, op0)
3972 || (MEM_P (op0) && MEM_P (target))))
3973 || reg_mentioned_p (target, op1)
3974 || (MEM_P (op1) && MEM_P (target))))
3975 target = 0;
3977 /* Get the mode in which to perform this computation. Normally it will
3978 be MODE, but sometimes we can't do the desired operation in MODE.
3979 If so, pick a wider mode in which we can do the operation. Convert
3980 to that mode at the start to avoid repeated conversions.
3982 First see what operations we need. These depend on the expression
3983 we are evaluating. (We assume that divxx3 insns exist under the
3984 same conditions that modxx3 insns and that these insns don't normally
3985 fail. If these assumptions are not correct, we may generate less
3986 efficient code in some cases.)
3988 Then see if we find a mode in which we can open-code that operation
3989 (either a division, modulus, or shift). Finally, check for the smallest
3990 mode for which we can do the operation with a library call. */
3992 /* We might want to refine this now that we have division-by-constant
3993 optimization. Since expand_mult_highpart tries so many variants, it is
3994 not straightforward to generalize this. Maybe we should make an array
3995 of possible modes in init_expmed? Save this for GCC 2.7. */
3997 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3998 ? (unsignedp ? lshr_optab : ashr_optab)
3999 : (unsignedp ? udiv_optab : sdiv_optab));
4000 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4001 ? optab1
4002 : (unsignedp ? udivmod_optab : sdivmod_optab));
4004 for (compute_mode = mode; compute_mode != VOIDmode;
4005 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4006 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4007 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4008 break;
4010 if (compute_mode == VOIDmode)
4011 for (compute_mode = mode; compute_mode != VOIDmode;
4012 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4013 if (optab_libfunc (optab1, compute_mode)
4014 || optab_libfunc (optab2, compute_mode))
4015 break;
4017 /* If we still couldn't find a mode, use MODE, but expand_binop will
4018 probably die. */
4019 if (compute_mode == VOIDmode)
4020 compute_mode = mode;
4022 if (target && GET_MODE (target) == compute_mode)
4023 tquotient = target;
4024 else
4025 tquotient = gen_reg_rtx (compute_mode);
4027 size = GET_MODE_BITSIZE (compute_mode);
4028 #if 0
4029 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4030 (mode), and thereby get better code when OP1 is a constant. Do that
4031 later. It will require going over all usages of SIZE below. */
4032 size = GET_MODE_BITSIZE (mode);
4033 #endif
4035 /* Only deduct something for a REM if the last divide done was
4036 for a different constant. Then set the constant of the last
4037 divide. */
4038 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4039 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4040 && INTVAL (op1) == last_div_const))
4041 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4043 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4045 /* Now convert to the best mode to use. */
4046 if (compute_mode != mode)
4048 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4049 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4051 /* convert_modes may have placed op1 into a register, so we
4052 must recompute the following. */
4053 op1_is_constant = CONST_INT_P (op1);
4054 op1_is_pow2 = (op1_is_constant
4055 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4056 || (! unsignedp
4057 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4060 /* If one of the operands is a volatile MEM, copy it into a register. */
4062 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4063 op0 = force_reg (compute_mode, op0);
4064 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4065 op1 = force_reg (compute_mode, op1);
4067 /* If we need the remainder or if OP1 is constant, we need to
4068 put OP0 in a register in case it has any queued subexpressions. */
4069 if (rem_flag || op1_is_constant)
4070 op0 = force_reg (compute_mode, op0);
4072 last = get_last_insn ();
4074 /* Promote floor rounding to trunc rounding for unsigned operations. */
4075 if (unsignedp)
4077 if (code == FLOOR_DIV_EXPR)
4078 code = TRUNC_DIV_EXPR;
4079 if (code == FLOOR_MOD_EXPR)
4080 code = TRUNC_MOD_EXPR;
4081 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4082 code = TRUNC_DIV_EXPR;
4085 if (op1 != const0_rtx)
4086 switch (code)
4088 case TRUNC_MOD_EXPR:
4089 case TRUNC_DIV_EXPR:
4090 if (op1_is_constant)
4092 if (unsignedp)
4094 unsigned HOST_WIDE_INT mh, ml;
4095 int pre_shift, post_shift;
4096 int dummy;
4097 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4098 & GET_MODE_MASK (compute_mode));
4100 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4102 pre_shift = floor_log2 (d);
4103 if (rem_flag)
4105 remainder
4106 = expand_binop (compute_mode, and_optab, op0,
4107 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4108 remainder, 1,
4109 OPTAB_LIB_WIDEN);
4110 if (remainder)
4111 return gen_lowpart (mode, remainder);
4113 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4114 pre_shift, tquotient, 1);
4116 else if (size <= HOST_BITS_PER_WIDE_INT)
4118 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4120 /* Most significant bit of divisor is set; emit an scc
4121 insn. */
4122 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4123 compute_mode, 1, 1);
4125 else
4127 /* Find a suitable multiplier and right shift count
4128 instead of multiplying with D. */
4130 mh = choose_multiplier (d, size, size,
4131 &ml, &post_shift, &dummy);
4133 /* If the suggested multiplier is more than SIZE bits,
4134 we can do better for even divisors, using an
4135 initial right shift. */
4136 if (mh != 0 && (d & 1) == 0)
4138 pre_shift = floor_log2 (d & -d);
4139 mh = choose_multiplier (d >> pre_shift, size,
4140 size - pre_shift,
4141 &ml, &post_shift, &dummy);
4142 gcc_assert (!mh);
4144 else
4145 pre_shift = 0;
4147 if (mh != 0)
4149 rtx t1, t2, t3, t4;
4151 if (post_shift - 1 >= BITS_PER_WORD)
4152 goto fail1;
4154 extra_cost
4155 = (shift_cost[speed][compute_mode][post_shift - 1]
4156 + shift_cost[speed][compute_mode][1]
4157 + 2 * add_cost[speed][compute_mode]);
4158 t1 = expand_mult_highpart (compute_mode, op0,
4159 GEN_INT (ml),
4160 NULL_RTX, 1,
4161 max_cost - extra_cost);
4162 if (t1 == 0)
4163 goto fail1;
4164 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4165 op0, t1),
4166 NULL_RTX);
4167 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4168 t2, 1, NULL_RTX, 1);
4169 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4170 t1, t3),
4171 NULL_RTX);
4172 quotient = expand_shift
4173 (RSHIFT_EXPR, compute_mode, t4,
4174 post_shift - 1, tquotient, 1);
4176 else
4178 rtx t1, t2;
4180 if (pre_shift >= BITS_PER_WORD
4181 || post_shift >= BITS_PER_WORD)
4182 goto fail1;
4184 t1 = expand_shift
4185 (RSHIFT_EXPR, compute_mode, op0,
4186 pre_shift, NULL_RTX, 1);
4187 extra_cost
4188 = (shift_cost[speed][compute_mode][pre_shift]
4189 + shift_cost[speed][compute_mode][post_shift]);
4190 t2 = expand_mult_highpart (compute_mode, t1,
4191 GEN_INT (ml),
4192 NULL_RTX, 1,
4193 max_cost - extra_cost);
4194 if (t2 == 0)
4195 goto fail1;
4196 quotient = expand_shift
4197 (RSHIFT_EXPR, compute_mode, t2,
4198 post_shift, tquotient, 1);
4202 else /* Too wide mode to use tricky code */
4203 break;
4205 insn = get_last_insn ();
4206 if (insn != last)
4207 set_dst_reg_note (insn, REG_EQUAL,
4208 gen_rtx_UDIV (compute_mode, op0, op1),
4209 quotient);
4211 else /* TRUNC_DIV, signed */
4213 unsigned HOST_WIDE_INT ml;
4214 int lgup, post_shift;
4215 rtx mlr;
4216 HOST_WIDE_INT d = INTVAL (op1);
4217 unsigned HOST_WIDE_INT abs_d;
4219 /* Since d might be INT_MIN, we have to cast to
4220 unsigned HOST_WIDE_INT before negating to avoid
4221 undefined signed overflow. */
4222 abs_d = (d >= 0
4223 ? (unsigned HOST_WIDE_INT) d
4224 : - (unsigned HOST_WIDE_INT) d);
4226 /* n rem d = n rem -d */
4227 if (rem_flag && d < 0)
4229 d = abs_d;
4230 op1 = gen_int_mode (abs_d, compute_mode);
4233 if (d == 1)
4234 quotient = op0;
4235 else if (d == -1)
4236 quotient = expand_unop (compute_mode, neg_optab, op0,
4237 tquotient, 0);
4238 else if (HOST_BITS_PER_WIDE_INT >= size
4239 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4241 /* This case is not handled correctly below. */
4242 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4243 compute_mode, 1, 1);
4244 if (quotient == 0)
4245 goto fail1;
4247 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4248 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4249 : sdiv_pow2_cheap[speed][compute_mode])
4250 /* We assume that cheap metric is true if the
4251 optab has an expander for this mode. */
4252 && ((optab_handler ((rem_flag ? smod_optab
4253 : sdiv_optab),
4254 compute_mode)
4255 != CODE_FOR_nothing)
4256 || (optab_handler (sdivmod_optab,
4257 compute_mode)
4258 != CODE_FOR_nothing)))
4260 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4262 if (rem_flag)
4264 remainder = expand_smod_pow2 (compute_mode, op0, d);
4265 if (remainder)
4266 return gen_lowpart (mode, remainder);
4269 if (sdiv_pow2_cheap[speed][compute_mode]
4270 && ((optab_handler (sdiv_optab, compute_mode)
4271 != CODE_FOR_nothing)
4272 || (optab_handler (sdivmod_optab, compute_mode)
4273 != CODE_FOR_nothing)))
4274 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4275 compute_mode, op0,
4276 gen_int_mode (abs_d,
4277 compute_mode),
4278 NULL_RTX, 0);
4279 else
4280 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4282 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4283 negate the quotient. */
4284 if (d < 0)
4286 insn = get_last_insn ();
4287 if (insn != last
4288 && abs_d < ((unsigned HOST_WIDE_INT) 1
4289 << (HOST_BITS_PER_WIDE_INT - 1)))
4290 set_dst_reg_note (insn, REG_EQUAL,
4291 gen_rtx_DIV (compute_mode, op0,
4292 gen_int_mode
4293 (abs_d,
4294 compute_mode)),
4295 quotient);
4297 quotient = expand_unop (compute_mode, neg_optab,
4298 quotient, quotient, 0);
4301 else if (size <= HOST_BITS_PER_WIDE_INT)
4303 choose_multiplier (abs_d, size, size - 1,
4304 &ml, &post_shift, &lgup);
4305 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4307 rtx t1, t2, t3;
4309 if (post_shift >= BITS_PER_WORD
4310 || size - 1 >= BITS_PER_WORD)
4311 goto fail1;
4313 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4314 + shift_cost[speed][compute_mode][size - 1]
4315 + add_cost[speed][compute_mode]);
4316 t1 = expand_mult_highpart (compute_mode, op0,
4317 GEN_INT (ml), NULL_RTX, 0,
4318 max_cost - extra_cost);
4319 if (t1 == 0)
4320 goto fail1;
4321 t2 = expand_shift
4322 (RSHIFT_EXPR, compute_mode, t1,
4323 post_shift, NULL_RTX, 0);
4324 t3 = expand_shift
4325 (RSHIFT_EXPR, compute_mode, op0,
4326 size - 1, NULL_RTX, 0);
4327 if (d < 0)
4328 quotient
4329 = force_operand (gen_rtx_MINUS (compute_mode,
4330 t3, t2),
4331 tquotient);
4332 else
4333 quotient
4334 = force_operand (gen_rtx_MINUS (compute_mode,
4335 t2, t3),
4336 tquotient);
4338 else
4340 rtx t1, t2, t3, t4;
4342 if (post_shift >= BITS_PER_WORD
4343 || size - 1 >= BITS_PER_WORD)
4344 goto fail1;
4346 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4347 mlr = gen_int_mode (ml, compute_mode);
4348 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4349 + shift_cost[speed][compute_mode][size - 1]
4350 + 2 * add_cost[speed][compute_mode]);
4351 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4352 NULL_RTX, 0,
4353 max_cost - extra_cost);
4354 if (t1 == 0)
4355 goto fail1;
4356 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4357 t1, op0),
4358 NULL_RTX);
4359 t3 = expand_shift
4360 (RSHIFT_EXPR, compute_mode, t2,
4361 post_shift, NULL_RTX, 0);
4362 t4 = expand_shift
4363 (RSHIFT_EXPR, compute_mode, op0,
4364 size - 1, NULL_RTX, 0);
4365 if (d < 0)
4366 quotient
4367 = force_operand (gen_rtx_MINUS (compute_mode,
4368 t4, t3),
4369 tquotient);
4370 else
4371 quotient
4372 = force_operand (gen_rtx_MINUS (compute_mode,
4373 t3, t4),
4374 tquotient);
4377 else /* Too wide mode to use tricky code */
4378 break;
4380 insn = get_last_insn ();
4381 if (insn != last)
4382 set_dst_reg_note (insn, REG_EQUAL,
4383 gen_rtx_DIV (compute_mode, op0, op1),
4384 quotient);
4386 break;
4388 fail1:
4389 delete_insns_since (last);
4390 break;
4392 case FLOOR_DIV_EXPR:
4393 case FLOOR_MOD_EXPR:
4394 /* We will come here only for signed operations. */
4395 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4397 unsigned HOST_WIDE_INT mh, ml;
4398 int pre_shift, lgup, post_shift;
4399 HOST_WIDE_INT d = INTVAL (op1);
4401 if (d > 0)
4403 /* We could just as easily deal with negative constants here,
4404 but it does not seem worth the trouble for GCC 2.6. */
4405 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4407 pre_shift = floor_log2 (d);
4408 if (rem_flag)
4410 remainder = expand_binop (compute_mode, and_optab, op0,
4411 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4412 remainder, 0, OPTAB_LIB_WIDEN);
4413 if (remainder)
4414 return gen_lowpart (mode, remainder);
4416 quotient = expand_shift
4417 (RSHIFT_EXPR, compute_mode, op0,
4418 pre_shift, tquotient, 0);
4420 else
4422 rtx t1, t2, t3, t4;
4424 mh = choose_multiplier (d, size, size - 1,
4425 &ml, &post_shift, &lgup);
4426 gcc_assert (!mh);
4428 if (post_shift < BITS_PER_WORD
4429 && size - 1 < BITS_PER_WORD)
4431 t1 = expand_shift
4432 (RSHIFT_EXPR, compute_mode, op0,
4433 size - 1, NULL_RTX, 0);
4434 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4435 NULL_RTX, 0, OPTAB_WIDEN);
4436 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4437 + shift_cost[speed][compute_mode][size - 1]
4438 + 2 * add_cost[speed][compute_mode]);
4439 t3 = expand_mult_highpart (compute_mode, t2,
4440 GEN_INT (ml), NULL_RTX, 1,
4441 max_cost - extra_cost);
4442 if (t3 != 0)
4444 t4 = expand_shift
4445 (RSHIFT_EXPR, compute_mode, t3,
4446 post_shift, NULL_RTX, 1);
4447 quotient = expand_binop (compute_mode, xor_optab,
4448 t4, t1, tquotient, 0,
4449 OPTAB_WIDEN);
4454 else
4456 rtx nsign, t1, t2, t3, t4;
4457 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4458 op0, constm1_rtx), NULL_RTX);
4459 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4460 0, OPTAB_WIDEN);
4461 nsign = expand_shift
4462 (RSHIFT_EXPR, compute_mode, t2,
4463 size - 1, NULL_RTX, 0);
4464 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4465 NULL_RTX);
4466 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4467 NULL_RTX, 0);
4468 if (t4)
4470 rtx t5;
4471 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4472 NULL_RTX, 0);
4473 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4474 t4, t5),
4475 tquotient);
4480 if (quotient != 0)
4481 break;
4482 delete_insns_since (last);
4484 /* Try using an instruction that produces both the quotient and
4485 remainder, using truncation. We can easily compensate the quotient
4486 or remainder to get floor rounding, once we have the remainder.
4487 Notice that we compute also the final remainder value here,
4488 and return the result right away. */
4489 if (target == 0 || GET_MODE (target) != compute_mode)
4490 target = gen_reg_rtx (compute_mode);
4492 if (rem_flag)
4494 remainder
4495 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4496 quotient = gen_reg_rtx (compute_mode);
4498 else
4500 quotient
4501 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4502 remainder = gen_reg_rtx (compute_mode);
4505 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4506 quotient, remainder, 0))
4508 /* This could be computed with a branch-less sequence.
4509 Save that for later. */
4510 rtx tem;
4511 rtx label = gen_label_rtx ();
4512 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4513 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4514 NULL_RTX, 0, OPTAB_WIDEN);
4515 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4516 expand_dec (quotient, const1_rtx);
4517 expand_inc (remainder, op1);
4518 emit_label (label);
4519 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4522 /* No luck with division elimination or divmod. Have to do it
4523 by conditionally adjusting op0 *and* the result. */
4525 rtx label1, label2, label3, label4, label5;
4526 rtx adjusted_op0;
4527 rtx tem;
4529 quotient = gen_reg_rtx (compute_mode);
4530 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4531 label1 = gen_label_rtx ();
4532 label2 = gen_label_rtx ();
4533 label3 = gen_label_rtx ();
4534 label4 = gen_label_rtx ();
4535 label5 = gen_label_rtx ();
4536 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4537 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4538 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4539 quotient, 0, OPTAB_LIB_WIDEN);
4540 if (tem != quotient)
4541 emit_move_insn (quotient, tem);
4542 emit_jump_insn (gen_jump (label5));
4543 emit_barrier ();
4544 emit_label (label1);
4545 expand_inc (adjusted_op0, const1_rtx);
4546 emit_jump_insn (gen_jump (label4));
4547 emit_barrier ();
4548 emit_label (label2);
4549 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4550 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4551 quotient, 0, OPTAB_LIB_WIDEN);
4552 if (tem != quotient)
4553 emit_move_insn (quotient, tem);
4554 emit_jump_insn (gen_jump (label5));
4555 emit_barrier ();
4556 emit_label (label3);
4557 expand_dec (adjusted_op0, const1_rtx);
4558 emit_label (label4);
4559 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4560 quotient, 0, OPTAB_LIB_WIDEN);
4561 if (tem != quotient)
4562 emit_move_insn (quotient, tem);
4563 expand_dec (quotient, const1_rtx);
4564 emit_label (label5);
4566 break;
4568 case CEIL_DIV_EXPR:
4569 case CEIL_MOD_EXPR:
4570 if (unsignedp)
4572 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4574 rtx t1, t2, t3;
4575 unsigned HOST_WIDE_INT d = INTVAL (op1);
4576 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4577 floor_log2 (d), tquotient, 1);
4578 t2 = expand_binop (compute_mode, and_optab, op0,
4579 GEN_INT (d - 1),
4580 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4581 t3 = gen_reg_rtx (compute_mode);
4582 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4583 compute_mode, 1, 1);
4584 if (t3 == 0)
4586 rtx lab;
4587 lab = gen_label_rtx ();
4588 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4589 expand_inc (t1, const1_rtx);
4590 emit_label (lab);
4591 quotient = t1;
4593 else
4594 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4595 t1, t3),
4596 tquotient);
4597 break;
4600 /* Try using an instruction that produces both the quotient and
4601 remainder, using truncation. We can easily compensate the
4602 quotient or remainder to get ceiling rounding, once we have the
4603 remainder. Notice that we compute also the final remainder
4604 value here, and return the result right away. */
4605 if (target == 0 || GET_MODE (target) != compute_mode)
4606 target = gen_reg_rtx (compute_mode);
4608 if (rem_flag)
4610 remainder = (REG_P (target)
4611 ? target : gen_reg_rtx (compute_mode));
4612 quotient = gen_reg_rtx (compute_mode);
4614 else
4616 quotient = (REG_P (target)
4617 ? target : gen_reg_rtx (compute_mode));
4618 remainder = gen_reg_rtx (compute_mode);
4621 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4622 remainder, 1))
4624 /* This could be computed with a branch-less sequence.
4625 Save that for later. */
4626 rtx label = gen_label_rtx ();
4627 do_cmp_and_jump (remainder, const0_rtx, EQ,
4628 compute_mode, label);
4629 expand_inc (quotient, const1_rtx);
4630 expand_dec (remainder, op1);
4631 emit_label (label);
4632 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4635 /* No luck with division elimination or divmod. Have to do it
4636 by conditionally adjusting op0 *and* the result. */
4638 rtx label1, label2;
4639 rtx adjusted_op0, tem;
4641 quotient = gen_reg_rtx (compute_mode);
4642 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4643 label1 = gen_label_rtx ();
4644 label2 = gen_label_rtx ();
4645 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4646 compute_mode, label1);
4647 emit_move_insn (quotient, const0_rtx);
4648 emit_jump_insn (gen_jump (label2));
4649 emit_barrier ();
4650 emit_label (label1);
4651 expand_dec (adjusted_op0, const1_rtx);
4652 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4653 quotient, 1, OPTAB_LIB_WIDEN);
4654 if (tem != quotient)
4655 emit_move_insn (quotient, tem);
4656 expand_inc (quotient, const1_rtx);
4657 emit_label (label2);
4660 else /* signed */
4662 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4663 && INTVAL (op1) >= 0)
4665 /* This is extremely similar to the code for the unsigned case
4666 above. For 2.7 we should merge these variants, but for
4667 2.6.1 I don't want to touch the code for unsigned since that
4668 get used in C. The signed case will only be used by other
4669 languages (Ada). */
4671 rtx t1, t2, t3;
4672 unsigned HOST_WIDE_INT d = INTVAL (op1);
4673 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4674 floor_log2 (d), tquotient, 0);
4675 t2 = expand_binop (compute_mode, and_optab, op0,
4676 GEN_INT (d - 1),
4677 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4678 t3 = gen_reg_rtx (compute_mode);
4679 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4680 compute_mode, 1, 1);
4681 if (t3 == 0)
4683 rtx lab;
4684 lab = gen_label_rtx ();
4685 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4686 expand_inc (t1, const1_rtx);
4687 emit_label (lab);
4688 quotient = t1;
4690 else
4691 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4692 t1, t3),
4693 tquotient);
4694 break;
4697 /* Try using an instruction that produces both the quotient and
4698 remainder, using truncation. We can easily compensate the
4699 quotient or remainder to get ceiling rounding, once we have the
4700 remainder. Notice that we compute also the final remainder
4701 value here, and return the result right away. */
4702 if (target == 0 || GET_MODE (target) != compute_mode)
4703 target = gen_reg_rtx (compute_mode);
4704 if (rem_flag)
4706 remainder= (REG_P (target)
4707 ? target : gen_reg_rtx (compute_mode));
4708 quotient = gen_reg_rtx (compute_mode);
4710 else
4712 quotient = (REG_P (target)
4713 ? target : gen_reg_rtx (compute_mode));
4714 remainder = gen_reg_rtx (compute_mode);
4717 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4718 remainder, 0))
4720 /* This could be computed with a branch-less sequence.
4721 Save that for later. */
4722 rtx tem;
4723 rtx label = gen_label_rtx ();
4724 do_cmp_and_jump (remainder, const0_rtx, EQ,
4725 compute_mode, label);
4726 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4727 NULL_RTX, 0, OPTAB_WIDEN);
4728 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4729 expand_inc (quotient, const1_rtx);
4730 expand_dec (remainder, op1);
4731 emit_label (label);
4732 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4735 /* No luck with division elimination or divmod. Have to do it
4736 by conditionally adjusting op0 *and* the result. */
4738 rtx label1, label2, label3, label4, label5;
4739 rtx adjusted_op0;
4740 rtx tem;
4742 quotient = gen_reg_rtx (compute_mode);
4743 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4744 label1 = gen_label_rtx ();
4745 label2 = gen_label_rtx ();
4746 label3 = gen_label_rtx ();
4747 label4 = gen_label_rtx ();
4748 label5 = gen_label_rtx ();
4749 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4750 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4751 compute_mode, label1);
4752 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4753 quotient, 0, OPTAB_LIB_WIDEN);
4754 if (tem != quotient)
4755 emit_move_insn (quotient, tem);
4756 emit_jump_insn (gen_jump (label5));
4757 emit_barrier ();
4758 emit_label (label1);
4759 expand_dec (adjusted_op0, const1_rtx);
4760 emit_jump_insn (gen_jump (label4));
4761 emit_barrier ();
4762 emit_label (label2);
4763 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4764 compute_mode, label3);
4765 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4766 quotient, 0, OPTAB_LIB_WIDEN);
4767 if (tem != quotient)
4768 emit_move_insn (quotient, tem);
4769 emit_jump_insn (gen_jump (label5));
4770 emit_barrier ();
4771 emit_label (label3);
4772 expand_inc (adjusted_op0, const1_rtx);
4773 emit_label (label4);
4774 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4775 quotient, 0, OPTAB_LIB_WIDEN);
4776 if (tem != quotient)
4777 emit_move_insn (quotient, tem);
4778 expand_inc (quotient, const1_rtx);
4779 emit_label (label5);
4782 break;
4784 case EXACT_DIV_EXPR:
4785 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4787 HOST_WIDE_INT d = INTVAL (op1);
4788 unsigned HOST_WIDE_INT ml;
4789 int pre_shift;
4790 rtx t1;
4792 pre_shift = floor_log2 (d & -d);
4793 ml = invert_mod2n (d >> pre_shift, size);
4794 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4795 pre_shift, NULL_RTX, unsignedp);
4796 quotient = expand_mult (compute_mode, t1,
4797 gen_int_mode (ml, compute_mode),
4798 NULL_RTX, 1);
4800 insn = get_last_insn ();
4801 set_dst_reg_note (insn, REG_EQUAL,
4802 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4803 compute_mode, op0, op1),
4804 quotient);
4806 break;
4808 case ROUND_DIV_EXPR:
4809 case ROUND_MOD_EXPR:
4810 if (unsignedp)
4812 rtx tem;
4813 rtx label;
4814 label = gen_label_rtx ();
4815 quotient = gen_reg_rtx (compute_mode);
4816 remainder = gen_reg_rtx (compute_mode);
4817 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4819 rtx tem;
4820 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4821 quotient, 1, OPTAB_LIB_WIDEN);
4822 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4823 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4824 remainder, 1, OPTAB_LIB_WIDEN);
4826 tem = plus_constant (compute_mode, op1, -1);
4827 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4828 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4829 expand_inc (quotient, const1_rtx);
4830 expand_dec (remainder, op1);
4831 emit_label (label);
4833 else
4835 rtx abs_rem, abs_op1, tem, mask;
4836 rtx label;
4837 label = gen_label_rtx ();
4838 quotient = gen_reg_rtx (compute_mode);
4839 remainder = gen_reg_rtx (compute_mode);
4840 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4842 rtx tem;
4843 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4844 quotient, 0, OPTAB_LIB_WIDEN);
4845 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4846 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4847 remainder, 0, OPTAB_LIB_WIDEN);
4849 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4850 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4851 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4852 1, NULL_RTX, 1);
4853 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4854 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4855 NULL_RTX, 0, OPTAB_WIDEN);
4856 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4857 size - 1, NULL_RTX, 0);
4858 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4859 NULL_RTX, 0, OPTAB_WIDEN);
4860 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4861 NULL_RTX, 0, OPTAB_WIDEN);
4862 expand_inc (quotient, tem);
4863 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4864 NULL_RTX, 0, OPTAB_WIDEN);
4865 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4866 NULL_RTX, 0, OPTAB_WIDEN);
4867 expand_dec (remainder, tem);
4868 emit_label (label);
4870 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4872 default:
4873 gcc_unreachable ();
4876 if (quotient == 0)
4878 if (target && GET_MODE (target) != compute_mode)
4879 target = 0;
4881 if (rem_flag)
4883 /* Try to produce the remainder without producing the quotient.
4884 If we seem to have a divmod pattern that does not require widening,
4885 don't try widening here. We should really have a WIDEN argument
4886 to expand_twoval_binop, since what we'd really like to do here is
4887 1) try a mod insn in compute_mode
4888 2) try a divmod insn in compute_mode
4889 3) try a div insn in compute_mode and multiply-subtract to get
4890 remainder
4891 4) try the same things with widening allowed. */
4892 remainder
4893 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4894 op0, op1, target,
4895 unsignedp,
4896 ((optab_handler (optab2, compute_mode)
4897 != CODE_FOR_nothing)
4898 ? OPTAB_DIRECT : OPTAB_WIDEN));
4899 if (remainder == 0)
4901 /* No luck there. Can we do remainder and divide at once
4902 without a library call? */
4903 remainder = gen_reg_rtx (compute_mode);
4904 if (! expand_twoval_binop ((unsignedp
4905 ? udivmod_optab
4906 : sdivmod_optab),
4907 op0, op1,
4908 NULL_RTX, remainder, unsignedp))
4909 remainder = 0;
4912 if (remainder)
4913 return gen_lowpart (mode, remainder);
4916 /* Produce the quotient. Try a quotient insn, but not a library call.
4917 If we have a divmod in this mode, use it in preference to widening
4918 the div (for this test we assume it will not fail). Note that optab2
4919 is set to the one of the two optabs that the call below will use. */
4920 quotient
4921 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4922 op0, op1, rem_flag ? NULL_RTX : target,
4923 unsignedp,
4924 ((optab_handler (optab2, compute_mode)
4925 != CODE_FOR_nothing)
4926 ? OPTAB_DIRECT : OPTAB_WIDEN));
4928 if (quotient == 0)
4930 /* No luck there. Try a quotient-and-remainder insn,
4931 keeping the quotient alone. */
4932 quotient = gen_reg_rtx (compute_mode);
4933 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4934 op0, op1,
4935 quotient, NULL_RTX, unsignedp))
4937 quotient = 0;
4938 if (! rem_flag)
4939 /* Still no luck. If we are not computing the remainder,
4940 use a library call for the quotient. */
4941 quotient = sign_expand_binop (compute_mode,
4942 udiv_optab, sdiv_optab,
4943 op0, op1, target,
4944 unsignedp, OPTAB_LIB_WIDEN);
4949 if (rem_flag)
4951 if (target && GET_MODE (target) != compute_mode)
4952 target = 0;
4954 if (quotient == 0)
4956 /* No divide instruction either. Use library for remainder. */
4957 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4958 op0, op1, target,
4959 unsignedp, OPTAB_LIB_WIDEN);
4960 /* No remainder function. Try a quotient-and-remainder
4961 function, keeping the remainder. */
4962 if (!remainder)
4964 remainder = gen_reg_rtx (compute_mode);
4965 if (!expand_twoval_binop_libfunc
4966 (unsignedp ? udivmod_optab : sdivmod_optab,
4967 op0, op1,
4968 NULL_RTX, remainder,
4969 unsignedp ? UMOD : MOD))
4970 remainder = NULL_RTX;
4973 else
4975 /* We divided. Now finish doing X - Y * (X / Y). */
4976 remainder = expand_mult (compute_mode, quotient, op1,
4977 NULL_RTX, unsignedp);
4978 remainder = expand_binop (compute_mode, sub_optab, op0,
4979 remainder, target, unsignedp,
4980 OPTAB_LIB_WIDEN);
4984 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4987 /* Return a tree node with data type TYPE, describing the value of X.
4988 Usually this is an VAR_DECL, if there is no obvious better choice.
4989 X may be an expression, however we only support those expressions
4990 generated by loop.c. */
4992 tree
4993 make_tree (tree type, rtx x)
4995 tree t;
4997 switch (GET_CODE (x))
4999 case CONST_INT:
5001 HOST_WIDE_INT hi = 0;
5003 if (INTVAL (x) < 0
5004 && !(TYPE_UNSIGNED (type)
5005 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5006 < HOST_BITS_PER_WIDE_INT)))
5007 hi = -1;
5009 t = build_int_cst_wide (type, INTVAL (x), hi);
5011 return t;
5014 case CONST_DOUBLE:
5015 if (GET_MODE (x) == VOIDmode)
5016 t = build_int_cst_wide (type,
5017 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5018 else
5020 REAL_VALUE_TYPE d;
5022 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5023 t = build_real (type, d);
5026 return t;
5028 case CONST_VECTOR:
5030 int units = CONST_VECTOR_NUNITS (x);
5031 tree itype = TREE_TYPE (type);
5032 tree *elts;
5033 int i;
5035 /* Build a tree with vector elements. */
5036 elts = XALLOCAVEC (tree, units);
5037 for (i = units - 1; i >= 0; --i)
5039 rtx elt = CONST_VECTOR_ELT (x, i);
5040 elts[i] = make_tree (itype, elt);
5043 return build_vector (type, elts);
5046 case PLUS:
5047 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5048 make_tree (type, XEXP (x, 1)));
5050 case MINUS:
5051 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5052 make_tree (type, XEXP (x, 1)));
5054 case NEG:
5055 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5057 case MULT:
5058 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5059 make_tree (type, XEXP (x, 1)));
5061 case ASHIFT:
5062 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5063 make_tree (type, XEXP (x, 1)));
5065 case LSHIFTRT:
5066 t = unsigned_type_for (type);
5067 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5068 make_tree (t, XEXP (x, 0)),
5069 make_tree (type, XEXP (x, 1))));
5071 case ASHIFTRT:
5072 t = signed_type_for (type);
5073 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5074 make_tree (t, XEXP (x, 0)),
5075 make_tree (type, XEXP (x, 1))));
5077 case DIV:
5078 if (TREE_CODE (type) != REAL_TYPE)
5079 t = signed_type_for (type);
5080 else
5081 t = type;
5083 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5084 make_tree (t, XEXP (x, 0)),
5085 make_tree (t, XEXP (x, 1))));
5086 case UDIV:
5087 t = unsigned_type_for (type);
5088 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5089 make_tree (t, XEXP (x, 0)),
5090 make_tree (t, XEXP (x, 1))));
5092 case SIGN_EXTEND:
5093 case ZERO_EXTEND:
5094 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5095 GET_CODE (x) == ZERO_EXTEND);
5096 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5098 case CONST:
5099 return make_tree (type, XEXP (x, 0));
5101 case SYMBOL_REF:
5102 t = SYMBOL_REF_DECL (x);
5103 if (t)
5104 return fold_convert (type, build_fold_addr_expr (t));
5105 /* else fall through. */
5107 default:
5108 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5110 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5111 address mode to pointer mode. */
5112 if (POINTER_TYPE_P (type))
5113 x = convert_memory_address_addr_space
5114 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5116 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5117 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5118 t->decl_with_rtl.rtl = x;
5120 return t;
5124 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5125 and returning TARGET.
5127 If TARGET is 0, a pseudo-register or constant is returned. */
5130 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5132 rtx tem = 0;
5134 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5135 tem = simplify_binary_operation (AND, mode, op0, op1);
5136 if (tem == 0)
5137 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5139 if (target == 0)
5140 target = tem;
5141 else if (tem != target)
5142 emit_move_insn (target, tem);
5143 return target;
5146 /* Helper function for emit_store_flag. */
5147 static rtx
5148 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5149 enum machine_mode mode, enum machine_mode compare_mode,
5150 int unsignedp, rtx x, rtx y, int normalizep,
5151 enum machine_mode target_mode)
5153 struct expand_operand ops[4];
5154 rtx op0, last, comparison, subtarget;
5155 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5157 last = get_last_insn ();
5158 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5159 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5160 if (!x || !y)
5162 delete_insns_since (last);
5163 return NULL_RTX;
5166 if (target_mode == VOIDmode)
5167 target_mode = result_mode;
5168 if (!target)
5169 target = gen_reg_rtx (target_mode);
5171 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5173 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5174 create_fixed_operand (&ops[1], comparison);
5175 create_fixed_operand (&ops[2], x);
5176 create_fixed_operand (&ops[3], y);
5177 if (!maybe_expand_insn (icode, 4, ops))
5179 delete_insns_since (last);
5180 return NULL_RTX;
5182 subtarget = ops[0].value;
5184 /* If we are converting to a wider mode, first convert to
5185 TARGET_MODE, then normalize. This produces better combining
5186 opportunities on machines that have a SIGN_EXTRACT when we are
5187 testing a single bit. This mostly benefits the 68k.
5189 If STORE_FLAG_VALUE does not have the sign bit set when
5190 interpreted in MODE, we can do this conversion as unsigned, which
5191 is usually more efficient. */
5192 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5194 convert_move (target, subtarget,
5195 val_signbit_known_clear_p (result_mode,
5196 STORE_FLAG_VALUE));
5197 op0 = target;
5198 result_mode = target_mode;
5200 else
5201 op0 = subtarget;
5203 /* If we want to keep subexpressions around, don't reuse our last
5204 target. */
5205 if (optimize)
5206 subtarget = 0;
5208 /* Now normalize to the proper value in MODE. Sometimes we don't
5209 have to do anything. */
5210 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5212 /* STORE_FLAG_VALUE might be the most negative number, so write
5213 the comparison this way to avoid a compiler-time warning. */
5214 else if (- normalizep == STORE_FLAG_VALUE)
5215 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5217 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5218 it hard to use a value of just the sign bit due to ANSI integer
5219 constant typing rules. */
5220 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5221 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5222 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5223 normalizep == 1);
5224 else
5226 gcc_assert (STORE_FLAG_VALUE & 1);
5228 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5229 if (normalizep == -1)
5230 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5233 /* If we were converting to a smaller mode, do the conversion now. */
5234 if (target_mode != result_mode)
5236 convert_move (target, op0, 0);
5237 return target;
5239 else
5240 return op0;
5244 /* A subroutine of emit_store_flag only including "tricks" that do not
5245 need a recursive call. These are kept separate to avoid infinite
5246 loops. */
5248 static rtx
5249 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5250 enum machine_mode mode, int unsignedp, int normalizep,
5251 enum machine_mode target_mode)
5253 rtx subtarget;
5254 enum insn_code icode;
5255 enum machine_mode compare_mode;
5256 enum mode_class mclass;
5257 enum rtx_code scode;
5258 rtx tem;
5260 if (unsignedp)
5261 code = unsigned_condition (code);
5262 scode = swap_condition (code);
5264 /* If one operand is constant, make it the second one. Only do this
5265 if the other operand is not constant as well. */
5267 if (swap_commutative_operands_p (op0, op1))
5269 tem = op0;
5270 op0 = op1;
5271 op1 = tem;
5272 code = swap_condition (code);
5275 if (mode == VOIDmode)
5276 mode = GET_MODE (op0);
5278 /* For some comparisons with 1 and -1, we can convert this to
5279 comparisons with zero. This will often produce more opportunities for
5280 store-flag insns. */
5282 switch (code)
5284 case LT:
5285 if (op1 == const1_rtx)
5286 op1 = const0_rtx, code = LE;
5287 break;
5288 case LE:
5289 if (op1 == constm1_rtx)
5290 op1 = const0_rtx, code = LT;
5291 break;
5292 case GE:
5293 if (op1 == const1_rtx)
5294 op1 = const0_rtx, code = GT;
5295 break;
5296 case GT:
5297 if (op1 == constm1_rtx)
5298 op1 = const0_rtx, code = GE;
5299 break;
5300 case GEU:
5301 if (op1 == const1_rtx)
5302 op1 = const0_rtx, code = NE;
5303 break;
5304 case LTU:
5305 if (op1 == const1_rtx)
5306 op1 = const0_rtx, code = EQ;
5307 break;
5308 default:
5309 break;
5312 /* If we are comparing a double-word integer with zero or -1, we can
5313 convert the comparison into one involving a single word. */
5314 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5315 && GET_MODE_CLASS (mode) == MODE_INT
5316 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5318 if ((code == EQ || code == NE)
5319 && (op1 == const0_rtx || op1 == constm1_rtx))
5321 rtx op00, op01;
5323 /* Do a logical OR or AND of the two words and compare the
5324 result. */
5325 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5326 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5327 tem = expand_binop (word_mode,
5328 op1 == const0_rtx ? ior_optab : and_optab,
5329 op00, op01, NULL_RTX, unsignedp,
5330 OPTAB_DIRECT);
5332 if (tem != 0)
5333 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5334 unsignedp, normalizep);
5336 else if ((code == LT || code == GE) && op1 == const0_rtx)
5338 rtx op0h;
5340 /* If testing the sign bit, can just test on high word. */
5341 op0h = simplify_gen_subreg (word_mode, op0, mode,
5342 subreg_highpart_offset (word_mode,
5343 mode));
5344 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5345 unsignedp, normalizep);
5347 else
5348 tem = NULL_RTX;
5350 if (tem)
5352 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5353 return tem;
5354 if (!target)
5355 target = gen_reg_rtx (target_mode);
5357 convert_move (target, tem,
5358 !val_signbit_known_set_p (word_mode,
5359 (normalizep ? normalizep
5360 : STORE_FLAG_VALUE)));
5361 return target;
5365 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5366 complement of A (for GE) and shifting the sign bit to the low bit. */
5367 if (op1 == const0_rtx && (code == LT || code == GE)
5368 && GET_MODE_CLASS (mode) == MODE_INT
5369 && (normalizep || STORE_FLAG_VALUE == 1
5370 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5372 subtarget = target;
5374 if (!target)
5375 target_mode = mode;
5377 /* If the result is to be wider than OP0, it is best to convert it
5378 first. If it is to be narrower, it is *incorrect* to convert it
5379 first. */
5380 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5382 op0 = convert_modes (target_mode, mode, op0, 0);
5383 mode = target_mode;
5386 if (target_mode != mode)
5387 subtarget = 0;
5389 if (code == GE)
5390 op0 = expand_unop (mode, one_cmpl_optab, op0,
5391 ((STORE_FLAG_VALUE == 1 || normalizep)
5392 ? 0 : subtarget), 0);
5394 if (STORE_FLAG_VALUE == 1 || normalizep)
5395 /* If we are supposed to produce a 0/1 value, we want to do
5396 a logical shift from the sign bit to the low-order bit; for
5397 a -1/0 value, we do an arithmetic shift. */
5398 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5399 GET_MODE_BITSIZE (mode) - 1,
5400 subtarget, normalizep != -1);
5402 if (mode != target_mode)
5403 op0 = convert_modes (target_mode, mode, op0, 0);
5405 return op0;
5408 mclass = GET_MODE_CLASS (mode);
5409 for (compare_mode = mode; compare_mode != VOIDmode;
5410 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5412 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5413 icode = optab_handler (cstore_optab, optab_mode);
5414 if (icode != CODE_FOR_nothing)
5416 do_pending_stack_adjust ();
5417 tem = emit_cstore (target, icode, code, mode, compare_mode,
5418 unsignedp, op0, op1, normalizep, target_mode);
5419 if (tem)
5420 return tem;
5422 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5424 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5425 unsignedp, op1, op0, normalizep, target_mode);
5426 if (tem)
5427 return tem;
5429 break;
5433 return 0;
5436 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5437 and storing in TARGET. Normally return TARGET.
5438 Return 0 if that cannot be done.
5440 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5441 it is VOIDmode, they cannot both be CONST_INT.
5443 UNSIGNEDP is for the case where we have to widen the operands
5444 to perform the operation. It says to use zero-extension.
5446 NORMALIZEP is 1 if we should convert the result to be either zero
5447 or one. Normalize is -1 if we should convert the result to be
5448 either zero or -1. If NORMALIZEP is zero, the result will be left
5449 "raw" out of the scc insn. */
5452 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5453 enum machine_mode mode, int unsignedp, int normalizep)
5455 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5456 enum rtx_code rcode;
5457 rtx subtarget;
5458 rtx tem, last, trueval;
5460 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5461 target_mode);
5462 if (tem)
5463 return tem;
5465 /* If we reached here, we can't do this with a scc insn, however there
5466 are some comparisons that can be done in other ways. Don't do any
5467 of these cases if branches are very cheap. */
5468 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5469 return 0;
5471 /* See what we need to return. We can only return a 1, -1, or the
5472 sign bit. */
5474 if (normalizep == 0)
5476 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5477 normalizep = STORE_FLAG_VALUE;
5479 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5481 else
5482 return 0;
5485 last = get_last_insn ();
5487 /* If optimizing, use different pseudo registers for each insn, instead
5488 of reusing the same pseudo. This leads to better CSE, but slows
5489 down the compiler, since there are more pseudos */
5490 subtarget = (!optimize
5491 && (target_mode == mode)) ? target : NULL_RTX;
5492 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5494 /* For floating-point comparisons, try the reverse comparison or try
5495 changing the "orderedness" of the comparison. */
5496 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5498 enum rtx_code first_code;
5499 bool and_them;
5501 rcode = reverse_condition_maybe_unordered (code);
5502 if (can_compare_p (rcode, mode, ccp_store_flag)
5503 && (code == ORDERED || code == UNORDERED
5504 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5505 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5507 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5508 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5510 /* For the reverse comparison, use either an addition or a XOR. */
5511 if (want_add
5512 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5513 optimize_insn_for_speed_p ()) == 0)
5515 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5516 STORE_FLAG_VALUE, target_mode);
5517 if (tem)
5518 return expand_binop (target_mode, add_optab, tem,
5519 GEN_INT (normalizep),
5520 target, 0, OPTAB_WIDEN);
5522 else if (!want_add
5523 && rtx_cost (trueval, XOR, 1,
5524 optimize_insn_for_speed_p ()) == 0)
5526 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5527 normalizep, target_mode);
5528 if (tem)
5529 return expand_binop (target_mode, xor_optab, tem, trueval,
5530 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5534 delete_insns_since (last);
5536 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5537 if (code == ORDERED || code == UNORDERED)
5538 return 0;
5540 and_them = split_comparison (code, mode, &first_code, &code);
5542 /* If there are no NaNs, the first comparison should always fall through.
5543 Effectively change the comparison to the other one. */
5544 if (!HONOR_NANS (mode))
5546 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5547 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5548 target_mode);
5551 #ifdef HAVE_conditional_move
5552 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5553 conditional move. */
5554 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5555 normalizep, target_mode);
5556 if (tem == 0)
5557 return 0;
5559 if (and_them)
5560 tem = emit_conditional_move (target, code, op0, op1, mode,
5561 tem, const0_rtx, GET_MODE (tem), 0);
5562 else
5563 tem = emit_conditional_move (target, code, op0, op1, mode,
5564 trueval, tem, GET_MODE (tem), 0);
5566 if (tem == 0)
5567 delete_insns_since (last);
5568 return tem;
5569 #else
5570 return 0;
5571 #endif
5574 /* The remaining tricks only apply to integer comparisons. */
5576 if (GET_MODE_CLASS (mode) != MODE_INT)
5577 return 0;
5579 /* If this is an equality comparison of integers, we can try to exclusive-or
5580 (or subtract) the two operands and use a recursive call to try the
5581 comparison with zero. Don't do any of these cases if branches are
5582 very cheap. */
5584 if ((code == EQ || code == NE) && op1 != const0_rtx)
5586 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5587 OPTAB_WIDEN);
5589 if (tem == 0)
5590 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5591 OPTAB_WIDEN);
5592 if (tem != 0)
5593 tem = emit_store_flag (target, code, tem, const0_rtx,
5594 mode, unsignedp, normalizep);
5595 if (tem != 0)
5596 return tem;
5598 delete_insns_since (last);
5601 /* For integer comparisons, try the reverse comparison. However, for
5602 small X and if we'd have anyway to extend, implementing "X != 0"
5603 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5604 rcode = reverse_condition (code);
5605 if (can_compare_p (rcode, mode, ccp_store_flag)
5606 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5607 && code == NE
5608 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5609 && op1 == const0_rtx))
5611 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5612 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5614 /* Again, for the reverse comparison, use either an addition or a XOR. */
5615 if (want_add
5616 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5617 optimize_insn_for_speed_p ()) == 0)
5619 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5620 STORE_FLAG_VALUE, target_mode);
5621 if (tem != 0)
5622 tem = expand_binop (target_mode, add_optab, tem,
5623 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5625 else if (!want_add
5626 && rtx_cost (trueval, XOR, 1,
5627 optimize_insn_for_speed_p ()) == 0)
5629 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5630 normalizep, target_mode);
5631 if (tem != 0)
5632 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5633 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5636 if (tem != 0)
5637 return tem;
5638 delete_insns_since (last);
5641 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5642 the constant zero. Reject all other comparisons at this point. Only
5643 do LE and GT if branches are expensive since they are expensive on
5644 2-operand machines. */
5646 if (op1 != const0_rtx
5647 || (code != EQ && code != NE
5648 && (BRANCH_COST (optimize_insn_for_speed_p (),
5649 false) <= 1 || (code != LE && code != GT))))
5650 return 0;
5652 /* Try to put the result of the comparison in the sign bit. Assume we can't
5653 do the necessary operation below. */
5655 tem = 0;
5657 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5658 the sign bit set. */
5660 if (code == LE)
5662 /* This is destructive, so SUBTARGET can't be OP0. */
5663 if (rtx_equal_p (subtarget, op0))
5664 subtarget = 0;
5666 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5667 OPTAB_WIDEN);
5668 if (tem)
5669 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5670 OPTAB_WIDEN);
5673 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5674 number of bits in the mode of OP0, minus one. */
5676 if (code == GT)
5678 if (rtx_equal_p (subtarget, op0))
5679 subtarget = 0;
5681 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5682 GET_MODE_BITSIZE (mode) - 1,
5683 subtarget, 0);
5684 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5685 OPTAB_WIDEN);
5688 if (code == EQ || code == NE)
5690 /* For EQ or NE, one way to do the comparison is to apply an operation
5691 that converts the operand into a positive number if it is nonzero
5692 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5693 for NE we negate. This puts the result in the sign bit. Then we
5694 normalize with a shift, if needed.
5696 Two operations that can do the above actions are ABS and FFS, so try
5697 them. If that doesn't work, and MODE is smaller than a full word,
5698 we can use zero-extension to the wider mode (an unsigned conversion)
5699 as the operation. */
5701 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5702 that is compensated by the subsequent overflow when subtracting
5703 one / negating. */
5705 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5706 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5707 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5708 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5709 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5711 tem = convert_modes (word_mode, mode, op0, 1);
5712 mode = word_mode;
5715 if (tem != 0)
5717 if (code == EQ)
5718 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5719 0, OPTAB_WIDEN);
5720 else
5721 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5724 /* If we couldn't do it that way, for NE we can "or" the two's complement
5725 of the value with itself. For EQ, we take the one's complement of
5726 that "or", which is an extra insn, so we only handle EQ if branches
5727 are expensive. */
5729 if (tem == 0
5730 && (code == NE
5731 || BRANCH_COST (optimize_insn_for_speed_p (),
5732 false) > 1))
5734 if (rtx_equal_p (subtarget, op0))
5735 subtarget = 0;
5737 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5738 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5739 OPTAB_WIDEN);
5741 if (tem && code == EQ)
5742 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5746 if (tem && normalizep)
5747 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5748 GET_MODE_BITSIZE (mode) - 1,
5749 subtarget, normalizep == 1);
5751 if (tem)
5753 if (!target)
5755 else if (GET_MODE (tem) != target_mode)
5757 convert_move (target, tem, 0);
5758 tem = target;
5760 else if (!subtarget)
5762 emit_move_insn (target, tem);
5763 tem = target;
5766 else
5767 delete_insns_since (last);
5769 return tem;
5772 /* Like emit_store_flag, but always succeeds. */
5775 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5776 enum machine_mode mode, int unsignedp, int normalizep)
5778 rtx tem, label;
5779 rtx trueval, falseval;
5781 /* First see if emit_store_flag can do the job. */
5782 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5783 if (tem != 0)
5784 return tem;
5786 if (!target)
5787 target = gen_reg_rtx (word_mode);
5789 /* If this failed, we have to do this with set/compare/jump/set code.
5790 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5791 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5792 if (code == NE
5793 && GET_MODE_CLASS (mode) == MODE_INT
5794 && REG_P (target)
5795 && op0 == target
5796 && op1 == const0_rtx)
5798 label = gen_label_rtx ();
5799 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5800 mode, NULL_RTX, NULL_RTX, label, -1);
5801 emit_move_insn (target, trueval);
5802 emit_label (label);
5803 return target;
5806 if (!REG_P (target)
5807 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5808 target = gen_reg_rtx (GET_MODE (target));
5810 /* Jump in the right direction if the target cannot implement CODE
5811 but can jump on its reverse condition. */
5812 falseval = const0_rtx;
5813 if (! can_compare_p (code, mode, ccp_jump)
5814 && (! FLOAT_MODE_P (mode)
5815 || code == ORDERED || code == UNORDERED
5816 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5817 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5819 enum rtx_code rcode;
5820 if (FLOAT_MODE_P (mode))
5821 rcode = reverse_condition_maybe_unordered (code);
5822 else
5823 rcode = reverse_condition (code);
5825 /* Canonicalize to UNORDERED for the libcall. */
5826 if (can_compare_p (rcode, mode, ccp_jump)
5827 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5829 falseval = trueval;
5830 trueval = const0_rtx;
5831 code = rcode;
5835 emit_move_insn (target, trueval);
5836 label = gen_label_rtx ();
5837 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5838 NULL_RTX, label, -1);
5840 emit_move_insn (target, falseval);
5841 emit_label (label);
5843 return target;
5846 /* Perform possibly multi-word comparison and conditional jump to LABEL
5847 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5848 now a thin wrapper around do_compare_rtx_and_jump. */
5850 static void
5851 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5852 rtx label)
5854 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5855 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5856 NULL_RTX, NULL_RTX, label, -1);