unroll-1.c: New testcase.
[official-gcc.git] / gcc / expmed.c
blob767834eefb58ae41c7ad067138dde338491f3ab3
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
96 struct init_expmed_rtl
98 struct rtx_def reg; rtunion reg_fld[2];
99 struct rtx_def plus; rtunion plus_fld1;
100 struct rtx_def neg;
101 struct rtx_def mult; rtunion mult_fld1;
102 struct rtx_def sdiv; rtunion sdiv_fld1;
103 struct rtx_def udiv; rtunion udiv_fld1;
104 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
105 struct rtx_def smod_32; rtunion smod_32_fld1;
106 struct rtx_def wide_mult; rtunion wide_mult_fld1;
107 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
108 struct rtx_def wide_trunc;
109 struct rtx_def shift; rtunion shift_fld1;
110 struct rtx_def shift_mult; rtunion shift_mult_fld1;
111 struct rtx_def shift_add; rtunion shift_add_fld1;
112 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
113 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
114 struct rtx_def zext;
115 struct rtx_def trunc;
117 rtx pow2[MAX_BITS_PER_WORD];
118 rtx cint[MAX_BITS_PER_WORD];
121 static void
122 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
123 enum machine_mode from_mode, bool speed)
125 int to_size, from_size;
126 rtx which;
128 /* We're given no information about the true size of a partial integer,
129 only the size of the "full" integer it requires for storage. For
130 comparison purposes here, reduce the bit size by one in that case. */
131 to_size = (GET_MODE_BITSIZE (to_mode)
132 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
133 from_size = (GET_MODE_BITSIZE (from_mode)
134 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which = (to_size < from_size ? &all->trunc : &all->zext);
139 PUT_MODE (&all->reg, from_mode);
140 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
143 static void
144 init_expmed_one_mode (struct init_expmed_rtl *all,
145 enum machine_mode mode, int speed)
147 int m, n, mode_bitsize;
148 enum machine_mode mode_from;
150 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
152 PUT_MODE (&all->reg, mode);
153 PUT_MODE (&all->plus, mode);
154 PUT_MODE (&all->neg, mode);
155 PUT_MODE (&all->mult, mode);
156 PUT_MODE (&all->sdiv, mode);
157 PUT_MODE (&all->udiv, mode);
158 PUT_MODE (&all->sdiv_32, mode);
159 PUT_MODE (&all->smod_32, mode);
160 PUT_MODE (&all->wide_trunc, mode);
161 PUT_MODE (&all->shift, mode);
162 PUT_MODE (&all->shift_mult, mode);
163 PUT_MODE (&all->shift_add, mode);
164 PUT_MODE (&all->shift_sub0, mode);
165 PUT_MODE (&all->shift_sub1, mode);
166 PUT_MODE (&all->zext, mode);
167 PUT_MODE (&all->trunc, mode);
169 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
170 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
171 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
172 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
173 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
175 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
176 <= 2 * add_cost (speed, mode)));
177 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
178 <= 4 * add_cost (speed, mode)));
180 set_shift_cost (speed, mode, 0, 0);
182 int cost = add_cost (speed, mode);
183 set_shiftadd_cost (speed, mode, 0, cost);
184 set_shiftsub0_cost (speed, mode, 0, cost);
185 set_shiftsub1_cost (speed, mode, 0, cost);
188 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
189 for (m = 1; m < n; m++)
191 XEXP (&all->shift, 1) = all->cint[m];
192 XEXP (&all->shift_mult, 1) = all->pow2[m];
194 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
195 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
196 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
197 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
200 if (SCALAR_INT_MODE_P (mode))
202 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
203 mode_from = (enum machine_mode)(mode_from + 1))
204 init_expmed_one_conv (all, mode, mode_from, speed);
206 if (GET_MODE_CLASS (mode) == MODE_INT)
208 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
209 if (wider_mode != VOIDmode)
211 PUT_MODE (&all->zext, wider_mode);
212 PUT_MODE (&all->wide_mult, wider_mode);
213 PUT_MODE (&all->wide_lshr, wider_mode);
214 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
216 set_mul_widen_cost (speed, wider_mode,
217 set_src_cost (&all->wide_mult, speed));
218 set_mul_highpart_cost (speed, mode,
219 set_src_cost (&all->wide_trunc, speed));
224 void
225 init_expmed (void)
227 struct init_expmed_rtl all;
228 enum machine_mode mode;
229 int m, speed;
231 memset (&all, 0, sizeof all);
232 for (m = 1; m < MAX_BITS_PER_WORD; m++)
234 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
235 all.cint[m] = GEN_INT (m);
238 PUT_CODE (&all.reg, REG);
239 /* Avoid using hard regs in ways which may be unsupported. */
240 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
242 PUT_CODE (&all.plus, PLUS);
243 XEXP (&all.plus, 0) = &all.reg;
244 XEXP (&all.plus, 1) = &all.reg;
246 PUT_CODE (&all.neg, NEG);
247 XEXP (&all.neg, 0) = &all.reg;
249 PUT_CODE (&all.mult, MULT);
250 XEXP (&all.mult, 0) = &all.reg;
251 XEXP (&all.mult, 1) = &all.reg;
253 PUT_CODE (&all.sdiv, DIV);
254 XEXP (&all.sdiv, 0) = &all.reg;
255 XEXP (&all.sdiv, 1) = &all.reg;
257 PUT_CODE (&all.udiv, UDIV);
258 XEXP (&all.udiv, 0) = &all.reg;
259 XEXP (&all.udiv, 1) = &all.reg;
261 PUT_CODE (&all.sdiv_32, DIV);
262 XEXP (&all.sdiv_32, 0) = &all.reg;
263 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
265 PUT_CODE (&all.smod_32, MOD);
266 XEXP (&all.smod_32, 0) = &all.reg;
267 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
269 PUT_CODE (&all.zext, ZERO_EXTEND);
270 XEXP (&all.zext, 0) = &all.reg;
272 PUT_CODE (&all.wide_mult, MULT);
273 XEXP (&all.wide_mult, 0) = &all.zext;
274 XEXP (&all.wide_mult, 1) = &all.zext;
276 PUT_CODE (&all.wide_lshr, LSHIFTRT);
277 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
279 PUT_CODE (&all.wide_trunc, TRUNCATE);
280 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
282 PUT_CODE (&all.shift, ASHIFT);
283 XEXP (&all.shift, 0) = &all.reg;
285 PUT_CODE (&all.shift_mult, MULT);
286 XEXP (&all.shift_mult, 0) = &all.reg;
288 PUT_CODE (&all.shift_add, PLUS);
289 XEXP (&all.shift_add, 0) = &all.shift_mult;
290 XEXP (&all.shift_add, 1) = &all.reg;
292 PUT_CODE (&all.shift_sub0, MINUS);
293 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
294 XEXP (&all.shift_sub0, 1) = &all.reg;
296 PUT_CODE (&all.shift_sub1, MINUS);
297 XEXP (&all.shift_sub1, 0) = &all.reg;
298 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
300 PUT_CODE (&all.trunc, TRUNCATE);
301 XEXP (&all.trunc, 0) = &all.reg;
303 for (speed = 0; speed < 2; speed++)
305 crtl->maybe_hot_insn_p = speed;
306 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
308 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
309 mode = (enum machine_mode)(mode + 1))
310 init_expmed_one_mode (&all, mode, speed);
312 if (MIN_MODE_PARTIAL_INT != VOIDmode)
313 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
314 mode = (enum machine_mode)(mode + 1))
315 init_expmed_one_mode (&all, mode, speed);
317 if (MIN_MODE_VECTOR_INT != VOIDmode)
318 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
319 mode = (enum machine_mode)(mode + 1))
320 init_expmed_one_mode (&all, mode, speed);
323 if (alg_hash_used_p ())
325 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
326 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
328 else
329 set_alg_hash_used_p (true);
330 default_rtl_profile ();
333 /* Return an rtx representing minus the value of X.
334 MODE is the intended mode of the result,
335 useful if X is a CONST_INT. */
338 negate_rtx (enum machine_mode mode, rtx x)
340 rtx result = simplify_unary_operation (NEG, mode, x, mode);
342 if (result == 0)
343 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
345 return result;
348 /* Report on the availability of insv/extv/extzv and the desired mode
349 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
350 is false; else the mode of the specified operand. If OPNO is -1,
351 all the caller cares about is whether the insn is available. */
352 enum machine_mode
353 mode_for_extraction (enum extraction_pattern pattern, int opno)
355 const struct insn_data_d *data;
357 switch (pattern)
359 case EP_insv:
360 if (HAVE_insv)
362 data = &insn_data[CODE_FOR_insv];
363 break;
365 return MAX_MACHINE_MODE;
367 case EP_extv:
368 if (HAVE_extv)
370 data = &insn_data[CODE_FOR_extv];
371 break;
373 return MAX_MACHINE_MODE;
375 case EP_extzv:
376 if (HAVE_extzv)
378 data = &insn_data[CODE_FOR_extzv];
379 break;
381 return MAX_MACHINE_MODE;
383 default:
384 gcc_unreachable ();
387 if (opno == -1)
388 return VOIDmode;
390 /* Everyone who uses this function used to follow it with
391 if (result == VOIDmode) result = word_mode; */
392 if (data->operand[opno].mode == VOIDmode)
393 return word_mode;
394 return data->operand[opno].mode;
397 /* A subroutine of store_bit_field, with the same arguments. Return true
398 if the operation could be implemented.
400 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
401 no other way of implementing the operation. If FALLBACK_P is false,
402 return false instead. */
404 static bool
405 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
406 unsigned HOST_WIDE_INT bitnum,
407 unsigned HOST_WIDE_INT bitregion_start,
408 unsigned HOST_WIDE_INT bitregion_end,
409 enum machine_mode fieldmode,
410 rtx value, bool fallback_p)
412 unsigned int unit
413 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
414 unsigned HOST_WIDE_INT offset, bitpos;
415 rtx op0 = str_rtx;
416 int byte_offset;
417 rtx orig_value;
419 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
421 while (GET_CODE (op0) == SUBREG)
423 /* The following line once was done only if WORDS_BIG_ENDIAN,
424 but I think that is a mistake. WORDS_BIG_ENDIAN is
425 meaningful at a much higher level; when structures are copied
426 between memory and regs, the higher-numbered regs
427 always get higher addresses. */
428 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
429 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
431 byte_offset = 0;
433 /* Paradoxical subregs need special handling on big endian machines. */
434 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
436 int difference = inner_mode_size - outer_mode_size;
438 if (WORDS_BIG_ENDIAN)
439 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
440 if (BYTES_BIG_ENDIAN)
441 byte_offset += difference % UNITS_PER_WORD;
443 else
444 byte_offset = SUBREG_BYTE (op0);
446 bitnum += byte_offset * BITS_PER_UNIT;
447 op0 = SUBREG_REG (op0);
450 /* No action is needed if the target is a register and if the field
451 lies completely outside that register. This can occur if the source
452 code contains an out-of-bounds access to a small array. */
453 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
454 return true;
456 /* Use vec_set patterns for inserting parts of vectors whenever
457 available. */
458 if (VECTOR_MODE_P (GET_MODE (op0))
459 && !MEM_P (op0)
460 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
461 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
462 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
463 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
465 struct expand_operand ops[3];
466 enum machine_mode outermode = GET_MODE (op0);
467 enum machine_mode innermode = GET_MODE_INNER (outermode);
468 enum insn_code icode = optab_handler (vec_set_optab, outermode);
469 int pos = bitnum / GET_MODE_BITSIZE (innermode);
471 create_fixed_operand (&ops[0], op0);
472 create_input_operand (&ops[1], value, innermode);
473 create_integer_operand (&ops[2], pos);
474 if (maybe_expand_insn (icode, 3, ops))
475 return true;
478 /* If the target is a register, overwriting the entire object, or storing
479 a full-word or multi-word field can be done with just a SUBREG.
481 If the target is memory, storing any naturally aligned field can be
482 done with a simple store. For targets that support fast unaligned
483 memory, any naturally sized, unit aligned field can be done directly. */
485 offset = bitnum / unit;
486 bitpos = bitnum % unit;
487 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
488 + (offset * UNITS_PER_WORD);
490 if (bitpos == 0
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && (!MEM_P (op0)
493 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
494 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
495 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
496 || validate_subreg (fieldmode, GET_MODE (op0), op0,
497 byte_offset)))
498 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
499 || (offset * BITS_PER_UNIT % bitsize == 0
500 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
502 if (MEM_P (op0))
503 op0 = adjust_bitfield_address (op0, fieldmode, offset);
504 else if (GET_MODE (op0) != fieldmode)
505 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
506 byte_offset);
507 emit_move_insn (op0, value);
508 return true;
511 /* Make sure we are playing with integral modes. Pun with subregs
512 if we aren't. This must come after the entire register case above,
513 since that case is valid for any mode. The following cases are only
514 valid for integral modes. */
516 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
517 if (imode != GET_MODE (op0))
519 if (MEM_P (op0))
520 op0 = adjust_bitfield_address (op0, imode, 0);
521 else
523 gcc_assert (imode != BLKmode);
524 op0 = gen_lowpart (imode, op0);
529 /* If OP0 is a register, BITPOS must count within a word.
530 But as we have it, it counts within whatever size OP0 now has.
531 On a bigendian machine, these are not the same, so convert. */
532 if (BYTES_BIG_ENDIAN
533 && !MEM_P (op0)
534 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
535 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
537 /* Storing an lsb-aligned field in a register
538 can be done with a movestrict instruction. */
540 if (!MEM_P (op0)
541 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
542 && bitsize == GET_MODE_BITSIZE (fieldmode)
543 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
545 struct expand_operand ops[2];
546 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
547 rtx arg0 = op0;
548 unsigned HOST_WIDE_INT subreg_off;
550 if (GET_CODE (arg0) == SUBREG)
552 /* Else we've got some float mode source being extracted into
553 a different float mode destination -- this combination of
554 subregs results in Severe Tire Damage. */
555 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
556 || GET_MODE_CLASS (fieldmode) == MODE_INT
557 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
558 arg0 = SUBREG_REG (arg0);
561 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
562 + (offset * UNITS_PER_WORD);
563 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
565 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
567 create_fixed_operand (&ops[0], arg0);
568 /* Shrink the source operand to FIELDMODE. */
569 create_convert_operand_to (&ops[1], value, fieldmode, false);
570 if (maybe_expand_insn (icode, 2, ops))
571 return true;
575 /* Handle fields bigger than a word. */
577 if (bitsize > BITS_PER_WORD)
579 /* Here we transfer the words of the field
580 in the order least significant first.
581 This is because the most significant word is the one which may
582 be less than full.
583 However, only do that if the value is not BLKmode. */
585 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
586 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
587 unsigned int i;
588 rtx last;
590 /* This is the mode we must force value to, so that there will be enough
591 subwords to extract. Note that fieldmode will often (always?) be
592 VOIDmode, because that is what store_field uses to indicate that this
593 is a bit field, but passing VOIDmode to operand_subword_force
594 is not allowed. */
595 fieldmode = GET_MODE (value);
596 if (fieldmode == VOIDmode)
597 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
599 last = get_last_insn ();
600 for (i = 0; i < nwords; i++)
602 /* If I is 0, use the low-order word in both field and target;
603 if I is 1, use the next to lowest word; and so on. */
604 unsigned int wordnum = (backwards
605 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
606 - i - 1
607 : i);
608 unsigned int bit_offset = (backwards
609 ? MAX ((int) bitsize - ((int) i + 1)
610 * BITS_PER_WORD,
612 : (int) i * BITS_PER_WORD);
613 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
614 unsigned HOST_WIDE_INT new_bitsize =
615 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
617 /* If the remaining chunk doesn't have full wordsize we have
618 to make sure that for big endian machines the higher order
619 bits are used. */
620 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
621 value_word = simplify_expand_binop (word_mode, lshr_optab,
622 value_word,
623 GEN_INT (BITS_PER_WORD
624 - new_bitsize),
625 NULL_RTX, true,
626 OPTAB_LIB_WIDEN);
628 if (!store_bit_field_1 (op0, new_bitsize,
629 bitnum + bit_offset,
630 bitregion_start, bitregion_end,
631 word_mode,
632 value_word, fallback_p))
634 delete_insns_since (last);
635 return false;
638 return true;
641 /* From here on we can assume that the field to be stored in is
642 a full-word (whatever type that is), since it is shorter than a word. */
644 /* OFFSET is the number of words or bytes (UNIT says which)
645 from STR_RTX to the first word or byte containing part of the field. */
647 if (!MEM_P (op0))
649 if (offset != 0
650 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
652 if (!REG_P (op0))
654 /* Since this is a destination (lvalue), we can't copy
655 it to a pseudo. We can remove a SUBREG that does not
656 change the size of the operand. Such a SUBREG may
657 have been added above. */
658 gcc_assert (GET_CODE (op0) == SUBREG
659 && (GET_MODE_SIZE (GET_MODE (op0))
660 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
661 op0 = SUBREG_REG (op0);
663 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
664 op0, (offset * UNITS_PER_WORD));
666 offset = 0;
669 /* If VALUE has a floating-point or complex mode, access it as an
670 integer of the corresponding size. This can occur on a machine
671 with 64 bit registers that uses SFmode for float. It can also
672 occur for unaligned float or complex fields. */
673 orig_value = value;
674 if (GET_MODE (value) != VOIDmode
675 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
676 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
678 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
679 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
682 /* Now OFFSET is nonzero only if OP0 is memory
683 and is therefore always measured in bytes. */
685 if (HAVE_insv
686 && GET_MODE (value) != BLKmode
687 && bitsize > 0
688 && GET_MODE_BITSIZE (op_mode) >= bitsize
689 /* Do not use insv for volatile bitfields when
690 -fstrict-volatile-bitfields is in effect. */
691 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
692 && flag_strict_volatile_bitfields > 0)
693 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
694 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
695 /* Do not use insv if the bit region is restricted and
696 op_mode integer at offset doesn't fit into the
697 restricted region. */
698 && !(MEM_P (op0) && bitregion_end
699 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
700 > bitregion_end + 1))
702 struct expand_operand ops[4];
703 int xbitpos = bitpos;
704 rtx value1;
705 rtx xop0 = op0;
706 rtx last = get_last_insn ();
707 bool copy_back = false;
709 /* Add OFFSET into OP0's address. */
710 if (MEM_P (xop0))
711 xop0 = adjust_bitfield_address (xop0, byte_mode, offset);
713 /* If xop0 is a register, we need it in OP_MODE
714 to make it acceptable to the format of insv. */
715 if (GET_CODE (xop0) == SUBREG)
716 /* We can't just change the mode, because this might clobber op0,
717 and we will need the original value of op0 if insv fails. */
718 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
719 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
720 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
722 /* If the destination is a paradoxical subreg such that we need a
723 truncate to the inner mode, perform the insertion on a temporary and
724 truncate the result to the original destination. Note that we can't
725 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
726 X) 0)) is (reg:N X). */
727 if (GET_CODE (xop0) == SUBREG
728 && REG_P (SUBREG_REG (xop0))
729 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
730 op_mode)))
732 rtx tem = gen_reg_rtx (op_mode);
733 emit_move_insn (tem, xop0);
734 xop0 = tem;
735 copy_back = true;
738 /* We have been counting XBITPOS within UNIT.
739 Count instead within the size of the register. */
740 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
741 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
743 unit = GET_MODE_BITSIZE (op_mode);
745 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
746 "backwards" from the size of the unit we are inserting into.
747 Otherwise, we count bits from the most significant on a
748 BYTES/BITS_BIG_ENDIAN machine. */
750 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
751 xbitpos = unit - bitsize - xbitpos;
753 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
754 value1 = value;
755 if (GET_MODE (value) != op_mode)
757 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
759 /* Optimization: Don't bother really extending VALUE
760 if it has all the bits we will actually use. However,
761 if we must narrow it, be sure we do it correctly. */
763 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
765 rtx tmp;
767 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
768 if (! tmp)
769 tmp = simplify_gen_subreg (op_mode,
770 force_reg (GET_MODE (value),
771 value1),
772 GET_MODE (value), 0);
773 value1 = tmp;
775 else
776 value1 = gen_lowpart (op_mode, value1);
778 else if (CONST_INT_P (value))
779 value1 = gen_int_mode (INTVAL (value), op_mode);
780 else
781 /* Parse phase is supposed to make VALUE's data type
782 match that of the component reference, which is a type
783 at least as wide as the field; so VALUE should have
784 a mode that corresponds to that type. */
785 gcc_assert (CONSTANT_P (value));
788 create_fixed_operand (&ops[0], xop0);
789 create_integer_operand (&ops[1], bitsize);
790 create_integer_operand (&ops[2], xbitpos);
791 create_input_operand (&ops[3], value1, op_mode);
792 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
794 if (copy_back)
795 convert_move (op0, xop0, true);
796 return true;
798 delete_insns_since (last);
801 /* If OP0 is a memory, try copying it to a register and seeing if a
802 cheap register alternative is available. */
803 if (HAVE_insv && MEM_P (op0))
805 enum machine_mode bestmode;
806 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
808 if (bitregion_end)
809 maxbits = bitregion_end - bitregion_start + 1;
811 /* Get the mode to use for inserting into this field. If OP0 is
812 BLKmode, get the smallest mode consistent with the alignment. If
813 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
814 mode. Otherwise, use the smallest mode containing the field. */
816 if (GET_MODE (op0) == BLKmode
817 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
818 || (op_mode != MAX_MACHINE_MODE
819 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
820 bestmode = get_best_mode (bitsize, bitnum,
821 bitregion_start, bitregion_end,
822 MEM_ALIGN (op0),
823 (op_mode == MAX_MACHINE_MODE
824 ? VOIDmode : op_mode),
825 MEM_VOLATILE_P (op0));
826 else
827 bestmode = GET_MODE (op0);
829 if (bestmode != VOIDmode
830 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
831 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
832 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
834 rtx last, tempreg, xop0;
835 unsigned HOST_WIDE_INT xoffset, xbitpos;
837 last = get_last_insn ();
839 /* Adjust address to point to the containing unit of
840 that mode. Compute the offset as a multiple of this unit,
841 counting in bytes. */
842 unit = GET_MODE_BITSIZE (bestmode);
843 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
844 xbitpos = bitnum % unit;
845 xop0 = adjust_bitfield_address (op0, bestmode, xoffset);
847 /* Fetch that unit, store the bitfield in it, then store
848 the unit. */
849 tempreg = copy_to_reg (xop0);
850 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
851 bitregion_start, bitregion_end,
852 fieldmode, orig_value, false))
854 emit_move_insn (xop0, tempreg);
855 return true;
857 delete_insns_since (last);
861 if (!fallback_p)
862 return false;
864 store_fixed_bit_field (op0, offset, bitsize, bitpos,
865 bitregion_start, bitregion_end, value);
866 return true;
869 /* Generate code to store value from rtx VALUE
870 into a bit-field within structure STR_RTX
871 containing BITSIZE bits starting at bit BITNUM.
873 BITREGION_START is bitpos of the first bitfield in this region.
874 BITREGION_END is the bitpos of the ending bitfield in this region.
875 These two fields are 0, if the C++ memory model does not apply,
876 or we are not interested in keeping track of bitfield regions.
878 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
880 void
881 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitnum,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 enum machine_mode fieldmode,
886 rtx value)
888 /* Under the C++0x memory model, we must not touch bits outside the
889 bit region. Adjust the address to start at the beginning of the
890 bit region. */
891 if (MEM_P (str_rtx) && bitregion_start > 0)
893 enum machine_mode bestmode;
894 enum machine_mode op_mode;
895 unsigned HOST_WIDE_INT offset;
897 op_mode = mode_for_extraction (EP_insv, 3);
898 if (op_mode == MAX_MACHINE_MODE)
899 op_mode = VOIDmode;
901 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
903 offset = bitregion_start / BITS_PER_UNIT;
904 bitnum -= bitregion_start;
905 bitregion_end -= bitregion_start;
906 bitregion_start = 0;
907 bestmode = get_best_mode (bitsize, bitnum,
908 bitregion_start, bitregion_end,
909 MEM_ALIGN (str_rtx),
910 op_mode,
911 MEM_VOLATILE_P (str_rtx));
912 str_rtx = adjust_address (str_rtx, bestmode, offset);
915 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
916 bitregion_start, bitregion_end,
917 fieldmode, value, true))
918 gcc_unreachable ();
921 /* Use shifts and boolean operations to store VALUE
922 into a bit field of width BITSIZE
923 in a memory location specified by OP0 except offset by OFFSET bytes.
924 (OFFSET must be 0 if OP0 is a register.)
925 The field starts at position BITPOS within the byte.
926 (If OP0 is a register, it may be a full word or a narrower mode,
927 but BITPOS still counts within a full word,
928 which is significant on bigendian machines.) */
930 static void
931 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
932 unsigned HOST_WIDE_INT bitsize,
933 unsigned HOST_WIDE_INT bitpos,
934 unsigned HOST_WIDE_INT bitregion_start,
935 unsigned HOST_WIDE_INT bitregion_end,
936 rtx value)
938 enum machine_mode mode;
939 unsigned int total_bits = BITS_PER_WORD;
940 rtx temp;
941 int all_zero = 0;
942 int all_one = 0;
944 /* There is a case not handled here:
945 a structure with a known alignment of just a halfword
946 and a field split across two aligned halfwords within the structure.
947 Or likewise a structure with a known alignment of just a byte
948 and a field split across two bytes.
949 Such cases are not supposed to be able to occur. */
951 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
953 gcc_assert (!offset);
954 /* Special treatment for a bit field split across two registers. */
955 if (bitsize + bitpos > BITS_PER_WORD)
957 store_split_bit_field (op0, bitsize, bitpos,
958 bitregion_start, bitregion_end,
959 value);
960 return;
963 else
965 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
967 if (bitregion_end)
968 maxbits = bitregion_end - bitregion_start + 1;
970 /* Get the proper mode to use for this field. We want a mode that
971 includes the entire field. If such a mode would be larger than
972 a word, we won't be doing the extraction the normal way.
973 We don't want a mode bigger than the destination. */
975 mode = GET_MODE (op0);
976 if (GET_MODE_BITSIZE (mode) == 0
977 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
978 mode = word_mode;
980 if (MEM_VOLATILE_P (op0)
981 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
982 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
983 && flag_strict_volatile_bitfields > 0)
984 mode = GET_MODE (op0);
985 else
986 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
987 bitregion_start, bitregion_end,
988 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
990 if (mode == VOIDmode)
992 /* The only way this should occur is if the field spans word
993 boundaries. */
994 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
995 bitregion_start, bitregion_end, value);
996 return;
999 total_bits = GET_MODE_BITSIZE (mode);
1001 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1002 be in the range 0 to total_bits-1, and put any excess bytes in
1003 OFFSET. */
1004 if (bitpos >= total_bits)
1006 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1007 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1008 * BITS_PER_UNIT);
1011 /* Get ref to an aligned byte, halfword, or word containing the field.
1012 Adjust BITPOS to be position within a word,
1013 and OFFSET to be the offset of that word.
1014 Then alter OP0 to refer to that word. */
1015 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1016 offset -= (offset % (total_bits / BITS_PER_UNIT));
1017 op0 = adjust_bitfield_address (op0, mode, offset);
1020 mode = GET_MODE (op0);
1022 /* Now MODE is either some integral mode for a MEM as OP0,
1023 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
1024 The bit field is contained entirely within OP0.
1025 BITPOS is the starting bit number within OP0.
1026 (OP0's mode may actually be narrower than MODE.) */
1028 if (BYTES_BIG_ENDIAN)
1029 /* BITPOS is the distance between our msb
1030 and that of the containing datum.
1031 Convert it to the distance from the lsb. */
1032 bitpos = total_bits - bitsize - bitpos;
1034 /* Now BITPOS is always the distance between our lsb
1035 and that of OP0. */
1037 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1038 we must first convert its mode to MODE. */
1040 if (CONST_INT_P (value))
1042 HOST_WIDE_INT v = INTVAL (value);
1044 if (bitsize < HOST_BITS_PER_WIDE_INT)
1045 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1047 if (v == 0)
1048 all_zero = 1;
1049 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1050 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1051 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1052 all_one = 1;
1054 value = lshift_value (mode, value, bitpos, bitsize);
1056 else
1058 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1059 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1061 if (GET_MODE (value) != mode)
1062 value = convert_to_mode (mode, value, 1);
1064 if (must_and)
1065 value = expand_binop (mode, and_optab, value,
1066 mask_rtx (mode, 0, bitsize, 0),
1067 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1068 if (bitpos > 0)
1069 value = expand_shift (LSHIFT_EXPR, mode, value,
1070 bitpos, NULL_RTX, 1);
1073 /* Now clear the chosen bits in OP0,
1074 except that if VALUE is -1 we need not bother. */
1075 /* We keep the intermediates in registers to allow CSE to combine
1076 consecutive bitfield assignments. */
1078 temp = force_reg (mode, op0);
1080 if (! all_one)
1082 temp = expand_binop (mode, and_optab, temp,
1083 mask_rtx (mode, bitpos, bitsize, 1),
1084 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1085 temp = force_reg (mode, temp);
1088 /* Now logical-or VALUE into OP0, unless it is zero. */
1090 if (! all_zero)
1092 temp = expand_binop (mode, ior_optab, temp, value,
1093 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1094 temp = force_reg (mode, temp);
1097 if (op0 != temp)
1099 op0 = copy_rtx (op0);
1100 emit_move_insn (op0, temp);
1104 /* Store a bit field that is split across multiple accessible memory objects.
1106 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1107 BITSIZE is the field width; BITPOS the position of its first bit
1108 (within the word).
1109 VALUE is the value to store.
1111 This does not yet handle fields wider than BITS_PER_WORD. */
1113 static void
1114 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1115 unsigned HOST_WIDE_INT bitpos,
1116 unsigned HOST_WIDE_INT bitregion_start,
1117 unsigned HOST_WIDE_INT bitregion_end,
1118 rtx value)
1120 unsigned int unit;
1121 unsigned int bitsdone = 0;
1123 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1124 much at a time. */
1125 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1126 unit = BITS_PER_WORD;
1127 else
1128 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1130 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1131 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1132 that VALUE might be a floating-point constant. */
1133 if (CONSTANT_P (value) && !CONST_INT_P (value))
1135 rtx word = gen_lowpart_common (word_mode, value);
1137 if (word && (value != word))
1138 value = word;
1139 else
1140 value = gen_lowpart_common (word_mode,
1141 force_reg (GET_MODE (value) != VOIDmode
1142 ? GET_MODE (value)
1143 : word_mode, value));
1146 while (bitsdone < bitsize)
1148 unsigned HOST_WIDE_INT thissize;
1149 rtx part, word;
1150 unsigned HOST_WIDE_INT thispos;
1151 unsigned HOST_WIDE_INT offset;
1153 offset = (bitpos + bitsdone) / unit;
1154 thispos = (bitpos + bitsdone) % unit;
1156 /* When region of bytes we can touch is restricted, decrease
1157 UNIT close to the end of the region as needed. */
1158 if (bitregion_end
1159 && unit > BITS_PER_UNIT
1160 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1162 unit = unit / 2;
1163 continue;
1166 /* THISSIZE must not overrun a word boundary. Otherwise,
1167 store_fixed_bit_field will call us again, and we will mutually
1168 recurse forever. */
1169 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1170 thissize = MIN (thissize, unit - thispos);
1172 if (BYTES_BIG_ENDIAN)
1174 int total_bits;
1176 /* We must do an endian conversion exactly the same way as it is
1177 done in extract_bit_field, so that the two calls to
1178 extract_fixed_bit_field will have comparable arguments. */
1179 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1180 total_bits = BITS_PER_WORD;
1181 else
1182 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1184 /* Fetch successively less significant portions. */
1185 if (CONST_INT_P (value))
1186 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1187 >> (bitsize - bitsdone - thissize))
1188 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1189 else
1190 /* The args are chosen so that the last part includes the
1191 lsb. Give extract_bit_field the value it needs (with
1192 endianness compensation) to fetch the piece we want. */
1193 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1194 total_bits - bitsize + bitsdone,
1195 NULL_RTX, 1, false);
1197 else
1199 /* Fetch successively more significant portions. */
1200 if (CONST_INT_P (value))
1201 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1202 >> bitsdone)
1203 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1204 else
1205 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1206 bitsdone, NULL_RTX, 1, false);
1209 /* If OP0 is a register, then handle OFFSET here.
1211 When handling multiword bitfields, extract_bit_field may pass
1212 down a word_mode SUBREG of a larger REG for a bitfield that actually
1213 crosses a word boundary. Thus, for a SUBREG, we must find
1214 the current word starting from the base register. */
1215 if (GET_CODE (op0) == SUBREG)
1217 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1218 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1219 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1220 word = word_offset ? const0_rtx : op0;
1221 else
1222 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1223 GET_MODE (SUBREG_REG (op0)));
1224 offset = 0;
1226 else if (REG_P (op0))
1228 enum machine_mode op0_mode = GET_MODE (op0);
1229 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1230 word = offset ? const0_rtx : op0;
1231 else
1232 word = operand_subword_force (op0, offset, GET_MODE (op0));
1233 offset = 0;
1235 else
1236 word = op0;
1238 /* OFFSET is in UNITs, and UNIT is in bits.
1239 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1240 it is just an out-of-bounds access. Ignore it. */
1241 if (word != const0_rtx)
1242 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1243 thispos, bitregion_start, bitregion_end, part);
1244 bitsdone += thissize;
1248 /* A subroutine of extract_bit_field_1 that converts return value X
1249 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1250 to extract_bit_field. */
1252 static rtx
1253 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1254 enum machine_mode tmode, bool unsignedp)
1256 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1257 return x;
1259 /* If the x mode is not a scalar integral, first convert to the
1260 integer mode of that size and then access it as a floating-point
1261 value via a SUBREG. */
1262 if (!SCALAR_INT_MODE_P (tmode))
1264 enum machine_mode smode;
1266 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1267 x = convert_to_mode (smode, x, unsignedp);
1268 x = force_reg (smode, x);
1269 return gen_lowpart (tmode, x);
1272 return convert_to_mode (tmode, x, unsignedp);
1275 /* A subroutine of extract_bit_field, with the same arguments.
1276 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1277 if we can find no other means of implementing the operation.
1278 if FALLBACK_P is false, return NULL instead. */
1280 static rtx
1281 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1282 unsigned HOST_WIDE_INT bitnum,
1283 int unsignedp, bool packedp, rtx target,
1284 enum machine_mode mode, enum machine_mode tmode,
1285 bool fallback_p)
1287 unsigned int unit
1288 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1289 unsigned HOST_WIDE_INT offset, bitpos;
1290 rtx op0 = str_rtx;
1291 enum machine_mode int_mode;
1292 enum machine_mode ext_mode;
1293 enum machine_mode mode1;
1294 int byte_offset;
1296 if (tmode == VOIDmode)
1297 tmode = mode;
1299 while (GET_CODE (op0) == SUBREG)
1301 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1302 op0 = SUBREG_REG (op0);
1305 /* If we have an out-of-bounds access to a register, just return an
1306 uninitialized register of the required mode. This can occur if the
1307 source code contains an out-of-bounds access to a small array. */
1308 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1309 return gen_reg_rtx (tmode);
1311 if (REG_P (op0)
1312 && mode == GET_MODE (op0)
1313 && bitnum == 0
1314 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1316 /* We're trying to extract a full register from itself. */
1317 return op0;
1320 /* See if we can get a better vector mode before extracting. */
1321 if (VECTOR_MODE_P (GET_MODE (op0))
1322 && !MEM_P (op0)
1323 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1325 enum machine_mode new_mode;
1327 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1328 new_mode = MIN_MODE_VECTOR_FLOAT;
1329 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1330 new_mode = MIN_MODE_VECTOR_FRACT;
1331 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1332 new_mode = MIN_MODE_VECTOR_UFRACT;
1333 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1334 new_mode = MIN_MODE_VECTOR_ACCUM;
1335 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1336 new_mode = MIN_MODE_VECTOR_UACCUM;
1337 else
1338 new_mode = MIN_MODE_VECTOR_INT;
1340 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1341 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1342 && targetm.vector_mode_supported_p (new_mode))
1343 break;
1344 if (new_mode != VOIDmode)
1345 op0 = gen_lowpart (new_mode, op0);
1348 /* Use vec_extract patterns for extracting parts of vectors whenever
1349 available. */
1350 if (VECTOR_MODE_P (GET_MODE (op0))
1351 && !MEM_P (op0)
1352 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1353 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1354 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1356 struct expand_operand ops[3];
1357 enum machine_mode outermode = GET_MODE (op0);
1358 enum machine_mode innermode = GET_MODE_INNER (outermode);
1359 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1360 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1362 create_output_operand (&ops[0], target, innermode);
1363 create_input_operand (&ops[1], op0, outermode);
1364 create_integer_operand (&ops[2], pos);
1365 if (maybe_expand_insn (icode, 3, ops))
1367 target = ops[0].value;
1368 if (GET_MODE (target) != mode)
1369 return gen_lowpart (tmode, target);
1370 return target;
1374 /* Make sure we are playing with integral modes. Pun with subregs
1375 if we aren't. */
1377 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1378 if (imode != GET_MODE (op0))
1380 if (MEM_P (op0))
1381 op0 = adjust_bitfield_address (op0, imode, 0);
1382 else if (imode != BLKmode)
1384 op0 = gen_lowpart (imode, op0);
1386 /* If we got a SUBREG, force it into a register since we
1387 aren't going to be able to do another SUBREG on it. */
1388 if (GET_CODE (op0) == SUBREG)
1389 op0 = force_reg (imode, op0);
1391 else if (REG_P (op0))
1393 rtx reg, subreg;
1394 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1395 MODE_INT);
1396 reg = gen_reg_rtx (imode);
1397 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1398 emit_move_insn (subreg, op0);
1399 op0 = reg;
1400 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1402 else
1404 rtx mem = assign_stack_temp (GET_MODE (op0),
1405 GET_MODE_SIZE (GET_MODE (op0)));
1406 emit_move_insn (mem, op0);
1407 op0 = adjust_bitfield_address (mem, BLKmode, 0);
1412 /* Extraction of a full-word or multi-word value from a structure
1413 in a register or aligned memory can be done with just a SUBREG.
1414 A subword value in the least significant part of a register
1415 can also be extracted with a SUBREG. For this, we need the
1416 byte offset of the value in op0. */
1418 bitpos = bitnum % unit;
1419 offset = bitnum / unit;
1420 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1422 /* If OP0 is a register, BITPOS must count within a word.
1423 But as we have it, it counts within whatever size OP0 now has.
1424 On a bigendian machine, these are not the same, so convert. */
1425 if (BYTES_BIG_ENDIAN
1426 && !MEM_P (op0)
1427 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1428 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1430 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1431 If that's wrong, the solution is to test for it and set TARGET to 0
1432 if needed. */
1434 /* Only scalar integer modes can be converted via subregs. There is an
1435 additional problem for FP modes here in that they can have a precision
1436 which is different from the size. mode_for_size uses precision, but
1437 we want a mode based on the size, so we must avoid calling it for FP
1438 modes. */
1439 mode1 = (SCALAR_INT_MODE_P (tmode)
1440 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1441 : mode);
1443 /* If the bitfield is volatile, we need to make sure the access
1444 remains on a type-aligned boundary. */
1445 if (GET_CODE (op0) == MEM
1446 && MEM_VOLATILE_P (op0)
1447 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1448 && flag_strict_volatile_bitfields > 0)
1449 goto no_subreg_mode_swap;
1451 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1452 && bitpos % BITS_PER_WORD == 0)
1453 || (mode1 != BLKmode
1454 /* ??? The big endian test here is wrong. This is correct
1455 if the value is in a register, and if mode_for_size is not
1456 the same mode as op0. This causes us to get unnecessarily
1457 inefficient code from the Thumb port when -mbig-endian. */
1458 && (BYTES_BIG_ENDIAN
1459 ? bitpos + bitsize == BITS_PER_WORD
1460 : bitpos == 0)))
1461 && ((!MEM_P (op0)
1462 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1463 && GET_MODE_SIZE (mode1) != 0
1464 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1465 || (MEM_P (op0)
1466 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1467 || (offset * BITS_PER_UNIT % bitsize == 0
1468 && MEM_ALIGN (op0) % bitsize == 0)))))
1470 if (MEM_P (op0))
1471 op0 = adjust_bitfield_address (op0, mode1, offset);
1472 else if (mode1 != GET_MODE (op0))
1474 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1475 byte_offset);
1476 if (sub == NULL)
1477 goto no_subreg_mode_swap;
1478 op0 = sub;
1480 if (mode1 != mode)
1481 return convert_to_mode (tmode, op0, unsignedp);
1482 return op0;
1484 no_subreg_mode_swap:
1486 /* Handle fields bigger than a word. */
1488 if (bitsize > BITS_PER_WORD)
1490 /* Here we transfer the words of the field
1491 in the order least significant first.
1492 This is because the most significant word is the one which may
1493 be less than full. */
1495 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1496 unsigned int i;
1497 rtx last;
1499 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1500 target = gen_reg_rtx (mode);
1502 /* Indicate for flow that the entire target reg is being set. */
1503 emit_clobber (target);
1505 last = get_last_insn ();
1506 for (i = 0; i < nwords; i++)
1508 /* If I is 0, use the low-order word in both field and target;
1509 if I is 1, use the next to lowest word; and so on. */
1510 /* Word number in TARGET to use. */
1511 unsigned int wordnum
1512 = (WORDS_BIG_ENDIAN
1513 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1514 : i);
1515 /* Offset from start of field in OP0. */
1516 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1517 ? MAX (0, ((int) bitsize - ((int) i + 1)
1518 * (int) BITS_PER_WORD))
1519 : (int) i * BITS_PER_WORD);
1520 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1521 rtx result_part
1522 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1523 bitsize - i * BITS_PER_WORD),
1524 bitnum + bit_offset, 1, false, target_part,
1525 mode, word_mode, fallback_p);
1527 gcc_assert (target_part);
1528 if (!result_part)
1530 delete_insns_since (last);
1531 return NULL;
1534 if (result_part != target_part)
1535 emit_move_insn (target_part, result_part);
1538 if (unsignedp)
1540 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1541 need to be zero'd out. */
1542 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1544 unsigned int i, total_words;
1546 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1547 for (i = nwords; i < total_words; i++)
1548 emit_move_insn
1549 (operand_subword (target,
1550 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1551 1, VOIDmode),
1552 const0_rtx);
1554 return target;
1557 /* Signed bit field: sign-extend with two arithmetic shifts. */
1558 target = expand_shift (LSHIFT_EXPR, mode, target,
1559 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1560 return expand_shift (RSHIFT_EXPR, mode, target,
1561 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1564 /* From here on we know the desired field is smaller than a word. */
1566 /* Check if there is a correspondingly-sized integer field, so we can
1567 safely extract it as one size of integer, if necessary; then
1568 truncate or extend to the size that is wanted; then use SUBREGs or
1569 convert_to_mode to get one of the modes we really wanted. */
1571 int_mode = int_mode_for_mode (tmode);
1572 if (int_mode == BLKmode)
1573 int_mode = int_mode_for_mode (mode);
1574 /* Should probably push op0 out to memory and then do a load. */
1575 gcc_assert (int_mode != BLKmode);
1577 /* OFFSET is the number of words or bytes (UNIT says which)
1578 from STR_RTX to the first word or byte containing part of the field. */
1579 if (!MEM_P (op0))
1581 if (offset != 0
1582 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1584 if (!REG_P (op0))
1585 op0 = copy_to_reg (op0);
1586 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1587 op0, (offset * UNITS_PER_WORD));
1589 offset = 0;
1592 /* Now OFFSET is nonzero only for memory operands. */
1593 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1594 if (ext_mode != MAX_MACHINE_MODE
1595 && bitsize > 0
1596 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1597 /* Do not use extv/extzv for volatile bitfields when
1598 -fstrict-volatile-bitfields is in effect. */
1599 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1600 && flag_strict_volatile_bitfields > 0)
1601 /* If op0 is a register, we need it in EXT_MODE to make it
1602 acceptable to the format of ext(z)v. */
1603 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1604 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1605 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1607 struct expand_operand ops[4];
1608 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1609 rtx xop0 = op0;
1610 rtx xtarget = target;
1611 rtx xspec_target = target;
1612 rtx xspec_target_subreg = 0;
1614 /* If op0 is a register, we need it in EXT_MODE to make it
1615 acceptable to the format of ext(z)v. */
1616 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1617 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1618 if (MEM_P (xop0))
1619 /* Get ref to first byte containing part of the field. */
1620 xop0 = adjust_bitfield_address (xop0, byte_mode, xoffset);
1622 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1623 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1624 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1626 unit = GET_MODE_BITSIZE (ext_mode);
1628 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1629 "backwards" from the size of the unit we are extracting from.
1630 Otherwise, we count bits from the most significant on a
1631 BYTES/BITS_BIG_ENDIAN machine. */
1633 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1634 xbitpos = unit - bitsize - xbitpos;
1636 if (xtarget == 0)
1637 xtarget = xspec_target = gen_reg_rtx (tmode);
1639 if (GET_MODE (xtarget) != ext_mode)
1641 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1642 between the mode of the extraction (word_mode) and the target
1643 mode. Instead, create a temporary and use convert_move to set
1644 the target. */
1645 if (REG_P (xtarget)
1646 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1648 xtarget = gen_lowpart (ext_mode, xtarget);
1649 if (GET_MODE_PRECISION (ext_mode)
1650 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1651 xspec_target_subreg = xtarget;
1653 else
1654 xtarget = gen_reg_rtx (ext_mode);
1657 create_output_operand (&ops[0], xtarget, ext_mode);
1658 create_fixed_operand (&ops[1], xop0);
1659 create_integer_operand (&ops[2], bitsize);
1660 create_integer_operand (&ops[3], xbitpos);
1661 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1662 4, ops))
1664 xtarget = ops[0].value;
1665 if (xtarget == xspec_target)
1666 return xtarget;
1667 if (xtarget == xspec_target_subreg)
1668 return xspec_target;
1669 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1673 /* If OP0 is a memory, try copying it to a register and seeing if a
1674 cheap register alternative is available. */
1675 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1677 enum machine_mode bestmode;
1679 /* Get the mode to use for inserting into this field. If
1680 OP0 is BLKmode, get the smallest mode consistent with the
1681 alignment. If OP0 is a non-BLKmode object that is no
1682 wider than EXT_MODE, use its mode. Otherwise, use the
1683 smallest mode containing the field. */
1685 if (GET_MODE (op0) == BLKmode
1686 || (ext_mode != MAX_MACHINE_MODE
1687 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1688 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1689 (ext_mode == MAX_MACHINE_MODE
1690 ? VOIDmode : ext_mode),
1691 MEM_VOLATILE_P (op0));
1692 else
1693 bestmode = GET_MODE (op0);
1695 if (bestmode != VOIDmode
1696 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1697 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1699 unsigned HOST_WIDE_INT xoffset, xbitpos;
1701 /* Compute the offset as a multiple of this unit,
1702 counting in bytes. */
1703 unit = GET_MODE_BITSIZE (bestmode);
1704 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1705 xbitpos = bitnum % unit;
1707 /* Make sure the register is big enough for the whole field. */
1708 if (xoffset * BITS_PER_UNIT + unit
1709 >= offset * BITS_PER_UNIT + bitsize)
1711 rtx last, result, xop0;
1713 last = get_last_insn ();
1715 /* Fetch it to a register in that size. */
1716 xop0 = adjust_bitfield_address (op0, bestmode, xoffset);
1717 xop0 = force_reg (bestmode, xop0);
1718 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1719 unsignedp, packedp, target,
1720 mode, tmode, false);
1721 if (result)
1722 return result;
1724 delete_insns_since (last);
1729 if (!fallback_p)
1730 return NULL;
1732 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1733 bitpos, target, unsignedp, packedp);
1734 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1737 /* Generate code to extract a byte-field from STR_RTX
1738 containing BITSIZE bits, starting at BITNUM,
1739 and put it in TARGET if possible (if TARGET is nonzero).
1740 Regardless of TARGET, we return the rtx for where the value is placed.
1742 STR_RTX is the structure containing the byte (a REG or MEM).
1743 UNSIGNEDP is nonzero if this is an unsigned bit field.
1744 PACKEDP is nonzero if the field has the packed attribute.
1745 MODE is the natural mode of the field value once extracted.
1746 TMODE is the mode the caller would like the value to have;
1747 but the value may be returned with type MODE instead.
1749 If a TARGET is specified and we can store in it at no extra cost,
1750 we do so, and return TARGET.
1751 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1752 if they are equally easy. */
1755 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1756 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1757 rtx target, enum machine_mode mode, enum machine_mode tmode)
1759 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1760 target, mode, tmode, true);
1763 /* Extract a bit field using shifts and boolean operations
1764 Returns an rtx to represent the value.
1765 OP0 addresses a register (word) or memory (byte).
1766 BITPOS says which bit within the word or byte the bit field starts in.
1767 OFFSET says how many bytes farther the bit field starts;
1768 it is 0 if OP0 is a register.
1769 BITSIZE says how many bits long the bit field is.
1770 (If OP0 is a register, it may be narrower than a full word,
1771 but BITPOS still counts within a full word,
1772 which is significant on bigendian machines.)
1774 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1775 PACKEDP is true if the field has the packed attribute.
1777 If TARGET is nonzero, attempts to store the value there
1778 and return TARGET, but this is not guaranteed.
1779 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1781 static rtx
1782 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1783 unsigned HOST_WIDE_INT offset,
1784 unsigned HOST_WIDE_INT bitsize,
1785 unsigned HOST_WIDE_INT bitpos, rtx target,
1786 int unsignedp, bool packedp)
1788 unsigned int total_bits = BITS_PER_WORD;
1789 enum machine_mode mode;
1791 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1793 /* Special treatment for a bit field split across two registers. */
1794 if (bitsize + bitpos > BITS_PER_WORD)
1795 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1797 else
1799 /* Get the proper mode to use for this field. We want a mode that
1800 includes the entire field. If such a mode would be larger than
1801 a word, we won't be doing the extraction the normal way. */
1803 if (MEM_VOLATILE_P (op0)
1804 && flag_strict_volatile_bitfields > 0)
1806 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1807 mode = GET_MODE (op0);
1808 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1809 mode = GET_MODE (target);
1810 else
1811 mode = tmode;
1813 else
1814 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1815 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1817 if (mode == VOIDmode)
1818 /* The only way this should occur is if the field spans word
1819 boundaries. */
1820 return extract_split_bit_field (op0, bitsize,
1821 bitpos + offset * BITS_PER_UNIT,
1822 unsignedp);
1824 total_bits = GET_MODE_BITSIZE (mode);
1826 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1827 be in the range 0 to total_bits-1, and put any excess bytes in
1828 OFFSET. */
1829 if (bitpos >= total_bits)
1831 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1832 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1833 * BITS_PER_UNIT);
1836 /* If we're accessing a volatile MEM, we can't do the next
1837 alignment step if it results in a multi-word access where we
1838 otherwise wouldn't have one. So, check for that case
1839 here. */
1840 if (MEM_P (op0)
1841 && MEM_VOLATILE_P (op0)
1842 && flag_strict_volatile_bitfields > 0
1843 && bitpos + bitsize <= total_bits
1844 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1846 if (STRICT_ALIGNMENT)
1848 static bool informed_about_misalignment = false;
1849 bool warned;
1851 if (packedp)
1853 if (bitsize == total_bits)
1854 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1855 "multiple accesses to volatile structure member"
1856 " because of packed attribute");
1857 else
1858 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1859 "multiple accesses to volatile structure bitfield"
1860 " because of packed attribute");
1862 return extract_split_bit_field (op0, bitsize,
1863 bitpos + offset * BITS_PER_UNIT,
1864 unsignedp);
1867 if (bitsize == total_bits)
1868 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1869 "mis-aligned access used for structure member");
1870 else
1871 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1872 "mis-aligned access used for structure bitfield");
1874 if (! informed_about_misalignment && warned)
1876 informed_about_misalignment = true;
1877 inform (input_location,
1878 "when a volatile object spans multiple type-sized locations,"
1879 " the compiler must choose between using a single mis-aligned access to"
1880 " preserve the volatility, or using multiple aligned accesses to avoid"
1881 " runtime faults; this code may fail at runtime if the hardware does"
1882 " not allow this access");
1886 else
1889 /* Get ref to an aligned byte, halfword, or word containing the field.
1890 Adjust BITPOS to be position within a word,
1891 and OFFSET to be the offset of that word.
1892 Then alter OP0 to refer to that word. */
1893 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1894 offset -= (offset % (total_bits / BITS_PER_UNIT));
1897 op0 = adjust_bitfield_address (op0, mode, offset);
1900 mode = GET_MODE (op0);
1902 if (BYTES_BIG_ENDIAN)
1903 /* BITPOS is the distance between our msb and that of OP0.
1904 Convert it to the distance from the lsb. */
1905 bitpos = total_bits - bitsize - bitpos;
1907 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1908 We have reduced the big-endian case to the little-endian case. */
1910 if (unsignedp)
1912 if (bitpos)
1914 /* If the field does not already start at the lsb,
1915 shift it so it does. */
1916 /* Maybe propagate the target for the shift. */
1917 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1918 if (tmode != mode)
1919 subtarget = 0;
1920 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1922 /* Convert the value to the desired mode. */
1923 if (mode != tmode)
1924 op0 = convert_to_mode (tmode, op0, 1);
1926 /* Unless the msb of the field used to be the msb when we shifted,
1927 mask out the upper bits. */
1929 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1930 return expand_binop (GET_MODE (op0), and_optab, op0,
1931 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1932 target, 1, OPTAB_LIB_WIDEN);
1933 return op0;
1936 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1937 then arithmetic-shift its lsb to the lsb of the word. */
1938 op0 = force_reg (mode, op0);
1940 /* Find the narrowest integer mode that contains the field. */
1942 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1943 mode = GET_MODE_WIDER_MODE (mode))
1944 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1946 op0 = convert_to_mode (mode, op0, 0);
1947 break;
1950 if (mode != tmode)
1951 target = 0;
1953 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1955 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1956 /* Maybe propagate the target for the shift. */
1957 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1958 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1961 return expand_shift (RSHIFT_EXPR, mode, op0,
1962 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1965 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1966 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1967 complement of that if COMPLEMENT. The mask is truncated if
1968 necessary to the width of mode MODE. The mask is zero-extended if
1969 BITSIZE+BITPOS is too small for MODE. */
1971 static rtx
1972 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1974 double_int mask;
1976 mask = double_int::mask (bitsize);
1977 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1979 if (complement)
1980 mask = ~mask;
1982 return immed_double_int_const (mask, mode);
1985 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1986 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1988 static rtx
1989 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1991 double_int val;
1993 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1994 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1996 return immed_double_int_const (val, mode);
1999 /* Extract a bit field that is split across two words
2000 and return an RTX for the result.
2002 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2003 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2004 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
2006 static rtx
2007 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2008 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2010 unsigned int unit;
2011 unsigned int bitsdone = 0;
2012 rtx result = NULL_RTX;
2013 int first = 1;
2015 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2016 much at a time. */
2017 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2018 unit = BITS_PER_WORD;
2019 else
2020 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2022 while (bitsdone < bitsize)
2024 unsigned HOST_WIDE_INT thissize;
2025 rtx part, word;
2026 unsigned HOST_WIDE_INT thispos;
2027 unsigned HOST_WIDE_INT offset;
2029 offset = (bitpos + bitsdone) / unit;
2030 thispos = (bitpos + bitsdone) % unit;
2032 /* THISSIZE must not overrun a word boundary. Otherwise,
2033 extract_fixed_bit_field will call us again, and we will mutually
2034 recurse forever. */
2035 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2036 thissize = MIN (thissize, unit - thispos);
2038 /* If OP0 is a register, then handle OFFSET here.
2040 When handling multiword bitfields, extract_bit_field may pass
2041 down a word_mode SUBREG of a larger REG for a bitfield that actually
2042 crosses a word boundary. Thus, for a SUBREG, we must find
2043 the current word starting from the base register. */
2044 if (GET_CODE (op0) == SUBREG)
2046 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2047 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2048 GET_MODE (SUBREG_REG (op0)));
2049 offset = 0;
2051 else if (REG_P (op0))
2053 word = operand_subword_force (op0, offset, GET_MODE (op0));
2054 offset = 0;
2056 else
2057 word = op0;
2059 /* Extract the parts in bit-counting order,
2060 whose meaning is determined by BYTES_PER_UNIT.
2061 OFFSET is in UNITs, and UNIT is in bits.
2062 extract_fixed_bit_field wants offset in bytes. */
2063 part = extract_fixed_bit_field (word_mode, word,
2064 offset * unit / BITS_PER_UNIT,
2065 thissize, thispos, 0, 1, false);
2066 bitsdone += thissize;
2068 /* Shift this part into place for the result. */
2069 if (BYTES_BIG_ENDIAN)
2071 if (bitsize != bitsdone)
2072 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2073 bitsize - bitsdone, 0, 1);
2075 else
2077 if (bitsdone != thissize)
2078 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2079 bitsdone - thissize, 0, 1);
2082 if (first)
2083 result = part;
2084 else
2085 /* Combine the parts with bitwise or. This works
2086 because we extracted each part as an unsigned bit field. */
2087 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2088 OPTAB_LIB_WIDEN);
2090 first = 0;
2093 /* Unsigned bit field: we are done. */
2094 if (unsignedp)
2095 return result;
2096 /* Signed bit field: sign-extend with two arithmetic shifts. */
2097 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2098 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2099 return expand_shift (RSHIFT_EXPR, word_mode, result,
2100 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2103 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2104 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2105 MODE, fill the upper bits with zeros. Fail if the layout of either
2106 mode is unknown (as for CC modes) or if the extraction would involve
2107 unprofitable mode punning. Return the value on success, otherwise
2108 return null.
2110 This is different from gen_lowpart* in these respects:
2112 - the returned value must always be considered an rvalue
2114 - when MODE is wider than SRC_MODE, the extraction involves
2115 a zero extension
2117 - when MODE is smaller than SRC_MODE, the extraction involves
2118 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2120 In other words, this routine performs a computation, whereas the
2121 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2122 operations. */
2125 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2127 enum machine_mode int_mode, src_int_mode;
2129 if (mode == src_mode)
2130 return src;
2132 if (CONSTANT_P (src))
2134 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2135 fails, it will happily create (subreg (symbol_ref)) or similar
2136 invalid SUBREGs. */
2137 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2138 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2139 if (ret)
2140 return ret;
2142 if (GET_MODE (src) == VOIDmode
2143 || !validate_subreg (mode, src_mode, src, byte))
2144 return NULL_RTX;
2146 src = force_reg (GET_MODE (src), src);
2147 return gen_rtx_SUBREG (mode, src, byte);
2150 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2151 return NULL_RTX;
2153 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2154 && MODES_TIEABLE_P (mode, src_mode))
2156 rtx x = gen_lowpart_common (mode, src);
2157 if (x)
2158 return x;
2161 src_int_mode = int_mode_for_mode (src_mode);
2162 int_mode = int_mode_for_mode (mode);
2163 if (src_int_mode == BLKmode || int_mode == BLKmode)
2164 return NULL_RTX;
2166 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2167 return NULL_RTX;
2168 if (!MODES_TIEABLE_P (int_mode, mode))
2169 return NULL_RTX;
2171 src = gen_lowpart (src_int_mode, src);
2172 src = convert_modes (int_mode, src_int_mode, src, true);
2173 src = gen_lowpart (mode, src);
2174 return src;
2177 /* Add INC into TARGET. */
2179 void
2180 expand_inc (rtx target, rtx inc)
2182 rtx value = expand_binop (GET_MODE (target), add_optab,
2183 target, inc,
2184 target, 0, OPTAB_LIB_WIDEN);
2185 if (value != target)
2186 emit_move_insn (target, value);
2189 /* Subtract DEC from TARGET. */
2191 void
2192 expand_dec (rtx target, rtx dec)
2194 rtx value = expand_binop (GET_MODE (target), sub_optab,
2195 target, dec,
2196 target, 0, OPTAB_LIB_WIDEN);
2197 if (value != target)
2198 emit_move_insn (target, value);
2201 /* Output a shift instruction for expression code CODE,
2202 with SHIFTED being the rtx for the value to shift,
2203 and AMOUNT the rtx for the amount to shift by.
2204 Store the result in the rtx TARGET, if that is convenient.
2205 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2206 Return the rtx for where the value is. */
2208 static rtx
2209 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2210 rtx amount, rtx target, int unsignedp)
2212 rtx op1, temp = 0;
2213 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2214 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2215 optab lshift_optab = ashl_optab;
2216 optab rshift_arith_optab = ashr_optab;
2217 optab rshift_uns_optab = lshr_optab;
2218 optab lrotate_optab = rotl_optab;
2219 optab rrotate_optab = rotr_optab;
2220 enum machine_mode op1_mode;
2221 int attempt;
2222 bool speed = optimize_insn_for_speed_p ();
2224 op1 = amount;
2225 op1_mode = GET_MODE (op1);
2227 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2228 shift amount is a vector, use the vector/vector shift patterns. */
2229 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2231 lshift_optab = vashl_optab;
2232 rshift_arith_optab = vashr_optab;
2233 rshift_uns_optab = vlshr_optab;
2234 lrotate_optab = vrotl_optab;
2235 rrotate_optab = vrotr_optab;
2238 /* Previously detected shift-counts computed by NEGATE_EXPR
2239 and shifted in the other direction; but that does not work
2240 on all machines. */
2242 if (SHIFT_COUNT_TRUNCATED)
2244 if (CONST_INT_P (op1)
2245 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2246 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2247 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2248 % GET_MODE_BITSIZE (mode));
2249 else if (GET_CODE (op1) == SUBREG
2250 && subreg_lowpart_p (op1)
2251 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2252 op1 = SUBREG_REG (op1);
2255 if (op1 == const0_rtx)
2256 return shifted;
2258 /* Check whether its cheaper to implement a left shift by a constant
2259 bit count by a sequence of additions. */
2260 if (code == LSHIFT_EXPR
2261 && CONST_INT_P (op1)
2262 && INTVAL (op1) > 0
2263 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2264 && INTVAL (op1) < MAX_BITS_PER_WORD
2265 && (shift_cost (speed, mode, INTVAL (op1))
2266 > INTVAL (op1) * add_cost (speed, mode))
2267 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2269 int i;
2270 for (i = 0; i < INTVAL (op1); i++)
2272 temp = force_reg (mode, shifted);
2273 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2274 unsignedp, OPTAB_LIB_WIDEN);
2276 return shifted;
2279 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2281 enum optab_methods methods;
2283 if (attempt == 0)
2284 methods = OPTAB_DIRECT;
2285 else if (attempt == 1)
2286 methods = OPTAB_WIDEN;
2287 else
2288 methods = OPTAB_LIB_WIDEN;
2290 if (rotate)
2292 /* Widening does not work for rotation. */
2293 if (methods == OPTAB_WIDEN)
2294 continue;
2295 else if (methods == OPTAB_LIB_WIDEN)
2297 /* If we have been unable to open-code this by a rotation,
2298 do it as the IOR of two shifts. I.e., to rotate A
2299 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2300 where C is the bitsize of A.
2302 It is theoretically possible that the target machine might
2303 not be able to perform either shift and hence we would
2304 be making two libcalls rather than just the one for the
2305 shift (similarly if IOR could not be done). We will allow
2306 this extremely unlikely lossage to avoid complicating the
2307 code below. */
2309 rtx subtarget = target == shifted ? 0 : target;
2310 rtx new_amount, other_amount;
2311 rtx temp1;
2313 new_amount = op1;
2314 if (CONST_INT_P (op1))
2315 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2316 - INTVAL (op1));
2317 else
2318 other_amount
2319 = simplify_gen_binary (MINUS, GET_MODE (op1),
2320 GEN_INT (GET_MODE_PRECISION (mode)),
2321 op1);
2323 shifted = force_reg (mode, shifted);
2325 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2326 mode, shifted, new_amount, 0, 1);
2327 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2328 mode, shifted, other_amount,
2329 subtarget, 1);
2330 return expand_binop (mode, ior_optab, temp, temp1, target,
2331 unsignedp, methods);
2334 temp = expand_binop (mode,
2335 left ? lrotate_optab : rrotate_optab,
2336 shifted, op1, target, unsignedp, methods);
2338 else if (unsignedp)
2339 temp = expand_binop (mode,
2340 left ? lshift_optab : rshift_uns_optab,
2341 shifted, op1, target, unsignedp, methods);
2343 /* Do arithmetic shifts.
2344 Also, if we are going to widen the operand, we can just as well
2345 use an arithmetic right-shift instead of a logical one. */
2346 if (temp == 0 && ! rotate
2347 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2349 enum optab_methods methods1 = methods;
2351 /* If trying to widen a log shift to an arithmetic shift,
2352 don't accept an arithmetic shift of the same size. */
2353 if (unsignedp)
2354 methods1 = OPTAB_MUST_WIDEN;
2356 /* Arithmetic shift */
2358 temp = expand_binop (mode,
2359 left ? lshift_optab : rshift_arith_optab,
2360 shifted, op1, target, unsignedp, methods1);
2363 /* We used to try extzv here for logical right shifts, but that was
2364 only useful for one machine, the VAX, and caused poor code
2365 generation there for lshrdi3, so the code was deleted and a
2366 define_expand for lshrsi3 was added to vax.md. */
2369 gcc_assert (temp);
2370 return temp;
2373 /* Output a shift instruction for expression code CODE,
2374 with SHIFTED being the rtx for the value to shift,
2375 and AMOUNT the amount to shift by.
2376 Store the result in the rtx TARGET, if that is convenient.
2377 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2378 Return the rtx for where the value is. */
2381 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2382 int amount, rtx target, int unsignedp)
2384 return expand_shift_1 (code, mode,
2385 shifted, GEN_INT (amount), target, unsignedp);
2388 /* Output a shift instruction for expression code CODE,
2389 with SHIFTED being the rtx for the value to shift,
2390 and AMOUNT the tree for the amount to shift by.
2391 Store the result in the rtx TARGET, if that is convenient.
2392 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2393 Return the rtx for where the value is. */
2396 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2397 tree amount, rtx target, int unsignedp)
2399 return expand_shift_1 (code, mode,
2400 shifted, expand_normal (amount), target, unsignedp);
2404 /* Indicates the type of fixup needed after a constant multiplication.
2405 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2406 the result should be negated, and ADD_VARIANT means that the
2407 multiplicand should be added to the result. */
2408 enum mult_variant {basic_variant, negate_variant, add_variant};
2410 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2411 const struct mult_cost *, enum machine_mode mode);
2412 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2413 struct algorithm *, enum mult_variant *, int);
2414 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2415 const struct algorithm *, enum mult_variant);
2416 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2417 static rtx extract_high_half (enum machine_mode, rtx);
2418 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2419 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2420 int, int);
2421 /* Compute and return the best algorithm for multiplying by T.
2422 The algorithm must cost less than cost_limit
2423 If retval.cost >= COST_LIMIT, no algorithm was found and all
2424 other field of the returned struct are undefined.
2425 MODE is the machine mode of the multiplication. */
2427 static void
2428 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2429 const struct mult_cost *cost_limit, enum machine_mode mode)
2431 int m;
2432 struct algorithm *alg_in, *best_alg;
2433 struct mult_cost best_cost;
2434 struct mult_cost new_limit;
2435 int op_cost, op_latency;
2436 unsigned HOST_WIDE_INT orig_t = t;
2437 unsigned HOST_WIDE_INT q;
2438 int maxm, hash_index;
2439 bool cache_hit = false;
2440 enum alg_code cache_alg = alg_zero;
2441 bool speed = optimize_insn_for_speed_p ();
2442 enum machine_mode imode;
2443 struct alg_hash_entry *entry_ptr;
2445 /* Indicate that no algorithm is yet found. If no algorithm
2446 is found, this value will be returned and indicate failure. */
2447 alg_out->cost.cost = cost_limit->cost + 1;
2448 alg_out->cost.latency = cost_limit->latency + 1;
2450 if (cost_limit->cost < 0
2451 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2452 return;
2454 /* Be prepared for vector modes. */
2455 imode = GET_MODE_INNER (mode);
2456 if (imode == VOIDmode)
2457 imode = mode;
2459 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2461 /* Restrict the bits of "t" to the multiplication's mode. */
2462 t &= GET_MODE_MASK (imode);
2464 /* t == 1 can be done in zero cost. */
2465 if (t == 1)
2467 alg_out->ops = 1;
2468 alg_out->cost.cost = 0;
2469 alg_out->cost.latency = 0;
2470 alg_out->op[0] = alg_m;
2471 return;
2474 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2475 fail now. */
2476 if (t == 0)
2478 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2479 return;
2480 else
2482 alg_out->ops = 1;
2483 alg_out->cost.cost = zero_cost (speed);
2484 alg_out->cost.latency = zero_cost (speed);
2485 alg_out->op[0] = alg_zero;
2486 return;
2490 /* We'll be needing a couple extra algorithm structures now. */
2492 alg_in = XALLOCA (struct algorithm);
2493 best_alg = XALLOCA (struct algorithm);
2494 best_cost = *cost_limit;
2496 /* Compute the hash index. */
2497 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2499 /* See if we already know what to do for T. */
2500 entry_ptr = alg_hash_entry_ptr (hash_index);
2501 if (entry_ptr->t == t
2502 && entry_ptr->mode == mode
2503 && entry_ptr->mode == mode
2504 && entry_ptr->speed == speed
2505 && entry_ptr->alg != alg_unknown)
2507 cache_alg = entry_ptr->alg;
2509 if (cache_alg == alg_impossible)
2511 /* The cache tells us that it's impossible to synthesize
2512 multiplication by T within entry_ptr->cost. */
2513 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2514 /* COST_LIMIT is at least as restrictive as the one
2515 recorded in the hash table, in which case we have no
2516 hope of synthesizing a multiplication. Just
2517 return. */
2518 return;
2520 /* If we get here, COST_LIMIT is less restrictive than the
2521 one recorded in the hash table, so we may be able to
2522 synthesize a multiplication. Proceed as if we didn't
2523 have the cache entry. */
2525 else
2527 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2528 /* The cached algorithm shows that this multiplication
2529 requires more cost than COST_LIMIT. Just return. This
2530 way, we don't clobber this cache entry with
2531 alg_impossible but retain useful information. */
2532 return;
2534 cache_hit = true;
2536 switch (cache_alg)
2538 case alg_shift:
2539 goto do_alg_shift;
2541 case alg_add_t_m2:
2542 case alg_sub_t_m2:
2543 goto do_alg_addsub_t_m2;
2545 case alg_add_factor:
2546 case alg_sub_factor:
2547 goto do_alg_addsub_factor;
2549 case alg_add_t2_m:
2550 goto do_alg_add_t2_m;
2552 case alg_sub_t2_m:
2553 goto do_alg_sub_t2_m;
2555 default:
2556 gcc_unreachable ();
2561 /* If we have a group of zero bits at the low-order part of T, try
2562 multiplying by the remaining bits and then doing a shift. */
2564 if ((t & 1) == 0)
2566 do_alg_shift:
2567 m = floor_log2 (t & -t); /* m = number of low zero bits */
2568 if (m < maxm)
2570 q = t >> m;
2571 /* The function expand_shift will choose between a shift and
2572 a sequence of additions, so the observed cost is given as
2573 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2574 op_cost = m * add_cost (speed, mode);
2575 if (shift_cost (speed, mode, m) < op_cost)
2576 op_cost = shift_cost (speed, mode, m);
2577 new_limit.cost = best_cost.cost - op_cost;
2578 new_limit.latency = best_cost.latency - op_cost;
2579 synth_mult (alg_in, q, &new_limit, mode);
2581 alg_in->cost.cost += op_cost;
2582 alg_in->cost.latency += op_cost;
2583 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2585 struct algorithm *x;
2586 best_cost = alg_in->cost;
2587 x = alg_in, alg_in = best_alg, best_alg = x;
2588 best_alg->log[best_alg->ops] = m;
2589 best_alg->op[best_alg->ops] = alg_shift;
2592 /* See if treating ORIG_T as a signed number yields a better
2593 sequence. Try this sequence only for a negative ORIG_T
2594 as it would be useless for a non-negative ORIG_T. */
2595 if ((HOST_WIDE_INT) orig_t < 0)
2597 /* Shift ORIG_T as follows because a right shift of a
2598 negative-valued signed type is implementation
2599 defined. */
2600 q = ~(~orig_t >> m);
2601 /* The function expand_shift will choose between a shift
2602 and a sequence of additions, so the observed cost is
2603 given as MIN (m * add_cost(speed, mode),
2604 shift_cost(speed, mode, m)). */
2605 op_cost = m * add_cost (speed, mode);
2606 if (shift_cost (speed, mode, m) < op_cost)
2607 op_cost = shift_cost (speed, mode, m);
2608 new_limit.cost = best_cost.cost - op_cost;
2609 new_limit.latency = best_cost.latency - op_cost;
2610 synth_mult (alg_in, q, &new_limit, mode);
2612 alg_in->cost.cost += op_cost;
2613 alg_in->cost.latency += op_cost;
2614 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2616 struct algorithm *x;
2617 best_cost = alg_in->cost;
2618 x = alg_in, alg_in = best_alg, best_alg = x;
2619 best_alg->log[best_alg->ops] = m;
2620 best_alg->op[best_alg->ops] = alg_shift;
2624 if (cache_hit)
2625 goto done;
2628 /* If we have an odd number, add or subtract one. */
2629 if ((t & 1) != 0)
2631 unsigned HOST_WIDE_INT w;
2633 do_alg_addsub_t_m2:
2634 for (w = 1; (w & t) != 0; w <<= 1)
2636 /* If T was -1, then W will be zero after the loop. This is another
2637 case where T ends with ...111. Handling this with (T + 1) and
2638 subtract 1 produces slightly better code and results in algorithm
2639 selection much faster than treating it like the ...0111 case
2640 below. */
2641 if (w == 0
2642 || (w > 2
2643 /* Reject the case where t is 3.
2644 Thus we prefer addition in that case. */
2645 && t != 3))
2647 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2649 op_cost = add_cost (speed, mode);
2650 new_limit.cost = best_cost.cost - op_cost;
2651 new_limit.latency = best_cost.latency - op_cost;
2652 synth_mult (alg_in, t + 1, &new_limit, mode);
2654 alg_in->cost.cost += op_cost;
2655 alg_in->cost.latency += op_cost;
2656 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2658 struct algorithm *x;
2659 best_cost = alg_in->cost;
2660 x = alg_in, alg_in = best_alg, best_alg = x;
2661 best_alg->log[best_alg->ops] = 0;
2662 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2665 else
2667 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2669 op_cost = add_cost (speed, mode);
2670 new_limit.cost = best_cost.cost - op_cost;
2671 new_limit.latency = best_cost.latency - op_cost;
2672 synth_mult (alg_in, t - 1, &new_limit, mode);
2674 alg_in->cost.cost += op_cost;
2675 alg_in->cost.latency += op_cost;
2676 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2678 struct algorithm *x;
2679 best_cost = alg_in->cost;
2680 x = alg_in, alg_in = best_alg, best_alg = x;
2681 best_alg->log[best_alg->ops] = 0;
2682 best_alg->op[best_alg->ops] = alg_add_t_m2;
2686 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2687 quickly with a - a * n for some appropriate constant n. */
2688 m = exact_log2 (-orig_t + 1);
2689 if (m >= 0 && m < maxm)
2691 op_cost = shiftsub1_cost (speed, mode, m);
2692 new_limit.cost = best_cost.cost - op_cost;
2693 new_limit.latency = best_cost.latency - op_cost;
2694 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2695 &new_limit, mode);
2697 alg_in->cost.cost += op_cost;
2698 alg_in->cost.latency += op_cost;
2699 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2701 struct algorithm *x;
2702 best_cost = alg_in->cost;
2703 x = alg_in, alg_in = best_alg, best_alg = x;
2704 best_alg->log[best_alg->ops] = m;
2705 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2709 if (cache_hit)
2710 goto done;
2713 /* Look for factors of t of the form
2714 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2715 If we find such a factor, we can multiply by t using an algorithm that
2716 multiplies by q, shift the result by m and add/subtract it to itself.
2718 We search for large factors first and loop down, even if large factors
2719 are less probable than small; if we find a large factor we will find a
2720 good sequence quickly, and therefore be able to prune (by decreasing
2721 COST_LIMIT) the search. */
2723 do_alg_addsub_factor:
2724 for (m = floor_log2 (t - 1); m >= 2; m--)
2726 unsigned HOST_WIDE_INT d;
2728 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2729 if (t % d == 0 && t > d && m < maxm
2730 && (!cache_hit || cache_alg == alg_add_factor))
2732 /* If the target has a cheap shift-and-add instruction use
2733 that in preference to a shift insn followed by an add insn.
2734 Assume that the shift-and-add is "atomic" with a latency
2735 equal to its cost, otherwise assume that on superscalar
2736 hardware the shift may be executed concurrently with the
2737 earlier steps in the algorithm. */
2738 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2739 if (shiftadd_cost (speed, mode, m) < op_cost)
2741 op_cost = shiftadd_cost (speed, mode, m);
2742 op_latency = op_cost;
2744 else
2745 op_latency = add_cost (speed, mode);
2747 new_limit.cost = best_cost.cost - op_cost;
2748 new_limit.latency = best_cost.latency - op_latency;
2749 synth_mult (alg_in, t / d, &new_limit, mode);
2751 alg_in->cost.cost += op_cost;
2752 alg_in->cost.latency += op_latency;
2753 if (alg_in->cost.latency < op_cost)
2754 alg_in->cost.latency = op_cost;
2755 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2757 struct algorithm *x;
2758 best_cost = alg_in->cost;
2759 x = alg_in, alg_in = best_alg, best_alg = x;
2760 best_alg->log[best_alg->ops] = m;
2761 best_alg->op[best_alg->ops] = alg_add_factor;
2763 /* Other factors will have been taken care of in the recursion. */
2764 break;
2767 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2768 if (t % d == 0 && t > d && m < maxm
2769 && (!cache_hit || cache_alg == alg_sub_factor))
2771 /* If the target has a cheap shift-and-subtract insn use
2772 that in preference to a shift insn followed by a sub insn.
2773 Assume that the shift-and-sub is "atomic" with a latency
2774 equal to it's cost, otherwise assume that on superscalar
2775 hardware the shift may be executed concurrently with the
2776 earlier steps in the algorithm. */
2777 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2778 if (shiftsub0_cost (speed, mode, m) < op_cost)
2780 op_cost = shiftsub0_cost (speed, mode, m);
2781 op_latency = op_cost;
2783 else
2784 op_latency = add_cost (speed, mode);
2786 new_limit.cost = best_cost.cost - op_cost;
2787 new_limit.latency = best_cost.latency - op_latency;
2788 synth_mult (alg_in, t / d, &new_limit, mode);
2790 alg_in->cost.cost += op_cost;
2791 alg_in->cost.latency += op_latency;
2792 if (alg_in->cost.latency < op_cost)
2793 alg_in->cost.latency = op_cost;
2794 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2796 struct algorithm *x;
2797 best_cost = alg_in->cost;
2798 x = alg_in, alg_in = best_alg, best_alg = x;
2799 best_alg->log[best_alg->ops] = m;
2800 best_alg->op[best_alg->ops] = alg_sub_factor;
2802 break;
2805 if (cache_hit)
2806 goto done;
2808 /* Try shift-and-add (load effective address) instructions,
2809 i.e. do a*3, a*5, a*9. */
2810 if ((t & 1) != 0)
2812 do_alg_add_t2_m:
2813 q = t - 1;
2814 q = q & -q;
2815 m = exact_log2 (q);
2816 if (m >= 0 && m < maxm)
2818 op_cost = shiftadd_cost (speed, mode, m);
2819 new_limit.cost = best_cost.cost - op_cost;
2820 new_limit.latency = best_cost.latency - op_cost;
2821 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2823 alg_in->cost.cost += op_cost;
2824 alg_in->cost.latency += op_cost;
2825 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2827 struct algorithm *x;
2828 best_cost = alg_in->cost;
2829 x = alg_in, alg_in = best_alg, best_alg = x;
2830 best_alg->log[best_alg->ops] = m;
2831 best_alg->op[best_alg->ops] = alg_add_t2_m;
2834 if (cache_hit)
2835 goto done;
2837 do_alg_sub_t2_m:
2838 q = t + 1;
2839 q = q & -q;
2840 m = exact_log2 (q);
2841 if (m >= 0 && m < maxm)
2843 op_cost = shiftsub0_cost (speed, mode, m);
2844 new_limit.cost = best_cost.cost - op_cost;
2845 new_limit.latency = best_cost.latency - op_cost;
2846 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2848 alg_in->cost.cost += op_cost;
2849 alg_in->cost.latency += op_cost;
2850 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2852 struct algorithm *x;
2853 best_cost = alg_in->cost;
2854 x = alg_in, alg_in = best_alg, best_alg = x;
2855 best_alg->log[best_alg->ops] = m;
2856 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2859 if (cache_hit)
2860 goto done;
2863 done:
2864 /* If best_cost has not decreased, we have not found any algorithm. */
2865 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2867 /* We failed to find an algorithm. Record alg_impossible for
2868 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2869 we are asked to find an algorithm for T within the same or
2870 lower COST_LIMIT, we can immediately return to the
2871 caller. */
2872 entry_ptr->t = t;
2873 entry_ptr->mode = mode;
2874 entry_ptr->speed = speed;
2875 entry_ptr->alg = alg_impossible;
2876 entry_ptr->cost = *cost_limit;
2877 return;
2880 /* Cache the result. */
2881 if (!cache_hit)
2883 entry_ptr->t = t;
2884 entry_ptr->mode = mode;
2885 entry_ptr->speed = speed;
2886 entry_ptr->alg = best_alg->op[best_alg->ops];
2887 entry_ptr->cost.cost = best_cost.cost;
2888 entry_ptr->cost.latency = best_cost.latency;
2891 /* If we are getting a too long sequence for `struct algorithm'
2892 to record, make this search fail. */
2893 if (best_alg->ops == MAX_BITS_PER_WORD)
2894 return;
2896 /* Copy the algorithm from temporary space to the space at alg_out.
2897 We avoid using structure assignment because the majority of
2898 best_alg is normally undefined, and this is a critical function. */
2899 alg_out->ops = best_alg->ops + 1;
2900 alg_out->cost = best_cost;
2901 memcpy (alg_out->op, best_alg->op,
2902 alg_out->ops * sizeof *alg_out->op);
2903 memcpy (alg_out->log, best_alg->log,
2904 alg_out->ops * sizeof *alg_out->log);
2907 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2908 Try three variations:
2910 - a shift/add sequence based on VAL itself
2911 - a shift/add sequence based on -VAL, followed by a negation
2912 - a shift/add sequence based on VAL - 1, followed by an addition.
2914 Return true if the cheapest of these cost less than MULT_COST,
2915 describing the algorithm in *ALG and final fixup in *VARIANT. */
2917 static bool
2918 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2919 struct algorithm *alg, enum mult_variant *variant,
2920 int mult_cost)
2922 struct algorithm alg2;
2923 struct mult_cost limit;
2924 int op_cost;
2925 bool speed = optimize_insn_for_speed_p ();
2927 /* Fail quickly for impossible bounds. */
2928 if (mult_cost < 0)
2929 return false;
2931 /* Ensure that mult_cost provides a reasonable upper bound.
2932 Any constant multiplication can be performed with less
2933 than 2 * bits additions. */
2934 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2935 if (mult_cost > op_cost)
2936 mult_cost = op_cost;
2938 *variant = basic_variant;
2939 limit.cost = mult_cost;
2940 limit.latency = mult_cost;
2941 synth_mult (alg, val, &limit, mode);
2943 /* This works only if the inverted value actually fits in an
2944 `unsigned int' */
2945 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2947 op_cost = neg_cost(speed, mode);
2948 if (MULT_COST_LESS (&alg->cost, mult_cost))
2950 limit.cost = alg->cost.cost - op_cost;
2951 limit.latency = alg->cost.latency - op_cost;
2953 else
2955 limit.cost = mult_cost - op_cost;
2956 limit.latency = mult_cost - op_cost;
2959 synth_mult (&alg2, -val, &limit, mode);
2960 alg2.cost.cost += op_cost;
2961 alg2.cost.latency += op_cost;
2962 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2963 *alg = alg2, *variant = negate_variant;
2966 /* This proves very useful for division-by-constant. */
2967 op_cost = add_cost (speed, mode);
2968 if (MULT_COST_LESS (&alg->cost, mult_cost))
2970 limit.cost = alg->cost.cost - op_cost;
2971 limit.latency = alg->cost.latency - op_cost;
2973 else
2975 limit.cost = mult_cost - op_cost;
2976 limit.latency = mult_cost - op_cost;
2979 synth_mult (&alg2, val - 1, &limit, mode);
2980 alg2.cost.cost += op_cost;
2981 alg2.cost.latency += op_cost;
2982 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2983 *alg = alg2, *variant = add_variant;
2985 return MULT_COST_LESS (&alg->cost, mult_cost);
2988 /* A subroutine of expand_mult, used for constant multiplications.
2989 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2990 convenient. Use the shift/add sequence described by ALG and apply
2991 the final fixup specified by VARIANT. */
2993 static rtx
2994 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2995 rtx target, const struct algorithm *alg,
2996 enum mult_variant variant)
2998 HOST_WIDE_INT val_so_far;
2999 rtx insn, accum, tem;
3000 int opno;
3001 enum machine_mode nmode;
3003 /* Avoid referencing memory over and over and invalid sharing
3004 on SUBREGs. */
3005 op0 = force_reg (mode, op0);
3007 /* ACCUM starts out either as OP0 or as a zero, depending on
3008 the first operation. */
3010 if (alg->op[0] == alg_zero)
3012 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3013 val_so_far = 0;
3015 else if (alg->op[0] == alg_m)
3017 accum = copy_to_mode_reg (mode, op0);
3018 val_so_far = 1;
3020 else
3021 gcc_unreachable ();
3023 for (opno = 1; opno < alg->ops; opno++)
3025 int log = alg->log[opno];
3026 rtx shift_subtarget = optimize ? 0 : accum;
3027 rtx add_target
3028 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3029 && !optimize)
3030 ? target : 0;
3031 rtx accum_target = optimize ? 0 : accum;
3032 rtx accum_inner;
3034 switch (alg->op[opno])
3036 case alg_shift:
3037 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3038 /* REG_EQUAL note will be attached to the following insn. */
3039 emit_move_insn (accum, tem);
3040 val_so_far <<= log;
3041 break;
3043 case alg_add_t_m2:
3044 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3045 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3046 add_target ? add_target : accum_target);
3047 val_so_far += (HOST_WIDE_INT) 1 << log;
3048 break;
3050 case alg_sub_t_m2:
3051 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3052 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3053 add_target ? add_target : accum_target);
3054 val_so_far -= (HOST_WIDE_INT) 1 << log;
3055 break;
3057 case alg_add_t2_m:
3058 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3059 log, shift_subtarget, 0);
3060 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3061 add_target ? add_target : accum_target);
3062 val_so_far = (val_so_far << log) + 1;
3063 break;
3065 case alg_sub_t2_m:
3066 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3067 log, shift_subtarget, 0);
3068 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3069 add_target ? add_target : accum_target);
3070 val_so_far = (val_so_far << log) - 1;
3071 break;
3073 case alg_add_factor:
3074 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3075 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3076 add_target ? add_target : accum_target);
3077 val_so_far += val_so_far << log;
3078 break;
3080 case alg_sub_factor:
3081 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3082 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3083 (add_target
3084 ? add_target : (optimize ? 0 : tem)));
3085 val_so_far = (val_so_far << log) - val_so_far;
3086 break;
3088 default:
3089 gcc_unreachable ();
3092 if (SCALAR_INT_MODE_P (mode))
3094 /* Write a REG_EQUAL note on the last insn so that we can cse
3095 multiplication sequences. Note that if ACCUM is a SUBREG,
3096 we've set the inner register and must properly indicate that. */
3097 tem = op0, nmode = mode;
3098 accum_inner = accum;
3099 if (GET_CODE (accum) == SUBREG)
3101 accum_inner = SUBREG_REG (accum);
3102 nmode = GET_MODE (accum_inner);
3103 tem = gen_lowpart (nmode, op0);
3106 insn = get_last_insn ();
3107 set_dst_reg_note (insn, REG_EQUAL,
3108 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3109 accum_inner);
3113 if (variant == negate_variant)
3115 val_so_far = -val_so_far;
3116 accum = expand_unop (mode, neg_optab, accum, target, 0);
3118 else if (variant == add_variant)
3120 val_so_far = val_so_far + 1;
3121 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3124 /* Compare only the bits of val and val_so_far that are significant
3125 in the result mode, to avoid sign-/zero-extension confusion. */
3126 nmode = GET_MODE_INNER (mode);
3127 if (nmode == VOIDmode)
3128 nmode = mode;
3129 val &= GET_MODE_MASK (nmode);
3130 val_so_far &= GET_MODE_MASK (nmode);
3131 gcc_assert (val == val_so_far);
3133 return accum;
3136 /* Perform a multiplication and return an rtx for the result.
3137 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3138 TARGET is a suggestion for where to store the result (an rtx).
3140 We check specially for a constant integer as OP1.
3141 If you want this check for OP0 as well, then before calling
3142 you should swap the two operands if OP0 would be constant. */
3145 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3146 int unsignedp)
3148 enum mult_variant variant;
3149 struct algorithm algorithm;
3150 rtx scalar_op1;
3151 int max_cost;
3152 bool speed = optimize_insn_for_speed_p ();
3153 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3155 if (CONSTANT_P (op0))
3157 rtx temp = op0;
3158 op0 = op1;
3159 op1 = temp;
3162 /* For vectors, there are several simplifications that can be made if
3163 all elements of the vector constant are identical. */
3164 scalar_op1 = op1;
3165 if (GET_CODE (op1) == CONST_VECTOR)
3167 int i, n = CONST_VECTOR_NUNITS (op1);
3168 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3169 for (i = 1; i < n; ++i)
3170 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3171 goto skip_scalar;
3174 if (INTEGRAL_MODE_P (mode))
3176 rtx fake_reg;
3177 HOST_WIDE_INT coeff;
3178 bool is_neg;
3179 int mode_bitsize;
3181 if (op1 == CONST0_RTX (mode))
3182 return op1;
3183 if (op1 == CONST1_RTX (mode))
3184 return op0;
3185 if (op1 == CONSTM1_RTX (mode))
3186 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3187 op0, target, 0);
3189 if (do_trapv)
3190 goto skip_synth;
3192 /* These are the operations that are potentially turned into
3193 a sequence of shifts and additions. */
3194 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3196 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3197 less than or equal in size to `unsigned int' this doesn't matter.
3198 If the mode is larger than `unsigned int', then synth_mult works
3199 only if the constant value exactly fits in an `unsigned int' without
3200 any truncation. This means that multiplying by negative values does
3201 not work; results are off by 2^32 on a 32 bit machine. */
3203 if (CONST_INT_P (scalar_op1))
3205 coeff = INTVAL (scalar_op1);
3206 is_neg = coeff < 0;
3208 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3210 /* If we are multiplying in DImode, it may still be a win
3211 to try to work with shifts and adds. */
3212 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3213 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3215 coeff = CONST_DOUBLE_LOW (scalar_op1);
3216 is_neg = false;
3218 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3220 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3221 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3223 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3224 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3225 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3226 return expand_shift (LSHIFT_EXPR, mode, op0,
3227 shift, target, unsignedp);
3229 goto skip_synth;
3231 else
3232 goto skip_synth;
3234 else
3235 goto skip_synth;
3237 /* We used to test optimize here, on the grounds that it's better to
3238 produce a smaller program when -O is not used. But this causes
3239 such a terrible slowdown sometimes that it seems better to always
3240 use synth_mult. */
3242 /* Special case powers of two. */
3243 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3244 return expand_shift (LSHIFT_EXPR, mode, op0,
3245 floor_log2 (coeff), target, unsignedp);
3247 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3249 /* Attempt to handle multiplication of DImode values by negative
3250 coefficients, by performing the multiplication by a positive
3251 multiplier and then inverting the result. */
3252 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3254 /* Its safe to use -coeff even for INT_MIN, as the
3255 result is interpreted as an unsigned coefficient.
3256 Exclude cost of op0 from max_cost to match the cost
3257 calculation of the synth_mult. */
3258 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3259 - neg_cost(speed, mode));
3260 if (max_cost > 0
3261 && choose_mult_variant (mode, -coeff, &algorithm,
3262 &variant, max_cost))
3264 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3265 &algorithm, variant);
3266 return expand_unop (mode, neg_optab, temp, target, 0);
3268 goto skip_synth;
3271 /* Exclude cost of op0 from max_cost to match the cost
3272 calculation of the synth_mult. */
3273 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3274 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3275 return expand_mult_const (mode, op0, coeff, target,
3276 &algorithm, variant);
3278 skip_synth:
3280 /* Expand x*2.0 as x+x. */
3281 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3283 REAL_VALUE_TYPE d;
3284 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3286 if (REAL_VALUES_EQUAL (d, dconst2))
3288 op0 = force_reg (GET_MODE (op0), op0);
3289 return expand_binop (mode, add_optab, op0, op0,
3290 target, unsignedp, OPTAB_LIB_WIDEN);
3293 skip_scalar:
3295 /* This used to use umul_optab if unsigned, but for non-widening multiply
3296 there is no difference between signed and unsigned. */
3297 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3298 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3299 gcc_assert (op0);
3300 return op0;
3303 /* Return a cost estimate for multiplying a register by the given
3304 COEFFicient in the given MODE and SPEED. */
3307 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3309 int max_cost;
3310 struct algorithm algorithm;
3311 enum mult_variant variant;
3313 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3314 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3315 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3316 return algorithm.cost.cost;
3317 else
3318 return max_cost;
3321 /* Perform a widening multiplication and return an rtx for the result.
3322 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3323 TARGET is a suggestion for where to store the result (an rtx).
3324 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3325 or smul_widen_optab.
3327 We check specially for a constant integer as OP1, comparing the
3328 cost of a widening multiply against the cost of a sequence of shifts
3329 and adds. */
3332 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3333 int unsignedp, optab this_optab)
3335 bool speed = optimize_insn_for_speed_p ();
3336 rtx cop1;
3338 if (CONST_INT_P (op1)
3339 && GET_MODE (op0) != VOIDmode
3340 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3341 this_optab == umul_widen_optab))
3342 && CONST_INT_P (cop1)
3343 && (INTVAL (cop1) >= 0
3344 || HWI_COMPUTABLE_MODE_P (mode)))
3346 HOST_WIDE_INT coeff = INTVAL (cop1);
3347 int max_cost;
3348 enum mult_variant variant;
3349 struct algorithm algorithm;
3351 /* Special case powers of two. */
3352 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3354 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3355 return expand_shift (LSHIFT_EXPR, mode, op0,
3356 floor_log2 (coeff), target, unsignedp);
3359 /* Exclude cost of op0 from max_cost to match the cost
3360 calculation of the synth_mult. */
3361 max_cost = mul_widen_cost (speed, mode);
3362 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3363 max_cost))
3365 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3366 return expand_mult_const (mode, op0, coeff, target,
3367 &algorithm, variant);
3370 return expand_binop (mode, this_optab, op0, op1, target,
3371 unsignedp, OPTAB_LIB_WIDEN);
3374 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3375 replace division by D, and put the least significant N bits of the result
3376 in *MULTIPLIER_PTR and return the most significant bit.
3378 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3379 needed precision is in PRECISION (should be <= N).
3381 PRECISION should be as small as possible so this function can choose
3382 multiplier more freely.
3384 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3385 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3387 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3388 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3390 unsigned HOST_WIDE_INT
3391 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3392 unsigned HOST_WIDE_INT *multiplier_ptr,
3393 int *post_shift_ptr, int *lgup_ptr)
3395 double_int mhigh, mlow;
3396 int lgup, post_shift;
3397 int pow, pow2;
3399 /* lgup = ceil(log2(divisor)); */
3400 lgup = ceil_log2 (d);
3402 gcc_assert (lgup <= n);
3404 pow = n + lgup;
3405 pow2 = n + lgup - precision;
3407 /* We could handle this with some effort, but this case is much
3408 better handled directly with a scc insn, so rely on caller using
3409 that. */
3410 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3412 /* mlow = 2^(N + lgup)/d */
3413 double_int val = double_int_zero.set_bit (pow);
3414 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3416 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3417 val |= double_int_zero.set_bit (pow2);
3418 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3420 gcc_assert (!mhigh.high || val.high - d < d);
3421 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3422 /* Assert that mlow < mhigh. */
3423 gcc_assert (mlow.ult (mhigh));
3425 /* If precision == N, then mlow, mhigh exceed 2^N
3426 (but they do not exceed 2^(N+1)). */
3428 /* Reduce to lowest terms. */
3429 for (post_shift = lgup; post_shift > 0; post_shift--)
3431 int shft = HOST_BITS_PER_WIDE_INT - 1;
3432 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3433 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3434 if (ml_lo >= mh_lo)
3435 break;
3437 mlow = double_int::from_uhwi (ml_lo);
3438 mhigh = double_int::from_uhwi (mh_lo);
3441 *post_shift_ptr = post_shift;
3442 *lgup_ptr = lgup;
3443 if (n < HOST_BITS_PER_WIDE_INT)
3445 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3446 *multiplier_ptr = mhigh.low & mask;
3447 return mhigh.low >= mask;
3449 else
3451 *multiplier_ptr = mhigh.low;
3452 return mhigh.high;
3456 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3457 congruent to 1 (mod 2**N). */
3459 static unsigned HOST_WIDE_INT
3460 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3462 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3464 /* The algorithm notes that the choice y = x satisfies
3465 x*y == 1 mod 2^3, since x is assumed odd.
3466 Each iteration doubles the number of bits of significance in y. */
3468 unsigned HOST_WIDE_INT mask;
3469 unsigned HOST_WIDE_INT y = x;
3470 int nbit = 3;
3472 mask = (n == HOST_BITS_PER_WIDE_INT
3473 ? ~(unsigned HOST_WIDE_INT) 0
3474 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3476 while (nbit < n)
3478 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3479 nbit *= 2;
3481 return y;
3484 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3485 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3486 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3487 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3488 become signed.
3490 The result is put in TARGET if that is convenient.
3492 MODE is the mode of operation. */
3495 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3496 rtx op1, rtx target, int unsignedp)
3498 rtx tem;
3499 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3501 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3502 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3503 tem = expand_and (mode, tem, op1, NULL_RTX);
3504 adj_operand
3505 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3506 adj_operand);
3508 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3509 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3510 tem = expand_and (mode, tem, op0, NULL_RTX);
3511 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3512 target);
3514 return target;
3517 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3519 static rtx
3520 extract_high_half (enum machine_mode mode, rtx op)
3522 enum machine_mode wider_mode;
3524 if (mode == word_mode)
3525 return gen_highpart (mode, op);
3527 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3529 wider_mode = GET_MODE_WIDER_MODE (mode);
3530 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3531 GET_MODE_BITSIZE (mode), 0, 1);
3532 return convert_modes (mode, wider_mode, op, 0);
3535 /* Like expmed_mult_highpart, but only consider using a multiplication
3536 optab. OP1 is an rtx for the constant operand. */
3538 static rtx
3539 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3540 rtx target, int unsignedp, int max_cost)
3542 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3543 enum machine_mode wider_mode;
3544 optab moptab;
3545 rtx tem;
3546 int size;
3547 bool speed = optimize_insn_for_speed_p ();
3549 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3551 wider_mode = GET_MODE_WIDER_MODE (mode);
3552 size = GET_MODE_BITSIZE (mode);
3554 /* Firstly, try using a multiplication insn that only generates the needed
3555 high part of the product, and in the sign flavor of unsignedp. */
3556 if (mul_highpart_cost (speed, mode) < max_cost)
3558 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3559 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3560 unsignedp, OPTAB_DIRECT);
3561 if (tem)
3562 return tem;
3565 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3566 Need to adjust the result after the multiplication. */
3567 if (size - 1 < BITS_PER_WORD
3568 && (mul_highpart_cost (speed, mode)
3569 + 2 * shift_cost (speed, mode, size-1)
3570 + 4 * add_cost (speed, mode) < max_cost))
3572 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3573 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3574 unsignedp, OPTAB_DIRECT);
3575 if (tem)
3576 /* We used the wrong signedness. Adjust the result. */
3577 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3578 tem, unsignedp);
3581 /* Try widening multiplication. */
3582 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3583 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3584 && mul_widen_cost (speed, wider_mode) < max_cost)
3586 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3587 unsignedp, OPTAB_WIDEN);
3588 if (tem)
3589 return extract_high_half (mode, tem);
3592 /* Try widening the mode and perform a non-widening multiplication. */
3593 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3594 && size - 1 < BITS_PER_WORD
3595 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3596 < max_cost))
3598 rtx insns, wop0, wop1;
3600 /* We need to widen the operands, for example to ensure the
3601 constant multiplier is correctly sign or zero extended.
3602 Use a sequence to clean-up any instructions emitted by
3603 the conversions if things don't work out. */
3604 start_sequence ();
3605 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3606 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3607 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3608 unsignedp, OPTAB_WIDEN);
3609 insns = get_insns ();
3610 end_sequence ();
3612 if (tem)
3614 emit_insn (insns);
3615 return extract_high_half (mode, tem);
3619 /* Try widening multiplication of opposite signedness, and adjust. */
3620 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3621 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3622 && size - 1 < BITS_PER_WORD
3623 && (mul_widen_cost (speed, wider_mode)
3624 + 2 * shift_cost (speed, mode, size-1)
3625 + 4 * add_cost (speed, mode) < max_cost))
3627 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3628 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3629 if (tem != 0)
3631 tem = extract_high_half (mode, tem);
3632 /* We used the wrong signedness. Adjust the result. */
3633 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3634 target, unsignedp);
3638 return 0;
3641 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3642 putting the high half of the result in TARGET if that is convenient,
3643 and return where the result is. If the operation can not be performed,
3644 0 is returned.
3646 MODE is the mode of operation and result.
3648 UNSIGNEDP nonzero means unsigned multiply.
3650 MAX_COST is the total allowed cost for the expanded RTL. */
3652 static rtx
3653 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3654 rtx target, int unsignedp, int max_cost)
3656 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3657 unsigned HOST_WIDE_INT cnst1;
3658 int extra_cost;
3659 bool sign_adjust = false;
3660 enum mult_variant variant;
3661 struct algorithm alg;
3662 rtx tem;
3663 bool speed = optimize_insn_for_speed_p ();
3665 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3666 /* We can't support modes wider than HOST_BITS_PER_INT. */
3667 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3669 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3671 /* We can't optimize modes wider than BITS_PER_WORD.
3672 ??? We might be able to perform double-word arithmetic if
3673 mode == word_mode, however all the cost calculations in
3674 synth_mult etc. assume single-word operations. */
3675 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3676 return expmed_mult_highpart_optab (mode, op0, op1, target,
3677 unsignedp, max_cost);
3679 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3681 /* Check whether we try to multiply by a negative constant. */
3682 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3684 sign_adjust = true;
3685 extra_cost += add_cost (speed, mode);
3688 /* See whether shift/add multiplication is cheap enough. */
3689 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3690 max_cost - extra_cost))
3692 /* See whether the specialized multiplication optabs are
3693 cheaper than the shift/add version. */
3694 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3695 alg.cost.cost + extra_cost);
3696 if (tem)
3697 return tem;
3699 tem = convert_to_mode (wider_mode, op0, unsignedp);
3700 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3701 tem = extract_high_half (mode, tem);
3703 /* Adjust result for signedness. */
3704 if (sign_adjust)
3705 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3707 return tem;
3709 return expmed_mult_highpart_optab (mode, op0, op1, target,
3710 unsignedp, max_cost);
3714 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3716 static rtx
3717 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3719 unsigned HOST_WIDE_INT masklow, maskhigh;
3720 rtx result, temp, shift, label;
3721 int logd;
3723 logd = floor_log2 (d);
3724 result = gen_reg_rtx (mode);
3726 /* Avoid conditional branches when they're expensive. */
3727 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3728 && optimize_insn_for_speed_p ())
3730 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3731 mode, 0, -1);
3732 if (signmask)
3734 signmask = force_reg (mode, signmask);
3735 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3736 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3738 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3739 which instruction sequence to use. If logical right shifts
3740 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3741 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3743 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3744 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3745 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3746 > COSTS_N_INSNS (2)))
3748 temp = expand_binop (mode, xor_optab, op0, signmask,
3749 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3750 temp = expand_binop (mode, sub_optab, temp, signmask,
3751 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3752 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3753 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3754 temp = expand_binop (mode, xor_optab, temp, signmask,
3755 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3756 temp = expand_binop (mode, sub_optab, temp, signmask,
3757 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3759 else
3761 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3762 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3763 signmask = force_reg (mode, signmask);
3765 temp = expand_binop (mode, add_optab, op0, signmask,
3766 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3767 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3768 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3769 temp = expand_binop (mode, sub_optab, temp, signmask,
3770 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3772 return temp;
3776 /* Mask contains the mode's signbit and the significant bits of the
3777 modulus. By including the signbit in the operation, many targets
3778 can avoid an explicit compare operation in the following comparison
3779 against zero. */
3781 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3782 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3784 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3785 maskhigh = -1;
3787 else
3788 maskhigh = (HOST_WIDE_INT) -1
3789 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3791 temp = expand_binop (mode, and_optab, op0,
3792 immed_double_const (masklow, maskhigh, mode),
3793 result, 1, OPTAB_LIB_WIDEN);
3794 if (temp != result)
3795 emit_move_insn (result, temp);
3797 label = gen_label_rtx ();
3798 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3800 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3801 0, OPTAB_LIB_WIDEN);
3802 masklow = (HOST_WIDE_INT) -1 << logd;
3803 maskhigh = -1;
3804 temp = expand_binop (mode, ior_optab, temp,
3805 immed_double_const (masklow, maskhigh, mode),
3806 result, 1, OPTAB_LIB_WIDEN);
3807 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3808 0, OPTAB_LIB_WIDEN);
3809 if (temp != result)
3810 emit_move_insn (result, temp);
3811 emit_label (label);
3812 return result;
3815 /* Expand signed division of OP0 by a power of two D in mode MODE.
3816 This routine is only called for positive values of D. */
3818 static rtx
3819 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3821 rtx temp, label;
3822 int logd;
3824 logd = floor_log2 (d);
3826 if (d == 2
3827 && BRANCH_COST (optimize_insn_for_speed_p (),
3828 false) >= 1)
3830 temp = gen_reg_rtx (mode);
3831 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3832 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3833 0, OPTAB_LIB_WIDEN);
3834 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3837 #ifdef HAVE_conditional_move
3838 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3839 >= 2)
3841 rtx temp2;
3843 /* ??? emit_conditional_move forces a stack adjustment via
3844 compare_from_rtx so, if the sequence is discarded, it will
3845 be lost. Do it now instead. */
3846 do_pending_stack_adjust ();
3848 start_sequence ();
3849 temp2 = copy_to_mode_reg (mode, op0);
3850 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3851 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3852 temp = force_reg (mode, temp);
3854 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3855 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3856 mode, temp, temp2, mode, 0);
3857 if (temp2)
3859 rtx seq = get_insns ();
3860 end_sequence ();
3861 emit_insn (seq);
3862 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3864 end_sequence ();
3866 #endif
3868 if (BRANCH_COST (optimize_insn_for_speed_p (),
3869 false) >= 2)
3871 int ushift = GET_MODE_BITSIZE (mode) - logd;
3873 temp = gen_reg_rtx (mode);
3874 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3875 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3876 > COSTS_N_INSNS (1))
3877 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3878 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3879 else
3880 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3881 ushift, NULL_RTX, 1);
3882 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3883 0, OPTAB_LIB_WIDEN);
3884 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3887 label = gen_label_rtx ();
3888 temp = copy_to_mode_reg (mode, op0);
3889 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3890 expand_inc (temp, GEN_INT (d - 1));
3891 emit_label (label);
3892 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3895 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3896 if that is convenient, and returning where the result is.
3897 You may request either the quotient or the remainder as the result;
3898 specify REM_FLAG nonzero to get the remainder.
3900 CODE is the expression code for which kind of division this is;
3901 it controls how rounding is done. MODE is the machine mode to use.
3902 UNSIGNEDP nonzero means do unsigned division. */
3904 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3905 and then correct it by or'ing in missing high bits
3906 if result of ANDI is nonzero.
3907 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3908 This could optimize to a bfexts instruction.
3909 But C doesn't use these operations, so their optimizations are
3910 left for later. */
3911 /* ??? For modulo, we don't actually need the highpart of the first product,
3912 the low part will do nicely. And for small divisors, the second multiply
3913 can also be a low-part only multiply or even be completely left out.
3914 E.g. to calculate the remainder of a division by 3 with a 32 bit
3915 multiply, multiply with 0x55555556 and extract the upper two bits;
3916 the result is exact for inputs up to 0x1fffffff.
3917 The input range can be reduced by using cross-sum rules.
3918 For odd divisors >= 3, the following table gives right shift counts
3919 so that if a number is shifted by an integer multiple of the given
3920 amount, the remainder stays the same:
3921 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3922 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3923 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3924 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3925 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3927 Cross-sum rules for even numbers can be derived by leaving as many bits
3928 to the right alone as the divisor has zeros to the right.
3929 E.g. if x is an unsigned 32 bit number:
3930 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3934 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3935 rtx op0, rtx op1, rtx target, int unsignedp)
3937 enum machine_mode compute_mode;
3938 rtx tquotient;
3939 rtx quotient = 0, remainder = 0;
3940 rtx last;
3941 int size;
3942 rtx insn;
3943 optab optab1, optab2;
3944 int op1_is_constant, op1_is_pow2 = 0;
3945 int max_cost, extra_cost;
3946 static HOST_WIDE_INT last_div_const = 0;
3947 static HOST_WIDE_INT ext_op1;
3948 bool speed = optimize_insn_for_speed_p ();
3950 op1_is_constant = CONST_INT_P (op1);
3951 if (op1_is_constant)
3953 ext_op1 = INTVAL (op1);
3954 if (unsignedp)
3955 ext_op1 &= GET_MODE_MASK (mode);
3956 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3957 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3961 This is the structure of expand_divmod:
3963 First comes code to fix up the operands so we can perform the operations
3964 correctly and efficiently.
3966 Second comes a switch statement with code specific for each rounding mode.
3967 For some special operands this code emits all RTL for the desired
3968 operation, for other cases, it generates only a quotient and stores it in
3969 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3970 to indicate that it has not done anything.
3972 Last comes code that finishes the operation. If QUOTIENT is set and
3973 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3974 QUOTIENT is not set, it is computed using trunc rounding.
3976 We try to generate special code for division and remainder when OP1 is a
3977 constant. If |OP1| = 2**n we can use shifts and some other fast
3978 operations. For other values of OP1, we compute a carefully selected
3979 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3980 by m.
3982 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3983 half of the product. Different strategies for generating the product are
3984 implemented in expmed_mult_highpart.
3986 If what we actually want is the remainder, we generate that by another
3987 by-constant multiplication and a subtraction. */
3989 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3990 code below will malfunction if we are, so check here and handle
3991 the special case if so. */
3992 if (op1 == const1_rtx)
3993 return rem_flag ? const0_rtx : op0;
3995 /* When dividing by -1, we could get an overflow.
3996 negv_optab can handle overflows. */
3997 if (! unsignedp && op1 == constm1_rtx)
3999 if (rem_flag)
4000 return const0_rtx;
4001 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
4002 ? negv_optab : neg_optab, op0, target, 0);
4005 if (target
4006 /* Don't use the function value register as a target
4007 since we have to read it as well as write it,
4008 and function-inlining gets confused by this. */
4009 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4010 /* Don't clobber an operand while doing a multi-step calculation. */
4011 || ((rem_flag || op1_is_constant)
4012 && (reg_mentioned_p (target, op0)
4013 || (MEM_P (op0) && MEM_P (target))))
4014 || reg_mentioned_p (target, op1)
4015 || (MEM_P (op1) && MEM_P (target))))
4016 target = 0;
4018 /* Get the mode in which to perform this computation. Normally it will
4019 be MODE, but sometimes we can't do the desired operation in MODE.
4020 If so, pick a wider mode in which we can do the operation. Convert
4021 to that mode at the start to avoid repeated conversions.
4023 First see what operations we need. These depend on the expression
4024 we are evaluating. (We assume that divxx3 insns exist under the
4025 same conditions that modxx3 insns and that these insns don't normally
4026 fail. If these assumptions are not correct, we may generate less
4027 efficient code in some cases.)
4029 Then see if we find a mode in which we can open-code that operation
4030 (either a division, modulus, or shift). Finally, check for the smallest
4031 mode for which we can do the operation with a library call. */
4033 /* We might want to refine this now that we have division-by-constant
4034 optimization. Since expmed_mult_highpart tries so many variants, it is
4035 not straightforward to generalize this. Maybe we should make an array
4036 of possible modes in init_expmed? Save this for GCC 2.7. */
4038 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4039 ? (unsignedp ? lshr_optab : ashr_optab)
4040 : (unsignedp ? udiv_optab : sdiv_optab));
4041 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4042 ? optab1
4043 : (unsignedp ? udivmod_optab : sdivmod_optab));
4045 for (compute_mode = mode; compute_mode != VOIDmode;
4046 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4047 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4048 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4049 break;
4051 if (compute_mode == VOIDmode)
4052 for (compute_mode = mode; compute_mode != VOIDmode;
4053 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4054 if (optab_libfunc (optab1, compute_mode)
4055 || optab_libfunc (optab2, compute_mode))
4056 break;
4058 /* If we still couldn't find a mode, use MODE, but expand_binop will
4059 probably die. */
4060 if (compute_mode == VOIDmode)
4061 compute_mode = mode;
4063 if (target && GET_MODE (target) == compute_mode)
4064 tquotient = target;
4065 else
4066 tquotient = gen_reg_rtx (compute_mode);
4068 size = GET_MODE_BITSIZE (compute_mode);
4069 #if 0
4070 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4071 (mode), and thereby get better code when OP1 is a constant. Do that
4072 later. It will require going over all usages of SIZE below. */
4073 size = GET_MODE_BITSIZE (mode);
4074 #endif
4076 /* Only deduct something for a REM if the last divide done was
4077 for a different constant. Then set the constant of the last
4078 divide. */
4079 max_cost = (unsignedp
4080 ? udiv_cost (speed, compute_mode)
4081 : sdiv_cost (speed, compute_mode));
4082 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4083 && INTVAL (op1) == last_div_const))
4084 max_cost -= (mul_cost (speed, compute_mode)
4085 + add_cost (speed, compute_mode));
4087 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4089 /* Now convert to the best mode to use. */
4090 if (compute_mode != mode)
4092 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4093 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4095 /* convert_modes may have placed op1 into a register, so we
4096 must recompute the following. */
4097 op1_is_constant = CONST_INT_P (op1);
4098 op1_is_pow2 = (op1_is_constant
4099 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4100 || (! unsignedp
4101 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4104 /* If one of the operands is a volatile MEM, copy it into a register. */
4106 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4107 op0 = force_reg (compute_mode, op0);
4108 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4109 op1 = force_reg (compute_mode, op1);
4111 /* If we need the remainder or if OP1 is constant, we need to
4112 put OP0 in a register in case it has any queued subexpressions. */
4113 if (rem_flag || op1_is_constant)
4114 op0 = force_reg (compute_mode, op0);
4116 last = get_last_insn ();
4118 /* Promote floor rounding to trunc rounding for unsigned operations. */
4119 if (unsignedp)
4121 if (code == FLOOR_DIV_EXPR)
4122 code = TRUNC_DIV_EXPR;
4123 if (code == FLOOR_MOD_EXPR)
4124 code = TRUNC_MOD_EXPR;
4125 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4126 code = TRUNC_DIV_EXPR;
4129 if (op1 != const0_rtx)
4130 switch (code)
4132 case TRUNC_MOD_EXPR:
4133 case TRUNC_DIV_EXPR:
4134 if (op1_is_constant)
4136 if (unsignedp)
4138 unsigned HOST_WIDE_INT mh, ml;
4139 int pre_shift, post_shift;
4140 int dummy;
4141 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4142 & GET_MODE_MASK (compute_mode));
4144 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4146 pre_shift = floor_log2 (d);
4147 if (rem_flag)
4149 remainder
4150 = expand_binop (compute_mode, and_optab, op0,
4151 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4152 remainder, 1,
4153 OPTAB_LIB_WIDEN);
4154 if (remainder)
4155 return gen_lowpart (mode, remainder);
4157 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4158 pre_shift, tquotient, 1);
4160 else if (size <= HOST_BITS_PER_WIDE_INT)
4162 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4164 /* Most significant bit of divisor is set; emit an scc
4165 insn. */
4166 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4167 compute_mode, 1, 1);
4169 else
4171 /* Find a suitable multiplier and right shift count
4172 instead of multiplying with D. */
4174 mh = choose_multiplier (d, size, size,
4175 &ml, &post_shift, &dummy);
4177 /* If the suggested multiplier is more than SIZE bits,
4178 we can do better for even divisors, using an
4179 initial right shift. */
4180 if (mh != 0 && (d & 1) == 0)
4182 pre_shift = floor_log2 (d & -d);
4183 mh = choose_multiplier (d >> pre_shift, size,
4184 size - pre_shift,
4185 &ml, &post_shift, &dummy);
4186 gcc_assert (!mh);
4188 else
4189 pre_shift = 0;
4191 if (mh != 0)
4193 rtx t1, t2, t3, t4;
4195 if (post_shift - 1 >= BITS_PER_WORD)
4196 goto fail1;
4198 extra_cost
4199 = (shift_cost (speed, compute_mode, post_shift - 1)
4200 + shift_cost (speed, compute_mode, 1)
4201 + 2 * add_cost (speed, compute_mode));
4202 t1 = expmed_mult_highpart (compute_mode, op0,
4203 GEN_INT (ml),
4204 NULL_RTX, 1,
4205 max_cost - extra_cost);
4206 if (t1 == 0)
4207 goto fail1;
4208 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4209 op0, t1),
4210 NULL_RTX);
4211 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4212 t2, 1, NULL_RTX, 1);
4213 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4214 t1, t3),
4215 NULL_RTX);
4216 quotient = expand_shift
4217 (RSHIFT_EXPR, compute_mode, t4,
4218 post_shift - 1, tquotient, 1);
4220 else
4222 rtx t1, t2;
4224 if (pre_shift >= BITS_PER_WORD
4225 || post_shift >= BITS_PER_WORD)
4226 goto fail1;
4228 t1 = expand_shift
4229 (RSHIFT_EXPR, compute_mode, op0,
4230 pre_shift, NULL_RTX, 1);
4231 extra_cost
4232 = (shift_cost (speed, compute_mode, pre_shift)
4233 + shift_cost (speed, compute_mode, post_shift));
4234 t2 = expmed_mult_highpart (compute_mode, t1,
4235 GEN_INT (ml),
4236 NULL_RTX, 1,
4237 max_cost - extra_cost);
4238 if (t2 == 0)
4239 goto fail1;
4240 quotient = expand_shift
4241 (RSHIFT_EXPR, compute_mode, t2,
4242 post_shift, tquotient, 1);
4246 else /* Too wide mode to use tricky code */
4247 break;
4249 insn = get_last_insn ();
4250 if (insn != last)
4251 set_dst_reg_note (insn, REG_EQUAL,
4252 gen_rtx_UDIV (compute_mode, op0, op1),
4253 quotient);
4255 else /* TRUNC_DIV, signed */
4257 unsigned HOST_WIDE_INT ml;
4258 int lgup, post_shift;
4259 rtx mlr;
4260 HOST_WIDE_INT d = INTVAL (op1);
4261 unsigned HOST_WIDE_INT abs_d;
4263 /* Since d might be INT_MIN, we have to cast to
4264 unsigned HOST_WIDE_INT before negating to avoid
4265 undefined signed overflow. */
4266 abs_d = (d >= 0
4267 ? (unsigned HOST_WIDE_INT) d
4268 : - (unsigned HOST_WIDE_INT) d);
4270 /* n rem d = n rem -d */
4271 if (rem_flag && d < 0)
4273 d = abs_d;
4274 op1 = gen_int_mode (abs_d, compute_mode);
4277 if (d == 1)
4278 quotient = op0;
4279 else if (d == -1)
4280 quotient = expand_unop (compute_mode, neg_optab, op0,
4281 tquotient, 0);
4282 else if (HOST_BITS_PER_WIDE_INT >= size
4283 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4285 /* This case is not handled correctly below. */
4286 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4287 compute_mode, 1, 1);
4288 if (quotient == 0)
4289 goto fail1;
4291 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4292 && (rem_flag
4293 ? smod_pow2_cheap (speed, compute_mode)
4294 : sdiv_pow2_cheap (speed, compute_mode))
4295 /* We assume that cheap metric is true if the
4296 optab has an expander for this mode. */
4297 && ((optab_handler ((rem_flag ? smod_optab
4298 : sdiv_optab),
4299 compute_mode)
4300 != CODE_FOR_nothing)
4301 || (optab_handler (sdivmod_optab,
4302 compute_mode)
4303 != CODE_FOR_nothing)))
4305 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4307 if (rem_flag)
4309 remainder = expand_smod_pow2 (compute_mode, op0, d);
4310 if (remainder)
4311 return gen_lowpart (mode, remainder);
4314 if (sdiv_pow2_cheap (speed, compute_mode)
4315 && ((optab_handler (sdiv_optab, compute_mode)
4316 != CODE_FOR_nothing)
4317 || (optab_handler (sdivmod_optab, compute_mode)
4318 != CODE_FOR_nothing)))
4319 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4320 compute_mode, op0,
4321 gen_int_mode (abs_d,
4322 compute_mode),
4323 NULL_RTX, 0);
4324 else
4325 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4327 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4328 negate the quotient. */
4329 if (d < 0)
4331 insn = get_last_insn ();
4332 if (insn != last
4333 && abs_d < ((unsigned HOST_WIDE_INT) 1
4334 << (HOST_BITS_PER_WIDE_INT - 1)))
4335 set_dst_reg_note (insn, REG_EQUAL,
4336 gen_rtx_DIV (compute_mode, op0,
4337 gen_int_mode
4338 (abs_d,
4339 compute_mode)),
4340 quotient);
4342 quotient = expand_unop (compute_mode, neg_optab,
4343 quotient, quotient, 0);
4346 else if (size <= HOST_BITS_PER_WIDE_INT)
4348 choose_multiplier (abs_d, size, size - 1,
4349 &ml, &post_shift, &lgup);
4350 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4352 rtx t1, t2, t3;
4354 if (post_shift >= BITS_PER_WORD
4355 || size - 1 >= BITS_PER_WORD)
4356 goto fail1;
4358 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4359 + shift_cost (speed, compute_mode, size - 1)
4360 + add_cost (speed, compute_mode));
4361 t1 = expmed_mult_highpart (compute_mode, op0,
4362 GEN_INT (ml), NULL_RTX, 0,
4363 max_cost - extra_cost);
4364 if (t1 == 0)
4365 goto fail1;
4366 t2 = expand_shift
4367 (RSHIFT_EXPR, compute_mode, t1,
4368 post_shift, NULL_RTX, 0);
4369 t3 = expand_shift
4370 (RSHIFT_EXPR, compute_mode, op0,
4371 size - 1, NULL_RTX, 0);
4372 if (d < 0)
4373 quotient
4374 = force_operand (gen_rtx_MINUS (compute_mode,
4375 t3, t2),
4376 tquotient);
4377 else
4378 quotient
4379 = force_operand (gen_rtx_MINUS (compute_mode,
4380 t2, t3),
4381 tquotient);
4383 else
4385 rtx t1, t2, t3, t4;
4387 if (post_shift >= BITS_PER_WORD
4388 || size - 1 >= BITS_PER_WORD)
4389 goto fail1;
4391 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4392 mlr = gen_int_mode (ml, compute_mode);
4393 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4394 + shift_cost (speed, compute_mode, size - 1)
4395 + 2 * add_cost (speed, compute_mode));
4396 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4397 NULL_RTX, 0,
4398 max_cost - extra_cost);
4399 if (t1 == 0)
4400 goto fail1;
4401 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4402 t1, op0),
4403 NULL_RTX);
4404 t3 = expand_shift
4405 (RSHIFT_EXPR, compute_mode, t2,
4406 post_shift, NULL_RTX, 0);
4407 t4 = expand_shift
4408 (RSHIFT_EXPR, compute_mode, op0,
4409 size - 1, NULL_RTX, 0);
4410 if (d < 0)
4411 quotient
4412 = force_operand (gen_rtx_MINUS (compute_mode,
4413 t4, t3),
4414 tquotient);
4415 else
4416 quotient
4417 = force_operand (gen_rtx_MINUS (compute_mode,
4418 t3, t4),
4419 tquotient);
4422 else /* Too wide mode to use tricky code */
4423 break;
4425 insn = get_last_insn ();
4426 if (insn != last)
4427 set_dst_reg_note (insn, REG_EQUAL,
4428 gen_rtx_DIV (compute_mode, op0, op1),
4429 quotient);
4431 break;
4433 fail1:
4434 delete_insns_since (last);
4435 break;
4437 case FLOOR_DIV_EXPR:
4438 case FLOOR_MOD_EXPR:
4439 /* We will come here only for signed operations. */
4440 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4442 unsigned HOST_WIDE_INT mh, ml;
4443 int pre_shift, lgup, post_shift;
4444 HOST_WIDE_INT d = INTVAL (op1);
4446 if (d > 0)
4448 /* We could just as easily deal with negative constants here,
4449 but it does not seem worth the trouble for GCC 2.6. */
4450 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4452 pre_shift = floor_log2 (d);
4453 if (rem_flag)
4455 remainder = expand_binop (compute_mode, and_optab, op0,
4456 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4457 remainder, 0, OPTAB_LIB_WIDEN);
4458 if (remainder)
4459 return gen_lowpart (mode, remainder);
4461 quotient = expand_shift
4462 (RSHIFT_EXPR, compute_mode, op0,
4463 pre_shift, tquotient, 0);
4465 else
4467 rtx t1, t2, t3, t4;
4469 mh = choose_multiplier (d, size, size - 1,
4470 &ml, &post_shift, &lgup);
4471 gcc_assert (!mh);
4473 if (post_shift < BITS_PER_WORD
4474 && size - 1 < BITS_PER_WORD)
4476 t1 = expand_shift
4477 (RSHIFT_EXPR, compute_mode, op0,
4478 size - 1, NULL_RTX, 0);
4479 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4480 NULL_RTX, 0, OPTAB_WIDEN);
4481 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4482 + shift_cost (speed, compute_mode, size - 1)
4483 + 2 * add_cost (speed, compute_mode));
4484 t3 = expmed_mult_highpart (compute_mode, t2,
4485 GEN_INT (ml), NULL_RTX, 1,
4486 max_cost - extra_cost);
4487 if (t3 != 0)
4489 t4 = expand_shift
4490 (RSHIFT_EXPR, compute_mode, t3,
4491 post_shift, NULL_RTX, 1);
4492 quotient = expand_binop (compute_mode, xor_optab,
4493 t4, t1, tquotient, 0,
4494 OPTAB_WIDEN);
4499 else
4501 rtx nsign, t1, t2, t3, t4;
4502 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4503 op0, constm1_rtx), NULL_RTX);
4504 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4505 0, OPTAB_WIDEN);
4506 nsign = expand_shift
4507 (RSHIFT_EXPR, compute_mode, t2,
4508 size - 1, NULL_RTX, 0);
4509 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4510 NULL_RTX);
4511 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4512 NULL_RTX, 0);
4513 if (t4)
4515 rtx t5;
4516 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4517 NULL_RTX, 0);
4518 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4519 t4, t5),
4520 tquotient);
4525 if (quotient != 0)
4526 break;
4527 delete_insns_since (last);
4529 /* Try using an instruction that produces both the quotient and
4530 remainder, using truncation. We can easily compensate the quotient
4531 or remainder to get floor rounding, once we have the remainder.
4532 Notice that we compute also the final remainder value here,
4533 and return the result right away. */
4534 if (target == 0 || GET_MODE (target) != compute_mode)
4535 target = gen_reg_rtx (compute_mode);
4537 if (rem_flag)
4539 remainder
4540 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4541 quotient = gen_reg_rtx (compute_mode);
4543 else
4545 quotient
4546 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4547 remainder = gen_reg_rtx (compute_mode);
4550 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4551 quotient, remainder, 0))
4553 /* This could be computed with a branch-less sequence.
4554 Save that for later. */
4555 rtx tem;
4556 rtx label = gen_label_rtx ();
4557 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4558 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4559 NULL_RTX, 0, OPTAB_WIDEN);
4560 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4561 expand_dec (quotient, const1_rtx);
4562 expand_inc (remainder, op1);
4563 emit_label (label);
4564 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4567 /* No luck with division elimination or divmod. Have to do it
4568 by conditionally adjusting op0 *and* the result. */
4570 rtx label1, label2, label3, label4, label5;
4571 rtx adjusted_op0;
4572 rtx tem;
4574 quotient = gen_reg_rtx (compute_mode);
4575 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4576 label1 = gen_label_rtx ();
4577 label2 = gen_label_rtx ();
4578 label3 = gen_label_rtx ();
4579 label4 = gen_label_rtx ();
4580 label5 = gen_label_rtx ();
4581 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4582 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4583 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4584 quotient, 0, OPTAB_LIB_WIDEN);
4585 if (tem != quotient)
4586 emit_move_insn (quotient, tem);
4587 emit_jump_insn (gen_jump (label5));
4588 emit_barrier ();
4589 emit_label (label1);
4590 expand_inc (adjusted_op0, const1_rtx);
4591 emit_jump_insn (gen_jump (label4));
4592 emit_barrier ();
4593 emit_label (label2);
4594 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4595 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4596 quotient, 0, OPTAB_LIB_WIDEN);
4597 if (tem != quotient)
4598 emit_move_insn (quotient, tem);
4599 emit_jump_insn (gen_jump (label5));
4600 emit_barrier ();
4601 emit_label (label3);
4602 expand_dec (adjusted_op0, const1_rtx);
4603 emit_label (label4);
4604 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4605 quotient, 0, OPTAB_LIB_WIDEN);
4606 if (tem != quotient)
4607 emit_move_insn (quotient, tem);
4608 expand_dec (quotient, const1_rtx);
4609 emit_label (label5);
4611 break;
4613 case CEIL_DIV_EXPR:
4614 case CEIL_MOD_EXPR:
4615 if (unsignedp)
4617 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4619 rtx t1, t2, t3;
4620 unsigned HOST_WIDE_INT d = INTVAL (op1);
4621 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4622 floor_log2 (d), tquotient, 1);
4623 t2 = expand_binop (compute_mode, and_optab, op0,
4624 GEN_INT (d - 1),
4625 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4626 t3 = gen_reg_rtx (compute_mode);
4627 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4628 compute_mode, 1, 1);
4629 if (t3 == 0)
4631 rtx lab;
4632 lab = gen_label_rtx ();
4633 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4634 expand_inc (t1, const1_rtx);
4635 emit_label (lab);
4636 quotient = t1;
4638 else
4639 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4640 t1, t3),
4641 tquotient);
4642 break;
4645 /* Try using an instruction that produces both the quotient and
4646 remainder, using truncation. We can easily compensate the
4647 quotient or remainder to get ceiling rounding, once we have the
4648 remainder. Notice that we compute also the final remainder
4649 value here, and return the result right away. */
4650 if (target == 0 || GET_MODE (target) != compute_mode)
4651 target = gen_reg_rtx (compute_mode);
4653 if (rem_flag)
4655 remainder = (REG_P (target)
4656 ? target : gen_reg_rtx (compute_mode));
4657 quotient = gen_reg_rtx (compute_mode);
4659 else
4661 quotient = (REG_P (target)
4662 ? target : gen_reg_rtx (compute_mode));
4663 remainder = gen_reg_rtx (compute_mode);
4666 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4667 remainder, 1))
4669 /* This could be computed with a branch-less sequence.
4670 Save that for later. */
4671 rtx label = gen_label_rtx ();
4672 do_cmp_and_jump (remainder, const0_rtx, EQ,
4673 compute_mode, label);
4674 expand_inc (quotient, const1_rtx);
4675 expand_dec (remainder, op1);
4676 emit_label (label);
4677 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4680 /* No luck with division elimination or divmod. Have to do it
4681 by conditionally adjusting op0 *and* the result. */
4683 rtx label1, label2;
4684 rtx adjusted_op0, tem;
4686 quotient = gen_reg_rtx (compute_mode);
4687 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4688 label1 = gen_label_rtx ();
4689 label2 = gen_label_rtx ();
4690 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4691 compute_mode, label1);
4692 emit_move_insn (quotient, const0_rtx);
4693 emit_jump_insn (gen_jump (label2));
4694 emit_barrier ();
4695 emit_label (label1);
4696 expand_dec (adjusted_op0, const1_rtx);
4697 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4698 quotient, 1, OPTAB_LIB_WIDEN);
4699 if (tem != quotient)
4700 emit_move_insn (quotient, tem);
4701 expand_inc (quotient, const1_rtx);
4702 emit_label (label2);
4705 else /* signed */
4707 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4708 && INTVAL (op1) >= 0)
4710 /* This is extremely similar to the code for the unsigned case
4711 above. For 2.7 we should merge these variants, but for
4712 2.6.1 I don't want to touch the code for unsigned since that
4713 get used in C. The signed case will only be used by other
4714 languages (Ada). */
4716 rtx t1, t2, t3;
4717 unsigned HOST_WIDE_INT d = INTVAL (op1);
4718 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4719 floor_log2 (d), tquotient, 0);
4720 t2 = expand_binop (compute_mode, and_optab, op0,
4721 GEN_INT (d - 1),
4722 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4723 t3 = gen_reg_rtx (compute_mode);
4724 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4725 compute_mode, 1, 1);
4726 if (t3 == 0)
4728 rtx lab;
4729 lab = gen_label_rtx ();
4730 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4731 expand_inc (t1, const1_rtx);
4732 emit_label (lab);
4733 quotient = t1;
4735 else
4736 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4737 t1, t3),
4738 tquotient);
4739 break;
4742 /* Try using an instruction that produces both the quotient and
4743 remainder, using truncation. We can easily compensate the
4744 quotient or remainder to get ceiling rounding, once we have the
4745 remainder. Notice that we compute also the final remainder
4746 value here, and return the result right away. */
4747 if (target == 0 || GET_MODE (target) != compute_mode)
4748 target = gen_reg_rtx (compute_mode);
4749 if (rem_flag)
4751 remainder= (REG_P (target)
4752 ? target : gen_reg_rtx (compute_mode));
4753 quotient = gen_reg_rtx (compute_mode);
4755 else
4757 quotient = (REG_P (target)
4758 ? target : gen_reg_rtx (compute_mode));
4759 remainder = gen_reg_rtx (compute_mode);
4762 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4763 remainder, 0))
4765 /* This could be computed with a branch-less sequence.
4766 Save that for later. */
4767 rtx tem;
4768 rtx label = gen_label_rtx ();
4769 do_cmp_and_jump (remainder, const0_rtx, EQ,
4770 compute_mode, label);
4771 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4772 NULL_RTX, 0, OPTAB_WIDEN);
4773 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4774 expand_inc (quotient, const1_rtx);
4775 expand_dec (remainder, op1);
4776 emit_label (label);
4777 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4780 /* No luck with division elimination or divmod. Have to do it
4781 by conditionally adjusting op0 *and* the result. */
4783 rtx label1, label2, label3, label4, label5;
4784 rtx adjusted_op0;
4785 rtx tem;
4787 quotient = gen_reg_rtx (compute_mode);
4788 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4789 label1 = gen_label_rtx ();
4790 label2 = gen_label_rtx ();
4791 label3 = gen_label_rtx ();
4792 label4 = gen_label_rtx ();
4793 label5 = gen_label_rtx ();
4794 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4795 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4796 compute_mode, label1);
4797 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4798 quotient, 0, OPTAB_LIB_WIDEN);
4799 if (tem != quotient)
4800 emit_move_insn (quotient, tem);
4801 emit_jump_insn (gen_jump (label5));
4802 emit_barrier ();
4803 emit_label (label1);
4804 expand_dec (adjusted_op0, const1_rtx);
4805 emit_jump_insn (gen_jump (label4));
4806 emit_barrier ();
4807 emit_label (label2);
4808 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4809 compute_mode, label3);
4810 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4811 quotient, 0, OPTAB_LIB_WIDEN);
4812 if (tem != quotient)
4813 emit_move_insn (quotient, tem);
4814 emit_jump_insn (gen_jump (label5));
4815 emit_barrier ();
4816 emit_label (label3);
4817 expand_inc (adjusted_op0, const1_rtx);
4818 emit_label (label4);
4819 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4820 quotient, 0, OPTAB_LIB_WIDEN);
4821 if (tem != quotient)
4822 emit_move_insn (quotient, tem);
4823 expand_inc (quotient, const1_rtx);
4824 emit_label (label5);
4827 break;
4829 case EXACT_DIV_EXPR:
4830 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4832 HOST_WIDE_INT d = INTVAL (op1);
4833 unsigned HOST_WIDE_INT ml;
4834 int pre_shift;
4835 rtx t1;
4837 pre_shift = floor_log2 (d & -d);
4838 ml = invert_mod2n (d >> pre_shift, size);
4839 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4840 pre_shift, NULL_RTX, unsignedp);
4841 quotient = expand_mult (compute_mode, t1,
4842 gen_int_mode (ml, compute_mode),
4843 NULL_RTX, 1);
4845 insn = get_last_insn ();
4846 set_dst_reg_note (insn, REG_EQUAL,
4847 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4848 compute_mode, op0, op1),
4849 quotient);
4851 break;
4853 case ROUND_DIV_EXPR:
4854 case ROUND_MOD_EXPR:
4855 if (unsignedp)
4857 rtx tem;
4858 rtx label;
4859 label = gen_label_rtx ();
4860 quotient = gen_reg_rtx (compute_mode);
4861 remainder = gen_reg_rtx (compute_mode);
4862 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4864 rtx tem;
4865 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4866 quotient, 1, OPTAB_LIB_WIDEN);
4867 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4868 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4869 remainder, 1, OPTAB_LIB_WIDEN);
4871 tem = plus_constant (compute_mode, op1, -1);
4872 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4873 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4874 expand_inc (quotient, const1_rtx);
4875 expand_dec (remainder, op1);
4876 emit_label (label);
4878 else
4880 rtx abs_rem, abs_op1, tem, mask;
4881 rtx label;
4882 label = gen_label_rtx ();
4883 quotient = gen_reg_rtx (compute_mode);
4884 remainder = gen_reg_rtx (compute_mode);
4885 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4887 rtx tem;
4888 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4889 quotient, 0, OPTAB_LIB_WIDEN);
4890 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4891 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4892 remainder, 0, OPTAB_LIB_WIDEN);
4894 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4895 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4896 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4897 1, NULL_RTX, 1);
4898 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4899 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4900 NULL_RTX, 0, OPTAB_WIDEN);
4901 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4902 size - 1, NULL_RTX, 0);
4903 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4904 NULL_RTX, 0, OPTAB_WIDEN);
4905 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4906 NULL_RTX, 0, OPTAB_WIDEN);
4907 expand_inc (quotient, tem);
4908 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4909 NULL_RTX, 0, OPTAB_WIDEN);
4910 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4911 NULL_RTX, 0, OPTAB_WIDEN);
4912 expand_dec (remainder, tem);
4913 emit_label (label);
4915 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4917 default:
4918 gcc_unreachable ();
4921 if (quotient == 0)
4923 if (target && GET_MODE (target) != compute_mode)
4924 target = 0;
4926 if (rem_flag)
4928 /* Try to produce the remainder without producing the quotient.
4929 If we seem to have a divmod pattern that does not require widening,
4930 don't try widening here. We should really have a WIDEN argument
4931 to expand_twoval_binop, since what we'd really like to do here is
4932 1) try a mod insn in compute_mode
4933 2) try a divmod insn in compute_mode
4934 3) try a div insn in compute_mode and multiply-subtract to get
4935 remainder
4936 4) try the same things with widening allowed. */
4937 remainder
4938 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4939 op0, op1, target,
4940 unsignedp,
4941 ((optab_handler (optab2, compute_mode)
4942 != CODE_FOR_nothing)
4943 ? OPTAB_DIRECT : OPTAB_WIDEN));
4944 if (remainder == 0)
4946 /* No luck there. Can we do remainder and divide at once
4947 without a library call? */
4948 remainder = gen_reg_rtx (compute_mode);
4949 if (! expand_twoval_binop ((unsignedp
4950 ? udivmod_optab
4951 : sdivmod_optab),
4952 op0, op1,
4953 NULL_RTX, remainder, unsignedp))
4954 remainder = 0;
4957 if (remainder)
4958 return gen_lowpart (mode, remainder);
4961 /* Produce the quotient. Try a quotient insn, but not a library call.
4962 If we have a divmod in this mode, use it in preference to widening
4963 the div (for this test we assume it will not fail). Note that optab2
4964 is set to the one of the two optabs that the call below will use. */
4965 quotient
4966 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4967 op0, op1, rem_flag ? NULL_RTX : target,
4968 unsignedp,
4969 ((optab_handler (optab2, compute_mode)
4970 != CODE_FOR_nothing)
4971 ? OPTAB_DIRECT : OPTAB_WIDEN));
4973 if (quotient == 0)
4975 /* No luck there. Try a quotient-and-remainder insn,
4976 keeping the quotient alone. */
4977 quotient = gen_reg_rtx (compute_mode);
4978 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4979 op0, op1,
4980 quotient, NULL_RTX, unsignedp))
4982 quotient = 0;
4983 if (! rem_flag)
4984 /* Still no luck. If we are not computing the remainder,
4985 use a library call for the quotient. */
4986 quotient = sign_expand_binop (compute_mode,
4987 udiv_optab, sdiv_optab,
4988 op0, op1, target,
4989 unsignedp, OPTAB_LIB_WIDEN);
4994 if (rem_flag)
4996 if (target && GET_MODE (target) != compute_mode)
4997 target = 0;
4999 if (quotient == 0)
5001 /* No divide instruction either. Use library for remainder. */
5002 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5003 op0, op1, target,
5004 unsignedp, OPTAB_LIB_WIDEN);
5005 /* No remainder function. Try a quotient-and-remainder
5006 function, keeping the remainder. */
5007 if (!remainder)
5009 remainder = gen_reg_rtx (compute_mode);
5010 if (!expand_twoval_binop_libfunc
5011 (unsignedp ? udivmod_optab : sdivmod_optab,
5012 op0, op1,
5013 NULL_RTX, remainder,
5014 unsignedp ? UMOD : MOD))
5015 remainder = NULL_RTX;
5018 else
5020 /* We divided. Now finish doing X - Y * (X / Y). */
5021 remainder = expand_mult (compute_mode, quotient, op1,
5022 NULL_RTX, unsignedp);
5023 remainder = expand_binop (compute_mode, sub_optab, op0,
5024 remainder, target, unsignedp,
5025 OPTAB_LIB_WIDEN);
5029 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5032 /* Return a tree node with data type TYPE, describing the value of X.
5033 Usually this is an VAR_DECL, if there is no obvious better choice.
5034 X may be an expression, however we only support those expressions
5035 generated by loop.c. */
5037 tree
5038 make_tree (tree type, rtx x)
5040 tree t;
5042 switch (GET_CODE (x))
5044 case CONST_INT:
5046 HOST_WIDE_INT hi = 0;
5048 if (INTVAL (x) < 0
5049 && !(TYPE_UNSIGNED (type)
5050 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5051 < HOST_BITS_PER_WIDE_INT)))
5052 hi = -1;
5054 t = build_int_cst_wide (type, INTVAL (x), hi);
5056 return t;
5059 case CONST_DOUBLE:
5060 if (GET_MODE (x) == VOIDmode)
5061 t = build_int_cst_wide (type,
5062 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5063 else
5065 REAL_VALUE_TYPE d;
5067 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5068 t = build_real (type, d);
5071 return t;
5073 case CONST_VECTOR:
5075 int units = CONST_VECTOR_NUNITS (x);
5076 tree itype = TREE_TYPE (type);
5077 tree *elts;
5078 int i;
5080 /* Build a tree with vector elements. */
5081 elts = XALLOCAVEC (tree, units);
5082 for (i = units - 1; i >= 0; --i)
5084 rtx elt = CONST_VECTOR_ELT (x, i);
5085 elts[i] = make_tree (itype, elt);
5088 return build_vector (type, elts);
5091 case PLUS:
5092 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5093 make_tree (type, XEXP (x, 1)));
5095 case MINUS:
5096 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5097 make_tree (type, XEXP (x, 1)));
5099 case NEG:
5100 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5102 case MULT:
5103 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5104 make_tree (type, XEXP (x, 1)));
5106 case ASHIFT:
5107 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5108 make_tree (type, XEXP (x, 1)));
5110 case LSHIFTRT:
5111 t = unsigned_type_for (type);
5112 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5113 make_tree (t, XEXP (x, 0)),
5114 make_tree (type, XEXP (x, 1))));
5116 case ASHIFTRT:
5117 t = signed_type_for (type);
5118 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5119 make_tree (t, XEXP (x, 0)),
5120 make_tree (type, XEXP (x, 1))));
5122 case DIV:
5123 if (TREE_CODE (type) != REAL_TYPE)
5124 t = signed_type_for (type);
5125 else
5126 t = type;
5128 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5129 make_tree (t, XEXP (x, 0)),
5130 make_tree (t, XEXP (x, 1))));
5131 case UDIV:
5132 t = unsigned_type_for (type);
5133 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5134 make_tree (t, XEXP (x, 0)),
5135 make_tree (t, XEXP (x, 1))));
5137 case SIGN_EXTEND:
5138 case ZERO_EXTEND:
5139 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5140 GET_CODE (x) == ZERO_EXTEND);
5141 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5143 case CONST:
5144 return make_tree (type, XEXP (x, 0));
5146 case SYMBOL_REF:
5147 t = SYMBOL_REF_DECL (x);
5148 if (t)
5149 return fold_convert (type, build_fold_addr_expr (t));
5150 /* else fall through. */
5152 default:
5153 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5155 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5156 address mode to pointer mode. */
5157 if (POINTER_TYPE_P (type))
5158 x = convert_memory_address_addr_space
5159 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5161 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5162 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5163 t->decl_with_rtl.rtl = x;
5165 return t;
5169 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5170 and returning TARGET.
5172 If TARGET is 0, a pseudo-register or constant is returned. */
5175 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5177 rtx tem = 0;
5179 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5180 tem = simplify_binary_operation (AND, mode, op0, op1);
5181 if (tem == 0)
5182 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5184 if (target == 0)
5185 target = tem;
5186 else if (tem != target)
5187 emit_move_insn (target, tem);
5188 return target;
5191 /* Helper function for emit_store_flag. */
5192 static rtx
5193 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5194 enum machine_mode mode, enum machine_mode compare_mode,
5195 int unsignedp, rtx x, rtx y, int normalizep,
5196 enum machine_mode target_mode)
5198 struct expand_operand ops[4];
5199 rtx op0, last, comparison, subtarget;
5200 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5202 last = get_last_insn ();
5203 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5204 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5205 if (!x || !y)
5207 delete_insns_since (last);
5208 return NULL_RTX;
5211 if (target_mode == VOIDmode)
5212 target_mode = result_mode;
5213 if (!target)
5214 target = gen_reg_rtx (target_mode);
5216 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5218 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5219 create_fixed_operand (&ops[1], comparison);
5220 create_fixed_operand (&ops[2], x);
5221 create_fixed_operand (&ops[3], y);
5222 if (!maybe_expand_insn (icode, 4, ops))
5224 delete_insns_since (last);
5225 return NULL_RTX;
5227 subtarget = ops[0].value;
5229 /* If we are converting to a wider mode, first convert to
5230 TARGET_MODE, then normalize. This produces better combining
5231 opportunities on machines that have a SIGN_EXTRACT when we are
5232 testing a single bit. This mostly benefits the 68k.
5234 If STORE_FLAG_VALUE does not have the sign bit set when
5235 interpreted in MODE, we can do this conversion as unsigned, which
5236 is usually more efficient. */
5237 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5239 convert_move (target, subtarget,
5240 val_signbit_known_clear_p (result_mode,
5241 STORE_FLAG_VALUE));
5242 op0 = target;
5243 result_mode = target_mode;
5245 else
5246 op0 = subtarget;
5248 /* If we want to keep subexpressions around, don't reuse our last
5249 target. */
5250 if (optimize)
5251 subtarget = 0;
5253 /* Now normalize to the proper value in MODE. Sometimes we don't
5254 have to do anything. */
5255 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5257 /* STORE_FLAG_VALUE might be the most negative number, so write
5258 the comparison this way to avoid a compiler-time warning. */
5259 else if (- normalizep == STORE_FLAG_VALUE)
5260 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5262 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5263 it hard to use a value of just the sign bit due to ANSI integer
5264 constant typing rules. */
5265 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5266 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5267 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5268 normalizep == 1);
5269 else
5271 gcc_assert (STORE_FLAG_VALUE & 1);
5273 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5274 if (normalizep == -1)
5275 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5278 /* If we were converting to a smaller mode, do the conversion now. */
5279 if (target_mode != result_mode)
5281 convert_move (target, op0, 0);
5282 return target;
5284 else
5285 return op0;
5289 /* A subroutine of emit_store_flag only including "tricks" that do not
5290 need a recursive call. These are kept separate to avoid infinite
5291 loops. */
5293 static rtx
5294 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5295 enum machine_mode mode, int unsignedp, int normalizep,
5296 enum machine_mode target_mode)
5298 rtx subtarget;
5299 enum insn_code icode;
5300 enum machine_mode compare_mode;
5301 enum mode_class mclass;
5302 enum rtx_code scode;
5303 rtx tem;
5305 if (unsignedp)
5306 code = unsigned_condition (code);
5307 scode = swap_condition (code);
5309 /* If one operand is constant, make it the second one. Only do this
5310 if the other operand is not constant as well. */
5312 if (swap_commutative_operands_p (op0, op1))
5314 tem = op0;
5315 op0 = op1;
5316 op1 = tem;
5317 code = swap_condition (code);
5320 if (mode == VOIDmode)
5321 mode = GET_MODE (op0);
5323 /* For some comparisons with 1 and -1, we can convert this to
5324 comparisons with zero. This will often produce more opportunities for
5325 store-flag insns. */
5327 switch (code)
5329 case LT:
5330 if (op1 == const1_rtx)
5331 op1 = const0_rtx, code = LE;
5332 break;
5333 case LE:
5334 if (op1 == constm1_rtx)
5335 op1 = const0_rtx, code = LT;
5336 break;
5337 case GE:
5338 if (op1 == const1_rtx)
5339 op1 = const0_rtx, code = GT;
5340 break;
5341 case GT:
5342 if (op1 == constm1_rtx)
5343 op1 = const0_rtx, code = GE;
5344 break;
5345 case GEU:
5346 if (op1 == const1_rtx)
5347 op1 = const0_rtx, code = NE;
5348 break;
5349 case LTU:
5350 if (op1 == const1_rtx)
5351 op1 = const0_rtx, code = EQ;
5352 break;
5353 default:
5354 break;
5357 /* If we are comparing a double-word integer with zero or -1, we can
5358 convert the comparison into one involving a single word. */
5359 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5360 && GET_MODE_CLASS (mode) == MODE_INT
5361 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5363 if ((code == EQ || code == NE)
5364 && (op1 == const0_rtx || op1 == constm1_rtx))
5366 rtx op00, op01;
5368 /* Do a logical OR or AND of the two words and compare the
5369 result. */
5370 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5371 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5372 tem = expand_binop (word_mode,
5373 op1 == const0_rtx ? ior_optab : and_optab,
5374 op00, op01, NULL_RTX, unsignedp,
5375 OPTAB_DIRECT);
5377 if (tem != 0)
5378 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5379 unsignedp, normalizep);
5381 else if ((code == LT || code == GE) && op1 == const0_rtx)
5383 rtx op0h;
5385 /* If testing the sign bit, can just test on high word. */
5386 op0h = simplify_gen_subreg (word_mode, op0, mode,
5387 subreg_highpart_offset (word_mode,
5388 mode));
5389 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5390 unsignedp, normalizep);
5392 else
5393 tem = NULL_RTX;
5395 if (tem)
5397 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5398 return tem;
5399 if (!target)
5400 target = gen_reg_rtx (target_mode);
5402 convert_move (target, tem,
5403 !val_signbit_known_set_p (word_mode,
5404 (normalizep ? normalizep
5405 : STORE_FLAG_VALUE)));
5406 return target;
5410 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5411 complement of A (for GE) and shifting the sign bit to the low bit. */
5412 if (op1 == const0_rtx && (code == LT || code == GE)
5413 && GET_MODE_CLASS (mode) == MODE_INT
5414 && (normalizep || STORE_FLAG_VALUE == 1
5415 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5417 subtarget = target;
5419 if (!target)
5420 target_mode = mode;
5422 /* If the result is to be wider than OP0, it is best to convert it
5423 first. If it is to be narrower, it is *incorrect* to convert it
5424 first. */
5425 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5427 op0 = convert_modes (target_mode, mode, op0, 0);
5428 mode = target_mode;
5431 if (target_mode != mode)
5432 subtarget = 0;
5434 if (code == GE)
5435 op0 = expand_unop (mode, one_cmpl_optab, op0,
5436 ((STORE_FLAG_VALUE == 1 || normalizep)
5437 ? 0 : subtarget), 0);
5439 if (STORE_FLAG_VALUE == 1 || normalizep)
5440 /* If we are supposed to produce a 0/1 value, we want to do
5441 a logical shift from the sign bit to the low-order bit; for
5442 a -1/0 value, we do an arithmetic shift. */
5443 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5444 GET_MODE_BITSIZE (mode) - 1,
5445 subtarget, normalizep != -1);
5447 if (mode != target_mode)
5448 op0 = convert_modes (target_mode, mode, op0, 0);
5450 return op0;
5453 mclass = GET_MODE_CLASS (mode);
5454 for (compare_mode = mode; compare_mode != VOIDmode;
5455 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5457 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5458 icode = optab_handler (cstore_optab, optab_mode);
5459 if (icode != CODE_FOR_nothing)
5461 do_pending_stack_adjust ();
5462 tem = emit_cstore (target, icode, code, mode, compare_mode,
5463 unsignedp, op0, op1, normalizep, target_mode);
5464 if (tem)
5465 return tem;
5467 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5469 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5470 unsignedp, op1, op0, normalizep, target_mode);
5471 if (tem)
5472 return tem;
5474 break;
5478 return 0;
5481 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5482 and storing in TARGET. Normally return TARGET.
5483 Return 0 if that cannot be done.
5485 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5486 it is VOIDmode, they cannot both be CONST_INT.
5488 UNSIGNEDP is for the case where we have to widen the operands
5489 to perform the operation. It says to use zero-extension.
5491 NORMALIZEP is 1 if we should convert the result to be either zero
5492 or one. Normalize is -1 if we should convert the result to be
5493 either zero or -1. If NORMALIZEP is zero, the result will be left
5494 "raw" out of the scc insn. */
5497 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5498 enum machine_mode mode, int unsignedp, int normalizep)
5500 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5501 enum rtx_code rcode;
5502 rtx subtarget;
5503 rtx tem, last, trueval;
5505 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5506 target_mode);
5507 if (tem)
5508 return tem;
5510 /* If we reached here, we can't do this with a scc insn, however there
5511 are some comparisons that can be done in other ways. Don't do any
5512 of these cases if branches are very cheap. */
5513 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5514 return 0;
5516 /* See what we need to return. We can only return a 1, -1, or the
5517 sign bit. */
5519 if (normalizep == 0)
5521 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5522 normalizep = STORE_FLAG_VALUE;
5524 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5526 else
5527 return 0;
5530 last = get_last_insn ();
5532 /* If optimizing, use different pseudo registers for each insn, instead
5533 of reusing the same pseudo. This leads to better CSE, but slows
5534 down the compiler, since there are more pseudos */
5535 subtarget = (!optimize
5536 && (target_mode == mode)) ? target : NULL_RTX;
5537 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5539 /* For floating-point comparisons, try the reverse comparison or try
5540 changing the "orderedness" of the comparison. */
5541 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5543 enum rtx_code first_code;
5544 bool and_them;
5546 rcode = reverse_condition_maybe_unordered (code);
5547 if (can_compare_p (rcode, mode, ccp_store_flag)
5548 && (code == ORDERED || code == UNORDERED
5549 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5550 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5552 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5553 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5555 /* For the reverse comparison, use either an addition or a XOR. */
5556 if (want_add
5557 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5558 optimize_insn_for_speed_p ()) == 0)
5560 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5561 STORE_FLAG_VALUE, target_mode);
5562 if (tem)
5563 return expand_binop (target_mode, add_optab, tem,
5564 GEN_INT (normalizep),
5565 target, 0, OPTAB_WIDEN);
5567 else if (!want_add
5568 && rtx_cost (trueval, XOR, 1,
5569 optimize_insn_for_speed_p ()) == 0)
5571 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5572 normalizep, target_mode);
5573 if (tem)
5574 return expand_binop (target_mode, xor_optab, tem, trueval,
5575 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5579 delete_insns_since (last);
5581 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5582 if (code == ORDERED || code == UNORDERED)
5583 return 0;
5585 and_them = split_comparison (code, mode, &first_code, &code);
5587 /* If there are no NaNs, the first comparison should always fall through.
5588 Effectively change the comparison to the other one. */
5589 if (!HONOR_NANS (mode))
5591 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5592 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5593 target_mode);
5596 #ifdef HAVE_conditional_move
5597 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5598 conditional move. */
5599 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5600 normalizep, target_mode);
5601 if (tem == 0)
5602 return 0;
5604 if (and_them)
5605 tem = emit_conditional_move (target, code, op0, op1, mode,
5606 tem, const0_rtx, GET_MODE (tem), 0);
5607 else
5608 tem = emit_conditional_move (target, code, op0, op1, mode,
5609 trueval, tem, GET_MODE (tem), 0);
5611 if (tem == 0)
5612 delete_insns_since (last);
5613 return tem;
5614 #else
5615 return 0;
5616 #endif
5619 /* The remaining tricks only apply to integer comparisons. */
5621 if (GET_MODE_CLASS (mode) != MODE_INT)
5622 return 0;
5624 /* If this is an equality comparison of integers, we can try to exclusive-or
5625 (or subtract) the two operands and use a recursive call to try the
5626 comparison with zero. Don't do any of these cases if branches are
5627 very cheap. */
5629 if ((code == EQ || code == NE) && op1 != const0_rtx)
5631 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5632 OPTAB_WIDEN);
5634 if (tem == 0)
5635 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5636 OPTAB_WIDEN);
5637 if (tem != 0)
5638 tem = emit_store_flag (target, code, tem, const0_rtx,
5639 mode, unsignedp, normalizep);
5640 if (tem != 0)
5641 return tem;
5643 delete_insns_since (last);
5646 /* For integer comparisons, try the reverse comparison. However, for
5647 small X and if we'd have anyway to extend, implementing "X != 0"
5648 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5649 rcode = reverse_condition (code);
5650 if (can_compare_p (rcode, mode, ccp_store_flag)
5651 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5652 && code == NE
5653 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5654 && op1 == const0_rtx))
5656 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5657 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5659 /* Again, for the reverse comparison, use either an addition or a XOR. */
5660 if (want_add
5661 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5662 optimize_insn_for_speed_p ()) == 0)
5664 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5665 STORE_FLAG_VALUE, target_mode);
5666 if (tem != 0)
5667 tem = expand_binop (target_mode, add_optab, tem,
5668 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5670 else if (!want_add
5671 && rtx_cost (trueval, XOR, 1,
5672 optimize_insn_for_speed_p ()) == 0)
5674 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5675 normalizep, target_mode);
5676 if (tem != 0)
5677 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5678 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5681 if (tem != 0)
5682 return tem;
5683 delete_insns_since (last);
5686 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5687 the constant zero. Reject all other comparisons at this point. Only
5688 do LE and GT if branches are expensive since they are expensive on
5689 2-operand machines. */
5691 if (op1 != const0_rtx
5692 || (code != EQ && code != NE
5693 && (BRANCH_COST (optimize_insn_for_speed_p (),
5694 false) <= 1 || (code != LE && code != GT))))
5695 return 0;
5697 /* Try to put the result of the comparison in the sign bit. Assume we can't
5698 do the necessary operation below. */
5700 tem = 0;
5702 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5703 the sign bit set. */
5705 if (code == LE)
5707 /* This is destructive, so SUBTARGET can't be OP0. */
5708 if (rtx_equal_p (subtarget, op0))
5709 subtarget = 0;
5711 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5712 OPTAB_WIDEN);
5713 if (tem)
5714 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5715 OPTAB_WIDEN);
5718 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5719 number of bits in the mode of OP0, minus one. */
5721 if (code == GT)
5723 if (rtx_equal_p (subtarget, op0))
5724 subtarget = 0;
5726 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5727 GET_MODE_BITSIZE (mode) - 1,
5728 subtarget, 0);
5729 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5730 OPTAB_WIDEN);
5733 if (code == EQ || code == NE)
5735 /* For EQ or NE, one way to do the comparison is to apply an operation
5736 that converts the operand into a positive number if it is nonzero
5737 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5738 for NE we negate. This puts the result in the sign bit. Then we
5739 normalize with a shift, if needed.
5741 Two operations that can do the above actions are ABS and FFS, so try
5742 them. If that doesn't work, and MODE is smaller than a full word,
5743 we can use zero-extension to the wider mode (an unsigned conversion)
5744 as the operation. */
5746 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5747 that is compensated by the subsequent overflow when subtracting
5748 one / negating. */
5750 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5751 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5752 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5753 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5754 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5756 tem = convert_modes (word_mode, mode, op0, 1);
5757 mode = word_mode;
5760 if (tem != 0)
5762 if (code == EQ)
5763 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5764 0, OPTAB_WIDEN);
5765 else
5766 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5769 /* If we couldn't do it that way, for NE we can "or" the two's complement
5770 of the value with itself. For EQ, we take the one's complement of
5771 that "or", which is an extra insn, so we only handle EQ if branches
5772 are expensive. */
5774 if (tem == 0
5775 && (code == NE
5776 || BRANCH_COST (optimize_insn_for_speed_p (),
5777 false) > 1))
5779 if (rtx_equal_p (subtarget, op0))
5780 subtarget = 0;
5782 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5783 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5784 OPTAB_WIDEN);
5786 if (tem && code == EQ)
5787 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5791 if (tem && normalizep)
5792 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5793 GET_MODE_BITSIZE (mode) - 1,
5794 subtarget, normalizep == 1);
5796 if (tem)
5798 if (!target)
5800 else if (GET_MODE (tem) != target_mode)
5802 convert_move (target, tem, 0);
5803 tem = target;
5805 else if (!subtarget)
5807 emit_move_insn (target, tem);
5808 tem = target;
5811 else
5812 delete_insns_since (last);
5814 return tem;
5817 /* Like emit_store_flag, but always succeeds. */
5820 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5821 enum machine_mode mode, int unsignedp, int normalizep)
5823 rtx tem, label;
5824 rtx trueval, falseval;
5826 /* First see if emit_store_flag can do the job. */
5827 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5828 if (tem != 0)
5829 return tem;
5831 if (!target)
5832 target = gen_reg_rtx (word_mode);
5834 /* If this failed, we have to do this with set/compare/jump/set code.
5835 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5836 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5837 if (code == NE
5838 && GET_MODE_CLASS (mode) == MODE_INT
5839 && REG_P (target)
5840 && op0 == target
5841 && op1 == const0_rtx)
5843 label = gen_label_rtx ();
5844 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5845 mode, NULL_RTX, NULL_RTX, label, -1);
5846 emit_move_insn (target, trueval);
5847 emit_label (label);
5848 return target;
5851 if (!REG_P (target)
5852 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5853 target = gen_reg_rtx (GET_MODE (target));
5855 /* Jump in the right direction if the target cannot implement CODE
5856 but can jump on its reverse condition. */
5857 falseval = const0_rtx;
5858 if (! can_compare_p (code, mode, ccp_jump)
5859 && (! FLOAT_MODE_P (mode)
5860 || code == ORDERED || code == UNORDERED
5861 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5862 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5864 enum rtx_code rcode;
5865 if (FLOAT_MODE_P (mode))
5866 rcode = reverse_condition_maybe_unordered (code);
5867 else
5868 rcode = reverse_condition (code);
5870 /* Canonicalize to UNORDERED for the libcall. */
5871 if (can_compare_p (rcode, mode, ccp_jump)
5872 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5874 falseval = trueval;
5875 trueval = const0_rtx;
5876 code = rcode;
5880 emit_move_insn (target, trueval);
5881 label = gen_label_rtx ();
5882 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5883 NULL_RTX, label, -1);
5885 emit_move_insn (target, falseval);
5886 emit_label (label);
5888 return target;
5891 /* Perform possibly multi-word comparison and conditional jump to LABEL
5892 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5893 now a thin wrapper around do_compare_rtx_and_jump. */
5895 static void
5896 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5897 rtx label)
5899 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5900 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5901 NULL_RTX, NULL_RTX, label, -1);