Daily bump.
[official-gcc.git] / gcc / expmed.c
blobac944f159bdd311d75663e5f815acd7984e451c8
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
96 struct init_expmed_rtl
98 struct rtx_def reg; rtunion reg_fld[2];
99 struct rtx_def plus; rtunion plus_fld1;
100 struct rtx_def neg;
101 struct rtx_def mult; rtunion mult_fld1;
102 struct rtx_def sdiv; rtunion sdiv_fld1;
103 struct rtx_def udiv; rtunion udiv_fld1;
104 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
105 struct rtx_def smod_32; rtunion smod_32_fld1;
106 struct rtx_def wide_mult; rtunion wide_mult_fld1;
107 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
108 struct rtx_def wide_trunc;
109 struct rtx_def shift; rtunion shift_fld1;
110 struct rtx_def shift_mult; rtunion shift_mult_fld1;
111 struct rtx_def shift_add; rtunion shift_add_fld1;
112 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
113 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
114 struct rtx_def zext;
115 struct rtx_def trunc;
117 rtx pow2[MAX_BITS_PER_WORD];
118 rtx cint[MAX_BITS_PER_WORD];
121 static void
122 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
123 enum machine_mode from_mode, bool speed)
125 int to_size, from_size;
126 rtx which;
128 /* We're given no information about the true size of a partial integer,
129 only the size of the "full" integer it requires for storage. For
130 comparison purposes here, reduce the bit size by one in that case. */
131 to_size = (GET_MODE_BITSIZE (to_mode)
132 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
133 from_size = (GET_MODE_BITSIZE (from_mode)
134 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which = (to_size < from_size ? &all->trunc : &all->zext);
139 PUT_MODE (&all->reg, from_mode);
140 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
143 static void
144 init_expmed_one_mode (struct init_expmed_rtl *all,
145 enum machine_mode mode, int speed)
147 int m, n, mode_bitsize;
148 enum machine_mode mode_from;
150 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
152 PUT_MODE (&all->reg, mode);
153 PUT_MODE (&all->plus, mode);
154 PUT_MODE (&all->neg, mode);
155 PUT_MODE (&all->mult, mode);
156 PUT_MODE (&all->sdiv, mode);
157 PUT_MODE (&all->udiv, mode);
158 PUT_MODE (&all->sdiv_32, mode);
159 PUT_MODE (&all->smod_32, mode);
160 PUT_MODE (&all->wide_trunc, mode);
161 PUT_MODE (&all->shift, mode);
162 PUT_MODE (&all->shift_mult, mode);
163 PUT_MODE (&all->shift_add, mode);
164 PUT_MODE (&all->shift_sub0, mode);
165 PUT_MODE (&all->shift_sub1, mode);
166 PUT_MODE (&all->zext, mode);
167 PUT_MODE (&all->trunc, mode);
169 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
170 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
171 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
172 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
173 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
175 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
176 <= 2 * add_cost (speed, mode)));
177 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
178 <= 4 * add_cost (speed, mode)));
180 set_shift_cost (speed, mode, 0, 0);
182 int cost = add_cost (speed, mode);
183 set_shiftadd_cost (speed, mode, 0, cost);
184 set_shiftsub0_cost (speed, mode, 0, cost);
185 set_shiftsub1_cost (speed, mode, 0, cost);
188 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
189 for (m = 1; m < n; m++)
191 XEXP (&all->shift, 1) = all->cint[m];
192 XEXP (&all->shift_mult, 1) = all->pow2[m];
194 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
195 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
196 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
197 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
200 if (SCALAR_INT_MODE_P (mode))
202 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
203 mode_from = (enum machine_mode)(mode_from + 1))
204 init_expmed_one_conv (all, mode, mode_from, speed);
206 if (GET_MODE_CLASS (mode) == MODE_INT)
208 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
209 if (wider_mode != VOIDmode)
211 PUT_MODE (&all->zext, wider_mode);
212 PUT_MODE (&all->wide_mult, wider_mode);
213 PUT_MODE (&all->wide_lshr, wider_mode);
214 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
216 set_mul_widen_cost (speed, wider_mode,
217 set_src_cost (&all->wide_mult, speed));
218 set_mul_highpart_cost (speed, mode,
219 set_src_cost (&all->wide_trunc, speed));
224 void
225 init_expmed (void)
227 struct init_expmed_rtl all;
228 enum machine_mode mode;
229 int m, speed;
231 memset (&all, 0, sizeof all);
232 for (m = 1; m < MAX_BITS_PER_WORD; m++)
234 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
235 all.cint[m] = GEN_INT (m);
238 PUT_CODE (&all.reg, REG);
239 /* Avoid using hard regs in ways which may be unsupported. */
240 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
242 PUT_CODE (&all.plus, PLUS);
243 XEXP (&all.plus, 0) = &all.reg;
244 XEXP (&all.plus, 1) = &all.reg;
246 PUT_CODE (&all.neg, NEG);
247 XEXP (&all.neg, 0) = &all.reg;
249 PUT_CODE (&all.mult, MULT);
250 XEXP (&all.mult, 0) = &all.reg;
251 XEXP (&all.mult, 1) = &all.reg;
253 PUT_CODE (&all.sdiv, DIV);
254 XEXP (&all.sdiv, 0) = &all.reg;
255 XEXP (&all.sdiv, 1) = &all.reg;
257 PUT_CODE (&all.udiv, UDIV);
258 XEXP (&all.udiv, 0) = &all.reg;
259 XEXP (&all.udiv, 1) = &all.reg;
261 PUT_CODE (&all.sdiv_32, DIV);
262 XEXP (&all.sdiv_32, 0) = &all.reg;
263 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
265 PUT_CODE (&all.smod_32, MOD);
266 XEXP (&all.smod_32, 0) = &all.reg;
267 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
269 PUT_CODE (&all.zext, ZERO_EXTEND);
270 XEXP (&all.zext, 0) = &all.reg;
272 PUT_CODE (&all.wide_mult, MULT);
273 XEXP (&all.wide_mult, 0) = &all.zext;
274 XEXP (&all.wide_mult, 1) = &all.zext;
276 PUT_CODE (&all.wide_lshr, LSHIFTRT);
277 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
279 PUT_CODE (&all.wide_trunc, TRUNCATE);
280 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
282 PUT_CODE (&all.shift, ASHIFT);
283 XEXP (&all.shift, 0) = &all.reg;
285 PUT_CODE (&all.shift_mult, MULT);
286 XEXP (&all.shift_mult, 0) = &all.reg;
288 PUT_CODE (&all.shift_add, PLUS);
289 XEXP (&all.shift_add, 0) = &all.shift_mult;
290 XEXP (&all.shift_add, 1) = &all.reg;
292 PUT_CODE (&all.shift_sub0, MINUS);
293 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
294 XEXP (&all.shift_sub0, 1) = &all.reg;
296 PUT_CODE (&all.shift_sub1, MINUS);
297 XEXP (&all.shift_sub1, 0) = &all.reg;
298 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
300 PUT_CODE (&all.trunc, TRUNCATE);
301 XEXP (&all.trunc, 0) = &all.reg;
303 for (speed = 0; speed < 2; speed++)
305 crtl->maybe_hot_insn_p = speed;
306 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
308 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
309 mode = (enum machine_mode)(mode + 1))
310 init_expmed_one_mode (&all, mode, speed);
312 if (MIN_MODE_PARTIAL_INT != VOIDmode)
313 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
314 mode = (enum machine_mode)(mode + 1))
315 init_expmed_one_mode (&all, mode, speed);
317 if (MIN_MODE_VECTOR_INT != VOIDmode)
318 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
319 mode = (enum machine_mode)(mode + 1))
320 init_expmed_one_mode (&all, mode, speed);
323 if (alg_hash_used_p ())
325 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
326 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
328 else
329 set_alg_hash_used_p (true);
330 default_rtl_profile ();
333 /* Return an rtx representing minus the value of X.
334 MODE is the intended mode of the result,
335 useful if X is a CONST_INT. */
338 negate_rtx (enum machine_mode mode, rtx x)
340 rtx result = simplify_unary_operation (NEG, mode, x, mode);
342 if (result == 0)
343 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
345 return result;
348 /* Report on the availability of insv/extv/extzv and the desired mode
349 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
350 is false; else the mode of the specified operand. If OPNO is -1,
351 all the caller cares about is whether the insn is available. */
352 enum machine_mode
353 mode_for_extraction (enum extraction_pattern pattern, int opno)
355 const struct insn_data_d *data;
357 switch (pattern)
359 case EP_insv:
360 if (HAVE_insv)
362 data = &insn_data[CODE_FOR_insv];
363 break;
365 return MAX_MACHINE_MODE;
367 case EP_extv:
368 if (HAVE_extv)
370 data = &insn_data[CODE_FOR_extv];
371 break;
373 return MAX_MACHINE_MODE;
375 case EP_extzv:
376 if (HAVE_extzv)
378 data = &insn_data[CODE_FOR_extzv];
379 break;
381 return MAX_MACHINE_MODE;
383 default:
384 gcc_unreachable ();
387 if (opno == -1)
388 return VOIDmode;
390 /* Everyone who uses this function used to follow it with
391 if (result == VOIDmode) result = word_mode; */
392 if (data->operand[opno].mode == VOIDmode)
393 return word_mode;
394 return data->operand[opno].mode;
397 /* A subroutine of store_bit_field, with the same arguments. Return true
398 if the operation could be implemented.
400 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
401 no other way of implementing the operation. If FALLBACK_P is false,
402 return false instead. */
404 static bool
405 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
406 unsigned HOST_WIDE_INT bitnum,
407 unsigned HOST_WIDE_INT bitregion_start,
408 unsigned HOST_WIDE_INT bitregion_end,
409 enum machine_mode fieldmode,
410 rtx value, bool fallback_p)
412 unsigned int unit
413 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
414 unsigned HOST_WIDE_INT offset, bitpos;
415 rtx op0 = str_rtx;
416 int byte_offset;
417 rtx orig_value;
419 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
421 while (GET_CODE (op0) == SUBREG)
423 /* The following line once was done only if WORDS_BIG_ENDIAN,
424 but I think that is a mistake. WORDS_BIG_ENDIAN is
425 meaningful at a much higher level; when structures are copied
426 between memory and regs, the higher-numbered regs
427 always get higher addresses. */
428 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
429 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
431 byte_offset = 0;
433 /* Paradoxical subregs need special handling on big endian machines. */
434 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
436 int difference = inner_mode_size - outer_mode_size;
438 if (WORDS_BIG_ENDIAN)
439 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
440 if (BYTES_BIG_ENDIAN)
441 byte_offset += difference % UNITS_PER_WORD;
443 else
444 byte_offset = SUBREG_BYTE (op0);
446 bitnum += byte_offset * BITS_PER_UNIT;
447 op0 = SUBREG_REG (op0);
450 /* No action is needed if the target is a register and if the field
451 lies completely outside that register. This can occur if the source
452 code contains an out-of-bounds access to a small array. */
453 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
454 return true;
456 /* Use vec_set patterns for inserting parts of vectors whenever
457 available. */
458 if (VECTOR_MODE_P (GET_MODE (op0))
459 && !MEM_P (op0)
460 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
461 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
462 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
463 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
465 struct expand_operand ops[3];
466 enum machine_mode outermode = GET_MODE (op0);
467 enum machine_mode innermode = GET_MODE_INNER (outermode);
468 enum insn_code icode = optab_handler (vec_set_optab, outermode);
469 int pos = bitnum / GET_MODE_BITSIZE (innermode);
471 create_fixed_operand (&ops[0], op0);
472 create_input_operand (&ops[1], value, innermode);
473 create_integer_operand (&ops[2], pos);
474 if (maybe_expand_insn (icode, 3, ops))
475 return true;
478 /* If the target is a register, overwriting the entire object, or storing
479 a full-word or multi-word field can be done with just a SUBREG.
481 If the target is memory, storing any naturally aligned field can be
482 done with a simple store. For targets that support fast unaligned
483 memory, any naturally sized, unit aligned field can be done directly. */
485 offset = bitnum / unit;
486 bitpos = bitnum % unit;
487 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
488 + (offset * UNITS_PER_WORD);
490 if (bitpos == 0
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && (!MEM_P (op0)
493 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
494 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
495 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
496 || validate_subreg (fieldmode, GET_MODE (op0), op0,
497 byte_offset)))
498 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
499 || (offset * BITS_PER_UNIT % bitsize == 0
500 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
502 if (MEM_P (op0))
503 op0 = adjust_address (op0, fieldmode, offset);
504 else if (GET_MODE (op0) != fieldmode)
505 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
506 byte_offset);
507 emit_move_insn (op0, value);
508 return true;
511 /* Make sure we are playing with integral modes. Pun with subregs
512 if we aren't. This must come after the entire register case above,
513 since that case is valid for any mode. The following cases are only
514 valid for integral modes. */
516 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
517 if (imode != GET_MODE (op0))
519 if (MEM_P (op0))
520 op0 = adjust_address (op0, imode, 0);
521 else
523 gcc_assert (imode != BLKmode);
524 op0 = gen_lowpart (imode, op0);
529 /* We may be accessing data outside the field, which means
530 we can alias adjacent data. */
531 /* ?? not always for C++0x memory model ?? */
532 if (MEM_P (op0))
534 op0 = shallow_copy_rtx (op0);
535 set_mem_alias_set (op0, 0);
536 set_mem_expr (op0, 0);
539 /* If OP0 is a register, BITPOS must count within a word.
540 But as we have it, it counts within whatever size OP0 now has.
541 On a bigendian machine, these are not the same, so convert. */
542 if (BYTES_BIG_ENDIAN
543 && !MEM_P (op0)
544 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
545 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
547 /* Storing an lsb-aligned field in a register
548 can be done with a movestrict instruction. */
550 if (!MEM_P (op0)
551 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
552 && bitsize == GET_MODE_BITSIZE (fieldmode)
553 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
555 struct expand_operand ops[2];
556 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
557 rtx arg0 = op0;
558 unsigned HOST_WIDE_INT subreg_off;
560 if (GET_CODE (arg0) == SUBREG)
562 /* Else we've got some float mode source being extracted into
563 a different float mode destination -- this combination of
564 subregs results in Severe Tire Damage. */
565 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
566 || GET_MODE_CLASS (fieldmode) == MODE_INT
567 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
568 arg0 = SUBREG_REG (arg0);
571 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
572 + (offset * UNITS_PER_WORD);
573 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
575 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
577 create_fixed_operand (&ops[0], arg0);
578 /* Shrink the source operand to FIELDMODE. */
579 create_convert_operand_to (&ops[1], value, fieldmode, false);
580 if (maybe_expand_insn (icode, 2, ops))
581 return true;
585 /* Handle fields bigger than a word. */
587 if (bitsize > BITS_PER_WORD)
589 /* Here we transfer the words of the field
590 in the order least significant first.
591 This is because the most significant word is the one which may
592 be less than full.
593 However, only do that if the value is not BLKmode. */
595 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
596 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
597 unsigned int i;
598 rtx last;
600 /* This is the mode we must force value to, so that there will be enough
601 subwords to extract. Note that fieldmode will often (always?) be
602 VOIDmode, because that is what store_field uses to indicate that this
603 is a bit field, but passing VOIDmode to operand_subword_force
604 is not allowed. */
605 fieldmode = GET_MODE (value);
606 if (fieldmode == VOIDmode)
607 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
609 last = get_last_insn ();
610 for (i = 0; i < nwords; i++)
612 /* If I is 0, use the low-order word in both field and target;
613 if I is 1, use the next to lowest word; and so on. */
614 unsigned int wordnum = (backwards
615 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
616 - i - 1
617 : i);
618 unsigned int bit_offset = (backwards
619 ? MAX ((int) bitsize - ((int) i + 1)
620 * BITS_PER_WORD,
622 : (int) i * BITS_PER_WORD);
623 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
624 unsigned HOST_WIDE_INT new_bitsize =
625 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
627 /* If the remaining chunk doesn't have full wordsize we have
628 to make sure that for big endian machines the higher order
629 bits are used. */
630 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
631 value_word = simplify_expand_binop (word_mode, lshr_optab,
632 value_word,
633 GEN_INT (BITS_PER_WORD
634 - new_bitsize),
635 NULL_RTX, true,
636 OPTAB_LIB_WIDEN);
638 if (!store_bit_field_1 (op0, new_bitsize,
639 bitnum + bit_offset,
640 bitregion_start, bitregion_end,
641 word_mode,
642 value_word, fallback_p))
644 delete_insns_since (last);
645 return false;
648 return true;
651 /* From here on we can assume that the field to be stored in is
652 a full-word (whatever type that is), since it is shorter than a word. */
654 /* OFFSET is the number of words or bytes (UNIT says which)
655 from STR_RTX to the first word or byte containing part of the field. */
657 if (!MEM_P (op0))
659 if (offset != 0
660 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
662 if (!REG_P (op0))
664 /* Since this is a destination (lvalue), we can't copy
665 it to a pseudo. We can remove a SUBREG that does not
666 change the size of the operand. Such a SUBREG may
667 have been added above. */
668 gcc_assert (GET_CODE (op0) == SUBREG
669 && (GET_MODE_SIZE (GET_MODE (op0))
670 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
671 op0 = SUBREG_REG (op0);
673 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
674 op0, (offset * UNITS_PER_WORD));
676 offset = 0;
679 /* If VALUE has a floating-point or complex mode, access it as an
680 integer of the corresponding size. This can occur on a machine
681 with 64 bit registers that uses SFmode for float. It can also
682 occur for unaligned float or complex fields. */
683 orig_value = value;
684 if (GET_MODE (value) != VOIDmode
685 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
686 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
688 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
689 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
692 /* Now OFFSET is nonzero only if OP0 is memory
693 and is therefore always measured in bytes. */
695 if (HAVE_insv
696 && GET_MODE (value) != BLKmode
697 && bitsize > 0
698 && GET_MODE_BITSIZE (op_mode) >= bitsize
699 /* Do not use insv for volatile bitfields when
700 -fstrict-volatile-bitfields is in effect. */
701 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
702 && flag_strict_volatile_bitfields > 0)
703 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
704 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
705 /* Do not use insv if the bit region is restricted and
706 op_mode integer at offset doesn't fit into the
707 restricted region. */
708 && !(MEM_P (op0) && bitregion_end
709 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
710 > bitregion_end + 1))
712 struct expand_operand ops[4];
713 int xbitpos = bitpos;
714 rtx value1;
715 rtx xop0 = op0;
716 rtx last = get_last_insn ();
717 bool copy_back = false;
719 /* Add OFFSET into OP0's address. */
720 if (MEM_P (xop0))
721 xop0 = adjust_address (xop0, byte_mode, offset);
723 /* If xop0 is a register, we need it in OP_MODE
724 to make it acceptable to the format of insv. */
725 if (GET_CODE (xop0) == SUBREG)
726 /* We can't just change the mode, because this might clobber op0,
727 and we will need the original value of op0 if insv fails. */
728 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
729 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
730 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
732 /* If the destination is a paradoxical subreg such that we need a
733 truncate to the inner mode, perform the insertion on a temporary and
734 truncate the result to the original destination. Note that we can't
735 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
736 X) 0)) is (reg:N X). */
737 if (GET_CODE (xop0) == SUBREG
738 && REG_P (SUBREG_REG (xop0))
739 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
740 op_mode)))
742 rtx tem = gen_reg_rtx (op_mode);
743 emit_move_insn (tem, xop0);
744 xop0 = tem;
745 copy_back = true;
748 /* We have been counting XBITPOS within UNIT.
749 Count instead within the size of the register. */
750 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
751 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
753 unit = GET_MODE_BITSIZE (op_mode);
755 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
756 "backwards" from the size of the unit we are inserting into.
757 Otherwise, we count bits from the most significant on a
758 BYTES/BITS_BIG_ENDIAN machine. */
760 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
761 xbitpos = unit - bitsize - xbitpos;
763 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
764 value1 = value;
765 if (GET_MODE (value) != op_mode)
767 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
769 /* Optimization: Don't bother really extending VALUE
770 if it has all the bits we will actually use. However,
771 if we must narrow it, be sure we do it correctly. */
773 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
775 rtx tmp;
777 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
778 if (! tmp)
779 tmp = simplify_gen_subreg (op_mode,
780 force_reg (GET_MODE (value),
781 value1),
782 GET_MODE (value), 0);
783 value1 = tmp;
785 else
786 value1 = gen_lowpart (op_mode, value1);
788 else if (CONST_INT_P (value))
789 value1 = gen_int_mode (INTVAL (value), op_mode);
790 else
791 /* Parse phase is supposed to make VALUE's data type
792 match that of the component reference, which is a type
793 at least as wide as the field; so VALUE should have
794 a mode that corresponds to that type. */
795 gcc_assert (CONSTANT_P (value));
798 create_fixed_operand (&ops[0], xop0);
799 create_integer_operand (&ops[1], bitsize);
800 create_integer_operand (&ops[2], xbitpos);
801 create_input_operand (&ops[3], value1, op_mode);
802 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
804 if (copy_back)
805 convert_move (op0, xop0, true);
806 return true;
808 delete_insns_since (last);
811 /* If OP0 is a memory, try copying it to a register and seeing if a
812 cheap register alternative is available. */
813 if (HAVE_insv && MEM_P (op0))
815 enum machine_mode bestmode;
816 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
818 if (bitregion_end)
819 maxbits = bitregion_end - bitregion_start + 1;
821 /* Get the mode to use for inserting into this field. If OP0 is
822 BLKmode, get the smallest mode consistent with the alignment. If
823 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
824 mode. Otherwise, use the smallest mode containing the field. */
826 if (GET_MODE (op0) == BLKmode
827 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
828 || (op_mode != MAX_MACHINE_MODE
829 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
830 bestmode = get_best_mode (bitsize, bitnum,
831 bitregion_start, bitregion_end,
832 MEM_ALIGN (op0),
833 (op_mode == MAX_MACHINE_MODE
834 ? VOIDmode : op_mode),
835 MEM_VOLATILE_P (op0));
836 else
837 bestmode = GET_MODE (op0);
839 if (bestmode != VOIDmode
840 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
841 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
842 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
844 rtx last, tempreg, xop0;
845 unsigned HOST_WIDE_INT xoffset, xbitpos;
847 last = get_last_insn ();
849 /* Adjust address to point to the containing unit of
850 that mode. Compute the offset as a multiple of this unit,
851 counting in bytes. */
852 unit = GET_MODE_BITSIZE (bestmode);
853 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
854 xbitpos = bitnum % unit;
855 xop0 = adjust_address (op0, bestmode, xoffset);
857 /* Fetch that unit, store the bitfield in it, then store
858 the unit. */
859 tempreg = copy_to_reg (xop0);
860 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
861 bitregion_start, bitregion_end,
862 fieldmode, orig_value, false))
864 emit_move_insn (xop0, tempreg);
865 return true;
867 delete_insns_since (last);
871 if (!fallback_p)
872 return false;
874 store_fixed_bit_field (op0, offset, bitsize, bitpos,
875 bitregion_start, bitregion_end, value);
876 return true;
879 /* Generate code to store value from rtx VALUE
880 into a bit-field within structure STR_RTX
881 containing BITSIZE bits starting at bit BITNUM.
883 BITREGION_START is bitpos of the first bitfield in this region.
884 BITREGION_END is the bitpos of the ending bitfield in this region.
885 These two fields are 0, if the C++ memory model does not apply,
886 or we are not interested in keeping track of bitfield regions.
888 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
890 void
891 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
892 unsigned HOST_WIDE_INT bitnum,
893 unsigned HOST_WIDE_INT bitregion_start,
894 unsigned HOST_WIDE_INT bitregion_end,
895 enum machine_mode fieldmode,
896 rtx value)
898 /* Under the C++0x memory model, we must not touch bits outside the
899 bit region. Adjust the address to start at the beginning of the
900 bit region. */
901 if (MEM_P (str_rtx) && bitregion_start > 0)
903 enum machine_mode bestmode;
904 enum machine_mode op_mode;
905 unsigned HOST_WIDE_INT offset;
907 op_mode = mode_for_extraction (EP_insv, 3);
908 if (op_mode == MAX_MACHINE_MODE)
909 op_mode = VOIDmode;
911 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
913 offset = bitregion_start / BITS_PER_UNIT;
914 bitnum -= bitregion_start;
915 bitregion_end -= bitregion_start;
916 bitregion_start = 0;
917 bestmode = get_best_mode (bitsize, bitnum,
918 bitregion_start, bitregion_end,
919 MEM_ALIGN (str_rtx),
920 op_mode,
921 MEM_VOLATILE_P (str_rtx));
922 str_rtx = adjust_address (str_rtx, bestmode, offset);
925 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
926 bitregion_start, bitregion_end,
927 fieldmode, value, true))
928 gcc_unreachable ();
931 /* Use shifts and boolean operations to store VALUE
932 into a bit field of width BITSIZE
933 in a memory location specified by OP0 except offset by OFFSET bytes.
934 (OFFSET must be 0 if OP0 is a register.)
935 The field starts at position BITPOS within the byte.
936 (If OP0 is a register, it may be a full word or a narrower mode,
937 but BITPOS still counts within a full word,
938 which is significant on bigendian machines.) */
940 static void
941 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
942 unsigned HOST_WIDE_INT bitsize,
943 unsigned HOST_WIDE_INT bitpos,
944 unsigned HOST_WIDE_INT bitregion_start,
945 unsigned HOST_WIDE_INT bitregion_end,
946 rtx value)
948 enum machine_mode mode;
949 unsigned int total_bits = BITS_PER_WORD;
950 rtx temp;
951 int all_zero = 0;
952 int all_one = 0;
954 /* There is a case not handled here:
955 a structure with a known alignment of just a halfword
956 and a field split across two aligned halfwords within the structure.
957 Or likewise a structure with a known alignment of just a byte
958 and a field split across two bytes.
959 Such cases are not supposed to be able to occur. */
961 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
963 gcc_assert (!offset);
964 /* Special treatment for a bit field split across two registers. */
965 if (bitsize + bitpos > BITS_PER_WORD)
967 store_split_bit_field (op0, bitsize, bitpos,
968 bitregion_start, bitregion_end,
969 value);
970 return;
973 else
975 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
977 if (bitregion_end)
978 maxbits = bitregion_end - bitregion_start + 1;
980 /* Get the proper mode to use for this field. We want a mode that
981 includes the entire field. If such a mode would be larger than
982 a word, we won't be doing the extraction the normal way.
983 We don't want a mode bigger than the destination. */
985 mode = GET_MODE (op0);
986 if (GET_MODE_BITSIZE (mode) == 0
987 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
988 mode = word_mode;
990 if (MEM_VOLATILE_P (op0)
991 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
992 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
993 && flag_strict_volatile_bitfields > 0)
994 mode = GET_MODE (op0);
995 else
996 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
997 bitregion_start, bitregion_end,
998 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1000 if (mode == VOIDmode)
1002 /* The only way this should occur is if the field spans word
1003 boundaries. */
1004 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
1005 bitregion_start, bitregion_end, value);
1006 return;
1009 total_bits = GET_MODE_BITSIZE (mode);
1011 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1012 be in the range 0 to total_bits-1, and put any excess bytes in
1013 OFFSET. */
1014 if (bitpos >= total_bits)
1016 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1017 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1018 * BITS_PER_UNIT);
1021 /* Get ref to an aligned byte, halfword, or word containing the field.
1022 Adjust BITPOS to be position within a word,
1023 and OFFSET to be the offset of that word.
1024 Then alter OP0 to refer to that word. */
1025 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1026 offset -= (offset % (total_bits / BITS_PER_UNIT));
1027 op0 = adjust_address (op0, mode, offset);
1030 mode = GET_MODE (op0);
1032 /* Now MODE is either some integral mode for a MEM as OP0,
1033 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
1034 The bit field is contained entirely within OP0.
1035 BITPOS is the starting bit number within OP0.
1036 (OP0's mode may actually be narrower than MODE.) */
1038 if (BYTES_BIG_ENDIAN)
1039 /* BITPOS is the distance between our msb
1040 and that of the containing datum.
1041 Convert it to the distance from the lsb. */
1042 bitpos = total_bits - bitsize - bitpos;
1044 /* Now BITPOS is always the distance between our lsb
1045 and that of OP0. */
1047 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1048 we must first convert its mode to MODE. */
1050 if (CONST_INT_P (value))
1052 HOST_WIDE_INT v = INTVAL (value);
1054 if (bitsize < HOST_BITS_PER_WIDE_INT)
1055 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1057 if (v == 0)
1058 all_zero = 1;
1059 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1060 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1061 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1062 all_one = 1;
1064 value = lshift_value (mode, value, bitpos, bitsize);
1066 else
1068 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1069 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1071 if (GET_MODE (value) != mode)
1072 value = convert_to_mode (mode, value, 1);
1074 if (must_and)
1075 value = expand_binop (mode, and_optab, value,
1076 mask_rtx (mode, 0, bitsize, 0),
1077 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1078 if (bitpos > 0)
1079 value = expand_shift (LSHIFT_EXPR, mode, value,
1080 bitpos, NULL_RTX, 1);
1083 /* Now clear the chosen bits in OP0,
1084 except that if VALUE is -1 we need not bother. */
1085 /* We keep the intermediates in registers to allow CSE to combine
1086 consecutive bitfield assignments. */
1088 temp = force_reg (mode, op0);
1090 if (! all_one)
1092 temp = expand_binop (mode, and_optab, temp,
1093 mask_rtx (mode, bitpos, bitsize, 1),
1094 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1095 temp = force_reg (mode, temp);
1098 /* Now logical-or VALUE into OP0, unless it is zero. */
1100 if (! all_zero)
1102 temp = expand_binop (mode, ior_optab, temp, value,
1103 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1104 temp = force_reg (mode, temp);
1107 if (op0 != temp)
1109 op0 = copy_rtx (op0);
1110 emit_move_insn (op0, temp);
1114 /* Store a bit field that is split across multiple accessible memory objects.
1116 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1117 BITSIZE is the field width; BITPOS the position of its first bit
1118 (within the word).
1119 VALUE is the value to store.
1121 This does not yet handle fields wider than BITS_PER_WORD. */
1123 static void
1124 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1125 unsigned HOST_WIDE_INT bitpos,
1126 unsigned HOST_WIDE_INT bitregion_start,
1127 unsigned HOST_WIDE_INT bitregion_end,
1128 rtx value)
1130 unsigned int unit;
1131 unsigned int bitsdone = 0;
1133 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1134 much at a time. */
1135 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1136 unit = BITS_PER_WORD;
1137 else
1138 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1140 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1141 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1142 that VALUE might be a floating-point constant. */
1143 if (CONSTANT_P (value) && !CONST_INT_P (value))
1145 rtx word = gen_lowpart_common (word_mode, value);
1147 if (word && (value != word))
1148 value = word;
1149 else
1150 value = gen_lowpart_common (word_mode,
1151 force_reg (GET_MODE (value) != VOIDmode
1152 ? GET_MODE (value)
1153 : word_mode, value));
1156 while (bitsdone < bitsize)
1158 unsigned HOST_WIDE_INT thissize;
1159 rtx part, word;
1160 unsigned HOST_WIDE_INT thispos;
1161 unsigned HOST_WIDE_INT offset;
1163 offset = (bitpos + bitsdone) / unit;
1164 thispos = (bitpos + bitsdone) % unit;
1166 /* When region of bytes we can touch is restricted, decrease
1167 UNIT close to the end of the region as needed. */
1168 if (bitregion_end
1169 && unit > BITS_PER_UNIT
1170 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1172 unit = unit / 2;
1173 continue;
1176 /* THISSIZE must not overrun a word boundary. Otherwise,
1177 store_fixed_bit_field will call us again, and we will mutually
1178 recurse forever. */
1179 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1180 thissize = MIN (thissize, unit - thispos);
1182 if (BYTES_BIG_ENDIAN)
1184 int total_bits;
1186 /* We must do an endian conversion exactly the same way as it is
1187 done in extract_bit_field, so that the two calls to
1188 extract_fixed_bit_field will have comparable arguments. */
1189 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1190 total_bits = BITS_PER_WORD;
1191 else
1192 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1194 /* Fetch successively less significant portions. */
1195 if (CONST_INT_P (value))
1196 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1197 >> (bitsize - bitsdone - thissize))
1198 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1199 else
1200 /* The args are chosen so that the last part includes the
1201 lsb. Give extract_bit_field the value it needs (with
1202 endianness compensation) to fetch the piece we want. */
1203 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1204 total_bits - bitsize + bitsdone,
1205 NULL_RTX, 1, false);
1207 else
1209 /* Fetch successively more significant portions. */
1210 if (CONST_INT_P (value))
1211 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1212 >> bitsdone)
1213 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1214 else
1215 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1216 bitsdone, NULL_RTX, 1, false);
1219 /* If OP0 is a register, then handle OFFSET here.
1221 When handling multiword bitfields, extract_bit_field may pass
1222 down a word_mode SUBREG of a larger REG for a bitfield that actually
1223 crosses a word boundary. Thus, for a SUBREG, we must find
1224 the current word starting from the base register. */
1225 if (GET_CODE (op0) == SUBREG)
1227 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1228 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1229 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1230 word = word_offset ? const0_rtx : op0;
1231 else
1232 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1233 GET_MODE (SUBREG_REG (op0)));
1234 offset = 0;
1236 else if (REG_P (op0))
1238 enum machine_mode op0_mode = GET_MODE (op0);
1239 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1240 word = offset ? const0_rtx : op0;
1241 else
1242 word = operand_subword_force (op0, offset, GET_MODE (op0));
1243 offset = 0;
1245 else
1246 word = op0;
1248 /* OFFSET is in UNITs, and UNIT is in bits.
1249 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1250 it is just an out-of-bounds access. Ignore it. */
1251 if (word != const0_rtx)
1252 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1253 thispos, bitregion_start, bitregion_end, part);
1254 bitsdone += thissize;
1258 /* A subroutine of extract_bit_field_1 that converts return value X
1259 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1260 to extract_bit_field. */
1262 static rtx
1263 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1264 enum machine_mode tmode, bool unsignedp)
1266 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1267 return x;
1269 /* If the x mode is not a scalar integral, first convert to the
1270 integer mode of that size and then access it as a floating-point
1271 value via a SUBREG. */
1272 if (!SCALAR_INT_MODE_P (tmode))
1274 enum machine_mode smode;
1276 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1277 x = convert_to_mode (smode, x, unsignedp);
1278 x = force_reg (smode, x);
1279 return gen_lowpart (tmode, x);
1282 return convert_to_mode (tmode, x, unsignedp);
1285 /* A subroutine of extract_bit_field, with the same arguments.
1286 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1287 if we can find no other means of implementing the operation.
1288 if FALLBACK_P is false, return NULL instead. */
1290 static rtx
1291 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1292 unsigned HOST_WIDE_INT bitnum,
1293 int unsignedp, bool packedp, rtx target,
1294 enum machine_mode mode, enum machine_mode tmode,
1295 bool fallback_p)
1297 unsigned int unit
1298 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1299 unsigned HOST_WIDE_INT offset, bitpos;
1300 rtx op0 = str_rtx;
1301 enum machine_mode int_mode;
1302 enum machine_mode ext_mode;
1303 enum machine_mode mode1;
1304 int byte_offset;
1306 if (tmode == VOIDmode)
1307 tmode = mode;
1309 while (GET_CODE (op0) == SUBREG)
1311 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1312 op0 = SUBREG_REG (op0);
1315 /* If we have an out-of-bounds access to a register, just return an
1316 uninitialized register of the required mode. This can occur if the
1317 source code contains an out-of-bounds access to a small array. */
1318 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1319 return gen_reg_rtx (tmode);
1321 if (REG_P (op0)
1322 && mode == GET_MODE (op0)
1323 && bitnum == 0
1324 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1326 /* We're trying to extract a full register from itself. */
1327 return op0;
1330 /* See if we can get a better vector mode before extracting. */
1331 if (VECTOR_MODE_P (GET_MODE (op0))
1332 && !MEM_P (op0)
1333 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1335 enum machine_mode new_mode;
1337 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1338 new_mode = MIN_MODE_VECTOR_FLOAT;
1339 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1340 new_mode = MIN_MODE_VECTOR_FRACT;
1341 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1342 new_mode = MIN_MODE_VECTOR_UFRACT;
1343 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1344 new_mode = MIN_MODE_VECTOR_ACCUM;
1345 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1346 new_mode = MIN_MODE_VECTOR_UACCUM;
1347 else
1348 new_mode = MIN_MODE_VECTOR_INT;
1350 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1351 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1352 && targetm.vector_mode_supported_p (new_mode))
1353 break;
1354 if (new_mode != VOIDmode)
1355 op0 = gen_lowpart (new_mode, op0);
1358 /* Use vec_extract patterns for extracting parts of vectors whenever
1359 available. */
1360 if (VECTOR_MODE_P (GET_MODE (op0))
1361 && !MEM_P (op0)
1362 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1363 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1364 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1366 struct expand_operand ops[3];
1367 enum machine_mode outermode = GET_MODE (op0);
1368 enum machine_mode innermode = GET_MODE_INNER (outermode);
1369 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1370 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1372 create_output_operand (&ops[0], target, innermode);
1373 create_input_operand (&ops[1], op0, outermode);
1374 create_integer_operand (&ops[2], pos);
1375 if (maybe_expand_insn (icode, 3, ops))
1377 target = ops[0].value;
1378 if (GET_MODE (target) != mode)
1379 return gen_lowpart (tmode, target);
1380 return target;
1384 /* Make sure we are playing with integral modes. Pun with subregs
1385 if we aren't. */
1387 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1388 if (imode != GET_MODE (op0))
1390 if (MEM_P (op0))
1391 op0 = adjust_address (op0, imode, 0);
1392 else if (imode != BLKmode)
1394 op0 = gen_lowpart (imode, op0);
1396 /* If we got a SUBREG, force it into a register since we
1397 aren't going to be able to do another SUBREG on it. */
1398 if (GET_CODE (op0) == SUBREG)
1399 op0 = force_reg (imode, op0);
1401 else if (REG_P (op0))
1403 rtx reg, subreg;
1404 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1405 MODE_INT);
1406 reg = gen_reg_rtx (imode);
1407 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1408 emit_move_insn (subreg, op0);
1409 op0 = reg;
1410 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1412 else
1414 rtx mem = assign_stack_temp (GET_MODE (op0),
1415 GET_MODE_SIZE (GET_MODE (op0)));
1416 emit_move_insn (mem, op0);
1417 op0 = adjust_address (mem, BLKmode, 0);
1422 /* We may be accessing data outside the field, which means
1423 we can alias adjacent data. */
1424 if (MEM_P (op0))
1426 op0 = shallow_copy_rtx (op0);
1427 set_mem_alias_set (op0, 0);
1428 set_mem_expr (op0, 0);
1431 /* Extraction of a full-word or multi-word value from a structure
1432 in a register or aligned memory can be done with just a SUBREG.
1433 A subword value in the least significant part of a register
1434 can also be extracted with a SUBREG. For this, we need the
1435 byte offset of the value in op0. */
1437 bitpos = bitnum % unit;
1438 offset = bitnum / unit;
1439 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1441 /* If OP0 is a register, BITPOS must count within a word.
1442 But as we have it, it counts within whatever size OP0 now has.
1443 On a bigendian machine, these are not the same, so convert. */
1444 if (BYTES_BIG_ENDIAN
1445 && !MEM_P (op0)
1446 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1447 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1449 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1450 If that's wrong, the solution is to test for it and set TARGET to 0
1451 if needed. */
1453 /* Only scalar integer modes can be converted via subregs. There is an
1454 additional problem for FP modes here in that they can have a precision
1455 which is different from the size. mode_for_size uses precision, but
1456 we want a mode based on the size, so we must avoid calling it for FP
1457 modes. */
1458 mode1 = (SCALAR_INT_MODE_P (tmode)
1459 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1460 : mode);
1462 /* If the bitfield is volatile, we need to make sure the access
1463 remains on a type-aligned boundary. */
1464 if (GET_CODE (op0) == MEM
1465 && MEM_VOLATILE_P (op0)
1466 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1467 && flag_strict_volatile_bitfields > 0)
1468 goto no_subreg_mode_swap;
1470 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1471 && bitpos % BITS_PER_WORD == 0)
1472 || (mode1 != BLKmode
1473 /* ??? The big endian test here is wrong. This is correct
1474 if the value is in a register, and if mode_for_size is not
1475 the same mode as op0. This causes us to get unnecessarily
1476 inefficient code from the Thumb port when -mbig-endian. */
1477 && (BYTES_BIG_ENDIAN
1478 ? bitpos + bitsize == BITS_PER_WORD
1479 : bitpos == 0)))
1480 && ((!MEM_P (op0)
1481 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1482 && GET_MODE_SIZE (mode1) != 0
1483 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1484 || (MEM_P (op0)
1485 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1486 || (offset * BITS_PER_UNIT % bitsize == 0
1487 && MEM_ALIGN (op0) % bitsize == 0)))))
1489 if (MEM_P (op0))
1490 op0 = adjust_address (op0, mode1, offset);
1491 else if (mode1 != GET_MODE (op0))
1493 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1494 byte_offset);
1495 if (sub == NULL)
1496 goto no_subreg_mode_swap;
1497 op0 = sub;
1499 if (mode1 != mode)
1500 return convert_to_mode (tmode, op0, unsignedp);
1501 return op0;
1503 no_subreg_mode_swap:
1505 /* Handle fields bigger than a word. */
1507 if (bitsize > BITS_PER_WORD)
1509 /* Here we transfer the words of the field
1510 in the order least significant first.
1511 This is because the most significant word is the one which may
1512 be less than full. */
1514 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1515 unsigned int i;
1517 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1518 target = gen_reg_rtx (mode);
1520 /* Indicate for flow that the entire target reg is being set. */
1521 emit_clobber (target);
1523 for (i = 0; i < nwords; i++)
1525 /* If I is 0, use the low-order word in both field and target;
1526 if I is 1, use the next to lowest word; and so on. */
1527 /* Word number in TARGET to use. */
1528 unsigned int wordnum
1529 = (WORDS_BIG_ENDIAN
1530 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1531 : i);
1532 /* Offset from start of field in OP0. */
1533 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1534 ? MAX (0, ((int) bitsize - ((int) i + 1)
1535 * (int) BITS_PER_WORD))
1536 : (int) i * BITS_PER_WORD);
1537 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1538 rtx result_part
1539 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1540 bitsize - i * BITS_PER_WORD),
1541 bitnum + bit_offset, 1, false, target_part, mode,
1542 word_mode);
1544 gcc_assert (target_part);
1546 if (result_part != target_part)
1547 emit_move_insn (target_part, result_part);
1550 if (unsignedp)
1552 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1553 need to be zero'd out. */
1554 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1556 unsigned int i, total_words;
1558 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1559 for (i = nwords; i < total_words; i++)
1560 emit_move_insn
1561 (operand_subword (target,
1562 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1563 1, VOIDmode),
1564 const0_rtx);
1566 return target;
1569 /* Signed bit field: sign-extend with two arithmetic shifts. */
1570 target = expand_shift (LSHIFT_EXPR, mode, target,
1571 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1572 return expand_shift (RSHIFT_EXPR, mode, target,
1573 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1576 /* From here on we know the desired field is smaller than a word. */
1578 /* Check if there is a correspondingly-sized integer field, so we can
1579 safely extract it as one size of integer, if necessary; then
1580 truncate or extend to the size that is wanted; then use SUBREGs or
1581 convert_to_mode to get one of the modes we really wanted. */
1583 int_mode = int_mode_for_mode (tmode);
1584 if (int_mode == BLKmode)
1585 int_mode = int_mode_for_mode (mode);
1586 /* Should probably push op0 out to memory and then do a load. */
1587 gcc_assert (int_mode != BLKmode);
1589 /* OFFSET is the number of words or bytes (UNIT says which)
1590 from STR_RTX to the first word or byte containing part of the field. */
1591 if (!MEM_P (op0))
1593 if (offset != 0
1594 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1596 if (!REG_P (op0))
1597 op0 = copy_to_reg (op0);
1598 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1599 op0, (offset * UNITS_PER_WORD));
1601 offset = 0;
1604 /* Now OFFSET is nonzero only for memory operands. */
1605 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1606 if (ext_mode != MAX_MACHINE_MODE
1607 && bitsize > 0
1608 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1609 /* Do not use extv/extzv for volatile bitfields when
1610 -fstrict-volatile-bitfields is in effect. */
1611 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1612 && flag_strict_volatile_bitfields > 0)
1613 /* If op0 is a register, we need it in EXT_MODE to make it
1614 acceptable to the format of ext(z)v. */
1615 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1616 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1617 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1619 struct expand_operand ops[4];
1620 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1621 rtx xop0 = op0;
1622 rtx xtarget = target;
1623 rtx xspec_target = target;
1624 rtx xspec_target_subreg = 0;
1626 /* If op0 is a register, we need it in EXT_MODE to make it
1627 acceptable to the format of ext(z)v. */
1628 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1629 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1630 if (MEM_P (xop0))
1631 /* Get ref to first byte containing part of the field. */
1632 xop0 = adjust_address (xop0, byte_mode, xoffset);
1634 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1635 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1636 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1638 unit = GET_MODE_BITSIZE (ext_mode);
1640 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1641 "backwards" from the size of the unit we are extracting from.
1642 Otherwise, we count bits from the most significant on a
1643 BYTES/BITS_BIG_ENDIAN machine. */
1645 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1646 xbitpos = unit - bitsize - xbitpos;
1648 if (xtarget == 0)
1649 xtarget = xspec_target = gen_reg_rtx (tmode);
1651 if (GET_MODE (xtarget) != ext_mode)
1653 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1654 between the mode of the extraction (word_mode) and the target
1655 mode. Instead, create a temporary and use convert_move to set
1656 the target. */
1657 if (REG_P (xtarget)
1658 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1660 xtarget = gen_lowpart (ext_mode, xtarget);
1661 if (GET_MODE_PRECISION (ext_mode)
1662 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1663 xspec_target_subreg = xtarget;
1665 else
1666 xtarget = gen_reg_rtx (ext_mode);
1669 create_output_operand (&ops[0], xtarget, ext_mode);
1670 create_fixed_operand (&ops[1], xop0);
1671 create_integer_operand (&ops[2], bitsize);
1672 create_integer_operand (&ops[3], xbitpos);
1673 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1674 4, ops))
1676 xtarget = ops[0].value;
1677 if (xtarget == xspec_target)
1678 return xtarget;
1679 if (xtarget == xspec_target_subreg)
1680 return xspec_target;
1681 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1685 /* If OP0 is a memory, try copying it to a register and seeing if a
1686 cheap register alternative is available. */
1687 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1689 enum machine_mode bestmode;
1691 /* Get the mode to use for inserting into this field. If
1692 OP0 is BLKmode, get the smallest mode consistent with the
1693 alignment. If OP0 is a non-BLKmode object that is no
1694 wider than EXT_MODE, use its mode. Otherwise, use the
1695 smallest mode containing the field. */
1697 if (GET_MODE (op0) == BLKmode
1698 || (ext_mode != MAX_MACHINE_MODE
1699 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1700 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1701 (ext_mode == MAX_MACHINE_MODE
1702 ? VOIDmode : ext_mode),
1703 MEM_VOLATILE_P (op0));
1704 else
1705 bestmode = GET_MODE (op0);
1707 if (bestmode != VOIDmode
1708 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1709 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1711 unsigned HOST_WIDE_INT xoffset, xbitpos;
1713 /* Compute the offset as a multiple of this unit,
1714 counting in bytes. */
1715 unit = GET_MODE_BITSIZE (bestmode);
1716 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1717 xbitpos = bitnum % unit;
1719 /* Make sure the register is big enough for the whole field. */
1720 if (xoffset * BITS_PER_UNIT + unit
1721 >= offset * BITS_PER_UNIT + bitsize)
1723 rtx last, result, xop0;
1725 last = get_last_insn ();
1727 /* Fetch it to a register in that size. */
1728 xop0 = adjust_address (op0, bestmode, xoffset);
1729 xop0 = force_reg (bestmode, xop0);
1730 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1731 unsignedp, packedp, target,
1732 mode, tmode, false);
1733 if (result)
1734 return result;
1736 delete_insns_since (last);
1741 if (!fallback_p)
1742 return NULL;
1744 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1745 bitpos, target, unsignedp, packedp);
1746 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1749 /* Generate code to extract a byte-field from STR_RTX
1750 containing BITSIZE bits, starting at BITNUM,
1751 and put it in TARGET if possible (if TARGET is nonzero).
1752 Regardless of TARGET, we return the rtx for where the value is placed.
1754 STR_RTX is the structure containing the byte (a REG or MEM).
1755 UNSIGNEDP is nonzero if this is an unsigned bit field.
1756 PACKEDP is nonzero if the field has the packed attribute.
1757 MODE is the natural mode of the field value once extracted.
1758 TMODE is the mode the caller would like the value to have;
1759 but the value may be returned with type MODE instead.
1761 If a TARGET is specified and we can store in it at no extra cost,
1762 we do so, and return TARGET.
1763 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1764 if they are equally easy. */
1767 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1768 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1769 rtx target, enum machine_mode mode, enum machine_mode tmode)
1771 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1772 target, mode, tmode, true);
1775 /* Extract a bit field using shifts and boolean operations
1776 Returns an rtx to represent the value.
1777 OP0 addresses a register (word) or memory (byte).
1778 BITPOS says which bit within the word or byte the bit field starts in.
1779 OFFSET says how many bytes farther the bit field starts;
1780 it is 0 if OP0 is a register.
1781 BITSIZE says how many bits long the bit field is.
1782 (If OP0 is a register, it may be narrower than a full word,
1783 but BITPOS still counts within a full word,
1784 which is significant on bigendian machines.)
1786 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1787 PACKEDP is true if the field has the packed attribute.
1789 If TARGET is nonzero, attempts to store the value there
1790 and return TARGET, but this is not guaranteed.
1791 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1793 static rtx
1794 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1795 unsigned HOST_WIDE_INT offset,
1796 unsigned HOST_WIDE_INT bitsize,
1797 unsigned HOST_WIDE_INT bitpos, rtx target,
1798 int unsignedp, bool packedp)
1800 unsigned int total_bits = BITS_PER_WORD;
1801 enum machine_mode mode;
1803 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1805 /* Special treatment for a bit field split across two registers. */
1806 if (bitsize + bitpos > BITS_PER_WORD)
1807 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1809 else
1811 /* Get the proper mode to use for this field. We want a mode that
1812 includes the entire field. If such a mode would be larger than
1813 a word, we won't be doing the extraction the normal way. */
1815 if (MEM_VOLATILE_P (op0)
1816 && flag_strict_volatile_bitfields > 0)
1818 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1819 mode = GET_MODE (op0);
1820 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1821 mode = GET_MODE (target);
1822 else
1823 mode = tmode;
1825 else
1826 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1827 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1829 if (mode == VOIDmode)
1830 /* The only way this should occur is if the field spans word
1831 boundaries. */
1832 return extract_split_bit_field (op0, bitsize,
1833 bitpos + offset * BITS_PER_UNIT,
1834 unsignedp);
1836 total_bits = GET_MODE_BITSIZE (mode);
1838 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1839 be in the range 0 to total_bits-1, and put any excess bytes in
1840 OFFSET. */
1841 if (bitpos >= total_bits)
1843 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1844 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1845 * BITS_PER_UNIT);
1848 /* If we're accessing a volatile MEM, we can't do the next
1849 alignment step if it results in a multi-word access where we
1850 otherwise wouldn't have one. So, check for that case
1851 here. */
1852 if (MEM_P (op0)
1853 && MEM_VOLATILE_P (op0)
1854 && flag_strict_volatile_bitfields > 0
1855 && bitpos + bitsize <= total_bits
1856 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1858 if (STRICT_ALIGNMENT)
1860 static bool informed_about_misalignment = false;
1861 bool warned;
1863 if (packedp)
1865 if (bitsize == total_bits)
1866 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1867 "multiple accesses to volatile structure member"
1868 " because of packed attribute");
1869 else
1870 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1871 "multiple accesses to volatile structure bitfield"
1872 " because of packed attribute");
1874 return extract_split_bit_field (op0, bitsize,
1875 bitpos + offset * BITS_PER_UNIT,
1876 unsignedp);
1879 if (bitsize == total_bits)
1880 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1881 "mis-aligned access used for structure member");
1882 else
1883 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1884 "mis-aligned access used for structure bitfield");
1886 if (! informed_about_misalignment && warned)
1888 informed_about_misalignment = true;
1889 inform (input_location,
1890 "when a volatile object spans multiple type-sized locations,"
1891 " the compiler must choose between using a single mis-aligned access to"
1892 " preserve the volatility, or using multiple aligned accesses to avoid"
1893 " runtime faults; this code may fail at runtime if the hardware does"
1894 " not allow this access");
1898 else
1901 /* Get ref to an aligned byte, halfword, or word containing the field.
1902 Adjust BITPOS to be position within a word,
1903 and OFFSET to be the offset of that word.
1904 Then alter OP0 to refer to that word. */
1905 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1906 offset -= (offset % (total_bits / BITS_PER_UNIT));
1909 op0 = adjust_address (op0, mode, offset);
1912 mode = GET_MODE (op0);
1914 if (BYTES_BIG_ENDIAN)
1915 /* BITPOS is the distance between our msb and that of OP0.
1916 Convert it to the distance from the lsb. */
1917 bitpos = total_bits - bitsize - bitpos;
1919 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1920 We have reduced the big-endian case to the little-endian case. */
1922 if (unsignedp)
1924 if (bitpos)
1926 /* If the field does not already start at the lsb,
1927 shift it so it does. */
1928 /* Maybe propagate the target for the shift. */
1929 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1930 if (tmode != mode)
1931 subtarget = 0;
1932 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1934 /* Convert the value to the desired mode. */
1935 if (mode != tmode)
1936 op0 = convert_to_mode (tmode, op0, 1);
1938 /* Unless the msb of the field used to be the msb when we shifted,
1939 mask out the upper bits. */
1941 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1942 return expand_binop (GET_MODE (op0), and_optab, op0,
1943 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1944 target, 1, OPTAB_LIB_WIDEN);
1945 return op0;
1948 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1949 then arithmetic-shift its lsb to the lsb of the word. */
1950 op0 = force_reg (mode, op0);
1952 /* Find the narrowest integer mode that contains the field. */
1954 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1955 mode = GET_MODE_WIDER_MODE (mode))
1956 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1958 op0 = convert_to_mode (mode, op0, 0);
1959 break;
1962 if (mode != tmode)
1963 target = 0;
1965 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1967 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1968 /* Maybe propagate the target for the shift. */
1969 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1970 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1973 return expand_shift (RSHIFT_EXPR, mode, op0,
1974 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1977 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1978 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1979 complement of that if COMPLEMENT. The mask is truncated if
1980 necessary to the width of mode MODE. The mask is zero-extended if
1981 BITSIZE+BITPOS is too small for MODE. */
1983 static rtx
1984 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1986 double_int mask;
1988 mask = double_int_mask (bitsize);
1989 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1991 if (complement)
1992 mask = double_int_not (mask);
1994 return immed_double_int_const (mask, mode);
1997 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1998 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
2000 static rtx
2001 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
2003 double_int val;
2005 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
2006 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
2008 return immed_double_int_const (val, mode);
2011 /* Extract a bit field that is split across two words
2012 and return an RTX for the result.
2014 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2015 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2016 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
2018 static rtx
2019 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2020 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2022 unsigned int unit;
2023 unsigned int bitsdone = 0;
2024 rtx result = NULL_RTX;
2025 int first = 1;
2027 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2028 much at a time. */
2029 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2030 unit = BITS_PER_WORD;
2031 else
2032 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2034 while (bitsdone < bitsize)
2036 unsigned HOST_WIDE_INT thissize;
2037 rtx part, word;
2038 unsigned HOST_WIDE_INT thispos;
2039 unsigned HOST_WIDE_INT offset;
2041 offset = (bitpos + bitsdone) / unit;
2042 thispos = (bitpos + bitsdone) % unit;
2044 /* THISSIZE must not overrun a word boundary. Otherwise,
2045 extract_fixed_bit_field will call us again, and we will mutually
2046 recurse forever. */
2047 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2048 thissize = MIN (thissize, unit - thispos);
2050 /* If OP0 is a register, then handle OFFSET here.
2052 When handling multiword bitfields, extract_bit_field may pass
2053 down a word_mode SUBREG of a larger REG for a bitfield that actually
2054 crosses a word boundary. Thus, for a SUBREG, we must find
2055 the current word starting from the base register. */
2056 if (GET_CODE (op0) == SUBREG)
2058 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2059 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2060 GET_MODE (SUBREG_REG (op0)));
2061 offset = 0;
2063 else if (REG_P (op0))
2065 word = operand_subword_force (op0, offset, GET_MODE (op0));
2066 offset = 0;
2068 else
2069 word = op0;
2071 /* Extract the parts in bit-counting order,
2072 whose meaning is determined by BYTES_PER_UNIT.
2073 OFFSET is in UNITs, and UNIT is in bits.
2074 extract_fixed_bit_field wants offset in bytes. */
2075 part = extract_fixed_bit_field (word_mode, word,
2076 offset * unit / BITS_PER_UNIT,
2077 thissize, thispos, 0, 1, false);
2078 bitsdone += thissize;
2080 /* Shift this part into place for the result. */
2081 if (BYTES_BIG_ENDIAN)
2083 if (bitsize != bitsdone)
2084 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2085 bitsize - bitsdone, 0, 1);
2087 else
2089 if (bitsdone != thissize)
2090 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2091 bitsdone - thissize, 0, 1);
2094 if (first)
2095 result = part;
2096 else
2097 /* Combine the parts with bitwise or. This works
2098 because we extracted each part as an unsigned bit field. */
2099 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2100 OPTAB_LIB_WIDEN);
2102 first = 0;
2105 /* Unsigned bit field: we are done. */
2106 if (unsignedp)
2107 return result;
2108 /* Signed bit field: sign-extend with two arithmetic shifts. */
2109 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2110 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2111 return expand_shift (RSHIFT_EXPR, word_mode, result,
2112 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2115 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2116 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2117 MODE, fill the upper bits with zeros. Fail if the layout of either
2118 mode is unknown (as for CC modes) or if the extraction would involve
2119 unprofitable mode punning. Return the value on success, otherwise
2120 return null.
2122 This is different from gen_lowpart* in these respects:
2124 - the returned value must always be considered an rvalue
2126 - when MODE is wider than SRC_MODE, the extraction involves
2127 a zero extension
2129 - when MODE is smaller than SRC_MODE, the extraction involves
2130 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2132 In other words, this routine performs a computation, whereas the
2133 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2134 operations. */
2137 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2139 enum machine_mode int_mode, src_int_mode;
2141 if (mode == src_mode)
2142 return src;
2144 if (CONSTANT_P (src))
2146 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2147 fails, it will happily create (subreg (symbol_ref)) or similar
2148 invalid SUBREGs. */
2149 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2150 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2151 if (ret)
2152 return ret;
2154 if (GET_MODE (src) == VOIDmode
2155 || !validate_subreg (mode, src_mode, src, byte))
2156 return NULL_RTX;
2158 src = force_reg (GET_MODE (src), src);
2159 return gen_rtx_SUBREG (mode, src, byte);
2162 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2163 return NULL_RTX;
2165 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2166 && MODES_TIEABLE_P (mode, src_mode))
2168 rtx x = gen_lowpart_common (mode, src);
2169 if (x)
2170 return x;
2173 src_int_mode = int_mode_for_mode (src_mode);
2174 int_mode = int_mode_for_mode (mode);
2175 if (src_int_mode == BLKmode || int_mode == BLKmode)
2176 return NULL_RTX;
2178 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2179 return NULL_RTX;
2180 if (!MODES_TIEABLE_P (int_mode, mode))
2181 return NULL_RTX;
2183 src = gen_lowpart (src_int_mode, src);
2184 src = convert_modes (int_mode, src_int_mode, src, true);
2185 src = gen_lowpart (mode, src);
2186 return src;
2189 /* Add INC into TARGET. */
2191 void
2192 expand_inc (rtx target, rtx inc)
2194 rtx value = expand_binop (GET_MODE (target), add_optab,
2195 target, inc,
2196 target, 0, OPTAB_LIB_WIDEN);
2197 if (value != target)
2198 emit_move_insn (target, value);
2201 /* Subtract DEC from TARGET. */
2203 void
2204 expand_dec (rtx target, rtx dec)
2206 rtx value = expand_binop (GET_MODE (target), sub_optab,
2207 target, dec,
2208 target, 0, OPTAB_LIB_WIDEN);
2209 if (value != target)
2210 emit_move_insn (target, value);
2213 /* Output a shift instruction for expression code CODE,
2214 with SHIFTED being the rtx for the value to shift,
2215 and AMOUNT the rtx for the amount to shift by.
2216 Store the result in the rtx TARGET, if that is convenient.
2217 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2218 Return the rtx for where the value is. */
2220 static rtx
2221 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2222 rtx amount, rtx target, int unsignedp)
2224 rtx op1, temp = 0;
2225 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2226 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2227 optab lshift_optab = ashl_optab;
2228 optab rshift_arith_optab = ashr_optab;
2229 optab rshift_uns_optab = lshr_optab;
2230 optab lrotate_optab = rotl_optab;
2231 optab rrotate_optab = rotr_optab;
2232 enum machine_mode op1_mode;
2233 int attempt;
2234 bool speed = optimize_insn_for_speed_p ();
2236 op1 = amount;
2237 op1_mode = GET_MODE (op1);
2239 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2240 shift amount is a vector, use the vector/vector shift patterns. */
2241 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2243 lshift_optab = vashl_optab;
2244 rshift_arith_optab = vashr_optab;
2245 rshift_uns_optab = vlshr_optab;
2246 lrotate_optab = vrotl_optab;
2247 rrotate_optab = vrotr_optab;
2250 /* Previously detected shift-counts computed by NEGATE_EXPR
2251 and shifted in the other direction; but that does not work
2252 on all machines. */
2254 if (SHIFT_COUNT_TRUNCATED)
2256 if (CONST_INT_P (op1)
2257 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2258 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2259 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2260 % GET_MODE_BITSIZE (mode));
2261 else if (GET_CODE (op1) == SUBREG
2262 && subreg_lowpart_p (op1)
2263 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2264 op1 = SUBREG_REG (op1);
2267 if (op1 == const0_rtx)
2268 return shifted;
2270 /* Check whether its cheaper to implement a left shift by a constant
2271 bit count by a sequence of additions. */
2272 if (code == LSHIFT_EXPR
2273 && CONST_INT_P (op1)
2274 && INTVAL (op1) > 0
2275 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2276 && INTVAL (op1) < MAX_BITS_PER_WORD
2277 && (shift_cost (speed, mode, INTVAL (op1))
2278 > INTVAL (op1) * add_cost (speed, mode))
2279 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2281 int i;
2282 for (i = 0; i < INTVAL (op1); i++)
2284 temp = force_reg (mode, shifted);
2285 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2286 unsignedp, OPTAB_LIB_WIDEN);
2288 return shifted;
2291 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2293 enum optab_methods methods;
2295 if (attempt == 0)
2296 methods = OPTAB_DIRECT;
2297 else if (attempt == 1)
2298 methods = OPTAB_WIDEN;
2299 else
2300 methods = OPTAB_LIB_WIDEN;
2302 if (rotate)
2304 /* Widening does not work for rotation. */
2305 if (methods == OPTAB_WIDEN)
2306 continue;
2307 else if (methods == OPTAB_LIB_WIDEN)
2309 /* If we have been unable to open-code this by a rotation,
2310 do it as the IOR of two shifts. I.e., to rotate A
2311 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2312 where C is the bitsize of A.
2314 It is theoretically possible that the target machine might
2315 not be able to perform either shift and hence we would
2316 be making two libcalls rather than just the one for the
2317 shift (similarly if IOR could not be done). We will allow
2318 this extremely unlikely lossage to avoid complicating the
2319 code below. */
2321 rtx subtarget = target == shifted ? 0 : target;
2322 rtx new_amount, other_amount;
2323 rtx temp1;
2325 new_amount = op1;
2326 if (CONST_INT_P (op1))
2327 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2328 - INTVAL (op1));
2329 else
2330 other_amount
2331 = simplify_gen_binary (MINUS, GET_MODE (op1),
2332 GEN_INT (GET_MODE_PRECISION (mode)),
2333 op1);
2335 shifted = force_reg (mode, shifted);
2337 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2338 mode, shifted, new_amount, 0, 1);
2339 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2340 mode, shifted, other_amount,
2341 subtarget, 1);
2342 return expand_binop (mode, ior_optab, temp, temp1, target,
2343 unsignedp, methods);
2346 temp = expand_binop (mode,
2347 left ? lrotate_optab : rrotate_optab,
2348 shifted, op1, target, unsignedp, methods);
2350 else if (unsignedp)
2351 temp = expand_binop (mode,
2352 left ? lshift_optab : rshift_uns_optab,
2353 shifted, op1, target, unsignedp, methods);
2355 /* Do arithmetic shifts.
2356 Also, if we are going to widen the operand, we can just as well
2357 use an arithmetic right-shift instead of a logical one. */
2358 if (temp == 0 && ! rotate
2359 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2361 enum optab_methods methods1 = methods;
2363 /* If trying to widen a log shift to an arithmetic shift,
2364 don't accept an arithmetic shift of the same size. */
2365 if (unsignedp)
2366 methods1 = OPTAB_MUST_WIDEN;
2368 /* Arithmetic shift */
2370 temp = expand_binop (mode,
2371 left ? lshift_optab : rshift_arith_optab,
2372 shifted, op1, target, unsignedp, methods1);
2375 /* We used to try extzv here for logical right shifts, but that was
2376 only useful for one machine, the VAX, and caused poor code
2377 generation there for lshrdi3, so the code was deleted and a
2378 define_expand for lshrsi3 was added to vax.md. */
2381 gcc_assert (temp);
2382 return temp;
2385 /* Output a shift instruction for expression code CODE,
2386 with SHIFTED being the rtx for the value to shift,
2387 and AMOUNT the amount to shift by.
2388 Store the result in the rtx TARGET, if that is convenient.
2389 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2390 Return the rtx for where the value is. */
2393 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2394 int amount, rtx target, int unsignedp)
2396 return expand_shift_1 (code, mode,
2397 shifted, GEN_INT (amount), target, unsignedp);
2400 /* Output a shift instruction for expression code CODE,
2401 with SHIFTED being the rtx for the value to shift,
2402 and AMOUNT the tree for the amount to shift by.
2403 Store the result in the rtx TARGET, if that is convenient.
2404 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2405 Return the rtx for where the value is. */
2408 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2409 tree amount, rtx target, int unsignedp)
2411 return expand_shift_1 (code, mode,
2412 shifted, expand_normal (amount), target, unsignedp);
2416 /* Indicates the type of fixup needed after a constant multiplication.
2417 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2418 the result should be negated, and ADD_VARIANT means that the
2419 multiplicand should be added to the result. */
2420 enum mult_variant {basic_variant, negate_variant, add_variant};
2422 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2423 const struct mult_cost *, enum machine_mode mode);
2424 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2425 struct algorithm *, enum mult_variant *, int);
2426 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2427 const struct algorithm *, enum mult_variant);
2428 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2429 static rtx extract_high_half (enum machine_mode, rtx);
2430 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2431 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2432 int, int);
2433 /* Compute and return the best algorithm for multiplying by T.
2434 The algorithm must cost less than cost_limit
2435 If retval.cost >= COST_LIMIT, no algorithm was found and all
2436 other field of the returned struct are undefined.
2437 MODE is the machine mode of the multiplication. */
2439 static void
2440 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2441 const struct mult_cost *cost_limit, enum machine_mode mode)
2443 int m;
2444 struct algorithm *alg_in, *best_alg;
2445 struct mult_cost best_cost;
2446 struct mult_cost new_limit;
2447 int op_cost, op_latency;
2448 unsigned HOST_WIDE_INT orig_t = t;
2449 unsigned HOST_WIDE_INT q;
2450 int maxm, hash_index;
2451 bool cache_hit = false;
2452 enum alg_code cache_alg = alg_zero;
2453 bool speed = optimize_insn_for_speed_p ();
2454 enum machine_mode imode;
2455 struct alg_hash_entry *entry_ptr;
2457 /* Indicate that no algorithm is yet found. If no algorithm
2458 is found, this value will be returned and indicate failure. */
2459 alg_out->cost.cost = cost_limit->cost + 1;
2460 alg_out->cost.latency = cost_limit->latency + 1;
2462 if (cost_limit->cost < 0
2463 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2464 return;
2466 /* Be prepared for vector modes. */
2467 imode = GET_MODE_INNER (mode);
2468 if (imode == VOIDmode)
2469 imode = mode;
2471 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2473 /* Restrict the bits of "t" to the multiplication's mode. */
2474 t &= GET_MODE_MASK (imode);
2476 /* t == 1 can be done in zero cost. */
2477 if (t == 1)
2479 alg_out->ops = 1;
2480 alg_out->cost.cost = 0;
2481 alg_out->cost.latency = 0;
2482 alg_out->op[0] = alg_m;
2483 return;
2486 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2487 fail now. */
2488 if (t == 0)
2490 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2491 return;
2492 else
2494 alg_out->ops = 1;
2495 alg_out->cost.cost = zero_cost (speed);
2496 alg_out->cost.latency = zero_cost (speed);
2497 alg_out->op[0] = alg_zero;
2498 return;
2502 /* We'll be needing a couple extra algorithm structures now. */
2504 alg_in = XALLOCA (struct algorithm);
2505 best_alg = XALLOCA (struct algorithm);
2506 best_cost = *cost_limit;
2508 /* Compute the hash index. */
2509 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2511 /* See if we already know what to do for T. */
2512 entry_ptr = alg_hash_entry_ptr (hash_index);
2513 if (entry_ptr->t == t
2514 && entry_ptr->mode == mode
2515 && entry_ptr->mode == mode
2516 && entry_ptr->speed == speed
2517 && entry_ptr->alg != alg_unknown)
2519 cache_alg = entry_ptr->alg;
2521 if (cache_alg == alg_impossible)
2523 /* The cache tells us that it's impossible to synthesize
2524 multiplication by T within entry_ptr->cost. */
2525 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2526 /* COST_LIMIT is at least as restrictive as the one
2527 recorded in the hash table, in which case we have no
2528 hope of synthesizing a multiplication. Just
2529 return. */
2530 return;
2532 /* If we get here, COST_LIMIT is less restrictive than the
2533 one recorded in the hash table, so we may be able to
2534 synthesize a multiplication. Proceed as if we didn't
2535 have the cache entry. */
2537 else
2539 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2540 /* The cached algorithm shows that this multiplication
2541 requires more cost than COST_LIMIT. Just return. This
2542 way, we don't clobber this cache entry with
2543 alg_impossible but retain useful information. */
2544 return;
2546 cache_hit = true;
2548 switch (cache_alg)
2550 case alg_shift:
2551 goto do_alg_shift;
2553 case alg_add_t_m2:
2554 case alg_sub_t_m2:
2555 goto do_alg_addsub_t_m2;
2557 case alg_add_factor:
2558 case alg_sub_factor:
2559 goto do_alg_addsub_factor;
2561 case alg_add_t2_m:
2562 goto do_alg_add_t2_m;
2564 case alg_sub_t2_m:
2565 goto do_alg_sub_t2_m;
2567 default:
2568 gcc_unreachable ();
2573 /* If we have a group of zero bits at the low-order part of T, try
2574 multiplying by the remaining bits and then doing a shift. */
2576 if ((t & 1) == 0)
2578 do_alg_shift:
2579 m = floor_log2 (t & -t); /* m = number of low zero bits */
2580 if (m < maxm)
2582 q = t >> m;
2583 /* The function expand_shift will choose between a shift and
2584 a sequence of additions, so the observed cost is given as
2585 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2586 op_cost = m * add_cost (speed, mode);
2587 if (shift_cost (speed, mode, m) < op_cost)
2588 op_cost = shift_cost (speed, mode, m);
2589 new_limit.cost = best_cost.cost - op_cost;
2590 new_limit.latency = best_cost.latency - op_cost;
2591 synth_mult (alg_in, q, &new_limit, mode);
2593 alg_in->cost.cost += op_cost;
2594 alg_in->cost.latency += op_cost;
2595 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2597 struct algorithm *x;
2598 best_cost = alg_in->cost;
2599 x = alg_in, alg_in = best_alg, best_alg = x;
2600 best_alg->log[best_alg->ops] = m;
2601 best_alg->op[best_alg->ops] = alg_shift;
2604 /* See if treating ORIG_T as a signed number yields a better
2605 sequence. Try this sequence only for a negative ORIG_T
2606 as it would be useless for a non-negative ORIG_T. */
2607 if ((HOST_WIDE_INT) orig_t < 0)
2609 /* Shift ORIG_T as follows because a right shift of a
2610 negative-valued signed type is implementation
2611 defined. */
2612 q = ~(~orig_t >> m);
2613 /* The function expand_shift will choose between a shift
2614 and a sequence of additions, so the observed cost is
2615 given as MIN (m * add_cost(speed, mode),
2616 shift_cost(speed, mode, m)). */
2617 op_cost = m * add_cost (speed, mode);
2618 if (shift_cost (speed, mode, m) < op_cost)
2619 op_cost = shift_cost (speed, mode, m);
2620 new_limit.cost = best_cost.cost - op_cost;
2621 new_limit.latency = best_cost.latency - op_cost;
2622 synth_mult (alg_in, q, &new_limit, mode);
2624 alg_in->cost.cost += op_cost;
2625 alg_in->cost.latency += op_cost;
2626 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2628 struct algorithm *x;
2629 best_cost = alg_in->cost;
2630 x = alg_in, alg_in = best_alg, best_alg = x;
2631 best_alg->log[best_alg->ops] = m;
2632 best_alg->op[best_alg->ops] = alg_shift;
2636 if (cache_hit)
2637 goto done;
2640 /* If we have an odd number, add or subtract one. */
2641 if ((t & 1) != 0)
2643 unsigned HOST_WIDE_INT w;
2645 do_alg_addsub_t_m2:
2646 for (w = 1; (w & t) != 0; w <<= 1)
2648 /* If T was -1, then W will be zero after the loop. This is another
2649 case where T ends with ...111. Handling this with (T + 1) and
2650 subtract 1 produces slightly better code and results in algorithm
2651 selection much faster than treating it like the ...0111 case
2652 below. */
2653 if (w == 0
2654 || (w > 2
2655 /* Reject the case where t is 3.
2656 Thus we prefer addition in that case. */
2657 && t != 3))
2659 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2661 op_cost = add_cost (speed, mode);
2662 new_limit.cost = best_cost.cost - op_cost;
2663 new_limit.latency = best_cost.latency - op_cost;
2664 synth_mult (alg_in, t + 1, &new_limit, mode);
2666 alg_in->cost.cost += op_cost;
2667 alg_in->cost.latency += op_cost;
2668 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2670 struct algorithm *x;
2671 best_cost = alg_in->cost;
2672 x = alg_in, alg_in = best_alg, best_alg = x;
2673 best_alg->log[best_alg->ops] = 0;
2674 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2677 else
2679 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2681 op_cost = add_cost (speed, mode);
2682 new_limit.cost = best_cost.cost - op_cost;
2683 new_limit.latency = best_cost.latency - op_cost;
2684 synth_mult (alg_in, t - 1, &new_limit, mode);
2686 alg_in->cost.cost += op_cost;
2687 alg_in->cost.latency += op_cost;
2688 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2690 struct algorithm *x;
2691 best_cost = alg_in->cost;
2692 x = alg_in, alg_in = best_alg, best_alg = x;
2693 best_alg->log[best_alg->ops] = 0;
2694 best_alg->op[best_alg->ops] = alg_add_t_m2;
2698 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2699 quickly with a - a * n for some appropriate constant n. */
2700 m = exact_log2 (-orig_t + 1);
2701 if (m >= 0 && m < maxm)
2703 op_cost = shiftsub1_cost (speed, mode, m);
2704 new_limit.cost = best_cost.cost - op_cost;
2705 new_limit.latency = best_cost.latency - op_cost;
2706 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2707 &new_limit, mode);
2709 alg_in->cost.cost += op_cost;
2710 alg_in->cost.latency += op_cost;
2711 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2713 struct algorithm *x;
2714 best_cost = alg_in->cost;
2715 x = alg_in, alg_in = best_alg, best_alg = x;
2716 best_alg->log[best_alg->ops] = m;
2717 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2721 if (cache_hit)
2722 goto done;
2725 /* Look for factors of t of the form
2726 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2727 If we find such a factor, we can multiply by t using an algorithm that
2728 multiplies by q, shift the result by m and add/subtract it to itself.
2730 We search for large factors first and loop down, even if large factors
2731 are less probable than small; if we find a large factor we will find a
2732 good sequence quickly, and therefore be able to prune (by decreasing
2733 COST_LIMIT) the search. */
2735 do_alg_addsub_factor:
2736 for (m = floor_log2 (t - 1); m >= 2; m--)
2738 unsigned HOST_WIDE_INT d;
2740 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2741 if (t % d == 0 && t > d && m < maxm
2742 && (!cache_hit || cache_alg == alg_add_factor))
2744 /* If the target has a cheap shift-and-add instruction use
2745 that in preference to a shift insn followed by an add insn.
2746 Assume that the shift-and-add is "atomic" with a latency
2747 equal to its cost, otherwise assume that on superscalar
2748 hardware the shift may be executed concurrently with the
2749 earlier steps in the algorithm. */
2750 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2751 if (shiftadd_cost (speed, mode, m) < op_cost)
2753 op_cost = shiftadd_cost (speed, mode, m);
2754 op_latency = op_cost;
2756 else
2757 op_latency = add_cost (speed, mode);
2759 new_limit.cost = best_cost.cost - op_cost;
2760 new_limit.latency = best_cost.latency - op_latency;
2761 synth_mult (alg_in, t / d, &new_limit, mode);
2763 alg_in->cost.cost += op_cost;
2764 alg_in->cost.latency += op_latency;
2765 if (alg_in->cost.latency < op_cost)
2766 alg_in->cost.latency = op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 struct algorithm *x;
2770 best_cost = alg_in->cost;
2771 x = alg_in, alg_in = best_alg, best_alg = x;
2772 best_alg->log[best_alg->ops] = m;
2773 best_alg->op[best_alg->ops] = alg_add_factor;
2775 /* Other factors will have been taken care of in the recursion. */
2776 break;
2779 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2780 if (t % d == 0 && t > d && m < maxm
2781 && (!cache_hit || cache_alg == alg_sub_factor))
2783 /* If the target has a cheap shift-and-subtract insn use
2784 that in preference to a shift insn followed by a sub insn.
2785 Assume that the shift-and-sub is "atomic" with a latency
2786 equal to it's cost, otherwise assume that on superscalar
2787 hardware the shift may be executed concurrently with the
2788 earlier steps in the algorithm. */
2789 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2790 if (shiftsub0_cost (speed, mode, m) < op_cost)
2792 op_cost = shiftsub0_cost (speed, mode, m);
2793 op_latency = op_cost;
2795 else
2796 op_latency = add_cost (speed, mode);
2798 new_limit.cost = best_cost.cost - op_cost;
2799 new_limit.latency = best_cost.latency - op_latency;
2800 synth_mult (alg_in, t / d, &new_limit, mode);
2802 alg_in->cost.cost += op_cost;
2803 alg_in->cost.latency += op_latency;
2804 if (alg_in->cost.latency < op_cost)
2805 alg_in->cost.latency = op_cost;
2806 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2808 struct algorithm *x;
2809 best_cost = alg_in->cost;
2810 x = alg_in, alg_in = best_alg, best_alg = x;
2811 best_alg->log[best_alg->ops] = m;
2812 best_alg->op[best_alg->ops] = alg_sub_factor;
2814 break;
2817 if (cache_hit)
2818 goto done;
2820 /* Try shift-and-add (load effective address) instructions,
2821 i.e. do a*3, a*5, a*9. */
2822 if ((t & 1) != 0)
2824 do_alg_add_t2_m:
2825 q = t - 1;
2826 q = q & -q;
2827 m = exact_log2 (q);
2828 if (m >= 0 && m < maxm)
2830 op_cost = shiftadd_cost (speed, mode, m);
2831 new_limit.cost = best_cost.cost - op_cost;
2832 new_limit.latency = best_cost.latency - op_cost;
2833 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2835 alg_in->cost.cost += op_cost;
2836 alg_in->cost.latency += op_cost;
2837 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2839 struct algorithm *x;
2840 best_cost = alg_in->cost;
2841 x = alg_in, alg_in = best_alg, best_alg = x;
2842 best_alg->log[best_alg->ops] = m;
2843 best_alg->op[best_alg->ops] = alg_add_t2_m;
2846 if (cache_hit)
2847 goto done;
2849 do_alg_sub_t2_m:
2850 q = t + 1;
2851 q = q & -q;
2852 m = exact_log2 (q);
2853 if (m >= 0 && m < maxm)
2855 op_cost = shiftsub0_cost (speed, mode, m);
2856 new_limit.cost = best_cost.cost - op_cost;
2857 new_limit.latency = best_cost.latency - op_cost;
2858 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2860 alg_in->cost.cost += op_cost;
2861 alg_in->cost.latency += op_cost;
2862 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2864 struct algorithm *x;
2865 best_cost = alg_in->cost;
2866 x = alg_in, alg_in = best_alg, best_alg = x;
2867 best_alg->log[best_alg->ops] = m;
2868 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2871 if (cache_hit)
2872 goto done;
2875 done:
2876 /* If best_cost has not decreased, we have not found any algorithm. */
2877 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2879 /* We failed to find an algorithm. Record alg_impossible for
2880 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2881 we are asked to find an algorithm for T within the same or
2882 lower COST_LIMIT, we can immediately return to the
2883 caller. */
2884 entry_ptr->t = t;
2885 entry_ptr->mode = mode;
2886 entry_ptr->speed = speed;
2887 entry_ptr->alg = alg_impossible;
2888 entry_ptr->cost = *cost_limit;
2889 return;
2892 /* Cache the result. */
2893 if (!cache_hit)
2895 entry_ptr->t = t;
2896 entry_ptr->mode = mode;
2897 entry_ptr->speed = speed;
2898 entry_ptr->alg = best_alg->op[best_alg->ops];
2899 entry_ptr->cost.cost = best_cost.cost;
2900 entry_ptr->cost.latency = best_cost.latency;
2903 /* If we are getting a too long sequence for `struct algorithm'
2904 to record, make this search fail. */
2905 if (best_alg->ops == MAX_BITS_PER_WORD)
2906 return;
2908 /* Copy the algorithm from temporary space to the space at alg_out.
2909 We avoid using structure assignment because the majority of
2910 best_alg is normally undefined, and this is a critical function. */
2911 alg_out->ops = best_alg->ops + 1;
2912 alg_out->cost = best_cost;
2913 memcpy (alg_out->op, best_alg->op,
2914 alg_out->ops * sizeof *alg_out->op);
2915 memcpy (alg_out->log, best_alg->log,
2916 alg_out->ops * sizeof *alg_out->log);
2919 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2920 Try three variations:
2922 - a shift/add sequence based on VAL itself
2923 - a shift/add sequence based on -VAL, followed by a negation
2924 - a shift/add sequence based on VAL - 1, followed by an addition.
2926 Return true if the cheapest of these cost less than MULT_COST,
2927 describing the algorithm in *ALG and final fixup in *VARIANT. */
2929 static bool
2930 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2931 struct algorithm *alg, enum mult_variant *variant,
2932 int mult_cost)
2934 struct algorithm alg2;
2935 struct mult_cost limit;
2936 int op_cost;
2937 bool speed = optimize_insn_for_speed_p ();
2939 /* Fail quickly for impossible bounds. */
2940 if (mult_cost < 0)
2941 return false;
2943 /* Ensure that mult_cost provides a reasonable upper bound.
2944 Any constant multiplication can be performed with less
2945 than 2 * bits additions. */
2946 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2947 if (mult_cost > op_cost)
2948 mult_cost = op_cost;
2950 *variant = basic_variant;
2951 limit.cost = mult_cost;
2952 limit.latency = mult_cost;
2953 synth_mult (alg, val, &limit, mode);
2955 /* This works only if the inverted value actually fits in an
2956 `unsigned int' */
2957 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2959 op_cost = neg_cost(speed, mode);
2960 if (MULT_COST_LESS (&alg->cost, mult_cost))
2962 limit.cost = alg->cost.cost - op_cost;
2963 limit.latency = alg->cost.latency - op_cost;
2965 else
2967 limit.cost = mult_cost - op_cost;
2968 limit.latency = mult_cost - op_cost;
2971 synth_mult (&alg2, -val, &limit, mode);
2972 alg2.cost.cost += op_cost;
2973 alg2.cost.latency += op_cost;
2974 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2975 *alg = alg2, *variant = negate_variant;
2978 /* This proves very useful for division-by-constant. */
2979 op_cost = add_cost (speed, mode);
2980 if (MULT_COST_LESS (&alg->cost, mult_cost))
2982 limit.cost = alg->cost.cost - op_cost;
2983 limit.latency = alg->cost.latency - op_cost;
2985 else
2987 limit.cost = mult_cost - op_cost;
2988 limit.latency = mult_cost - op_cost;
2991 synth_mult (&alg2, val - 1, &limit, mode);
2992 alg2.cost.cost += op_cost;
2993 alg2.cost.latency += op_cost;
2994 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2995 *alg = alg2, *variant = add_variant;
2997 return MULT_COST_LESS (&alg->cost, mult_cost);
3000 /* A subroutine of expand_mult, used for constant multiplications.
3001 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3002 convenient. Use the shift/add sequence described by ALG and apply
3003 the final fixup specified by VARIANT. */
3005 static rtx
3006 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
3007 rtx target, const struct algorithm *alg,
3008 enum mult_variant variant)
3010 HOST_WIDE_INT val_so_far;
3011 rtx insn, accum, tem;
3012 int opno;
3013 enum machine_mode nmode;
3015 /* Avoid referencing memory over and over and invalid sharing
3016 on SUBREGs. */
3017 op0 = force_reg (mode, op0);
3019 /* ACCUM starts out either as OP0 or as a zero, depending on
3020 the first operation. */
3022 if (alg->op[0] == alg_zero)
3024 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3025 val_so_far = 0;
3027 else if (alg->op[0] == alg_m)
3029 accum = copy_to_mode_reg (mode, op0);
3030 val_so_far = 1;
3032 else
3033 gcc_unreachable ();
3035 for (opno = 1; opno < alg->ops; opno++)
3037 int log = alg->log[opno];
3038 rtx shift_subtarget = optimize ? 0 : accum;
3039 rtx add_target
3040 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3041 && !optimize)
3042 ? target : 0;
3043 rtx accum_target = optimize ? 0 : accum;
3044 rtx accum_inner;
3046 switch (alg->op[opno])
3048 case alg_shift:
3049 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3050 /* REG_EQUAL note will be attached to the following insn. */
3051 emit_move_insn (accum, tem);
3052 val_so_far <<= log;
3053 break;
3055 case alg_add_t_m2:
3056 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3057 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3058 add_target ? add_target : accum_target);
3059 val_so_far += (HOST_WIDE_INT) 1 << log;
3060 break;
3062 case alg_sub_t_m2:
3063 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3064 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3065 add_target ? add_target : accum_target);
3066 val_so_far -= (HOST_WIDE_INT) 1 << log;
3067 break;
3069 case alg_add_t2_m:
3070 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3071 log, shift_subtarget, 0);
3072 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3073 add_target ? add_target : accum_target);
3074 val_so_far = (val_so_far << log) + 1;
3075 break;
3077 case alg_sub_t2_m:
3078 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3079 log, shift_subtarget, 0);
3080 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3081 add_target ? add_target : accum_target);
3082 val_so_far = (val_so_far << log) - 1;
3083 break;
3085 case alg_add_factor:
3086 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3087 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3088 add_target ? add_target : accum_target);
3089 val_so_far += val_so_far << log;
3090 break;
3092 case alg_sub_factor:
3093 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3094 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3095 (add_target
3096 ? add_target : (optimize ? 0 : tem)));
3097 val_so_far = (val_so_far << log) - val_so_far;
3098 break;
3100 default:
3101 gcc_unreachable ();
3104 if (SCALAR_INT_MODE_P (mode))
3106 /* Write a REG_EQUAL note on the last insn so that we can cse
3107 multiplication sequences. Note that if ACCUM is a SUBREG,
3108 we've set the inner register and must properly indicate that. */
3109 tem = op0, nmode = mode;
3110 accum_inner = accum;
3111 if (GET_CODE (accum) == SUBREG)
3113 accum_inner = SUBREG_REG (accum);
3114 nmode = GET_MODE (accum_inner);
3115 tem = gen_lowpart (nmode, op0);
3118 insn = get_last_insn ();
3119 set_dst_reg_note (insn, REG_EQUAL,
3120 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3121 accum_inner);
3125 if (variant == negate_variant)
3127 val_so_far = -val_so_far;
3128 accum = expand_unop (mode, neg_optab, accum, target, 0);
3130 else if (variant == add_variant)
3132 val_so_far = val_so_far + 1;
3133 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3136 /* Compare only the bits of val and val_so_far that are significant
3137 in the result mode, to avoid sign-/zero-extension confusion. */
3138 nmode = GET_MODE_INNER (mode);
3139 if (nmode == VOIDmode)
3140 nmode = mode;
3141 val &= GET_MODE_MASK (nmode);
3142 val_so_far &= GET_MODE_MASK (nmode);
3143 gcc_assert (val == val_so_far);
3145 return accum;
3148 /* Perform a multiplication and return an rtx for the result.
3149 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3150 TARGET is a suggestion for where to store the result (an rtx).
3152 We check specially for a constant integer as OP1.
3153 If you want this check for OP0 as well, then before calling
3154 you should swap the two operands if OP0 would be constant. */
3157 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3158 int unsignedp)
3160 enum mult_variant variant;
3161 struct algorithm algorithm;
3162 rtx scalar_op1;
3163 int max_cost;
3164 bool speed = optimize_insn_for_speed_p ();
3165 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3167 if (CONSTANT_P (op0))
3169 rtx temp = op0;
3170 op0 = op1;
3171 op1 = temp;
3174 /* For vectors, there are several simplifications that can be made if
3175 all elements of the vector constant are identical. */
3176 scalar_op1 = op1;
3177 if (GET_CODE (op1) == CONST_VECTOR)
3179 int i, n = CONST_VECTOR_NUNITS (op1);
3180 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3181 for (i = 1; i < n; ++i)
3182 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3183 goto skip_scalar;
3186 if (INTEGRAL_MODE_P (mode))
3188 rtx fake_reg;
3189 HOST_WIDE_INT coeff;
3190 bool is_neg;
3191 int mode_bitsize;
3193 if (op1 == CONST0_RTX (mode))
3194 return op1;
3195 if (op1 == CONST1_RTX (mode))
3196 return op0;
3197 if (op1 == CONSTM1_RTX (mode))
3198 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3199 op0, target, 0);
3201 if (do_trapv)
3202 goto skip_synth;
3204 /* These are the operations that are potentially turned into
3205 a sequence of shifts and additions. */
3206 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3208 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3209 less than or equal in size to `unsigned int' this doesn't matter.
3210 If the mode is larger than `unsigned int', then synth_mult works
3211 only if the constant value exactly fits in an `unsigned int' without
3212 any truncation. This means that multiplying by negative values does
3213 not work; results are off by 2^32 on a 32 bit machine. */
3215 if (CONST_INT_P (scalar_op1))
3217 coeff = INTVAL (scalar_op1);
3218 is_neg = coeff < 0;
3220 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3222 /* If we are multiplying in DImode, it may still be a win
3223 to try to work with shifts and adds. */
3224 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3225 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3227 coeff = CONST_DOUBLE_LOW (scalar_op1);
3228 is_neg = false;
3230 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3232 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3233 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3235 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3236 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3237 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3238 return expand_shift (LSHIFT_EXPR, mode, op0,
3239 shift, target, unsignedp);
3241 goto skip_synth;
3243 else
3244 goto skip_synth;
3246 else
3247 goto skip_synth;
3249 /* We used to test optimize here, on the grounds that it's better to
3250 produce a smaller program when -O is not used. But this causes
3251 such a terrible slowdown sometimes that it seems better to always
3252 use synth_mult. */
3254 /* Special case powers of two. */
3255 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3256 return expand_shift (LSHIFT_EXPR, mode, op0,
3257 floor_log2 (coeff), target, unsignedp);
3259 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3261 /* Attempt to handle multiplication of DImode values by negative
3262 coefficients, by performing the multiplication by a positive
3263 multiplier and then inverting the result. */
3264 /* ??? How is this not slightly redundant with the neg variant? */
3265 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3267 /* Its safe to use -coeff even for INT_MIN, as the
3268 result is interpreted as an unsigned coefficient.
3269 Exclude cost of op0 from max_cost to match the cost
3270 calculation of the synth_mult. */
3271 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3272 - neg_cost(speed, mode));
3273 if (max_cost > 0
3274 && choose_mult_variant (mode, -coeff, &algorithm,
3275 &variant, max_cost))
3277 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3278 &algorithm, variant);
3279 return expand_unop (mode, neg_optab, temp, target, 0);
3283 /* Exclude cost of op0 from max_cost to match the cost
3284 calculation of the synth_mult. */
3285 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3286 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3287 return expand_mult_const (mode, op0, coeff, target,
3288 &algorithm, variant);
3290 skip_synth:
3292 /* Expand x*2.0 as x+x. */
3293 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3295 REAL_VALUE_TYPE d;
3296 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3298 if (REAL_VALUES_EQUAL (d, dconst2))
3300 op0 = force_reg (GET_MODE (op0), op0);
3301 return expand_binop (mode, add_optab, op0, op0,
3302 target, unsignedp, OPTAB_LIB_WIDEN);
3305 skip_scalar:
3307 /* This used to use umul_optab if unsigned, but for non-widening multiply
3308 there is no difference between signed and unsigned. */
3309 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3310 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3311 gcc_assert (op0);
3312 return op0;
3315 /* Return a cost estimate for multiplying a register by the given
3316 COEFFicient in the given MODE and SPEED. */
3319 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3321 int max_cost;
3322 struct algorithm algorithm;
3323 enum mult_variant variant;
3325 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3326 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3327 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3328 return algorithm.cost.cost;
3329 else
3330 return max_cost;
3333 /* Perform a widening multiplication and return an rtx for the result.
3334 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3335 TARGET is a suggestion for where to store the result (an rtx).
3336 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3337 or smul_widen_optab.
3339 We check specially for a constant integer as OP1, comparing the
3340 cost of a widening multiply against the cost of a sequence of shifts
3341 and adds. */
3344 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3345 int unsignedp, optab this_optab)
3347 bool speed = optimize_insn_for_speed_p ();
3348 rtx cop1;
3350 if (CONST_INT_P (op1)
3351 && GET_MODE (op0) != VOIDmode
3352 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3353 this_optab == umul_widen_optab))
3354 && CONST_INT_P (cop1)
3355 && (INTVAL (cop1) >= 0
3356 || HWI_COMPUTABLE_MODE_P (mode)))
3358 HOST_WIDE_INT coeff = INTVAL (cop1);
3359 int max_cost;
3360 enum mult_variant variant;
3361 struct algorithm algorithm;
3363 /* Special case powers of two. */
3364 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3366 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3367 return expand_shift (LSHIFT_EXPR, mode, op0,
3368 floor_log2 (coeff), target, unsignedp);
3371 /* Exclude cost of op0 from max_cost to match the cost
3372 calculation of the synth_mult. */
3373 max_cost = mul_widen_cost (speed, mode);
3374 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3375 max_cost))
3377 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3378 return expand_mult_const (mode, op0, coeff, target,
3379 &algorithm, variant);
3382 return expand_binop (mode, this_optab, op0, op1, target,
3383 unsignedp, OPTAB_LIB_WIDEN);
3386 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3387 replace division by D, and put the least significant N bits of the result
3388 in *MULTIPLIER_PTR and return the most significant bit.
3390 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3391 needed precision is in PRECISION (should be <= N).
3393 PRECISION should be as small as possible so this function can choose
3394 multiplier more freely.
3396 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3397 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3399 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3400 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3402 unsigned HOST_WIDE_INT
3403 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3404 unsigned HOST_WIDE_INT *multiplier_ptr,
3405 int *post_shift_ptr, int *lgup_ptr)
3407 HOST_WIDE_INT mhigh_hi, mlow_hi;
3408 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3409 int lgup, post_shift;
3410 int pow, pow2;
3411 unsigned HOST_WIDE_INT nl, dummy1;
3412 HOST_WIDE_INT nh, dummy2;
3414 /* lgup = ceil(log2(divisor)); */
3415 lgup = ceil_log2 (d);
3417 gcc_assert (lgup <= n);
3419 pow = n + lgup;
3420 pow2 = n + lgup - precision;
3422 /* We could handle this with some effort, but this case is much
3423 better handled directly with a scc insn, so rely on caller using
3424 that. */
3425 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3427 /* mlow = 2^(N + lgup)/d */
3428 if (pow >= HOST_BITS_PER_WIDE_INT)
3430 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3431 nl = 0;
3433 else
3435 nh = 0;
3436 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3438 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3439 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3441 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3442 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3443 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3444 else
3445 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3446 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3447 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3449 gcc_assert (!mhigh_hi || nh - d < d);
3450 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3451 /* Assert that mlow < mhigh. */
3452 gcc_assert (mlow_hi < mhigh_hi
3453 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3455 /* If precision == N, then mlow, mhigh exceed 2^N
3456 (but they do not exceed 2^(N+1)). */
3458 /* Reduce to lowest terms. */
3459 for (post_shift = lgup; post_shift > 0; post_shift--)
3461 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3462 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3463 if (ml_lo >= mh_lo)
3464 break;
3466 mlow_hi = 0;
3467 mlow_lo = ml_lo;
3468 mhigh_hi = 0;
3469 mhigh_lo = mh_lo;
3472 *post_shift_ptr = post_shift;
3473 *lgup_ptr = lgup;
3474 if (n < HOST_BITS_PER_WIDE_INT)
3476 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3477 *multiplier_ptr = mhigh_lo & mask;
3478 return mhigh_lo >= mask;
3480 else
3482 *multiplier_ptr = mhigh_lo;
3483 return mhigh_hi;
3487 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3488 congruent to 1 (mod 2**N). */
3490 static unsigned HOST_WIDE_INT
3491 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3493 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3495 /* The algorithm notes that the choice y = x satisfies
3496 x*y == 1 mod 2^3, since x is assumed odd.
3497 Each iteration doubles the number of bits of significance in y. */
3499 unsigned HOST_WIDE_INT mask;
3500 unsigned HOST_WIDE_INT y = x;
3501 int nbit = 3;
3503 mask = (n == HOST_BITS_PER_WIDE_INT
3504 ? ~(unsigned HOST_WIDE_INT) 0
3505 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3507 while (nbit < n)
3509 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3510 nbit *= 2;
3512 return y;
3515 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3516 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3517 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3518 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3519 become signed.
3521 The result is put in TARGET if that is convenient.
3523 MODE is the mode of operation. */
3526 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3527 rtx op1, rtx target, int unsignedp)
3529 rtx tem;
3530 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3532 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3533 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3534 tem = expand_and (mode, tem, op1, NULL_RTX);
3535 adj_operand
3536 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3537 adj_operand);
3539 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3540 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3541 tem = expand_and (mode, tem, op0, NULL_RTX);
3542 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3543 target);
3545 return target;
3548 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3550 static rtx
3551 extract_high_half (enum machine_mode mode, rtx op)
3553 enum machine_mode wider_mode;
3555 if (mode == word_mode)
3556 return gen_highpart (mode, op);
3558 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3560 wider_mode = GET_MODE_WIDER_MODE (mode);
3561 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3562 GET_MODE_BITSIZE (mode), 0, 1);
3563 return convert_modes (mode, wider_mode, op, 0);
3566 /* Like expmed_mult_highpart, but only consider using a multiplication
3567 optab. OP1 is an rtx for the constant operand. */
3569 static rtx
3570 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3571 rtx target, int unsignedp, int max_cost)
3573 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3574 enum machine_mode wider_mode;
3575 optab moptab;
3576 rtx tem;
3577 int size;
3578 bool speed = optimize_insn_for_speed_p ();
3580 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3582 wider_mode = GET_MODE_WIDER_MODE (mode);
3583 size = GET_MODE_BITSIZE (mode);
3585 /* Firstly, try using a multiplication insn that only generates the needed
3586 high part of the product, and in the sign flavor of unsignedp. */
3587 if (mul_highpart_cost (speed, mode) < max_cost)
3589 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3590 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3591 unsignedp, OPTAB_DIRECT);
3592 if (tem)
3593 return tem;
3596 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3597 Need to adjust the result after the multiplication. */
3598 if (size - 1 < BITS_PER_WORD
3599 && (mul_highpart_cost (speed, mode)
3600 + 2 * shift_cost (speed, mode, size-1)
3601 + 4 * add_cost (speed, mode) < max_cost))
3603 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3604 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3605 unsignedp, OPTAB_DIRECT);
3606 if (tem)
3607 /* We used the wrong signedness. Adjust the result. */
3608 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3609 tem, unsignedp);
3612 /* Try widening multiplication. */
3613 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3614 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3615 && mul_widen_cost (speed, wider_mode) < max_cost)
3617 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3618 unsignedp, OPTAB_WIDEN);
3619 if (tem)
3620 return extract_high_half (mode, tem);
3623 /* Try widening the mode and perform a non-widening multiplication. */
3624 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3625 && size - 1 < BITS_PER_WORD
3626 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3627 < max_cost))
3629 rtx insns, wop0, wop1;
3631 /* We need to widen the operands, for example to ensure the
3632 constant multiplier is correctly sign or zero extended.
3633 Use a sequence to clean-up any instructions emitted by
3634 the conversions if things don't work out. */
3635 start_sequence ();
3636 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3637 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3638 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3639 unsignedp, OPTAB_WIDEN);
3640 insns = get_insns ();
3641 end_sequence ();
3643 if (tem)
3645 emit_insn (insns);
3646 return extract_high_half (mode, tem);
3650 /* Try widening multiplication of opposite signedness, and adjust. */
3651 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3652 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3653 && size - 1 < BITS_PER_WORD
3654 && (mul_widen_cost (speed, wider_mode)
3655 + 2 * shift_cost (speed, mode, size-1)
3656 + 4 * add_cost (speed, mode) < max_cost))
3658 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3659 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3660 if (tem != 0)
3662 tem = extract_high_half (mode, tem);
3663 /* We used the wrong signedness. Adjust the result. */
3664 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3665 target, unsignedp);
3669 return 0;
3672 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3673 putting the high half of the result in TARGET if that is convenient,
3674 and return where the result is. If the operation can not be performed,
3675 0 is returned.
3677 MODE is the mode of operation and result.
3679 UNSIGNEDP nonzero means unsigned multiply.
3681 MAX_COST is the total allowed cost for the expanded RTL. */
3683 static rtx
3684 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3685 rtx target, int unsignedp, int max_cost)
3687 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3688 unsigned HOST_WIDE_INT cnst1;
3689 int extra_cost;
3690 bool sign_adjust = false;
3691 enum mult_variant variant;
3692 struct algorithm alg;
3693 rtx tem;
3694 bool speed = optimize_insn_for_speed_p ();
3696 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3697 /* We can't support modes wider than HOST_BITS_PER_INT. */
3698 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3700 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3702 /* We can't optimize modes wider than BITS_PER_WORD.
3703 ??? We might be able to perform double-word arithmetic if
3704 mode == word_mode, however all the cost calculations in
3705 synth_mult etc. assume single-word operations. */
3706 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3707 return expmed_mult_highpart_optab (mode, op0, op1, target,
3708 unsignedp, max_cost);
3710 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3712 /* Check whether we try to multiply by a negative constant. */
3713 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3715 sign_adjust = true;
3716 extra_cost += add_cost (speed, mode);
3719 /* See whether shift/add multiplication is cheap enough. */
3720 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3721 max_cost - extra_cost))
3723 /* See whether the specialized multiplication optabs are
3724 cheaper than the shift/add version. */
3725 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3726 alg.cost.cost + extra_cost);
3727 if (tem)
3728 return tem;
3730 tem = convert_to_mode (wider_mode, op0, unsignedp);
3731 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3732 tem = extract_high_half (mode, tem);
3734 /* Adjust result for signedness. */
3735 if (sign_adjust)
3736 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3738 return tem;
3740 return expmed_mult_highpart_optab (mode, op0, op1, target,
3741 unsignedp, max_cost);
3745 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3747 static rtx
3748 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3750 unsigned HOST_WIDE_INT masklow, maskhigh;
3751 rtx result, temp, shift, label;
3752 int logd;
3754 logd = floor_log2 (d);
3755 result = gen_reg_rtx (mode);
3757 /* Avoid conditional branches when they're expensive. */
3758 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3759 && optimize_insn_for_speed_p ())
3761 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3762 mode, 0, -1);
3763 if (signmask)
3765 signmask = force_reg (mode, signmask);
3766 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3767 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3769 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3770 which instruction sequence to use. If logical right shifts
3771 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3772 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3774 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3775 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3776 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3777 > COSTS_N_INSNS (2)))
3779 temp = expand_binop (mode, xor_optab, op0, signmask,
3780 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3781 temp = expand_binop (mode, sub_optab, temp, signmask,
3782 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3783 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3784 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3785 temp = expand_binop (mode, xor_optab, temp, signmask,
3786 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3787 temp = expand_binop (mode, sub_optab, temp, signmask,
3788 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3790 else
3792 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3793 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3794 signmask = force_reg (mode, signmask);
3796 temp = expand_binop (mode, add_optab, op0, signmask,
3797 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3798 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3799 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3800 temp = expand_binop (mode, sub_optab, temp, signmask,
3801 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3803 return temp;
3807 /* Mask contains the mode's signbit and the significant bits of the
3808 modulus. By including the signbit in the operation, many targets
3809 can avoid an explicit compare operation in the following comparison
3810 against zero. */
3812 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3813 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3815 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3816 maskhigh = -1;
3818 else
3819 maskhigh = (HOST_WIDE_INT) -1
3820 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3822 temp = expand_binop (mode, and_optab, op0,
3823 immed_double_const (masklow, maskhigh, mode),
3824 result, 1, OPTAB_LIB_WIDEN);
3825 if (temp != result)
3826 emit_move_insn (result, temp);
3828 label = gen_label_rtx ();
3829 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3831 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3832 0, OPTAB_LIB_WIDEN);
3833 masklow = (HOST_WIDE_INT) -1 << logd;
3834 maskhigh = -1;
3835 temp = expand_binop (mode, ior_optab, temp,
3836 immed_double_const (masklow, maskhigh, mode),
3837 result, 1, OPTAB_LIB_WIDEN);
3838 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3839 0, OPTAB_LIB_WIDEN);
3840 if (temp != result)
3841 emit_move_insn (result, temp);
3842 emit_label (label);
3843 return result;
3846 /* Expand signed division of OP0 by a power of two D in mode MODE.
3847 This routine is only called for positive values of D. */
3849 static rtx
3850 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3852 rtx temp, label;
3853 int logd;
3855 logd = floor_log2 (d);
3857 if (d == 2
3858 && BRANCH_COST (optimize_insn_for_speed_p (),
3859 false) >= 1)
3861 temp = gen_reg_rtx (mode);
3862 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3863 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3864 0, OPTAB_LIB_WIDEN);
3865 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3868 #ifdef HAVE_conditional_move
3869 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3870 >= 2)
3872 rtx temp2;
3874 /* ??? emit_conditional_move forces a stack adjustment via
3875 compare_from_rtx so, if the sequence is discarded, it will
3876 be lost. Do it now instead. */
3877 do_pending_stack_adjust ();
3879 start_sequence ();
3880 temp2 = copy_to_mode_reg (mode, op0);
3881 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3882 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3883 temp = force_reg (mode, temp);
3885 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3886 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3887 mode, temp, temp2, mode, 0);
3888 if (temp2)
3890 rtx seq = get_insns ();
3891 end_sequence ();
3892 emit_insn (seq);
3893 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3895 end_sequence ();
3897 #endif
3899 if (BRANCH_COST (optimize_insn_for_speed_p (),
3900 false) >= 2)
3902 int ushift = GET_MODE_BITSIZE (mode) - logd;
3904 temp = gen_reg_rtx (mode);
3905 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3906 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3907 > COSTS_N_INSNS (1))
3908 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3909 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3910 else
3911 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3912 ushift, NULL_RTX, 1);
3913 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3914 0, OPTAB_LIB_WIDEN);
3915 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3918 label = gen_label_rtx ();
3919 temp = copy_to_mode_reg (mode, op0);
3920 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3921 expand_inc (temp, GEN_INT (d - 1));
3922 emit_label (label);
3923 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3926 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3927 if that is convenient, and returning where the result is.
3928 You may request either the quotient or the remainder as the result;
3929 specify REM_FLAG nonzero to get the remainder.
3931 CODE is the expression code for which kind of division this is;
3932 it controls how rounding is done. MODE is the machine mode to use.
3933 UNSIGNEDP nonzero means do unsigned division. */
3935 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3936 and then correct it by or'ing in missing high bits
3937 if result of ANDI is nonzero.
3938 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3939 This could optimize to a bfexts instruction.
3940 But C doesn't use these operations, so their optimizations are
3941 left for later. */
3942 /* ??? For modulo, we don't actually need the highpart of the first product,
3943 the low part will do nicely. And for small divisors, the second multiply
3944 can also be a low-part only multiply or even be completely left out.
3945 E.g. to calculate the remainder of a division by 3 with a 32 bit
3946 multiply, multiply with 0x55555556 and extract the upper two bits;
3947 the result is exact for inputs up to 0x1fffffff.
3948 The input range can be reduced by using cross-sum rules.
3949 For odd divisors >= 3, the following table gives right shift counts
3950 so that if a number is shifted by an integer multiple of the given
3951 amount, the remainder stays the same:
3952 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3953 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3954 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3955 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3956 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3958 Cross-sum rules for even numbers can be derived by leaving as many bits
3959 to the right alone as the divisor has zeros to the right.
3960 E.g. if x is an unsigned 32 bit number:
3961 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3965 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3966 rtx op0, rtx op1, rtx target, int unsignedp)
3968 enum machine_mode compute_mode;
3969 rtx tquotient;
3970 rtx quotient = 0, remainder = 0;
3971 rtx last;
3972 int size;
3973 rtx insn;
3974 optab optab1, optab2;
3975 int op1_is_constant, op1_is_pow2 = 0;
3976 int max_cost, extra_cost;
3977 static HOST_WIDE_INT last_div_const = 0;
3978 static HOST_WIDE_INT ext_op1;
3979 bool speed = optimize_insn_for_speed_p ();
3981 op1_is_constant = CONST_INT_P (op1);
3982 if (op1_is_constant)
3984 ext_op1 = INTVAL (op1);
3985 if (unsignedp)
3986 ext_op1 &= GET_MODE_MASK (mode);
3987 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3988 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3992 This is the structure of expand_divmod:
3994 First comes code to fix up the operands so we can perform the operations
3995 correctly and efficiently.
3997 Second comes a switch statement with code specific for each rounding mode.
3998 For some special operands this code emits all RTL for the desired
3999 operation, for other cases, it generates only a quotient and stores it in
4000 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4001 to indicate that it has not done anything.
4003 Last comes code that finishes the operation. If QUOTIENT is set and
4004 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4005 QUOTIENT is not set, it is computed using trunc rounding.
4007 We try to generate special code for division and remainder when OP1 is a
4008 constant. If |OP1| = 2**n we can use shifts and some other fast
4009 operations. For other values of OP1, we compute a carefully selected
4010 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4011 by m.
4013 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4014 half of the product. Different strategies for generating the product are
4015 implemented in expmed_mult_highpart.
4017 If what we actually want is the remainder, we generate that by another
4018 by-constant multiplication and a subtraction. */
4020 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4021 code below will malfunction if we are, so check here and handle
4022 the special case if so. */
4023 if (op1 == const1_rtx)
4024 return rem_flag ? const0_rtx : op0;
4026 /* When dividing by -1, we could get an overflow.
4027 negv_optab can handle overflows. */
4028 if (! unsignedp && op1 == constm1_rtx)
4030 if (rem_flag)
4031 return const0_rtx;
4032 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
4033 ? negv_optab : neg_optab, op0, target, 0);
4036 if (target
4037 /* Don't use the function value register as a target
4038 since we have to read it as well as write it,
4039 and function-inlining gets confused by this. */
4040 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4041 /* Don't clobber an operand while doing a multi-step calculation. */
4042 || ((rem_flag || op1_is_constant)
4043 && (reg_mentioned_p (target, op0)
4044 || (MEM_P (op0) && MEM_P (target))))
4045 || reg_mentioned_p (target, op1)
4046 || (MEM_P (op1) && MEM_P (target))))
4047 target = 0;
4049 /* Get the mode in which to perform this computation. Normally it will
4050 be MODE, but sometimes we can't do the desired operation in MODE.
4051 If so, pick a wider mode in which we can do the operation. Convert
4052 to that mode at the start to avoid repeated conversions.
4054 First see what operations we need. These depend on the expression
4055 we are evaluating. (We assume that divxx3 insns exist under the
4056 same conditions that modxx3 insns and that these insns don't normally
4057 fail. If these assumptions are not correct, we may generate less
4058 efficient code in some cases.)
4060 Then see if we find a mode in which we can open-code that operation
4061 (either a division, modulus, or shift). Finally, check for the smallest
4062 mode for which we can do the operation with a library call. */
4064 /* We might want to refine this now that we have division-by-constant
4065 optimization. Since expmed_mult_highpart tries so many variants, it is
4066 not straightforward to generalize this. Maybe we should make an array
4067 of possible modes in init_expmed? Save this for GCC 2.7. */
4069 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4070 ? (unsignedp ? lshr_optab : ashr_optab)
4071 : (unsignedp ? udiv_optab : sdiv_optab));
4072 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4073 ? optab1
4074 : (unsignedp ? udivmod_optab : sdivmod_optab));
4076 for (compute_mode = mode; compute_mode != VOIDmode;
4077 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4078 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4079 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4080 break;
4082 if (compute_mode == VOIDmode)
4083 for (compute_mode = mode; compute_mode != VOIDmode;
4084 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4085 if (optab_libfunc (optab1, compute_mode)
4086 || optab_libfunc (optab2, compute_mode))
4087 break;
4089 /* If we still couldn't find a mode, use MODE, but expand_binop will
4090 probably die. */
4091 if (compute_mode == VOIDmode)
4092 compute_mode = mode;
4094 if (target && GET_MODE (target) == compute_mode)
4095 tquotient = target;
4096 else
4097 tquotient = gen_reg_rtx (compute_mode);
4099 size = GET_MODE_BITSIZE (compute_mode);
4100 #if 0
4101 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4102 (mode), and thereby get better code when OP1 is a constant. Do that
4103 later. It will require going over all usages of SIZE below. */
4104 size = GET_MODE_BITSIZE (mode);
4105 #endif
4107 /* Only deduct something for a REM if the last divide done was
4108 for a different constant. Then set the constant of the last
4109 divide. */
4110 max_cost = (unsignedp
4111 ? udiv_cost (speed, compute_mode)
4112 : sdiv_cost (speed, compute_mode));
4113 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4114 && INTVAL (op1) == last_div_const))
4115 max_cost -= (mul_cost (speed, compute_mode)
4116 + add_cost (speed, compute_mode));
4118 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4120 /* Now convert to the best mode to use. */
4121 if (compute_mode != mode)
4123 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4124 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4126 /* convert_modes may have placed op1 into a register, so we
4127 must recompute the following. */
4128 op1_is_constant = CONST_INT_P (op1);
4129 op1_is_pow2 = (op1_is_constant
4130 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4131 || (! unsignedp
4132 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4135 /* If one of the operands is a volatile MEM, copy it into a register. */
4137 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4138 op0 = force_reg (compute_mode, op0);
4139 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4140 op1 = force_reg (compute_mode, op1);
4142 /* If we need the remainder or if OP1 is constant, we need to
4143 put OP0 in a register in case it has any queued subexpressions. */
4144 if (rem_flag || op1_is_constant)
4145 op0 = force_reg (compute_mode, op0);
4147 last = get_last_insn ();
4149 /* Promote floor rounding to trunc rounding for unsigned operations. */
4150 if (unsignedp)
4152 if (code == FLOOR_DIV_EXPR)
4153 code = TRUNC_DIV_EXPR;
4154 if (code == FLOOR_MOD_EXPR)
4155 code = TRUNC_MOD_EXPR;
4156 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4157 code = TRUNC_DIV_EXPR;
4160 if (op1 != const0_rtx)
4161 switch (code)
4163 case TRUNC_MOD_EXPR:
4164 case TRUNC_DIV_EXPR:
4165 if (op1_is_constant)
4167 if (unsignedp)
4169 unsigned HOST_WIDE_INT mh, ml;
4170 int pre_shift, post_shift;
4171 int dummy;
4172 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4173 & GET_MODE_MASK (compute_mode));
4175 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4177 pre_shift = floor_log2 (d);
4178 if (rem_flag)
4180 remainder
4181 = expand_binop (compute_mode, and_optab, op0,
4182 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4183 remainder, 1,
4184 OPTAB_LIB_WIDEN);
4185 if (remainder)
4186 return gen_lowpart (mode, remainder);
4188 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4189 pre_shift, tquotient, 1);
4191 else if (size <= HOST_BITS_PER_WIDE_INT)
4193 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4195 /* Most significant bit of divisor is set; emit an scc
4196 insn. */
4197 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4198 compute_mode, 1, 1);
4200 else
4202 /* Find a suitable multiplier and right shift count
4203 instead of multiplying with D. */
4205 mh = choose_multiplier (d, size, size,
4206 &ml, &post_shift, &dummy);
4208 /* If the suggested multiplier is more than SIZE bits,
4209 we can do better for even divisors, using an
4210 initial right shift. */
4211 if (mh != 0 && (d & 1) == 0)
4213 pre_shift = floor_log2 (d & -d);
4214 mh = choose_multiplier (d >> pre_shift, size,
4215 size - pre_shift,
4216 &ml, &post_shift, &dummy);
4217 gcc_assert (!mh);
4219 else
4220 pre_shift = 0;
4222 if (mh != 0)
4224 rtx t1, t2, t3, t4;
4226 if (post_shift - 1 >= BITS_PER_WORD)
4227 goto fail1;
4229 extra_cost
4230 = (shift_cost (speed, compute_mode, post_shift - 1)
4231 + shift_cost (speed, compute_mode, 1)
4232 + 2 * add_cost (speed, compute_mode));
4233 t1 = expmed_mult_highpart (compute_mode, op0,
4234 GEN_INT (ml),
4235 NULL_RTX, 1,
4236 max_cost - extra_cost);
4237 if (t1 == 0)
4238 goto fail1;
4239 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4240 op0, t1),
4241 NULL_RTX);
4242 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4243 t2, 1, NULL_RTX, 1);
4244 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4245 t1, t3),
4246 NULL_RTX);
4247 quotient = expand_shift
4248 (RSHIFT_EXPR, compute_mode, t4,
4249 post_shift - 1, tquotient, 1);
4251 else
4253 rtx t1, t2;
4255 if (pre_shift >= BITS_PER_WORD
4256 || post_shift >= BITS_PER_WORD)
4257 goto fail1;
4259 t1 = expand_shift
4260 (RSHIFT_EXPR, compute_mode, op0,
4261 pre_shift, NULL_RTX, 1);
4262 extra_cost
4263 = (shift_cost (speed, compute_mode, pre_shift)
4264 + shift_cost (speed, compute_mode, post_shift));
4265 t2 = expmed_mult_highpart (compute_mode, t1,
4266 GEN_INT (ml),
4267 NULL_RTX, 1,
4268 max_cost - extra_cost);
4269 if (t2 == 0)
4270 goto fail1;
4271 quotient = expand_shift
4272 (RSHIFT_EXPR, compute_mode, t2,
4273 post_shift, tquotient, 1);
4277 else /* Too wide mode to use tricky code */
4278 break;
4280 insn = get_last_insn ();
4281 if (insn != last)
4282 set_dst_reg_note (insn, REG_EQUAL,
4283 gen_rtx_UDIV (compute_mode, op0, op1),
4284 quotient);
4286 else /* TRUNC_DIV, signed */
4288 unsigned HOST_WIDE_INT ml;
4289 int lgup, post_shift;
4290 rtx mlr;
4291 HOST_WIDE_INT d = INTVAL (op1);
4292 unsigned HOST_WIDE_INT abs_d;
4294 /* Since d might be INT_MIN, we have to cast to
4295 unsigned HOST_WIDE_INT before negating to avoid
4296 undefined signed overflow. */
4297 abs_d = (d >= 0
4298 ? (unsigned HOST_WIDE_INT) d
4299 : - (unsigned HOST_WIDE_INT) d);
4301 /* n rem d = n rem -d */
4302 if (rem_flag && d < 0)
4304 d = abs_d;
4305 op1 = gen_int_mode (abs_d, compute_mode);
4308 if (d == 1)
4309 quotient = op0;
4310 else if (d == -1)
4311 quotient = expand_unop (compute_mode, neg_optab, op0,
4312 tquotient, 0);
4313 else if (HOST_BITS_PER_WIDE_INT >= size
4314 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4316 /* This case is not handled correctly below. */
4317 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4318 compute_mode, 1, 1);
4319 if (quotient == 0)
4320 goto fail1;
4322 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4323 && (rem_flag
4324 ? smod_pow2_cheap (speed, compute_mode)
4325 : sdiv_pow2_cheap (speed, compute_mode))
4326 /* We assume that cheap metric is true if the
4327 optab has an expander for this mode. */
4328 && ((optab_handler ((rem_flag ? smod_optab
4329 : sdiv_optab),
4330 compute_mode)
4331 != CODE_FOR_nothing)
4332 || (optab_handler (sdivmod_optab,
4333 compute_mode)
4334 != CODE_FOR_nothing)))
4336 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4338 if (rem_flag)
4340 remainder = expand_smod_pow2 (compute_mode, op0, d);
4341 if (remainder)
4342 return gen_lowpart (mode, remainder);
4345 if (sdiv_pow2_cheap (speed, compute_mode)
4346 && ((optab_handler (sdiv_optab, compute_mode)
4347 != CODE_FOR_nothing)
4348 || (optab_handler (sdivmod_optab, compute_mode)
4349 != CODE_FOR_nothing)))
4350 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4351 compute_mode, op0,
4352 gen_int_mode (abs_d,
4353 compute_mode),
4354 NULL_RTX, 0);
4355 else
4356 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4358 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4359 negate the quotient. */
4360 if (d < 0)
4362 insn = get_last_insn ();
4363 if (insn != last
4364 && abs_d < ((unsigned HOST_WIDE_INT) 1
4365 << (HOST_BITS_PER_WIDE_INT - 1)))
4366 set_dst_reg_note (insn, REG_EQUAL,
4367 gen_rtx_DIV (compute_mode, op0,
4368 gen_int_mode
4369 (abs_d,
4370 compute_mode)),
4371 quotient);
4373 quotient = expand_unop (compute_mode, neg_optab,
4374 quotient, quotient, 0);
4377 else if (size <= HOST_BITS_PER_WIDE_INT)
4379 choose_multiplier (abs_d, size, size - 1,
4380 &ml, &post_shift, &lgup);
4381 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4383 rtx t1, t2, t3;
4385 if (post_shift >= BITS_PER_WORD
4386 || size - 1 >= BITS_PER_WORD)
4387 goto fail1;
4389 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4390 + shift_cost (speed, compute_mode, size - 1)
4391 + add_cost (speed, compute_mode));
4392 t1 = expmed_mult_highpart (compute_mode, op0,
4393 GEN_INT (ml), NULL_RTX, 0,
4394 max_cost - extra_cost);
4395 if (t1 == 0)
4396 goto fail1;
4397 t2 = expand_shift
4398 (RSHIFT_EXPR, compute_mode, t1,
4399 post_shift, NULL_RTX, 0);
4400 t3 = expand_shift
4401 (RSHIFT_EXPR, compute_mode, op0,
4402 size - 1, NULL_RTX, 0);
4403 if (d < 0)
4404 quotient
4405 = force_operand (gen_rtx_MINUS (compute_mode,
4406 t3, t2),
4407 tquotient);
4408 else
4409 quotient
4410 = force_operand (gen_rtx_MINUS (compute_mode,
4411 t2, t3),
4412 tquotient);
4414 else
4416 rtx t1, t2, t3, t4;
4418 if (post_shift >= BITS_PER_WORD
4419 || size - 1 >= BITS_PER_WORD)
4420 goto fail1;
4422 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4423 mlr = gen_int_mode (ml, compute_mode);
4424 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4425 + shift_cost (speed, compute_mode, size - 1)
4426 + 2 * add_cost (speed, compute_mode));
4427 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4428 NULL_RTX, 0,
4429 max_cost - extra_cost);
4430 if (t1 == 0)
4431 goto fail1;
4432 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4433 t1, op0),
4434 NULL_RTX);
4435 t3 = expand_shift
4436 (RSHIFT_EXPR, compute_mode, t2,
4437 post_shift, NULL_RTX, 0);
4438 t4 = expand_shift
4439 (RSHIFT_EXPR, compute_mode, op0,
4440 size - 1, NULL_RTX, 0);
4441 if (d < 0)
4442 quotient
4443 = force_operand (gen_rtx_MINUS (compute_mode,
4444 t4, t3),
4445 tquotient);
4446 else
4447 quotient
4448 = force_operand (gen_rtx_MINUS (compute_mode,
4449 t3, t4),
4450 tquotient);
4453 else /* Too wide mode to use tricky code */
4454 break;
4456 insn = get_last_insn ();
4457 if (insn != last)
4458 set_dst_reg_note (insn, REG_EQUAL,
4459 gen_rtx_DIV (compute_mode, op0, op1),
4460 quotient);
4462 break;
4464 fail1:
4465 delete_insns_since (last);
4466 break;
4468 case FLOOR_DIV_EXPR:
4469 case FLOOR_MOD_EXPR:
4470 /* We will come here only for signed operations. */
4471 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4473 unsigned HOST_WIDE_INT mh, ml;
4474 int pre_shift, lgup, post_shift;
4475 HOST_WIDE_INT d = INTVAL (op1);
4477 if (d > 0)
4479 /* We could just as easily deal with negative constants here,
4480 but it does not seem worth the trouble for GCC 2.6. */
4481 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4483 pre_shift = floor_log2 (d);
4484 if (rem_flag)
4486 remainder = expand_binop (compute_mode, and_optab, op0,
4487 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4488 remainder, 0, OPTAB_LIB_WIDEN);
4489 if (remainder)
4490 return gen_lowpart (mode, remainder);
4492 quotient = expand_shift
4493 (RSHIFT_EXPR, compute_mode, op0,
4494 pre_shift, tquotient, 0);
4496 else
4498 rtx t1, t2, t3, t4;
4500 mh = choose_multiplier (d, size, size - 1,
4501 &ml, &post_shift, &lgup);
4502 gcc_assert (!mh);
4504 if (post_shift < BITS_PER_WORD
4505 && size - 1 < BITS_PER_WORD)
4507 t1 = expand_shift
4508 (RSHIFT_EXPR, compute_mode, op0,
4509 size - 1, NULL_RTX, 0);
4510 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4511 NULL_RTX, 0, OPTAB_WIDEN);
4512 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4513 + shift_cost (speed, compute_mode, size - 1)
4514 + 2 * add_cost (speed, compute_mode));
4515 t3 = expmed_mult_highpart (compute_mode, t2,
4516 GEN_INT (ml), NULL_RTX, 1,
4517 max_cost - extra_cost);
4518 if (t3 != 0)
4520 t4 = expand_shift
4521 (RSHIFT_EXPR, compute_mode, t3,
4522 post_shift, NULL_RTX, 1);
4523 quotient = expand_binop (compute_mode, xor_optab,
4524 t4, t1, tquotient, 0,
4525 OPTAB_WIDEN);
4530 else
4532 rtx nsign, t1, t2, t3, t4;
4533 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4534 op0, constm1_rtx), NULL_RTX);
4535 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4536 0, OPTAB_WIDEN);
4537 nsign = expand_shift
4538 (RSHIFT_EXPR, compute_mode, t2,
4539 size - 1, NULL_RTX, 0);
4540 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4541 NULL_RTX);
4542 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4543 NULL_RTX, 0);
4544 if (t4)
4546 rtx t5;
4547 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4548 NULL_RTX, 0);
4549 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4550 t4, t5),
4551 tquotient);
4556 if (quotient != 0)
4557 break;
4558 delete_insns_since (last);
4560 /* Try using an instruction that produces both the quotient and
4561 remainder, using truncation. We can easily compensate the quotient
4562 or remainder to get floor rounding, once we have the remainder.
4563 Notice that we compute also the final remainder value here,
4564 and return the result right away. */
4565 if (target == 0 || GET_MODE (target) != compute_mode)
4566 target = gen_reg_rtx (compute_mode);
4568 if (rem_flag)
4570 remainder
4571 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4572 quotient = gen_reg_rtx (compute_mode);
4574 else
4576 quotient
4577 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4578 remainder = gen_reg_rtx (compute_mode);
4581 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4582 quotient, remainder, 0))
4584 /* This could be computed with a branch-less sequence.
4585 Save that for later. */
4586 rtx tem;
4587 rtx label = gen_label_rtx ();
4588 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4589 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4590 NULL_RTX, 0, OPTAB_WIDEN);
4591 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4592 expand_dec (quotient, const1_rtx);
4593 expand_inc (remainder, op1);
4594 emit_label (label);
4595 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4598 /* No luck with division elimination or divmod. Have to do it
4599 by conditionally adjusting op0 *and* the result. */
4601 rtx label1, label2, label3, label4, label5;
4602 rtx adjusted_op0;
4603 rtx tem;
4605 quotient = gen_reg_rtx (compute_mode);
4606 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4607 label1 = gen_label_rtx ();
4608 label2 = gen_label_rtx ();
4609 label3 = gen_label_rtx ();
4610 label4 = gen_label_rtx ();
4611 label5 = gen_label_rtx ();
4612 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4613 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4614 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4615 quotient, 0, OPTAB_LIB_WIDEN);
4616 if (tem != quotient)
4617 emit_move_insn (quotient, tem);
4618 emit_jump_insn (gen_jump (label5));
4619 emit_barrier ();
4620 emit_label (label1);
4621 expand_inc (adjusted_op0, const1_rtx);
4622 emit_jump_insn (gen_jump (label4));
4623 emit_barrier ();
4624 emit_label (label2);
4625 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4626 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4627 quotient, 0, OPTAB_LIB_WIDEN);
4628 if (tem != quotient)
4629 emit_move_insn (quotient, tem);
4630 emit_jump_insn (gen_jump (label5));
4631 emit_barrier ();
4632 emit_label (label3);
4633 expand_dec (adjusted_op0, const1_rtx);
4634 emit_label (label4);
4635 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4636 quotient, 0, OPTAB_LIB_WIDEN);
4637 if (tem != quotient)
4638 emit_move_insn (quotient, tem);
4639 expand_dec (quotient, const1_rtx);
4640 emit_label (label5);
4642 break;
4644 case CEIL_DIV_EXPR:
4645 case CEIL_MOD_EXPR:
4646 if (unsignedp)
4648 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4650 rtx t1, t2, t3;
4651 unsigned HOST_WIDE_INT d = INTVAL (op1);
4652 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4653 floor_log2 (d), tquotient, 1);
4654 t2 = expand_binop (compute_mode, and_optab, op0,
4655 GEN_INT (d - 1),
4656 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4657 t3 = gen_reg_rtx (compute_mode);
4658 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4659 compute_mode, 1, 1);
4660 if (t3 == 0)
4662 rtx lab;
4663 lab = gen_label_rtx ();
4664 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4665 expand_inc (t1, const1_rtx);
4666 emit_label (lab);
4667 quotient = t1;
4669 else
4670 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4671 t1, t3),
4672 tquotient);
4673 break;
4676 /* Try using an instruction that produces both the quotient and
4677 remainder, using truncation. We can easily compensate the
4678 quotient or remainder to get ceiling rounding, once we have the
4679 remainder. Notice that we compute also the final remainder
4680 value here, and return the result right away. */
4681 if (target == 0 || GET_MODE (target) != compute_mode)
4682 target = gen_reg_rtx (compute_mode);
4684 if (rem_flag)
4686 remainder = (REG_P (target)
4687 ? target : gen_reg_rtx (compute_mode));
4688 quotient = gen_reg_rtx (compute_mode);
4690 else
4692 quotient = (REG_P (target)
4693 ? target : gen_reg_rtx (compute_mode));
4694 remainder = gen_reg_rtx (compute_mode);
4697 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4698 remainder, 1))
4700 /* This could be computed with a branch-less sequence.
4701 Save that for later. */
4702 rtx label = gen_label_rtx ();
4703 do_cmp_and_jump (remainder, const0_rtx, EQ,
4704 compute_mode, label);
4705 expand_inc (quotient, const1_rtx);
4706 expand_dec (remainder, op1);
4707 emit_label (label);
4708 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4711 /* No luck with division elimination or divmod. Have to do it
4712 by conditionally adjusting op0 *and* the result. */
4714 rtx label1, label2;
4715 rtx adjusted_op0, tem;
4717 quotient = gen_reg_rtx (compute_mode);
4718 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4719 label1 = gen_label_rtx ();
4720 label2 = gen_label_rtx ();
4721 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4722 compute_mode, label1);
4723 emit_move_insn (quotient, const0_rtx);
4724 emit_jump_insn (gen_jump (label2));
4725 emit_barrier ();
4726 emit_label (label1);
4727 expand_dec (adjusted_op0, const1_rtx);
4728 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4729 quotient, 1, OPTAB_LIB_WIDEN);
4730 if (tem != quotient)
4731 emit_move_insn (quotient, tem);
4732 expand_inc (quotient, const1_rtx);
4733 emit_label (label2);
4736 else /* signed */
4738 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4739 && INTVAL (op1) >= 0)
4741 /* This is extremely similar to the code for the unsigned case
4742 above. For 2.7 we should merge these variants, but for
4743 2.6.1 I don't want to touch the code for unsigned since that
4744 get used in C. The signed case will only be used by other
4745 languages (Ada). */
4747 rtx t1, t2, t3;
4748 unsigned HOST_WIDE_INT d = INTVAL (op1);
4749 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4750 floor_log2 (d), tquotient, 0);
4751 t2 = expand_binop (compute_mode, and_optab, op0,
4752 GEN_INT (d - 1),
4753 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4754 t3 = gen_reg_rtx (compute_mode);
4755 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4756 compute_mode, 1, 1);
4757 if (t3 == 0)
4759 rtx lab;
4760 lab = gen_label_rtx ();
4761 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4762 expand_inc (t1, const1_rtx);
4763 emit_label (lab);
4764 quotient = t1;
4766 else
4767 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4768 t1, t3),
4769 tquotient);
4770 break;
4773 /* Try using an instruction that produces both the quotient and
4774 remainder, using truncation. We can easily compensate the
4775 quotient or remainder to get ceiling rounding, once we have the
4776 remainder. Notice that we compute also the final remainder
4777 value here, and return the result right away. */
4778 if (target == 0 || GET_MODE (target) != compute_mode)
4779 target = gen_reg_rtx (compute_mode);
4780 if (rem_flag)
4782 remainder= (REG_P (target)
4783 ? target : gen_reg_rtx (compute_mode));
4784 quotient = gen_reg_rtx (compute_mode);
4786 else
4788 quotient = (REG_P (target)
4789 ? target : gen_reg_rtx (compute_mode));
4790 remainder = gen_reg_rtx (compute_mode);
4793 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4794 remainder, 0))
4796 /* This could be computed with a branch-less sequence.
4797 Save that for later. */
4798 rtx tem;
4799 rtx label = gen_label_rtx ();
4800 do_cmp_and_jump (remainder, const0_rtx, EQ,
4801 compute_mode, label);
4802 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4803 NULL_RTX, 0, OPTAB_WIDEN);
4804 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4805 expand_inc (quotient, const1_rtx);
4806 expand_dec (remainder, op1);
4807 emit_label (label);
4808 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4811 /* No luck with division elimination or divmod. Have to do it
4812 by conditionally adjusting op0 *and* the result. */
4814 rtx label1, label2, label3, label4, label5;
4815 rtx adjusted_op0;
4816 rtx tem;
4818 quotient = gen_reg_rtx (compute_mode);
4819 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4820 label1 = gen_label_rtx ();
4821 label2 = gen_label_rtx ();
4822 label3 = gen_label_rtx ();
4823 label4 = gen_label_rtx ();
4824 label5 = gen_label_rtx ();
4825 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4826 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4827 compute_mode, label1);
4828 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4829 quotient, 0, OPTAB_LIB_WIDEN);
4830 if (tem != quotient)
4831 emit_move_insn (quotient, tem);
4832 emit_jump_insn (gen_jump (label5));
4833 emit_barrier ();
4834 emit_label (label1);
4835 expand_dec (adjusted_op0, const1_rtx);
4836 emit_jump_insn (gen_jump (label4));
4837 emit_barrier ();
4838 emit_label (label2);
4839 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4840 compute_mode, label3);
4841 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4842 quotient, 0, OPTAB_LIB_WIDEN);
4843 if (tem != quotient)
4844 emit_move_insn (quotient, tem);
4845 emit_jump_insn (gen_jump (label5));
4846 emit_barrier ();
4847 emit_label (label3);
4848 expand_inc (adjusted_op0, const1_rtx);
4849 emit_label (label4);
4850 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4851 quotient, 0, OPTAB_LIB_WIDEN);
4852 if (tem != quotient)
4853 emit_move_insn (quotient, tem);
4854 expand_inc (quotient, const1_rtx);
4855 emit_label (label5);
4858 break;
4860 case EXACT_DIV_EXPR:
4861 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4863 HOST_WIDE_INT d = INTVAL (op1);
4864 unsigned HOST_WIDE_INT ml;
4865 int pre_shift;
4866 rtx t1;
4868 pre_shift = floor_log2 (d & -d);
4869 ml = invert_mod2n (d >> pre_shift, size);
4870 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4871 pre_shift, NULL_RTX, unsignedp);
4872 quotient = expand_mult (compute_mode, t1,
4873 gen_int_mode (ml, compute_mode),
4874 NULL_RTX, 1);
4876 insn = get_last_insn ();
4877 set_dst_reg_note (insn, REG_EQUAL,
4878 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4879 compute_mode, op0, op1),
4880 quotient);
4882 break;
4884 case ROUND_DIV_EXPR:
4885 case ROUND_MOD_EXPR:
4886 if (unsignedp)
4888 rtx tem;
4889 rtx label;
4890 label = gen_label_rtx ();
4891 quotient = gen_reg_rtx (compute_mode);
4892 remainder = gen_reg_rtx (compute_mode);
4893 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4895 rtx tem;
4896 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4897 quotient, 1, OPTAB_LIB_WIDEN);
4898 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4899 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4900 remainder, 1, OPTAB_LIB_WIDEN);
4902 tem = plus_constant (compute_mode, op1, -1);
4903 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4904 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4905 expand_inc (quotient, const1_rtx);
4906 expand_dec (remainder, op1);
4907 emit_label (label);
4909 else
4911 rtx abs_rem, abs_op1, tem, mask;
4912 rtx label;
4913 label = gen_label_rtx ();
4914 quotient = gen_reg_rtx (compute_mode);
4915 remainder = gen_reg_rtx (compute_mode);
4916 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4918 rtx tem;
4919 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4920 quotient, 0, OPTAB_LIB_WIDEN);
4921 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4922 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4923 remainder, 0, OPTAB_LIB_WIDEN);
4925 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4926 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4927 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4928 1, NULL_RTX, 1);
4929 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4930 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4931 NULL_RTX, 0, OPTAB_WIDEN);
4932 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4933 size - 1, NULL_RTX, 0);
4934 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4935 NULL_RTX, 0, OPTAB_WIDEN);
4936 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4937 NULL_RTX, 0, OPTAB_WIDEN);
4938 expand_inc (quotient, tem);
4939 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4940 NULL_RTX, 0, OPTAB_WIDEN);
4941 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4942 NULL_RTX, 0, OPTAB_WIDEN);
4943 expand_dec (remainder, tem);
4944 emit_label (label);
4946 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4948 default:
4949 gcc_unreachable ();
4952 if (quotient == 0)
4954 if (target && GET_MODE (target) != compute_mode)
4955 target = 0;
4957 if (rem_flag)
4959 /* Try to produce the remainder without producing the quotient.
4960 If we seem to have a divmod pattern that does not require widening,
4961 don't try widening here. We should really have a WIDEN argument
4962 to expand_twoval_binop, since what we'd really like to do here is
4963 1) try a mod insn in compute_mode
4964 2) try a divmod insn in compute_mode
4965 3) try a div insn in compute_mode and multiply-subtract to get
4966 remainder
4967 4) try the same things with widening allowed. */
4968 remainder
4969 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4970 op0, op1, target,
4971 unsignedp,
4972 ((optab_handler (optab2, compute_mode)
4973 != CODE_FOR_nothing)
4974 ? OPTAB_DIRECT : OPTAB_WIDEN));
4975 if (remainder == 0)
4977 /* No luck there. Can we do remainder and divide at once
4978 without a library call? */
4979 remainder = gen_reg_rtx (compute_mode);
4980 if (! expand_twoval_binop ((unsignedp
4981 ? udivmod_optab
4982 : sdivmod_optab),
4983 op0, op1,
4984 NULL_RTX, remainder, unsignedp))
4985 remainder = 0;
4988 if (remainder)
4989 return gen_lowpart (mode, remainder);
4992 /* Produce the quotient. Try a quotient insn, but not a library call.
4993 If we have a divmod in this mode, use it in preference to widening
4994 the div (for this test we assume it will not fail). Note that optab2
4995 is set to the one of the two optabs that the call below will use. */
4996 quotient
4997 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4998 op0, op1, rem_flag ? NULL_RTX : target,
4999 unsignedp,
5000 ((optab_handler (optab2, compute_mode)
5001 != CODE_FOR_nothing)
5002 ? OPTAB_DIRECT : OPTAB_WIDEN));
5004 if (quotient == 0)
5006 /* No luck there. Try a quotient-and-remainder insn,
5007 keeping the quotient alone. */
5008 quotient = gen_reg_rtx (compute_mode);
5009 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5010 op0, op1,
5011 quotient, NULL_RTX, unsignedp))
5013 quotient = 0;
5014 if (! rem_flag)
5015 /* Still no luck. If we are not computing the remainder,
5016 use a library call for the quotient. */
5017 quotient = sign_expand_binop (compute_mode,
5018 udiv_optab, sdiv_optab,
5019 op0, op1, target,
5020 unsignedp, OPTAB_LIB_WIDEN);
5025 if (rem_flag)
5027 if (target && GET_MODE (target) != compute_mode)
5028 target = 0;
5030 if (quotient == 0)
5032 /* No divide instruction either. Use library for remainder. */
5033 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5034 op0, op1, target,
5035 unsignedp, OPTAB_LIB_WIDEN);
5036 /* No remainder function. Try a quotient-and-remainder
5037 function, keeping the remainder. */
5038 if (!remainder)
5040 remainder = gen_reg_rtx (compute_mode);
5041 if (!expand_twoval_binop_libfunc
5042 (unsignedp ? udivmod_optab : sdivmod_optab,
5043 op0, op1,
5044 NULL_RTX, remainder,
5045 unsignedp ? UMOD : MOD))
5046 remainder = NULL_RTX;
5049 else
5051 /* We divided. Now finish doing X - Y * (X / Y). */
5052 remainder = expand_mult (compute_mode, quotient, op1,
5053 NULL_RTX, unsignedp);
5054 remainder = expand_binop (compute_mode, sub_optab, op0,
5055 remainder, target, unsignedp,
5056 OPTAB_LIB_WIDEN);
5060 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5063 /* Return a tree node with data type TYPE, describing the value of X.
5064 Usually this is an VAR_DECL, if there is no obvious better choice.
5065 X may be an expression, however we only support those expressions
5066 generated by loop.c. */
5068 tree
5069 make_tree (tree type, rtx x)
5071 tree t;
5073 switch (GET_CODE (x))
5075 case CONST_INT:
5077 HOST_WIDE_INT hi = 0;
5079 if (INTVAL (x) < 0
5080 && !(TYPE_UNSIGNED (type)
5081 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5082 < HOST_BITS_PER_WIDE_INT)))
5083 hi = -1;
5085 t = build_int_cst_wide (type, INTVAL (x), hi);
5087 return t;
5090 case CONST_DOUBLE:
5091 if (GET_MODE (x) == VOIDmode)
5092 t = build_int_cst_wide (type,
5093 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5094 else
5096 REAL_VALUE_TYPE d;
5098 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5099 t = build_real (type, d);
5102 return t;
5104 case CONST_VECTOR:
5106 int units = CONST_VECTOR_NUNITS (x);
5107 tree itype = TREE_TYPE (type);
5108 tree *elts;
5109 int i;
5111 /* Build a tree with vector elements. */
5112 elts = XALLOCAVEC (tree, units);
5113 for (i = units - 1; i >= 0; --i)
5115 rtx elt = CONST_VECTOR_ELT (x, i);
5116 elts[i] = make_tree (itype, elt);
5119 return build_vector (type, elts);
5122 case PLUS:
5123 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5124 make_tree (type, XEXP (x, 1)));
5126 case MINUS:
5127 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5128 make_tree (type, XEXP (x, 1)));
5130 case NEG:
5131 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5133 case MULT:
5134 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5135 make_tree (type, XEXP (x, 1)));
5137 case ASHIFT:
5138 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5139 make_tree (type, XEXP (x, 1)));
5141 case LSHIFTRT:
5142 t = unsigned_type_for (type);
5143 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5144 make_tree (t, XEXP (x, 0)),
5145 make_tree (type, XEXP (x, 1))));
5147 case ASHIFTRT:
5148 t = signed_type_for (type);
5149 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5150 make_tree (t, XEXP (x, 0)),
5151 make_tree (type, XEXP (x, 1))));
5153 case DIV:
5154 if (TREE_CODE (type) != REAL_TYPE)
5155 t = signed_type_for (type);
5156 else
5157 t = type;
5159 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5160 make_tree (t, XEXP (x, 0)),
5161 make_tree (t, XEXP (x, 1))));
5162 case UDIV:
5163 t = unsigned_type_for (type);
5164 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5165 make_tree (t, XEXP (x, 0)),
5166 make_tree (t, XEXP (x, 1))));
5168 case SIGN_EXTEND:
5169 case ZERO_EXTEND:
5170 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5171 GET_CODE (x) == ZERO_EXTEND);
5172 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5174 case CONST:
5175 return make_tree (type, XEXP (x, 0));
5177 case SYMBOL_REF:
5178 t = SYMBOL_REF_DECL (x);
5179 if (t)
5180 return fold_convert (type, build_fold_addr_expr (t));
5181 /* else fall through. */
5183 default:
5184 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5186 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5187 address mode to pointer mode. */
5188 if (POINTER_TYPE_P (type))
5189 x = convert_memory_address_addr_space
5190 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5192 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5193 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5194 t->decl_with_rtl.rtl = x;
5196 return t;
5200 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5201 and returning TARGET.
5203 If TARGET is 0, a pseudo-register or constant is returned. */
5206 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5208 rtx tem = 0;
5210 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5211 tem = simplify_binary_operation (AND, mode, op0, op1);
5212 if (tem == 0)
5213 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5215 if (target == 0)
5216 target = tem;
5217 else if (tem != target)
5218 emit_move_insn (target, tem);
5219 return target;
5222 /* Helper function for emit_store_flag. */
5223 static rtx
5224 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5225 enum machine_mode mode, enum machine_mode compare_mode,
5226 int unsignedp, rtx x, rtx y, int normalizep,
5227 enum machine_mode target_mode)
5229 struct expand_operand ops[4];
5230 rtx op0, last, comparison, subtarget;
5231 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5233 last = get_last_insn ();
5234 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5235 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5236 if (!x || !y)
5238 delete_insns_since (last);
5239 return NULL_RTX;
5242 if (target_mode == VOIDmode)
5243 target_mode = result_mode;
5244 if (!target)
5245 target = gen_reg_rtx (target_mode);
5247 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5249 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5250 create_fixed_operand (&ops[1], comparison);
5251 create_fixed_operand (&ops[2], x);
5252 create_fixed_operand (&ops[3], y);
5253 if (!maybe_expand_insn (icode, 4, ops))
5255 delete_insns_since (last);
5256 return NULL_RTX;
5258 subtarget = ops[0].value;
5260 /* If we are converting to a wider mode, first convert to
5261 TARGET_MODE, then normalize. This produces better combining
5262 opportunities on machines that have a SIGN_EXTRACT when we are
5263 testing a single bit. This mostly benefits the 68k.
5265 If STORE_FLAG_VALUE does not have the sign bit set when
5266 interpreted in MODE, we can do this conversion as unsigned, which
5267 is usually more efficient. */
5268 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5270 convert_move (target, subtarget,
5271 val_signbit_known_clear_p (result_mode,
5272 STORE_FLAG_VALUE));
5273 op0 = target;
5274 result_mode = target_mode;
5276 else
5277 op0 = subtarget;
5279 /* If we want to keep subexpressions around, don't reuse our last
5280 target. */
5281 if (optimize)
5282 subtarget = 0;
5284 /* Now normalize to the proper value in MODE. Sometimes we don't
5285 have to do anything. */
5286 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5288 /* STORE_FLAG_VALUE might be the most negative number, so write
5289 the comparison this way to avoid a compiler-time warning. */
5290 else if (- normalizep == STORE_FLAG_VALUE)
5291 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5293 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5294 it hard to use a value of just the sign bit due to ANSI integer
5295 constant typing rules. */
5296 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5297 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5298 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5299 normalizep == 1);
5300 else
5302 gcc_assert (STORE_FLAG_VALUE & 1);
5304 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5305 if (normalizep == -1)
5306 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5309 /* If we were converting to a smaller mode, do the conversion now. */
5310 if (target_mode != result_mode)
5312 convert_move (target, op0, 0);
5313 return target;
5315 else
5316 return op0;
5320 /* A subroutine of emit_store_flag only including "tricks" that do not
5321 need a recursive call. These are kept separate to avoid infinite
5322 loops. */
5324 static rtx
5325 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5326 enum machine_mode mode, int unsignedp, int normalizep,
5327 enum machine_mode target_mode)
5329 rtx subtarget;
5330 enum insn_code icode;
5331 enum machine_mode compare_mode;
5332 enum mode_class mclass;
5333 enum rtx_code scode;
5334 rtx tem;
5336 if (unsignedp)
5337 code = unsigned_condition (code);
5338 scode = swap_condition (code);
5340 /* If one operand is constant, make it the second one. Only do this
5341 if the other operand is not constant as well. */
5343 if (swap_commutative_operands_p (op0, op1))
5345 tem = op0;
5346 op0 = op1;
5347 op1 = tem;
5348 code = swap_condition (code);
5351 if (mode == VOIDmode)
5352 mode = GET_MODE (op0);
5354 /* For some comparisons with 1 and -1, we can convert this to
5355 comparisons with zero. This will often produce more opportunities for
5356 store-flag insns. */
5358 switch (code)
5360 case LT:
5361 if (op1 == const1_rtx)
5362 op1 = const0_rtx, code = LE;
5363 break;
5364 case LE:
5365 if (op1 == constm1_rtx)
5366 op1 = const0_rtx, code = LT;
5367 break;
5368 case GE:
5369 if (op1 == const1_rtx)
5370 op1 = const0_rtx, code = GT;
5371 break;
5372 case GT:
5373 if (op1 == constm1_rtx)
5374 op1 = const0_rtx, code = GE;
5375 break;
5376 case GEU:
5377 if (op1 == const1_rtx)
5378 op1 = const0_rtx, code = NE;
5379 break;
5380 case LTU:
5381 if (op1 == const1_rtx)
5382 op1 = const0_rtx, code = EQ;
5383 break;
5384 default:
5385 break;
5388 /* If we are comparing a double-word integer with zero or -1, we can
5389 convert the comparison into one involving a single word. */
5390 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5391 && GET_MODE_CLASS (mode) == MODE_INT
5392 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5394 if ((code == EQ || code == NE)
5395 && (op1 == const0_rtx || op1 == constm1_rtx))
5397 rtx op00, op01;
5399 /* Do a logical OR or AND of the two words and compare the
5400 result. */
5401 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5402 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5403 tem = expand_binop (word_mode,
5404 op1 == const0_rtx ? ior_optab : and_optab,
5405 op00, op01, NULL_RTX, unsignedp,
5406 OPTAB_DIRECT);
5408 if (tem != 0)
5409 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5410 unsignedp, normalizep);
5412 else if ((code == LT || code == GE) && op1 == const0_rtx)
5414 rtx op0h;
5416 /* If testing the sign bit, can just test on high word. */
5417 op0h = simplify_gen_subreg (word_mode, op0, mode,
5418 subreg_highpart_offset (word_mode,
5419 mode));
5420 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5421 unsignedp, normalizep);
5423 else
5424 tem = NULL_RTX;
5426 if (tem)
5428 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5429 return tem;
5430 if (!target)
5431 target = gen_reg_rtx (target_mode);
5433 convert_move (target, tem,
5434 !val_signbit_known_set_p (word_mode,
5435 (normalizep ? normalizep
5436 : STORE_FLAG_VALUE)));
5437 return target;
5441 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5442 complement of A (for GE) and shifting the sign bit to the low bit. */
5443 if (op1 == const0_rtx && (code == LT || code == GE)
5444 && GET_MODE_CLASS (mode) == MODE_INT
5445 && (normalizep || STORE_FLAG_VALUE == 1
5446 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5448 subtarget = target;
5450 if (!target)
5451 target_mode = mode;
5453 /* If the result is to be wider than OP0, it is best to convert it
5454 first. If it is to be narrower, it is *incorrect* to convert it
5455 first. */
5456 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5458 op0 = convert_modes (target_mode, mode, op0, 0);
5459 mode = target_mode;
5462 if (target_mode != mode)
5463 subtarget = 0;
5465 if (code == GE)
5466 op0 = expand_unop (mode, one_cmpl_optab, op0,
5467 ((STORE_FLAG_VALUE == 1 || normalizep)
5468 ? 0 : subtarget), 0);
5470 if (STORE_FLAG_VALUE == 1 || normalizep)
5471 /* If we are supposed to produce a 0/1 value, we want to do
5472 a logical shift from the sign bit to the low-order bit; for
5473 a -1/0 value, we do an arithmetic shift. */
5474 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5475 GET_MODE_BITSIZE (mode) - 1,
5476 subtarget, normalizep != -1);
5478 if (mode != target_mode)
5479 op0 = convert_modes (target_mode, mode, op0, 0);
5481 return op0;
5484 mclass = GET_MODE_CLASS (mode);
5485 for (compare_mode = mode; compare_mode != VOIDmode;
5486 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5488 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5489 icode = optab_handler (cstore_optab, optab_mode);
5490 if (icode != CODE_FOR_nothing)
5492 do_pending_stack_adjust ();
5493 tem = emit_cstore (target, icode, code, mode, compare_mode,
5494 unsignedp, op0, op1, normalizep, target_mode);
5495 if (tem)
5496 return tem;
5498 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5500 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5501 unsignedp, op1, op0, normalizep, target_mode);
5502 if (tem)
5503 return tem;
5505 break;
5509 return 0;
5512 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5513 and storing in TARGET. Normally return TARGET.
5514 Return 0 if that cannot be done.
5516 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5517 it is VOIDmode, they cannot both be CONST_INT.
5519 UNSIGNEDP is for the case where we have to widen the operands
5520 to perform the operation. It says to use zero-extension.
5522 NORMALIZEP is 1 if we should convert the result to be either zero
5523 or one. Normalize is -1 if we should convert the result to be
5524 either zero or -1. If NORMALIZEP is zero, the result will be left
5525 "raw" out of the scc insn. */
5528 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5529 enum machine_mode mode, int unsignedp, int normalizep)
5531 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5532 enum rtx_code rcode;
5533 rtx subtarget;
5534 rtx tem, last, trueval;
5536 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5537 target_mode);
5538 if (tem)
5539 return tem;
5541 /* If we reached here, we can't do this with a scc insn, however there
5542 are some comparisons that can be done in other ways. Don't do any
5543 of these cases if branches are very cheap. */
5544 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5545 return 0;
5547 /* See what we need to return. We can only return a 1, -1, or the
5548 sign bit. */
5550 if (normalizep == 0)
5552 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5553 normalizep = STORE_FLAG_VALUE;
5555 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5557 else
5558 return 0;
5561 last = get_last_insn ();
5563 /* If optimizing, use different pseudo registers for each insn, instead
5564 of reusing the same pseudo. This leads to better CSE, but slows
5565 down the compiler, since there are more pseudos */
5566 subtarget = (!optimize
5567 && (target_mode == mode)) ? target : NULL_RTX;
5568 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5570 /* For floating-point comparisons, try the reverse comparison or try
5571 changing the "orderedness" of the comparison. */
5572 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5574 enum rtx_code first_code;
5575 bool and_them;
5577 rcode = reverse_condition_maybe_unordered (code);
5578 if (can_compare_p (rcode, mode, ccp_store_flag)
5579 && (code == ORDERED || code == UNORDERED
5580 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5581 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5583 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5584 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5586 /* For the reverse comparison, use either an addition or a XOR. */
5587 if (want_add
5588 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5589 optimize_insn_for_speed_p ()) == 0)
5591 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592 STORE_FLAG_VALUE, target_mode);
5593 if (tem)
5594 return expand_binop (target_mode, add_optab, tem,
5595 GEN_INT (normalizep),
5596 target, 0, OPTAB_WIDEN);
5598 else if (!want_add
5599 && rtx_cost (trueval, XOR, 1,
5600 optimize_insn_for_speed_p ()) == 0)
5602 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5603 normalizep, target_mode);
5604 if (tem)
5605 return expand_binop (target_mode, xor_optab, tem, trueval,
5606 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5610 delete_insns_since (last);
5612 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5613 if (code == ORDERED || code == UNORDERED)
5614 return 0;
5616 and_them = split_comparison (code, mode, &first_code, &code);
5618 /* If there are no NaNs, the first comparison should always fall through.
5619 Effectively change the comparison to the other one. */
5620 if (!HONOR_NANS (mode))
5622 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5623 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5624 target_mode);
5627 #ifdef HAVE_conditional_move
5628 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5629 conditional move. */
5630 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5631 normalizep, target_mode);
5632 if (tem == 0)
5633 return 0;
5635 if (and_them)
5636 tem = emit_conditional_move (target, code, op0, op1, mode,
5637 tem, const0_rtx, GET_MODE (tem), 0);
5638 else
5639 tem = emit_conditional_move (target, code, op0, op1, mode,
5640 trueval, tem, GET_MODE (tem), 0);
5642 if (tem == 0)
5643 delete_insns_since (last);
5644 return tem;
5645 #else
5646 return 0;
5647 #endif
5650 /* The remaining tricks only apply to integer comparisons. */
5652 if (GET_MODE_CLASS (mode) != MODE_INT)
5653 return 0;
5655 /* If this is an equality comparison of integers, we can try to exclusive-or
5656 (or subtract) the two operands and use a recursive call to try the
5657 comparison with zero. Don't do any of these cases if branches are
5658 very cheap. */
5660 if ((code == EQ || code == NE) && op1 != const0_rtx)
5662 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5663 OPTAB_WIDEN);
5665 if (tem == 0)
5666 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5667 OPTAB_WIDEN);
5668 if (tem != 0)
5669 tem = emit_store_flag (target, code, tem, const0_rtx,
5670 mode, unsignedp, normalizep);
5671 if (tem != 0)
5672 return tem;
5674 delete_insns_since (last);
5677 /* For integer comparisons, try the reverse comparison. However, for
5678 small X and if we'd have anyway to extend, implementing "X != 0"
5679 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5680 rcode = reverse_condition (code);
5681 if (can_compare_p (rcode, mode, ccp_store_flag)
5682 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5683 && code == NE
5684 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5685 && op1 == const0_rtx))
5687 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5688 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5690 /* Again, for the reverse comparison, use either an addition or a XOR. */
5691 if (want_add
5692 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5693 optimize_insn_for_speed_p ()) == 0)
5695 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5696 STORE_FLAG_VALUE, target_mode);
5697 if (tem != 0)
5698 tem = expand_binop (target_mode, add_optab, tem,
5699 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5701 else if (!want_add
5702 && rtx_cost (trueval, XOR, 1,
5703 optimize_insn_for_speed_p ()) == 0)
5705 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5706 normalizep, target_mode);
5707 if (tem != 0)
5708 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5709 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5712 if (tem != 0)
5713 return tem;
5714 delete_insns_since (last);
5717 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5718 the constant zero. Reject all other comparisons at this point. Only
5719 do LE and GT if branches are expensive since they are expensive on
5720 2-operand machines. */
5722 if (op1 != const0_rtx
5723 || (code != EQ && code != NE
5724 && (BRANCH_COST (optimize_insn_for_speed_p (),
5725 false) <= 1 || (code != LE && code != GT))))
5726 return 0;
5728 /* Try to put the result of the comparison in the sign bit. Assume we can't
5729 do the necessary operation below. */
5731 tem = 0;
5733 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5734 the sign bit set. */
5736 if (code == LE)
5738 /* This is destructive, so SUBTARGET can't be OP0. */
5739 if (rtx_equal_p (subtarget, op0))
5740 subtarget = 0;
5742 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5743 OPTAB_WIDEN);
5744 if (tem)
5745 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5746 OPTAB_WIDEN);
5749 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5750 number of bits in the mode of OP0, minus one. */
5752 if (code == GT)
5754 if (rtx_equal_p (subtarget, op0))
5755 subtarget = 0;
5757 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5758 GET_MODE_BITSIZE (mode) - 1,
5759 subtarget, 0);
5760 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5761 OPTAB_WIDEN);
5764 if (code == EQ || code == NE)
5766 /* For EQ or NE, one way to do the comparison is to apply an operation
5767 that converts the operand into a positive number if it is nonzero
5768 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5769 for NE we negate. This puts the result in the sign bit. Then we
5770 normalize with a shift, if needed.
5772 Two operations that can do the above actions are ABS and FFS, so try
5773 them. If that doesn't work, and MODE is smaller than a full word,
5774 we can use zero-extension to the wider mode (an unsigned conversion)
5775 as the operation. */
5777 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5778 that is compensated by the subsequent overflow when subtracting
5779 one / negating. */
5781 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5782 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5783 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5784 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5785 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5787 tem = convert_modes (word_mode, mode, op0, 1);
5788 mode = word_mode;
5791 if (tem != 0)
5793 if (code == EQ)
5794 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5795 0, OPTAB_WIDEN);
5796 else
5797 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5800 /* If we couldn't do it that way, for NE we can "or" the two's complement
5801 of the value with itself. For EQ, we take the one's complement of
5802 that "or", which is an extra insn, so we only handle EQ if branches
5803 are expensive. */
5805 if (tem == 0
5806 && (code == NE
5807 || BRANCH_COST (optimize_insn_for_speed_p (),
5808 false) > 1))
5810 if (rtx_equal_p (subtarget, op0))
5811 subtarget = 0;
5813 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5814 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5815 OPTAB_WIDEN);
5817 if (tem && code == EQ)
5818 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5822 if (tem && normalizep)
5823 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5824 GET_MODE_BITSIZE (mode) - 1,
5825 subtarget, normalizep == 1);
5827 if (tem)
5829 if (!target)
5831 else if (GET_MODE (tem) != target_mode)
5833 convert_move (target, tem, 0);
5834 tem = target;
5836 else if (!subtarget)
5838 emit_move_insn (target, tem);
5839 tem = target;
5842 else
5843 delete_insns_since (last);
5845 return tem;
5848 /* Like emit_store_flag, but always succeeds. */
5851 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5852 enum machine_mode mode, int unsignedp, int normalizep)
5854 rtx tem, label;
5855 rtx trueval, falseval;
5857 /* First see if emit_store_flag can do the job. */
5858 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5859 if (tem != 0)
5860 return tem;
5862 if (!target)
5863 target = gen_reg_rtx (word_mode);
5865 /* If this failed, we have to do this with set/compare/jump/set code.
5866 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5867 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5868 if (code == NE
5869 && GET_MODE_CLASS (mode) == MODE_INT
5870 && REG_P (target)
5871 && op0 == target
5872 && op1 == const0_rtx)
5874 label = gen_label_rtx ();
5875 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5876 mode, NULL_RTX, NULL_RTX, label, -1);
5877 emit_move_insn (target, trueval);
5878 emit_label (label);
5879 return target;
5882 if (!REG_P (target)
5883 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5884 target = gen_reg_rtx (GET_MODE (target));
5886 /* Jump in the right direction if the target cannot implement CODE
5887 but can jump on its reverse condition. */
5888 falseval = const0_rtx;
5889 if (! can_compare_p (code, mode, ccp_jump)
5890 && (! FLOAT_MODE_P (mode)
5891 || code == ORDERED || code == UNORDERED
5892 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5893 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5895 enum rtx_code rcode;
5896 if (FLOAT_MODE_P (mode))
5897 rcode = reverse_condition_maybe_unordered (code);
5898 else
5899 rcode = reverse_condition (code);
5901 /* Canonicalize to UNORDERED for the libcall. */
5902 if (can_compare_p (rcode, mode, ccp_jump)
5903 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5905 falseval = trueval;
5906 trueval = const0_rtx;
5907 code = rcode;
5911 emit_move_insn (target, trueval);
5912 label = gen_label_rtx ();
5913 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5914 NULL_RTX, label, -1);
5916 emit_move_insn (target, falseval);
5917 emit_label (label);
5919 return target;
5922 /* Perform possibly multi-word comparison and conditional jump to LABEL
5923 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5924 now a thin wrapper around do_compare_rtx_and_jump. */
5926 static void
5927 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5928 rtx label)
5930 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5931 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5932 NULL_RTX, NULL_RTX, label, -1);