2011-11-06 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / expmed.c
blobb3e6d6d181600d409ff0463e7f9ae8b0a0e2534c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
96 void
97 init_expmed (void)
99 struct
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
103 struct rtx_def neg;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
107 struct rtx_def zext;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
118 } all;
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
122 int m, n;
123 enum machine_mode mode, wider_mode;
124 int speed;
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
132 memset (&all, 0, sizeof all);
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
196 for (speed = 0; speed < 2; speed++)
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 mode != VOIDmode;
203 mode = GET_MODE_WIDER_MODE (mode))
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257 speed);
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259 speed);
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261 speed);
265 if (alg_hash_used_p)
266 memset (alg_hash, 0, sizeof (alg_hash));
267 else
268 alg_hash_used_p = true;
269 default_rtl_profile ();
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
277 negate_rtx (enum machine_mode mode, rtx x)
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
281 if (result == 0)
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
284 return result;
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
291 enum machine_mode
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
294 const struct insn_data_d *data;
296 switch (pattern)
298 case EP_insv:
299 if (HAVE_insv)
301 data = &insn_data[CODE_FOR_insv];
302 break;
304 return MAX_MACHINE_MODE;
306 case EP_extv:
307 if (HAVE_extv)
309 data = &insn_data[CODE_FOR_extv];
310 break;
312 return MAX_MACHINE_MODE;
314 case EP_extzv:
315 if (HAVE_extzv)
317 data = &insn_data[CODE_FOR_extzv];
318 break;
320 return MAX_MACHINE_MODE;
322 default:
323 gcc_unreachable ();
326 if (opno == -1)
327 return VOIDmode;
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
332 return word_mode;
333 return data->operand[opno].mode;
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
343 static bool
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
351 unsigned int unit
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
354 rtx op0 = str_rtx;
355 int byte_offset;
356 rtx orig_value;
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
360 while (GET_CODE (op0) == SUBREG)
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
370 byte_offset = 0;
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
375 int difference = inner_mode_size - outer_mode_size;
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
382 else
383 byte_offset = SUBREG_BYTE (op0);
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393 return true;
395 /* Use vec_set patterns for inserting parts of vectors whenever
396 available. */
397 if (VECTOR_MODE_P (GET_MODE (op0))
398 && !MEM_P (op0)
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
414 return true;
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
429 if (bitpos == 0
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
431 && (!MEM_P (op0)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436 byte_offset)))
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
441 if (MEM_P (op0))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445 byte_offset);
446 emit_move_insn (op0, value);
447 return true;
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
458 if (MEM_P (op0))
459 op0 = adjust_address (op0, imode, 0);
460 else
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
471 if (MEM_P (op0))
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
481 if (BYTES_BIG_ENDIAN
482 && !MEM_P (op0)
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
489 if (!MEM_P (op0)
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496 rtx arg0 = op0;
497 unsigned HOST_WIDE_INT subreg_off;
499 if (GET_CODE (arg0) == SUBREG)
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
520 return true;
524 /* Handle fields bigger than a word. */
526 if (bitsize > BITS_PER_WORD)
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
531 be less than full.
532 However, only do that if the value is not BLKmode. */
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536 unsigned int i;
537 rtx last;
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
543 is not allowed. */
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
554 unsigned int bit_offset = (backwards
555 ? MAX ((int) bitsize - ((int) i + 1)
556 * BITS_PER_WORD,
558 : (int) i * BITS_PER_WORD);
559 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
561 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
562 bitsize - i * BITS_PER_WORD),
563 bitnum + bit_offset,
564 bitregion_start, bitregion_end,
565 word_mode,
566 value_word, fallback_p))
568 delete_insns_since (last);
569 return false;
572 return true;
575 /* From here on we can assume that the field to be stored in is
576 a full-word (whatever type that is), since it is shorter than a word. */
578 /* OFFSET is the number of words or bytes (UNIT says which)
579 from STR_RTX to the first word or byte containing part of the field. */
581 if (!MEM_P (op0))
583 if (offset != 0
584 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
586 if (!REG_P (op0))
588 /* Since this is a destination (lvalue), we can't copy
589 it to a pseudo. We can remove a SUBREG that does not
590 change the size of the operand. Such a SUBREG may
591 have been added above. */
592 gcc_assert (GET_CODE (op0) == SUBREG
593 && (GET_MODE_SIZE (GET_MODE (op0))
594 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
595 op0 = SUBREG_REG (op0);
597 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
598 op0, (offset * UNITS_PER_WORD));
600 offset = 0;
603 /* If VALUE has a floating-point or complex mode, access it as an
604 integer of the corresponding size. This can occur on a machine
605 with 64 bit registers that uses SFmode for float. It can also
606 occur for unaligned float or complex fields. */
607 orig_value = value;
608 if (GET_MODE (value) != VOIDmode
609 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
610 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
612 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
613 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
616 /* Now OFFSET is nonzero only if OP0 is memory
617 and is therefore always measured in bytes. */
619 if (HAVE_insv
620 && GET_MODE (value) != BLKmode
621 && bitsize > 0
622 && GET_MODE_BITSIZE (op_mode) >= bitsize
623 /* Do not use insv for volatile bitfields when
624 -fstrict-volatile-bitfields is in effect. */
625 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
626 && flag_strict_volatile_bitfields > 0)
627 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
628 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
630 struct expand_operand ops[4];
631 int xbitpos = bitpos;
632 rtx value1;
633 rtx xop0 = op0;
634 rtx last = get_last_insn ();
635 bool copy_back = false;
637 /* Add OFFSET into OP0's address. */
638 if (MEM_P (xop0))
639 xop0 = adjust_address (xop0, byte_mode, offset);
641 /* If xop0 is a register, we need it in OP_MODE
642 to make it acceptable to the format of insv. */
643 if (GET_CODE (xop0) == SUBREG)
644 /* We can't just change the mode, because this might clobber op0,
645 and we will need the original value of op0 if insv fails. */
646 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
647 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
648 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
650 /* If the destination is a paradoxical subreg such that we need a
651 truncate to the inner mode, perform the insertion on a temporary and
652 truncate the result to the original destination. Note that we can't
653 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
654 X) 0)) is (reg:N X). */
655 if (GET_CODE (xop0) == SUBREG
656 && REG_P (SUBREG_REG (xop0))
657 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
658 op_mode)))
660 rtx tem = gen_reg_rtx (op_mode);
661 emit_move_insn (tem, xop0);
662 xop0 = tem;
663 copy_back = true;
666 /* We have been counting XBITPOS within UNIT.
667 Count instead within the size of the register. */
668 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
669 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
671 unit = GET_MODE_BITSIZE (op_mode);
673 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
674 "backwards" from the size of the unit we are inserting into.
675 Otherwise, we count bits from the most significant on a
676 BYTES/BITS_BIG_ENDIAN machine. */
678 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
679 xbitpos = unit - bitsize - xbitpos;
681 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
682 value1 = value;
683 if (GET_MODE (value) != op_mode)
685 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
687 /* Optimization: Don't bother really extending VALUE
688 if it has all the bits we will actually use. However,
689 if we must narrow it, be sure we do it correctly. */
691 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
693 rtx tmp;
695 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
696 if (! tmp)
697 tmp = simplify_gen_subreg (op_mode,
698 force_reg (GET_MODE (value),
699 value1),
700 GET_MODE (value), 0);
701 value1 = tmp;
703 else
704 value1 = gen_lowpart (op_mode, value1);
706 else if (CONST_INT_P (value))
707 value1 = gen_int_mode (INTVAL (value), op_mode);
708 else
709 /* Parse phase is supposed to make VALUE's data type
710 match that of the component reference, which is a type
711 at least as wide as the field; so VALUE should have
712 a mode that corresponds to that type. */
713 gcc_assert (CONSTANT_P (value));
716 create_fixed_operand (&ops[0], xop0);
717 create_integer_operand (&ops[1], bitsize);
718 create_integer_operand (&ops[2], xbitpos);
719 create_input_operand (&ops[3], value1, op_mode);
720 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
722 if (copy_back)
723 convert_move (op0, xop0, true);
724 return true;
726 delete_insns_since (last);
729 /* If OP0 is a memory, try copying it to a register and seeing if a
730 cheap register alternative is available. */
731 if (HAVE_insv && MEM_P (op0))
733 enum machine_mode bestmode;
734 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
736 if (bitregion_end)
737 maxbits = bitregion_end - bitregion_start + 1;
739 /* Get the mode to use for inserting into this field. If OP0 is
740 BLKmode, get the smallest mode consistent with the alignment. If
741 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
742 mode. Otherwise, use the smallest mode containing the field. */
744 if (GET_MODE (op0) == BLKmode
745 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
746 || (op_mode != MAX_MACHINE_MODE
747 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
748 bestmode = get_best_mode (bitsize, bitnum,
749 bitregion_start, bitregion_end,
750 MEM_ALIGN (op0),
751 (op_mode == MAX_MACHINE_MODE
752 ? VOIDmode : op_mode),
753 MEM_VOLATILE_P (op0));
754 else
755 bestmode = GET_MODE (op0);
757 if (bestmode != VOIDmode
758 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
759 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
760 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
762 rtx last, tempreg, xop0;
763 unsigned HOST_WIDE_INT xoffset, xbitpos;
765 last = get_last_insn ();
767 /* Adjust address to point to the containing unit of
768 that mode. Compute the offset as a multiple of this unit,
769 counting in bytes. */
770 unit = GET_MODE_BITSIZE (bestmode);
771 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
772 xbitpos = bitnum % unit;
773 xop0 = adjust_address (op0, bestmode, xoffset);
775 /* Fetch that unit, store the bitfield in it, then store
776 the unit. */
777 tempreg = copy_to_reg (xop0);
778 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
779 bitregion_start, bitregion_end,
780 fieldmode, orig_value, false))
782 emit_move_insn (xop0, tempreg);
783 return true;
785 delete_insns_since (last);
789 if (!fallback_p)
790 return false;
792 store_fixed_bit_field (op0, offset, bitsize, bitpos,
793 bitregion_start, bitregion_end, value);
794 return true;
797 /* Generate code to store value from rtx VALUE
798 into a bit-field within structure STR_RTX
799 containing BITSIZE bits starting at bit BITNUM.
801 BITREGION_START is bitpos of the first bitfield in this region.
802 BITREGION_END is the bitpos of the ending bitfield in this region.
803 These two fields are 0, if the C++ memory model does not apply,
804 or we are not interested in keeping track of bitfield regions.
806 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
808 void
809 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
810 unsigned HOST_WIDE_INT bitnum,
811 unsigned HOST_WIDE_INT bitregion_start,
812 unsigned HOST_WIDE_INT bitregion_end,
813 enum machine_mode fieldmode,
814 rtx value)
816 /* Under the C++0x memory model, we must not touch bits outside the
817 bit region. Adjust the address to start at the beginning of the
818 bit region. */
819 if (MEM_P (str_rtx)
820 && bitregion_start > 0)
822 enum machine_mode bestmode;
823 enum machine_mode op_mode;
824 unsigned HOST_WIDE_INT offset;
826 op_mode = mode_for_extraction (EP_insv, 3);
827 if (op_mode == MAX_MACHINE_MODE)
828 op_mode = VOIDmode;
830 offset = bitregion_start / BITS_PER_UNIT;
831 bitnum -= bitregion_start;
832 bitregion_end -= bitregion_start;
833 bitregion_start = 0;
834 bestmode = get_best_mode (bitsize, bitnum,
835 bitregion_start, bitregion_end,
836 MEM_ALIGN (str_rtx),
837 op_mode,
838 MEM_VOLATILE_P (str_rtx));
839 str_rtx = adjust_address (str_rtx, bestmode, offset);
842 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
843 bitregion_start, bitregion_end,
844 fieldmode, value, true))
845 gcc_unreachable ();
848 /* Use shifts and boolean operations to store VALUE
849 into a bit field of width BITSIZE
850 in a memory location specified by OP0 except offset by OFFSET bytes.
851 (OFFSET must be 0 if OP0 is a register.)
852 The field starts at position BITPOS within the byte.
853 (If OP0 is a register, it may be a full word or a narrower mode,
854 but BITPOS still counts within a full word,
855 which is significant on bigendian machines.) */
857 static void
858 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
859 unsigned HOST_WIDE_INT bitsize,
860 unsigned HOST_WIDE_INT bitpos,
861 unsigned HOST_WIDE_INT bitregion_start,
862 unsigned HOST_WIDE_INT bitregion_end,
863 rtx value)
865 enum machine_mode mode;
866 unsigned int total_bits = BITS_PER_WORD;
867 rtx temp;
868 int all_zero = 0;
869 int all_one = 0;
871 /* There is a case not handled here:
872 a structure with a known alignment of just a halfword
873 and a field split across two aligned halfwords within the structure.
874 Or likewise a structure with a known alignment of just a byte
875 and a field split across two bytes.
876 Such cases are not supposed to be able to occur. */
878 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
880 gcc_assert (!offset);
881 /* Special treatment for a bit field split across two registers. */
882 if (bitsize + bitpos > BITS_PER_WORD)
884 store_split_bit_field (op0, bitsize, bitpos,
885 bitregion_start, bitregion_end,
886 value);
887 return;
890 else
892 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
894 if (bitregion_end)
895 maxbits = bitregion_end - bitregion_start + 1;
897 /* Get the proper mode to use for this field. We want a mode that
898 includes the entire field. If such a mode would be larger than
899 a word, we won't be doing the extraction the normal way.
900 We don't want a mode bigger than the destination. */
902 mode = GET_MODE (op0);
903 if (GET_MODE_BITSIZE (mode) == 0
904 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
905 mode = word_mode;
907 if (MEM_VOLATILE_P (op0)
908 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
909 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
910 && flag_strict_volatile_bitfields > 0)
911 mode = GET_MODE (op0);
912 else
913 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
914 bitregion_start, bitregion_end,
915 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
917 if (mode == VOIDmode)
919 /* The only way this should occur is if the field spans word
920 boundaries. */
921 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
922 bitregion_start, bitregion_end, value);
923 return;
926 total_bits = GET_MODE_BITSIZE (mode);
928 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
929 be in the range 0 to total_bits-1, and put any excess bytes in
930 OFFSET. */
931 if (bitpos >= total_bits)
933 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
934 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
935 * BITS_PER_UNIT);
938 /* Get ref to an aligned byte, halfword, or word containing the field.
939 Adjust BITPOS to be position within a word,
940 and OFFSET to be the offset of that word.
941 Then alter OP0 to refer to that word. */
942 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
943 offset -= (offset % (total_bits / BITS_PER_UNIT));
944 op0 = adjust_address (op0, mode, offset);
947 mode = GET_MODE (op0);
949 /* Now MODE is either some integral mode for a MEM as OP0,
950 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
951 The bit field is contained entirely within OP0.
952 BITPOS is the starting bit number within OP0.
953 (OP0's mode may actually be narrower than MODE.) */
955 if (BYTES_BIG_ENDIAN)
956 /* BITPOS is the distance between our msb
957 and that of the containing datum.
958 Convert it to the distance from the lsb. */
959 bitpos = total_bits - bitsize - bitpos;
961 /* Now BITPOS is always the distance between our lsb
962 and that of OP0. */
964 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
965 we must first convert its mode to MODE. */
967 if (CONST_INT_P (value))
969 HOST_WIDE_INT v = INTVAL (value);
971 if (bitsize < HOST_BITS_PER_WIDE_INT)
972 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
974 if (v == 0)
975 all_zero = 1;
976 else if ((bitsize < HOST_BITS_PER_WIDE_INT
977 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
978 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
979 all_one = 1;
981 value = lshift_value (mode, value, bitpos, bitsize);
983 else
985 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
986 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
988 if (GET_MODE (value) != mode)
989 value = convert_to_mode (mode, value, 1);
991 if (must_and)
992 value = expand_binop (mode, and_optab, value,
993 mask_rtx (mode, 0, bitsize, 0),
994 NULL_RTX, 1, OPTAB_LIB_WIDEN);
995 if (bitpos > 0)
996 value = expand_shift (LSHIFT_EXPR, mode, value,
997 bitpos, NULL_RTX, 1);
1000 /* Now clear the chosen bits in OP0,
1001 except that if VALUE is -1 we need not bother. */
1002 /* We keep the intermediates in registers to allow CSE to combine
1003 consecutive bitfield assignments. */
1005 temp = force_reg (mode, op0);
1007 if (! all_one)
1009 temp = expand_binop (mode, and_optab, temp,
1010 mask_rtx (mode, bitpos, bitsize, 1),
1011 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1012 temp = force_reg (mode, temp);
1015 /* Now logical-or VALUE into OP0, unless it is zero. */
1017 if (! all_zero)
1019 temp = expand_binop (mode, ior_optab, temp, value,
1020 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1021 temp = force_reg (mode, temp);
1024 if (op0 != temp)
1026 op0 = copy_rtx (op0);
1027 emit_move_insn (op0, temp);
1031 /* Store a bit field that is split across multiple accessible memory objects.
1033 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1034 BITSIZE is the field width; BITPOS the position of its first bit
1035 (within the word).
1036 VALUE is the value to store.
1038 This does not yet handle fields wider than BITS_PER_WORD. */
1040 static void
1041 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1042 unsigned HOST_WIDE_INT bitpos,
1043 unsigned HOST_WIDE_INT bitregion_start,
1044 unsigned HOST_WIDE_INT bitregion_end,
1045 rtx value)
1047 unsigned int unit;
1048 unsigned int bitsdone = 0;
1050 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1051 much at a time. */
1052 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1053 unit = BITS_PER_WORD;
1054 else
1055 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1057 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1058 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1059 that VALUE might be a floating-point constant. */
1060 if (CONSTANT_P (value) && !CONST_INT_P (value))
1062 rtx word = gen_lowpart_common (word_mode, value);
1064 if (word && (value != word))
1065 value = word;
1066 else
1067 value = gen_lowpart_common (word_mode,
1068 force_reg (GET_MODE (value) != VOIDmode
1069 ? GET_MODE (value)
1070 : word_mode, value));
1073 while (bitsdone < bitsize)
1075 unsigned HOST_WIDE_INT thissize;
1076 rtx part, word;
1077 unsigned HOST_WIDE_INT thispos;
1078 unsigned HOST_WIDE_INT offset;
1080 offset = (bitpos + bitsdone) / unit;
1081 thispos = (bitpos + bitsdone) % unit;
1083 /* THISSIZE must not overrun a word boundary. Otherwise,
1084 store_fixed_bit_field will call us again, and we will mutually
1085 recurse forever. */
1086 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1087 thissize = MIN (thissize, unit - thispos);
1089 if (BYTES_BIG_ENDIAN)
1091 int total_bits;
1093 /* We must do an endian conversion exactly the same way as it is
1094 done in extract_bit_field, so that the two calls to
1095 extract_fixed_bit_field will have comparable arguments. */
1096 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1097 total_bits = BITS_PER_WORD;
1098 else
1099 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1101 /* Fetch successively less significant portions. */
1102 if (CONST_INT_P (value))
1103 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1104 >> (bitsize - bitsdone - thissize))
1105 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1106 else
1107 /* The args are chosen so that the last part includes the
1108 lsb. Give extract_bit_field the value it needs (with
1109 endianness compensation) to fetch the piece we want. */
1110 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1111 total_bits - bitsize + bitsdone,
1112 NULL_RTX, 1, false);
1114 else
1116 /* Fetch successively more significant portions. */
1117 if (CONST_INT_P (value))
1118 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1119 >> bitsdone)
1120 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1121 else
1122 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1123 bitsdone, NULL_RTX, 1, false);
1126 /* If OP0 is a register, then handle OFFSET here.
1128 When handling multiword bitfields, extract_bit_field may pass
1129 down a word_mode SUBREG of a larger REG for a bitfield that actually
1130 crosses a word boundary. Thus, for a SUBREG, we must find
1131 the current word starting from the base register. */
1132 if (GET_CODE (op0) == SUBREG)
1134 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1135 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1136 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1137 word = word_offset ? const0_rtx : op0;
1138 else
1139 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1140 GET_MODE (SUBREG_REG (op0)));
1141 offset = 0;
1143 else if (REG_P (op0))
1145 enum machine_mode op0_mode = GET_MODE (op0);
1146 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1147 word = offset ? const0_rtx : op0;
1148 else
1149 word = operand_subword_force (op0, offset, GET_MODE (op0));
1150 offset = 0;
1152 else
1153 word = op0;
1155 /* OFFSET is in UNITs, and UNIT is in bits.
1156 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1157 it is just an out-of-bounds access. Ignore it. */
1158 if (word != const0_rtx)
1159 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1160 thispos, bitregion_start, bitregion_end, part);
1161 bitsdone += thissize;
1165 /* A subroutine of extract_bit_field_1 that converts return value X
1166 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1167 to extract_bit_field. */
1169 static rtx
1170 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1171 enum machine_mode tmode, bool unsignedp)
1173 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1174 return x;
1176 /* If the x mode is not a scalar integral, first convert to the
1177 integer mode of that size and then access it as a floating-point
1178 value via a SUBREG. */
1179 if (!SCALAR_INT_MODE_P (tmode))
1181 enum machine_mode smode;
1183 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1184 x = convert_to_mode (smode, x, unsignedp);
1185 x = force_reg (smode, x);
1186 return gen_lowpart (tmode, x);
1189 return convert_to_mode (tmode, x, unsignedp);
1192 /* A subroutine of extract_bit_field, with the same arguments.
1193 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1194 if we can find no other means of implementing the operation.
1195 if FALLBACK_P is false, return NULL instead. */
1197 static rtx
1198 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1199 unsigned HOST_WIDE_INT bitnum,
1200 int unsignedp, bool packedp, rtx target,
1201 enum machine_mode mode, enum machine_mode tmode,
1202 bool fallback_p)
1204 unsigned int unit
1205 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1206 unsigned HOST_WIDE_INT offset, bitpos;
1207 rtx op0 = str_rtx;
1208 enum machine_mode int_mode;
1209 enum machine_mode ext_mode;
1210 enum machine_mode mode1;
1211 int byte_offset;
1213 if (tmode == VOIDmode)
1214 tmode = mode;
1216 while (GET_CODE (op0) == SUBREG)
1218 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1219 op0 = SUBREG_REG (op0);
1222 /* If we have an out-of-bounds access to a register, just return an
1223 uninitialized register of the required mode. This can occur if the
1224 source code contains an out-of-bounds access to a small array. */
1225 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1226 return gen_reg_rtx (tmode);
1228 if (REG_P (op0)
1229 && mode == GET_MODE (op0)
1230 && bitnum == 0
1231 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1233 /* We're trying to extract a full register from itself. */
1234 return op0;
1237 /* See if we can get a better vector mode before extracting. */
1238 if (VECTOR_MODE_P (GET_MODE (op0))
1239 && !MEM_P (op0)
1240 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1242 enum machine_mode new_mode;
1244 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1245 new_mode = MIN_MODE_VECTOR_FLOAT;
1246 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1247 new_mode = MIN_MODE_VECTOR_FRACT;
1248 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1249 new_mode = MIN_MODE_VECTOR_UFRACT;
1250 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1251 new_mode = MIN_MODE_VECTOR_ACCUM;
1252 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1253 new_mode = MIN_MODE_VECTOR_UACCUM;
1254 else
1255 new_mode = MIN_MODE_VECTOR_INT;
1257 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1258 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1259 && targetm.vector_mode_supported_p (new_mode))
1260 break;
1261 if (new_mode != VOIDmode)
1262 op0 = gen_lowpart (new_mode, op0);
1265 /* Use vec_extract patterns for extracting parts of vectors whenever
1266 available. */
1267 if (VECTOR_MODE_P (GET_MODE (op0))
1268 && !MEM_P (op0)
1269 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1270 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1271 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1273 struct expand_operand ops[3];
1274 enum machine_mode outermode = GET_MODE (op0);
1275 enum machine_mode innermode = GET_MODE_INNER (outermode);
1276 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1277 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1279 create_output_operand (&ops[0], target, innermode);
1280 create_input_operand (&ops[1], op0, outermode);
1281 create_integer_operand (&ops[2], pos);
1282 if (maybe_expand_insn (icode, 3, ops))
1284 target = ops[0].value;
1285 if (GET_MODE (target) != mode)
1286 return gen_lowpart (tmode, target);
1287 return target;
1291 /* Make sure we are playing with integral modes. Pun with subregs
1292 if we aren't. */
1294 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1295 if (imode != GET_MODE (op0))
1297 if (MEM_P (op0))
1298 op0 = adjust_address (op0, imode, 0);
1299 else if (imode != BLKmode)
1301 op0 = gen_lowpart (imode, op0);
1303 /* If we got a SUBREG, force it into a register since we
1304 aren't going to be able to do another SUBREG on it. */
1305 if (GET_CODE (op0) == SUBREG)
1306 op0 = force_reg (imode, op0);
1308 else if (REG_P (op0))
1310 rtx reg, subreg;
1311 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1312 MODE_INT);
1313 reg = gen_reg_rtx (imode);
1314 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1315 emit_move_insn (subreg, op0);
1316 op0 = reg;
1317 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1319 else
1321 rtx mem = assign_stack_temp (GET_MODE (op0),
1322 GET_MODE_SIZE (GET_MODE (op0)), 0);
1323 emit_move_insn (mem, op0);
1324 op0 = adjust_address (mem, BLKmode, 0);
1329 /* We may be accessing data outside the field, which means
1330 we can alias adjacent data. */
1331 if (MEM_P (op0))
1333 op0 = shallow_copy_rtx (op0);
1334 set_mem_alias_set (op0, 0);
1335 set_mem_expr (op0, 0);
1338 /* Extraction of a full-word or multi-word value from a structure
1339 in a register or aligned memory can be done with just a SUBREG.
1340 A subword value in the least significant part of a register
1341 can also be extracted with a SUBREG. For this, we need the
1342 byte offset of the value in op0. */
1344 bitpos = bitnum % unit;
1345 offset = bitnum / unit;
1346 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1348 /* If OP0 is a register, BITPOS must count within a word.
1349 But as we have it, it counts within whatever size OP0 now has.
1350 On a bigendian machine, these are not the same, so convert. */
1351 if (BYTES_BIG_ENDIAN
1352 && !MEM_P (op0)
1353 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1354 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1356 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1357 If that's wrong, the solution is to test for it and set TARGET to 0
1358 if needed. */
1360 /* Only scalar integer modes can be converted via subregs. There is an
1361 additional problem for FP modes here in that they can have a precision
1362 which is different from the size. mode_for_size uses precision, but
1363 we want a mode based on the size, so we must avoid calling it for FP
1364 modes. */
1365 mode1 = (SCALAR_INT_MODE_P (tmode)
1366 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1367 : mode);
1369 /* If the bitfield is volatile, we need to make sure the access
1370 remains on a type-aligned boundary. */
1371 if (GET_CODE (op0) == MEM
1372 && MEM_VOLATILE_P (op0)
1373 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1374 && flag_strict_volatile_bitfields > 0)
1375 goto no_subreg_mode_swap;
1377 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1378 && bitpos % BITS_PER_WORD == 0)
1379 || (mode1 != BLKmode
1380 /* ??? The big endian test here is wrong. This is correct
1381 if the value is in a register, and if mode_for_size is not
1382 the same mode as op0. This causes us to get unnecessarily
1383 inefficient code from the Thumb port when -mbig-endian. */
1384 && (BYTES_BIG_ENDIAN
1385 ? bitpos + bitsize == BITS_PER_WORD
1386 : bitpos == 0)))
1387 && ((!MEM_P (op0)
1388 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1389 && GET_MODE_SIZE (mode1) != 0
1390 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1391 || (MEM_P (op0)
1392 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1393 || (offset * BITS_PER_UNIT % bitsize == 0
1394 && MEM_ALIGN (op0) % bitsize == 0)))))
1396 if (MEM_P (op0))
1397 op0 = adjust_address (op0, mode1, offset);
1398 else if (mode1 != GET_MODE (op0))
1400 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1401 byte_offset);
1402 if (sub == NULL)
1403 goto no_subreg_mode_swap;
1404 op0 = sub;
1406 if (mode1 != mode)
1407 return convert_to_mode (tmode, op0, unsignedp);
1408 return op0;
1410 no_subreg_mode_swap:
1412 /* Handle fields bigger than a word. */
1414 if (bitsize > BITS_PER_WORD)
1416 /* Here we transfer the words of the field
1417 in the order least significant first.
1418 This is because the most significant word is the one which may
1419 be less than full. */
1421 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1422 unsigned int i;
1424 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1425 target = gen_reg_rtx (mode);
1427 /* Indicate for flow that the entire target reg is being set. */
1428 emit_clobber (target);
1430 for (i = 0; i < nwords; i++)
1432 /* If I is 0, use the low-order word in both field and target;
1433 if I is 1, use the next to lowest word; and so on. */
1434 /* Word number in TARGET to use. */
1435 unsigned int wordnum
1436 = (WORDS_BIG_ENDIAN
1437 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1438 : i);
1439 /* Offset from start of field in OP0. */
1440 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1441 ? MAX (0, ((int) bitsize - ((int) i + 1)
1442 * (int) BITS_PER_WORD))
1443 : (int) i * BITS_PER_WORD);
1444 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1445 rtx result_part
1446 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1447 bitsize - i * BITS_PER_WORD),
1448 bitnum + bit_offset, 1, false, target_part, mode,
1449 word_mode);
1451 gcc_assert (target_part);
1453 if (result_part != target_part)
1454 emit_move_insn (target_part, result_part);
1457 if (unsignedp)
1459 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1460 need to be zero'd out. */
1461 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1463 unsigned int i, total_words;
1465 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1466 for (i = nwords; i < total_words; i++)
1467 emit_move_insn
1468 (operand_subword (target,
1469 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1470 1, VOIDmode),
1471 const0_rtx);
1473 return target;
1476 /* Signed bit field: sign-extend with two arithmetic shifts. */
1477 target = expand_shift (LSHIFT_EXPR, mode, target,
1478 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1479 return expand_shift (RSHIFT_EXPR, mode, target,
1480 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1483 /* From here on we know the desired field is smaller than a word. */
1485 /* Check if there is a correspondingly-sized integer field, so we can
1486 safely extract it as one size of integer, if necessary; then
1487 truncate or extend to the size that is wanted; then use SUBREGs or
1488 convert_to_mode to get one of the modes we really wanted. */
1490 int_mode = int_mode_for_mode (tmode);
1491 if (int_mode == BLKmode)
1492 int_mode = int_mode_for_mode (mode);
1493 /* Should probably push op0 out to memory and then do a load. */
1494 gcc_assert (int_mode != BLKmode);
1496 /* OFFSET is the number of words or bytes (UNIT says which)
1497 from STR_RTX to the first word or byte containing part of the field. */
1498 if (!MEM_P (op0))
1500 if (offset != 0
1501 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1503 if (!REG_P (op0))
1504 op0 = copy_to_reg (op0);
1505 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1506 op0, (offset * UNITS_PER_WORD));
1508 offset = 0;
1511 /* Now OFFSET is nonzero only for memory operands. */
1512 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1513 if (ext_mode != MAX_MACHINE_MODE
1514 && bitsize > 0
1515 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1516 /* Do not use extv/extzv for volatile bitfields when
1517 -fstrict-volatile-bitfields is in effect. */
1518 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1519 && flag_strict_volatile_bitfields > 0)
1520 /* If op0 is a register, we need it in EXT_MODE to make it
1521 acceptable to the format of ext(z)v. */
1522 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1523 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1524 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1526 struct expand_operand ops[4];
1527 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1528 rtx xop0 = op0;
1529 rtx xtarget = target;
1530 rtx xspec_target = target;
1531 rtx xspec_target_subreg = 0;
1533 /* If op0 is a register, we need it in EXT_MODE to make it
1534 acceptable to the format of ext(z)v. */
1535 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1536 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1537 if (MEM_P (xop0))
1538 /* Get ref to first byte containing part of the field. */
1539 xop0 = adjust_address (xop0, byte_mode, xoffset);
1541 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1542 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1543 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1545 unit = GET_MODE_BITSIZE (ext_mode);
1547 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1548 "backwards" from the size of the unit we are extracting from.
1549 Otherwise, we count bits from the most significant on a
1550 BYTES/BITS_BIG_ENDIAN machine. */
1552 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553 xbitpos = unit - bitsize - xbitpos;
1555 if (xtarget == 0)
1556 xtarget = xspec_target = gen_reg_rtx (tmode);
1558 if (GET_MODE (xtarget) != ext_mode)
1560 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1561 between the mode of the extraction (word_mode) and the target
1562 mode. Instead, create a temporary and use convert_move to set
1563 the target. */
1564 if (REG_P (xtarget)
1565 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1567 xtarget = gen_lowpart (ext_mode, xtarget);
1568 if (GET_MODE_PRECISION (ext_mode)
1569 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1570 xspec_target_subreg = xtarget;
1572 else
1573 xtarget = gen_reg_rtx (ext_mode);
1576 create_output_operand (&ops[0], xtarget, ext_mode);
1577 create_fixed_operand (&ops[1], xop0);
1578 create_integer_operand (&ops[2], bitsize);
1579 create_integer_operand (&ops[3], xbitpos);
1580 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1581 4, ops))
1583 xtarget = ops[0].value;
1584 if (xtarget == xspec_target)
1585 return xtarget;
1586 if (xtarget == xspec_target_subreg)
1587 return xspec_target;
1588 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1592 /* If OP0 is a memory, try copying it to a register and seeing if a
1593 cheap register alternative is available. */
1594 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1596 enum machine_mode bestmode;
1598 /* Get the mode to use for inserting into this field. If
1599 OP0 is BLKmode, get the smallest mode consistent with the
1600 alignment. If OP0 is a non-BLKmode object that is no
1601 wider than EXT_MODE, use its mode. Otherwise, use the
1602 smallest mode containing the field. */
1604 if (GET_MODE (op0) == BLKmode
1605 || (ext_mode != MAX_MACHINE_MODE
1606 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1607 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1608 (ext_mode == MAX_MACHINE_MODE
1609 ? VOIDmode : ext_mode),
1610 MEM_VOLATILE_P (op0));
1611 else
1612 bestmode = GET_MODE (op0);
1614 if (bestmode != VOIDmode
1615 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1616 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1618 unsigned HOST_WIDE_INT xoffset, xbitpos;
1620 /* Compute the offset as a multiple of this unit,
1621 counting in bytes. */
1622 unit = GET_MODE_BITSIZE (bestmode);
1623 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1624 xbitpos = bitnum % unit;
1626 /* Make sure the register is big enough for the whole field. */
1627 if (xoffset * BITS_PER_UNIT + unit
1628 >= offset * BITS_PER_UNIT + bitsize)
1630 rtx last, result, xop0;
1632 last = get_last_insn ();
1634 /* Fetch it to a register in that size. */
1635 xop0 = adjust_address (op0, bestmode, xoffset);
1636 xop0 = force_reg (bestmode, xop0);
1637 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1638 unsignedp, packedp, target,
1639 mode, tmode, false);
1640 if (result)
1641 return result;
1643 delete_insns_since (last);
1648 if (!fallback_p)
1649 return NULL;
1651 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1652 bitpos, target, unsignedp, packedp);
1653 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1656 /* Generate code to extract a byte-field from STR_RTX
1657 containing BITSIZE bits, starting at BITNUM,
1658 and put it in TARGET if possible (if TARGET is nonzero).
1659 Regardless of TARGET, we return the rtx for where the value is placed.
1661 STR_RTX is the structure containing the byte (a REG or MEM).
1662 UNSIGNEDP is nonzero if this is an unsigned bit field.
1663 PACKEDP is nonzero if the field has the packed attribute.
1664 MODE is the natural mode of the field value once extracted.
1665 TMODE is the mode the caller would like the value to have;
1666 but the value may be returned with type MODE instead.
1668 If a TARGET is specified and we can store in it at no extra cost,
1669 we do so, and return TARGET.
1670 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1671 if they are equally easy. */
1674 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1675 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1676 rtx target, enum machine_mode mode, enum machine_mode tmode)
1678 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1679 target, mode, tmode, true);
1682 /* Extract a bit field using shifts and boolean operations
1683 Returns an rtx to represent the value.
1684 OP0 addresses a register (word) or memory (byte).
1685 BITPOS says which bit within the word or byte the bit field starts in.
1686 OFFSET says how many bytes farther the bit field starts;
1687 it is 0 if OP0 is a register.
1688 BITSIZE says how many bits long the bit field is.
1689 (If OP0 is a register, it may be narrower than a full word,
1690 but BITPOS still counts within a full word,
1691 which is significant on bigendian machines.)
1693 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1694 PACKEDP is true if the field has the packed attribute.
1696 If TARGET is nonzero, attempts to store the value there
1697 and return TARGET, but this is not guaranteed.
1698 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1700 static rtx
1701 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1702 unsigned HOST_WIDE_INT offset,
1703 unsigned HOST_WIDE_INT bitsize,
1704 unsigned HOST_WIDE_INT bitpos, rtx target,
1705 int unsignedp, bool packedp)
1707 unsigned int total_bits = BITS_PER_WORD;
1708 enum machine_mode mode;
1710 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1712 /* Special treatment for a bit field split across two registers. */
1713 if (bitsize + bitpos > BITS_PER_WORD)
1714 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1716 else
1718 /* Get the proper mode to use for this field. We want a mode that
1719 includes the entire field. If such a mode would be larger than
1720 a word, we won't be doing the extraction the normal way. */
1722 if (MEM_VOLATILE_P (op0)
1723 && flag_strict_volatile_bitfields > 0)
1725 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1726 mode = GET_MODE (op0);
1727 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1728 mode = GET_MODE (target);
1729 else
1730 mode = tmode;
1732 else
1733 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1734 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1736 if (mode == VOIDmode)
1737 /* The only way this should occur is if the field spans word
1738 boundaries. */
1739 return extract_split_bit_field (op0, bitsize,
1740 bitpos + offset * BITS_PER_UNIT,
1741 unsignedp);
1743 total_bits = GET_MODE_BITSIZE (mode);
1745 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1746 be in the range 0 to total_bits-1, and put any excess bytes in
1747 OFFSET. */
1748 if (bitpos >= total_bits)
1750 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1751 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1752 * BITS_PER_UNIT);
1755 /* If we're accessing a volatile MEM, we can't do the next
1756 alignment step if it results in a multi-word access where we
1757 otherwise wouldn't have one. So, check for that case
1758 here. */
1759 if (MEM_P (op0)
1760 && MEM_VOLATILE_P (op0)
1761 && flag_strict_volatile_bitfields > 0
1762 && bitpos + bitsize <= total_bits
1763 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1765 if (STRICT_ALIGNMENT)
1767 static bool informed_about_misalignment = false;
1768 bool warned;
1770 if (packedp)
1772 if (bitsize == total_bits)
1773 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1774 "multiple accesses to volatile structure member"
1775 " because of packed attribute");
1776 else
1777 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1778 "multiple accesses to volatile structure bitfield"
1779 " because of packed attribute");
1781 return extract_split_bit_field (op0, bitsize,
1782 bitpos + offset * BITS_PER_UNIT,
1783 unsignedp);
1786 if (bitsize == total_bits)
1787 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1788 "mis-aligned access used for structure member");
1789 else
1790 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1791 "mis-aligned access used for structure bitfield");
1793 if (! informed_about_misalignment && warned)
1795 informed_about_misalignment = true;
1796 inform (input_location,
1797 "when a volatile object spans multiple type-sized locations,"
1798 " the compiler must choose between using a single mis-aligned access to"
1799 " preserve the volatility, or using multiple aligned accesses to avoid"
1800 " runtime faults; this code may fail at runtime if the hardware does"
1801 " not allow this access");
1805 else
1808 /* Get ref to an aligned byte, halfword, or word containing the field.
1809 Adjust BITPOS to be position within a word,
1810 and OFFSET to be the offset of that word.
1811 Then alter OP0 to refer to that word. */
1812 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1813 offset -= (offset % (total_bits / BITS_PER_UNIT));
1816 op0 = adjust_address (op0, mode, offset);
1819 mode = GET_MODE (op0);
1821 if (BYTES_BIG_ENDIAN)
1822 /* BITPOS is the distance between our msb and that of OP0.
1823 Convert it to the distance from the lsb. */
1824 bitpos = total_bits - bitsize - bitpos;
1826 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1827 We have reduced the big-endian case to the little-endian case. */
1829 if (unsignedp)
1831 if (bitpos)
1833 /* If the field does not already start at the lsb,
1834 shift it so it does. */
1835 /* Maybe propagate the target for the shift. */
1836 /* But not if we will return it--could confuse integrate.c. */
1837 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1838 if (tmode != mode) subtarget = 0;
1839 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1841 /* Convert the value to the desired mode. */
1842 if (mode != tmode)
1843 op0 = convert_to_mode (tmode, op0, 1);
1845 /* Unless the msb of the field used to be the msb when we shifted,
1846 mask out the upper bits. */
1848 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1849 return expand_binop (GET_MODE (op0), and_optab, op0,
1850 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1851 target, 1, OPTAB_LIB_WIDEN);
1852 return op0;
1855 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1856 then arithmetic-shift its lsb to the lsb of the word. */
1857 op0 = force_reg (mode, op0);
1859 /* Find the narrowest integer mode that contains the field. */
1861 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1862 mode = GET_MODE_WIDER_MODE (mode))
1863 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1865 op0 = convert_to_mode (mode, op0, 0);
1866 break;
1869 if (mode != tmode)
1870 target = 0;
1872 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1874 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1875 /* Maybe propagate the target for the shift. */
1876 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1877 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1880 return expand_shift (RSHIFT_EXPR, mode, op0,
1881 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1884 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1885 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1886 complement of that if COMPLEMENT. The mask is truncated if
1887 necessary to the width of mode MODE. The mask is zero-extended if
1888 BITSIZE+BITPOS is too small for MODE. */
1890 static rtx
1891 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1893 double_int mask;
1895 mask = double_int_mask (bitsize);
1896 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1898 if (complement)
1899 mask = double_int_not (mask);
1901 return immed_double_int_const (mask, mode);
1904 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1905 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1907 static rtx
1908 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1910 double_int val;
1912 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1913 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1915 return immed_double_int_const (val, mode);
1918 /* Extract a bit field that is split across two words
1919 and return an RTX for the result.
1921 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1922 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1923 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1925 static rtx
1926 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1927 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1929 unsigned int unit;
1930 unsigned int bitsdone = 0;
1931 rtx result = NULL_RTX;
1932 int first = 1;
1934 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1935 much at a time. */
1936 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1937 unit = BITS_PER_WORD;
1938 else
1939 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1941 while (bitsdone < bitsize)
1943 unsigned HOST_WIDE_INT thissize;
1944 rtx part, word;
1945 unsigned HOST_WIDE_INT thispos;
1946 unsigned HOST_WIDE_INT offset;
1948 offset = (bitpos + bitsdone) / unit;
1949 thispos = (bitpos + bitsdone) % unit;
1951 /* THISSIZE must not overrun a word boundary. Otherwise,
1952 extract_fixed_bit_field will call us again, and we will mutually
1953 recurse forever. */
1954 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1955 thissize = MIN (thissize, unit - thispos);
1957 /* If OP0 is a register, then handle OFFSET here.
1959 When handling multiword bitfields, extract_bit_field may pass
1960 down a word_mode SUBREG of a larger REG for a bitfield that actually
1961 crosses a word boundary. Thus, for a SUBREG, we must find
1962 the current word starting from the base register. */
1963 if (GET_CODE (op0) == SUBREG)
1965 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1966 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1967 GET_MODE (SUBREG_REG (op0)));
1968 offset = 0;
1970 else if (REG_P (op0))
1972 word = operand_subword_force (op0, offset, GET_MODE (op0));
1973 offset = 0;
1975 else
1976 word = op0;
1978 /* Extract the parts in bit-counting order,
1979 whose meaning is determined by BYTES_PER_UNIT.
1980 OFFSET is in UNITs, and UNIT is in bits.
1981 extract_fixed_bit_field wants offset in bytes. */
1982 part = extract_fixed_bit_field (word_mode, word,
1983 offset * unit / BITS_PER_UNIT,
1984 thissize, thispos, 0, 1, false);
1985 bitsdone += thissize;
1987 /* Shift this part into place for the result. */
1988 if (BYTES_BIG_ENDIAN)
1990 if (bitsize != bitsdone)
1991 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1992 bitsize - bitsdone, 0, 1);
1994 else
1996 if (bitsdone != thissize)
1997 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1998 bitsdone - thissize, 0, 1);
2001 if (first)
2002 result = part;
2003 else
2004 /* Combine the parts with bitwise or. This works
2005 because we extracted each part as an unsigned bit field. */
2006 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2007 OPTAB_LIB_WIDEN);
2009 first = 0;
2012 /* Unsigned bit field: we are done. */
2013 if (unsignedp)
2014 return result;
2015 /* Signed bit field: sign-extend with two arithmetic shifts. */
2016 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2017 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2018 return expand_shift (RSHIFT_EXPR, word_mode, result,
2019 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2022 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2023 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2024 MODE, fill the upper bits with zeros. Fail if the layout of either
2025 mode is unknown (as for CC modes) or if the extraction would involve
2026 unprofitable mode punning. Return the value on success, otherwise
2027 return null.
2029 This is different from gen_lowpart* in these respects:
2031 - the returned value must always be considered an rvalue
2033 - when MODE is wider than SRC_MODE, the extraction involves
2034 a zero extension
2036 - when MODE is smaller than SRC_MODE, the extraction involves
2037 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2039 In other words, this routine performs a computation, whereas the
2040 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2041 operations. */
2044 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2046 enum machine_mode int_mode, src_int_mode;
2048 if (mode == src_mode)
2049 return src;
2051 if (CONSTANT_P (src))
2053 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2054 fails, it will happily create (subreg (symbol_ref)) or similar
2055 invalid SUBREGs. */
2056 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2057 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2058 if (ret)
2059 return ret;
2061 if (GET_MODE (src) == VOIDmode
2062 || !validate_subreg (mode, src_mode, src, byte))
2063 return NULL_RTX;
2065 src = force_reg (GET_MODE (src), src);
2066 return gen_rtx_SUBREG (mode, src, byte);
2069 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2070 return NULL_RTX;
2072 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2073 && MODES_TIEABLE_P (mode, src_mode))
2075 rtx x = gen_lowpart_common (mode, src);
2076 if (x)
2077 return x;
2080 src_int_mode = int_mode_for_mode (src_mode);
2081 int_mode = int_mode_for_mode (mode);
2082 if (src_int_mode == BLKmode || int_mode == BLKmode)
2083 return NULL_RTX;
2085 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2086 return NULL_RTX;
2087 if (!MODES_TIEABLE_P (int_mode, mode))
2088 return NULL_RTX;
2090 src = gen_lowpart (src_int_mode, src);
2091 src = convert_modes (int_mode, src_int_mode, src, true);
2092 src = gen_lowpart (mode, src);
2093 return src;
2096 /* Add INC into TARGET. */
2098 void
2099 expand_inc (rtx target, rtx inc)
2101 rtx value = expand_binop (GET_MODE (target), add_optab,
2102 target, inc,
2103 target, 0, OPTAB_LIB_WIDEN);
2104 if (value != target)
2105 emit_move_insn (target, value);
2108 /* Subtract DEC from TARGET. */
2110 void
2111 expand_dec (rtx target, rtx dec)
2113 rtx value = expand_binop (GET_MODE (target), sub_optab,
2114 target, dec,
2115 target, 0, OPTAB_LIB_WIDEN);
2116 if (value != target)
2117 emit_move_insn (target, value);
2120 /* Output a shift instruction for expression code CODE,
2121 with SHIFTED being the rtx for the value to shift,
2122 and AMOUNT the rtx for the amount to shift by.
2123 Store the result in the rtx TARGET, if that is convenient.
2124 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2125 Return the rtx for where the value is. */
2127 static rtx
2128 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2129 rtx amount, rtx target, int unsignedp)
2131 rtx op1, temp = 0;
2132 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2133 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2134 optab lshift_optab = ashl_optab;
2135 optab rshift_arith_optab = ashr_optab;
2136 optab rshift_uns_optab = lshr_optab;
2137 optab lrotate_optab = rotl_optab;
2138 optab rrotate_optab = rotr_optab;
2139 enum machine_mode op1_mode;
2140 int attempt;
2141 bool speed = optimize_insn_for_speed_p ();
2143 op1 = amount;
2144 op1_mode = GET_MODE (op1);
2146 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2147 shift amount is a vector, use the vector/vector shift patterns. */
2148 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2150 lshift_optab = vashl_optab;
2151 rshift_arith_optab = vashr_optab;
2152 rshift_uns_optab = vlshr_optab;
2153 lrotate_optab = vrotl_optab;
2154 rrotate_optab = vrotr_optab;
2157 /* Previously detected shift-counts computed by NEGATE_EXPR
2158 and shifted in the other direction; but that does not work
2159 on all machines. */
2161 if (SHIFT_COUNT_TRUNCATED)
2163 if (CONST_INT_P (op1)
2164 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2165 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2166 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2167 % GET_MODE_BITSIZE (mode));
2168 else if (GET_CODE (op1) == SUBREG
2169 && subreg_lowpart_p (op1)
2170 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2171 op1 = SUBREG_REG (op1);
2174 if (op1 == const0_rtx)
2175 return shifted;
2177 /* Check whether its cheaper to implement a left shift by a constant
2178 bit count by a sequence of additions. */
2179 if (code == LSHIFT_EXPR
2180 && CONST_INT_P (op1)
2181 && INTVAL (op1) > 0
2182 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2183 && INTVAL (op1) < MAX_BITS_PER_WORD
2184 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2185 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2187 int i;
2188 for (i = 0; i < INTVAL (op1); i++)
2190 temp = force_reg (mode, shifted);
2191 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2192 unsignedp, OPTAB_LIB_WIDEN);
2194 return shifted;
2197 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2199 enum optab_methods methods;
2201 if (attempt == 0)
2202 methods = OPTAB_DIRECT;
2203 else if (attempt == 1)
2204 methods = OPTAB_WIDEN;
2205 else
2206 methods = OPTAB_LIB_WIDEN;
2208 if (rotate)
2210 /* Widening does not work for rotation. */
2211 if (methods == OPTAB_WIDEN)
2212 continue;
2213 else if (methods == OPTAB_LIB_WIDEN)
2215 /* If we have been unable to open-code this by a rotation,
2216 do it as the IOR of two shifts. I.e., to rotate A
2217 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2218 where C is the bitsize of A.
2220 It is theoretically possible that the target machine might
2221 not be able to perform either shift and hence we would
2222 be making two libcalls rather than just the one for the
2223 shift (similarly if IOR could not be done). We will allow
2224 this extremely unlikely lossage to avoid complicating the
2225 code below. */
2227 rtx subtarget = target == shifted ? 0 : target;
2228 rtx new_amount, other_amount;
2229 rtx temp1;
2231 new_amount = op1;
2232 if (CONST_INT_P (op1))
2233 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2234 - INTVAL (op1));
2235 else
2236 other_amount
2237 = simplify_gen_binary (MINUS, GET_MODE (op1),
2238 GEN_INT (GET_MODE_PRECISION (mode)),
2239 op1);
2241 shifted = force_reg (mode, shifted);
2243 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2244 mode, shifted, new_amount, 0, 1);
2245 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2246 mode, shifted, other_amount,
2247 subtarget, 1);
2248 return expand_binop (mode, ior_optab, temp, temp1, target,
2249 unsignedp, methods);
2252 temp = expand_binop (mode,
2253 left ? lrotate_optab : rrotate_optab,
2254 shifted, op1, target, unsignedp, methods);
2256 else if (unsignedp)
2257 temp = expand_binop (mode,
2258 left ? lshift_optab : rshift_uns_optab,
2259 shifted, op1, target, unsignedp, methods);
2261 /* Do arithmetic shifts.
2262 Also, if we are going to widen the operand, we can just as well
2263 use an arithmetic right-shift instead of a logical one. */
2264 if (temp == 0 && ! rotate
2265 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2267 enum optab_methods methods1 = methods;
2269 /* If trying to widen a log shift to an arithmetic shift,
2270 don't accept an arithmetic shift of the same size. */
2271 if (unsignedp)
2272 methods1 = OPTAB_MUST_WIDEN;
2274 /* Arithmetic shift */
2276 temp = expand_binop (mode,
2277 left ? lshift_optab : rshift_arith_optab,
2278 shifted, op1, target, unsignedp, methods1);
2281 /* We used to try extzv here for logical right shifts, but that was
2282 only useful for one machine, the VAX, and caused poor code
2283 generation there for lshrdi3, so the code was deleted and a
2284 define_expand for lshrsi3 was added to vax.md. */
2287 gcc_assert (temp);
2288 return temp;
2291 /* Output a shift instruction for expression code CODE,
2292 with SHIFTED being the rtx for the value to shift,
2293 and AMOUNT the amount to shift by.
2294 Store the result in the rtx TARGET, if that is convenient.
2295 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2296 Return the rtx for where the value is. */
2299 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2300 int amount, rtx target, int unsignedp)
2302 return expand_shift_1 (code, mode,
2303 shifted, GEN_INT (amount), target, unsignedp);
2306 /* Output a shift instruction for expression code CODE,
2307 with SHIFTED being the rtx for the value to shift,
2308 and AMOUNT the tree for the amount to shift by.
2309 Store the result in the rtx TARGET, if that is convenient.
2310 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2311 Return the rtx for where the value is. */
2314 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2315 tree amount, rtx target, int unsignedp)
2317 return expand_shift_1 (code, mode,
2318 shifted, expand_normal (amount), target, unsignedp);
2322 /* Indicates the type of fixup needed after a constant multiplication.
2323 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2324 the result should be negated, and ADD_VARIANT means that the
2325 multiplicand should be added to the result. */
2326 enum mult_variant {basic_variant, negate_variant, add_variant};
2328 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2329 const struct mult_cost *, enum machine_mode mode);
2330 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2331 struct algorithm *, enum mult_variant *, int);
2332 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2333 const struct algorithm *, enum mult_variant);
2334 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2335 int, rtx *, int *, int *);
2336 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2337 static rtx extract_high_half (enum machine_mode, rtx);
2338 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2339 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2340 int, int);
2341 /* Compute and return the best algorithm for multiplying by T.
2342 The algorithm must cost less than cost_limit
2343 If retval.cost >= COST_LIMIT, no algorithm was found and all
2344 other field of the returned struct are undefined.
2345 MODE is the machine mode of the multiplication. */
2347 static void
2348 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2349 const struct mult_cost *cost_limit, enum machine_mode mode)
2351 int m;
2352 struct algorithm *alg_in, *best_alg;
2353 struct mult_cost best_cost;
2354 struct mult_cost new_limit;
2355 int op_cost, op_latency;
2356 unsigned HOST_WIDE_INT orig_t = t;
2357 unsigned HOST_WIDE_INT q;
2358 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2359 int hash_index;
2360 bool cache_hit = false;
2361 enum alg_code cache_alg = alg_zero;
2362 bool speed = optimize_insn_for_speed_p ();
2364 /* Indicate that no algorithm is yet found. If no algorithm
2365 is found, this value will be returned and indicate failure. */
2366 alg_out->cost.cost = cost_limit->cost + 1;
2367 alg_out->cost.latency = cost_limit->latency + 1;
2369 if (cost_limit->cost < 0
2370 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2371 return;
2373 /* Restrict the bits of "t" to the multiplication's mode. */
2374 t &= GET_MODE_MASK (mode);
2376 /* t == 1 can be done in zero cost. */
2377 if (t == 1)
2379 alg_out->ops = 1;
2380 alg_out->cost.cost = 0;
2381 alg_out->cost.latency = 0;
2382 alg_out->op[0] = alg_m;
2383 return;
2386 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2387 fail now. */
2388 if (t == 0)
2390 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2391 return;
2392 else
2394 alg_out->ops = 1;
2395 alg_out->cost.cost = zero_cost[speed];
2396 alg_out->cost.latency = zero_cost[speed];
2397 alg_out->op[0] = alg_zero;
2398 return;
2402 /* We'll be needing a couple extra algorithm structures now. */
2404 alg_in = XALLOCA (struct algorithm);
2405 best_alg = XALLOCA (struct algorithm);
2406 best_cost = *cost_limit;
2408 /* Compute the hash index. */
2409 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2411 /* See if we already know what to do for T. */
2412 if (alg_hash[hash_index].t == t
2413 && alg_hash[hash_index].mode == mode
2414 && alg_hash[hash_index].mode == mode
2415 && alg_hash[hash_index].speed == speed
2416 && alg_hash[hash_index].alg != alg_unknown)
2418 cache_alg = alg_hash[hash_index].alg;
2420 if (cache_alg == alg_impossible)
2422 /* The cache tells us that it's impossible to synthesize
2423 multiplication by T within alg_hash[hash_index].cost. */
2424 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2425 /* COST_LIMIT is at least as restrictive as the one
2426 recorded in the hash table, in which case we have no
2427 hope of synthesizing a multiplication. Just
2428 return. */
2429 return;
2431 /* If we get here, COST_LIMIT is less restrictive than the
2432 one recorded in the hash table, so we may be able to
2433 synthesize a multiplication. Proceed as if we didn't
2434 have the cache entry. */
2436 else
2438 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2439 /* The cached algorithm shows that this multiplication
2440 requires more cost than COST_LIMIT. Just return. This
2441 way, we don't clobber this cache entry with
2442 alg_impossible but retain useful information. */
2443 return;
2445 cache_hit = true;
2447 switch (cache_alg)
2449 case alg_shift:
2450 goto do_alg_shift;
2452 case alg_add_t_m2:
2453 case alg_sub_t_m2:
2454 goto do_alg_addsub_t_m2;
2456 case alg_add_factor:
2457 case alg_sub_factor:
2458 goto do_alg_addsub_factor;
2460 case alg_add_t2_m:
2461 goto do_alg_add_t2_m;
2463 case alg_sub_t2_m:
2464 goto do_alg_sub_t2_m;
2466 default:
2467 gcc_unreachable ();
2472 /* If we have a group of zero bits at the low-order part of T, try
2473 multiplying by the remaining bits and then doing a shift. */
2475 if ((t & 1) == 0)
2477 do_alg_shift:
2478 m = floor_log2 (t & -t); /* m = number of low zero bits */
2479 if (m < maxm)
2481 q = t >> m;
2482 /* The function expand_shift will choose between a shift and
2483 a sequence of additions, so the observed cost is given as
2484 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2485 op_cost = m * add_cost[speed][mode];
2486 if (shift_cost[speed][mode][m] < op_cost)
2487 op_cost = shift_cost[speed][mode][m];
2488 new_limit.cost = best_cost.cost - op_cost;
2489 new_limit.latency = best_cost.latency - op_cost;
2490 synth_mult (alg_in, q, &new_limit, mode);
2492 alg_in->cost.cost += op_cost;
2493 alg_in->cost.latency += op_cost;
2494 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2496 struct algorithm *x;
2497 best_cost = alg_in->cost;
2498 x = alg_in, alg_in = best_alg, best_alg = x;
2499 best_alg->log[best_alg->ops] = m;
2500 best_alg->op[best_alg->ops] = alg_shift;
2503 /* See if treating ORIG_T as a signed number yields a better
2504 sequence. Try this sequence only for a negative ORIG_T
2505 as it would be useless for a non-negative ORIG_T. */
2506 if ((HOST_WIDE_INT) orig_t < 0)
2508 /* Shift ORIG_T as follows because a right shift of a
2509 negative-valued signed type is implementation
2510 defined. */
2511 q = ~(~orig_t >> m);
2512 /* The function expand_shift will choose between a shift
2513 and a sequence of additions, so the observed cost is
2514 given as MIN (m * add_cost[speed][mode],
2515 shift_cost[speed][mode][m]). */
2516 op_cost = m * add_cost[speed][mode];
2517 if (shift_cost[speed][mode][m] < op_cost)
2518 op_cost = shift_cost[speed][mode][m];
2519 new_limit.cost = best_cost.cost - op_cost;
2520 new_limit.latency = best_cost.latency - op_cost;
2521 synth_mult (alg_in, q, &new_limit, mode);
2523 alg_in->cost.cost += op_cost;
2524 alg_in->cost.latency += op_cost;
2525 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2527 struct algorithm *x;
2528 best_cost = alg_in->cost;
2529 x = alg_in, alg_in = best_alg, best_alg = x;
2530 best_alg->log[best_alg->ops] = m;
2531 best_alg->op[best_alg->ops] = alg_shift;
2535 if (cache_hit)
2536 goto done;
2539 /* If we have an odd number, add or subtract one. */
2540 if ((t & 1) != 0)
2542 unsigned HOST_WIDE_INT w;
2544 do_alg_addsub_t_m2:
2545 for (w = 1; (w & t) != 0; w <<= 1)
2547 /* If T was -1, then W will be zero after the loop. This is another
2548 case where T ends with ...111. Handling this with (T + 1) and
2549 subtract 1 produces slightly better code and results in algorithm
2550 selection much faster than treating it like the ...0111 case
2551 below. */
2552 if (w == 0
2553 || (w > 2
2554 /* Reject the case where t is 3.
2555 Thus we prefer addition in that case. */
2556 && t != 3))
2558 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2560 op_cost = add_cost[speed][mode];
2561 new_limit.cost = best_cost.cost - op_cost;
2562 new_limit.latency = best_cost.latency - op_cost;
2563 synth_mult (alg_in, t + 1, &new_limit, mode);
2565 alg_in->cost.cost += op_cost;
2566 alg_in->cost.latency += op_cost;
2567 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569 struct algorithm *x;
2570 best_cost = alg_in->cost;
2571 x = alg_in, alg_in = best_alg, best_alg = x;
2572 best_alg->log[best_alg->ops] = 0;
2573 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2576 else
2578 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2580 op_cost = add_cost[speed][mode];
2581 new_limit.cost = best_cost.cost - op_cost;
2582 new_limit.latency = best_cost.latency - op_cost;
2583 synth_mult (alg_in, t - 1, &new_limit, mode);
2585 alg_in->cost.cost += op_cost;
2586 alg_in->cost.latency += op_cost;
2587 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589 struct algorithm *x;
2590 best_cost = alg_in->cost;
2591 x = alg_in, alg_in = best_alg, best_alg = x;
2592 best_alg->log[best_alg->ops] = 0;
2593 best_alg->op[best_alg->ops] = alg_add_t_m2;
2597 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2598 quickly with a - a * n for some appropriate constant n. */
2599 m = exact_log2 (-orig_t + 1);
2600 if (m >= 0 && m < maxm)
2602 op_cost = shiftsub1_cost[speed][mode][m];
2603 new_limit.cost = best_cost.cost - op_cost;
2604 new_limit.latency = best_cost.latency - op_cost;
2605 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2607 alg_in->cost.cost += op_cost;
2608 alg_in->cost.latency += op_cost;
2609 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2611 struct algorithm *x;
2612 best_cost = alg_in->cost;
2613 x = alg_in, alg_in = best_alg, best_alg = x;
2614 best_alg->log[best_alg->ops] = m;
2615 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2619 if (cache_hit)
2620 goto done;
2623 /* Look for factors of t of the form
2624 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2625 If we find such a factor, we can multiply by t using an algorithm that
2626 multiplies by q, shift the result by m and add/subtract it to itself.
2628 We search for large factors first and loop down, even if large factors
2629 are less probable than small; if we find a large factor we will find a
2630 good sequence quickly, and therefore be able to prune (by decreasing
2631 COST_LIMIT) the search. */
2633 do_alg_addsub_factor:
2634 for (m = floor_log2 (t - 1); m >= 2; m--)
2636 unsigned HOST_WIDE_INT d;
2638 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2639 if (t % d == 0 && t > d && m < maxm
2640 && (!cache_hit || cache_alg == alg_add_factor))
2642 /* If the target has a cheap shift-and-add instruction use
2643 that in preference to a shift insn followed by an add insn.
2644 Assume that the shift-and-add is "atomic" with a latency
2645 equal to its cost, otherwise assume that on superscalar
2646 hardware the shift may be executed concurrently with the
2647 earlier steps in the algorithm. */
2648 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2649 if (shiftadd_cost[speed][mode][m] < op_cost)
2651 op_cost = shiftadd_cost[speed][mode][m];
2652 op_latency = op_cost;
2654 else
2655 op_latency = add_cost[speed][mode];
2657 new_limit.cost = best_cost.cost - op_cost;
2658 new_limit.latency = best_cost.latency - op_latency;
2659 synth_mult (alg_in, t / d, &new_limit, mode);
2661 alg_in->cost.cost += op_cost;
2662 alg_in->cost.latency += op_latency;
2663 if (alg_in->cost.latency < op_cost)
2664 alg_in->cost.latency = op_cost;
2665 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2667 struct algorithm *x;
2668 best_cost = alg_in->cost;
2669 x = alg_in, alg_in = best_alg, best_alg = x;
2670 best_alg->log[best_alg->ops] = m;
2671 best_alg->op[best_alg->ops] = alg_add_factor;
2673 /* Other factors will have been taken care of in the recursion. */
2674 break;
2677 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2678 if (t % d == 0 && t > d && m < maxm
2679 && (!cache_hit || cache_alg == alg_sub_factor))
2681 /* If the target has a cheap shift-and-subtract insn use
2682 that in preference to a shift insn followed by a sub insn.
2683 Assume that the shift-and-sub is "atomic" with a latency
2684 equal to it's cost, otherwise assume that on superscalar
2685 hardware the shift may be executed concurrently with the
2686 earlier steps in the algorithm. */
2687 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2688 if (shiftsub0_cost[speed][mode][m] < op_cost)
2690 op_cost = shiftsub0_cost[speed][mode][m];
2691 op_latency = op_cost;
2693 else
2694 op_latency = add_cost[speed][mode];
2696 new_limit.cost = best_cost.cost - op_cost;
2697 new_limit.latency = best_cost.latency - op_latency;
2698 synth_mult (alg_in, t / d, &new_limit, mode);
2700 alg_in->cost.cost += op_cost;
2701 alg_in->cost.latency += op_latency;
2702 if (alg_in->cost.latency < op_cost)
2703 alg_in->cost.latency = op_cost;
2704 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2706 struct algorithm *x;
2707 best_cost = alg_in->cost;
2708 x = alg_in, alg_in = best_alg, best_alg = x;
2709 best_alg->log[best_alg->ops] = m;
2710 best_alg->op[best_alg->ops] = alg_sub_factor;
2712 break;
2715 if (cache_hit)
2716 goto done;
2718 /* Try shift-and-add (load effective address) instructions,
2719 i.e. do a*3, a*5, a*9. */
2720 if ((t & 1) != 0)
2722 do_alg_add_t2_m:
2723 q = t - 1;
2724 q = q & -q;
2725 m = exact_log2 (q);
2726 if (m >= 0 && m < maxm)
2728 op_cost = shiftadd_cost[speed][mode][m];
2729 new_limit.cost = best_cost.cost - op_cost;
2730 new_limit.latency = best_cost.latency - op_cost;
2731 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2733 alg_in->cost.cost += op_cost;
2734 alg_in->cost.latency += op_cost;
2735 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2737 struct algorithm *x;
2738 best_cost = alg_in->cost;
2739 x = alg_in, alg_in = best_alg, best_alg = x;
2740 best_alg->log[best_alg->ops] = m;
2741 best_alg->op[best_alg->ops] = alg_add_t2_m;
2744 if (cache_hit)
2745 goto done;
2747 do_alg_sub_t2_m:
2748 q = t + 1;
2749 q = q & -q;
2750 m = exact_log2 (q);
2751 if (m >= 0 && m < maxm)
2753 op_cost = shiftsub0_cost[speed][mode][m];
2754 new_limit.cost = best_cost.cost - op_cost;
2755 new_limit.latency = best_cost.latency - op_cost;
2756 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2758 alg_in->cost.cost += op_cost;
2759 alg_in->cost.latency += op_cost;
2760 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2762 struct algorithm *x;
2763 best_cost = alg_in->cost;
2764 x = alg_in, alg_in = best_alg, best_alg = x;
2765 best_alg->log[best_alg->ops] = m;
2766 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2769 if (cache_hit)
2770 goto done;
2773 done:
2774 /* If best_cost has not decreased, we have not found any algorithm. */
2775 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2777 /* We failed to find an algorithm. Record alg_impossible for
2778 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2779 we are asked to find an algorithm for T within the same or
2780 lower COST_LIMIT, we can immediately return to the
2781 caller. */
2782 alg_hash[hash_index].t = t;
2783 alg_hash[hash_index].mode = mode;
2784 alg_hash[hash_index].speed = speed;
2785 alg_hash[hash_index].alg = alg_impossible;
2786 alg_hash[hash_index].cost = *cost_limit;
2787 return;
2790 /* Cache the result. */
2791 if (!cache_hit)
2793 alg_hash[hash_index].t = t;
2794 alg_hash[hash_index].mode = mode;
2795 alg_hash[hash_index].speed = speed;
2796 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2797 alg_hash[hash_index].cost.cost = best_cost.cost;
2798 alg_hash[hash_index].cost.latency = best_cost.latency;
2801 /* If we are getting a too long sequence for `struct algorithm'
2802 to record, make this search fail. */
2803 if (best_alg->ops == MAX_BITS_PER_WORD)
2804 return;
2806 /* Copy the algorithm from temporary space to the space at alg_out.
2807 We avoid using structure assignment because the majority of
2808 best_alg is normally undefined, and this is a critical function. */
2809 alg_out->ops = best_alg->ops + 1;
2810 alg_out->cost = best_cost;
2811 memcpy (alg_out->op, best_alg->op,
2812 alg_out->ops * sizeof *alg_out->op);
2813 memcpy (alg_out->log, best_alg->log,
2814 alg_out->ops * sizeof *alg_out->log);
2817 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2818 Try three variations:
2820 - a shift/add sequence based on VAL itself
2821 - a shift/add sequence based on -VAL, followed by a negation
2822 - a shift/add sequence based on VAL - 1, followed by an addition.
2824 Return true if the cheapest of these cost less than MULT_COST,
2825 describing the algorithm in *ALG and final fixup in *VARIANT. */
2827 static bool
2828 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2829 struct algorithm *alg, enum mult_variant *variant,
2830 int mult_cost)
2832 struct algorithm alg2;
2833 struct mult_cost limit;
2834 int op_cost;
2835 bool speed = optimize_insn_for_speed_p ();
2837 /* Fail quickly for impossible bounds. */
2838 if (mult_cost < 0)
2839 return false;
2841 /* Ensure that mult_cost provides a reasonable upper bound.
2842 Any constant multiplication can be performed with less
2843 than 2 * bits additions. */
2844 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2845 if (mult_cost > op_cost)
2846 mult_cost = op_cost;
2848 *variant = basic_variant;
2849 limit.cost = mult_cost;
2850 limit.latency = mult_cost;
2851 synth_mult (alg, val, &limit, mode);
2853 /* This works only if the inverted value actually fits in an
2854 `unsigned int' */
2855 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2857 op_cost = neg_cost[speed][mode];
2858 if (MULT_COST_LESS (&alg->cost, mult_cost))
2860 limit.cost = alg->cost.cost - op_cost;
2861 limit.latency = alg->cost.latency - op_cost;
2863 else
2865 limit.cost = mult_cost - op_cost;
2866 limit.latency = mult_cost - op_cost;
2869 synth_mult (&alg2, -val, &limit, mode);
2870 alg2.cost.cost += op_cost;
2871 alg2.cost.latency += op_cost;
2872 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2873 *alg = alg2, *variant = negate_variant;
2876 /* This proves very useful for division-by-constant. */
2877 op_cost = add_cost[speed][mode];
2878 if (MULT_COST_LESS (&alg->cost, mult_cost))
2880 limit.cost = alg->cost.cost - op_cost;
2881 limit.latency = alg->cost.latency - op_cost;
2883 else
2885 limit.cost = mult_cost - op_cost;
2886 limit.latency = mult_cost - op_cost;
2889 synth_mult (&alg2, val - 1, &limit, mode);
2890 alg2.cost.cost += op_cost;
2891 alg2.cost.latency += op_cost;
2892 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2893 *alg = alg2, *variant = add_variant;
2895 return MULT_COST_LESS (&alg->cost, mult_cost);
2898 /* A subroutine of expand_mult, used for constant multiplications.
2899 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2900 convenient. Use the shift/add sequence described by ALG and apply
2901 the final fixup specified by VARIANT. */
2903 static rtx
2904 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2905 rtx target, const struct algorithm *alg,
2906 enum mult_variant variant)
2908 HOST_WIDE_INT val_so_far;
2909 rtx insn, accum, tem;
2910 int opno;
2911 enum machine_mode nmode;
2913 /* Avoid referencing memory over and over and invalid sharing
2914 on SUBREGs. */
2915 op0 = force_reg (mode, op0);
2917 /* ACCUM starts out either as OP0 or as a zero, depending on
2918 the first operation. */
2920 if (alg->op[0] == alg_zero)
2922 accum = copy_to_mode_reg (mode, const0_rtx);
2923 val_so_far = 0;
2925 else if (alg->op[0] == alg_m)
2927 accum = copy_to_mode_reg (mode, op0);
2928 val_so_far = 1;
2930 else
2931 gcc_unreachable ();
2933 for (opno = 1; opno < alg->ops; opno++)
2935 int log = alg->log[opno];
2936 rtx shift_subtarget = optimize ? 0 : accum;
2937 rtx add_target
2938 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2939 && !optimize)
2940 ? target : 0;
2941 rtx accum_target = optimize ? 0 : accum;
2943 switch (alg->op[opno])
2945 case alg_shift:
2946 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2947 /* REG_EQUAL note will be attached to the following insn. */
2948 emit_move_insn (accum, tem);
2949 val_so_far <<= log;
2950 break;
2952 case alg_add_t_m2:
2953 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2954 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2955 add_target ? add_target : accum_target);
2956 val_so_far += (HOST_WIDE_INT) 1 << log;
2957 break;
2959 case alg_sub_t_m2:
2960 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2961 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2962 add_target ? add_target : accum_target);
2963 val_so_far -= (HOST_WIDE_INT) 1 << log;
2964 break;
2966 case alg_add_t2_m:
2967 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2968 log, shift_subtarget, 0);
2969 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2970 add_target ? add_target : accum_target);
2971 val_so_far = (val_so_far << log) + 1;
2972 break;
2974 case alg_sub_t2_m:
2975 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2976 log, shift_subtarget, 0);
2977 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2978 add_target ? add_target : accum_target);
2979 val_so_far = (val_so_far << log) - 1;
2980 break;
2982 case alg_add_factor:
2983 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2984 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2985 add_target ? add_target : accum_target);
2986 val_so_far += val_so_far << log;
2987 break;
2989 case alg_sub_factor:
2990 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2991 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2992 (add_target
2993 ? add_target : (optimize ? 0 : tem)));
2994 val_so_far = (val_so_far << log) - val_so_far;
2995 break;
2997 default:
2998 gcc_unreachable ();
3001 /* Write a REG_EQUAL note on the last insn so that we can cse
3002 multiplication sequences. Note that if ACCUM is a SUBREG,
3003 we've set the inner register and must properly indicate
3004 that. */
3006 tem = op0, nmode = mode;
3007 if (GET_CODE (accum) == SUBREG)
3009 nmode = GET_MODE (SUBREG_REG (accum));
3010 tem = gen_lowpart (nmode, op0);
3013 insn = get_last_insn ();
3014 set_unique_reg_note (insn, REG_EQUAL,
3015 gen_rtx_MULT (nmode, tem,
3016 GEN_INT (val_so_far)));
3019 if (variant == negate_variant)
3021 val_so_far = -val_so_far;
3022 accum = expand_unop (mode, neg_optab, accum, target, 0);
3024 else if (variant == add_variant)
3026 val_so_far = val_so_far + 1;
3027 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3030 /* Compare only the bits of val and val_so_far that are significant
3031 in the result mode, to avoid sign-/zero-extension confusion. */
3032 val &= GET_MODE_MASK (mode);
3033 val_so_far &= GET_MODE_MASK (mode);
3034 gcc_assert (val == val_so_far);
3036 return accum;
3039 /* Perform a multiplication and return an rtx for the result.
3040 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3041 TARGET is a suggestion for where to store the result (an rtx).
3043 We check specially for a constant integer as OP1.
3044 If you want this check for OP0 as well, then before calling
3045 you should swap the two operands if OP0 would be constant. */
3048 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3049 int unsignedp)
3051 enum mult_variant variant;
3052 struct algorithm algorithm;
3053 int max_cost;
3054 bool speed = optimize_insn_for_speed_p ();
3056 /* Handling const0_rtx here allows us to use zero as a rogue value for
3057 coeff below. */
3058 if (op1 == const0_rtx)
3059 return const0_rtx;
3060 if (op1 == const1_rtx)
3061 return op0;
3062 if (op1 == constm1_rtx)
3063 return expand_unop (mode,
3064 GET_MODE_CLASS (mode) == MODE_INT
3065 && !unsignedp && flag_trapv
3066 ? negv_optab : neg_optab,
3067 op0, target, 0);
3069 /* These are the operations that are potentially turned into a sequence
3070 of shifts and additions. */
3071 if (SCALAR_INT_MODE_P (mode)
3072 && (unsignedp || !flag_trapv))
3074 HOST_WIDE_INT coeff = 0;
3075 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3077 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3078 less than or equal in size to `unsigned int' this doesn't matter.
3079 If the mode is larger than `unsigned int', then synth_mult works
3080 only if the constant value exactly fits in an `unsigned int' without
3081 any truncation. This means that multiplying by negative values does
3082 not work; results are off by 2^32 on a 32 bit machine. */
3084 if (CONST_INT_P (op1))
3086 /* Attempt to handle multiplication of DImode values by negative
3087 coefficients, by performing the multiplication by a positive
3088 multiplier and then inverting the result. */
3089 if (INTVAL (op1) < 0
3090 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3092 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3093 result is interpreted as an unsigned coefficient.
3094 Exclude cost of op0 from max_cost to match the cost
3095 calculation of the synth_mult. */
3096 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3097 speed)
3098 - neg_cost[speed][mode]);
3099 if (max_cost > 0
3100 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3101 &variant, max_cost))
3103 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3104 NULL_RTX, &algorithm,
3105 variant);
3106 return expand_unop (mode, neg_optab, temp, target, 0);
3109 else coeff = INTVAL (op1);
3111 else if (GET_CODE (op1) == CONST_DOUBLE)
3113 /* If we are multiplying in DImode, it may still be a win
3114 to try to work with shifts and adds. */
3115 if (CONST_DOUBLE_HIGH (op1) == 0
3116 && CONST_DOUBLE_LOW (op1) > 0)
3117 coeff = CONST_DOUBLE_LOW (op1);
3118 else if (CONST_DOUBLE_LOW (op1) == 0
3119 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3121 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3122 + HOST_BITS_PER_WIDE_INT;
3123 return expand_shift (LSHIFT_EXPR, mode, op0,
3124 shift, target, unsignedp);
3128 /* We used to test optimize here, on the grounds that it's better to
3129 produce a smaller program when -O is not used. But this causes
3130 such a terrible slowdown sometimes that it seems better to always
3131 use synth_mult. */
3132 if (coeff != 0)
3134 /* Special case powers of two. */
3135 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3136 return expand_shift (LSHIFT_EXPR, mode, op0,
3137 floor_log2 (coeff), target, unsignedp);
3139 /* Exclude cost of op0 from max_cost to match the cost
3140 calculation of the synth_mult. */
3141 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3142 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3143 max_cost))
3144 return expand_mult_const (mode, op0, coeff, target,
3145 &algorithm, variant);
3149 if (GET_CODE (op0) == CONST_DOUBLE)
3151 rtx temp = op0;
3152 op0 = op1;
3153 op1 = temp;
3156 /* Expand x*2.0 as x+x. */
3157 if (GET_CODE (op1) == CONST_DOUBLE
3158 && SCALAR_FLOAT_MODE_P (mode))
3160 REAL_VALUE_TYPE d;
3161 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3163 if (REAL_VALUES_EQUAL (d, dconst2))
3165 op0 = force_reg (GET_MODE (op0), op0);
3166 return expand_binop (mode, add_optab, op0, op0,
3167 target, unsignedp, OPTAB_LIB_WIDEN);
3171 /* This used to use umul_optab if unsigned, but for non-widening multiply
3172 there is no difference between signed and unsigned. */
3173 op0 = expand_binop (mode,
3174 ! unsignedp
3175 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3176 ? smulv_optab : smul_optab,
3177 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3178 gcc_assert (op0);
3179 return op0;
3182 /* Perform a widening multiplication and return an rtx for the result.
3183 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3184 TARGET is a suggestion for where to store the result (an rtx).
3185 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3186 or smul_widen_optab.
3188 We check specially for a constant integer as OP1, comparing the
3189 cost of a widening multiply against the cost of a sequence of shifts
3190 and adds. */
3193 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3194 int unsignedp, optab this_optab)
3196 bool speed = optimize_insn_for_speed_p ();
3197 rtx cop1;
3199 if (CONST_INT_P (op1)
3200 && GET_MODE (op0) != VOIDmode
3201 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3202 this_optab == umul_widen_optab))
3203 && CONST_INT_P (cop1)
3204 && (INTVAL (cop1) >= 0
3205 || HWI_COMPUTABLE_MODE_P (mode)))
3207 HOST_WIDE_INT coeff = INTVAL (cop1);
3208 int max_cost;
3209 enum mult_variant variant;
3210 struct algorithm algorithm;
3212 /* Special case powers of two. */
3213 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3215 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3216 return expand_shift (LSHIFT_EXPR, mode, op0,
3217 floor_log2 (coeff), target, unsignedp);
3220 /* Exclude cost of op0 from max_cost to match the cost
3221 calculation of the synth_mult. */
3222 max_cost = mul_widen_cost[speed][mode];
3223 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3224 max_cost))
3226 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3227 return expand_mult_const (mode, op0, coeff, target,
3228 &algorithm, variant);
3231 return expand_binop (mode, this_optab, op0, op1, target,
3232 unsignedp, OPTAB_LIB_WIDEN);
3235 /* Return the smallest n such that 2**n >= X. */
3238 ceil_log2 (unsigned HOST_WIDE_INT x)
3240 return floor_log2 (x - 1) + 1;
3243 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3244 replace division by D, and put the least significant N bits of the result
3245 in *MULTIPLIER_PTR and return the most significant bit.
3247 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3248 needed precision is in PRECISION (should be <= N).
3250 PRECISION should be as small as possible so this function can choose
3251 multiplier more freely.
3253 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3254 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3256 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3257 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3259 static
3260 unsigned HOST_WIDE_INT
3261 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3262 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3264 HOST_WIDE_INT mhigh_hi, mlow_hi;
3265 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3266 int lgup, post_shift;
3267 int pow, pow2;
3268 unsigned HOST_WIDE_INT nl, dummy1;
3269 HOST_WIDE_INT nh, dummy2;
3271 /* lgup = ceil(log2(divisor)); */
3272 lgup = ceil_log2 (d);
3274 gcc_assert (lgup <= n);
3276 pow = n + lgup;
3277 pow2 = n + lgup - precision;
3279 /* We could handle this with some effort, but this case is much
3280 better handled directly with a scc insn, so rely on caller using
3281 that. */
3282 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3284 /* mlow = 2^(N + lgup)/d */
3285 if (pow >= HOST_BITS_PER_WIDE_INT)
3287 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3288 nl = 0;
3290 else
3292 nh = 0;
3293 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3295 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3296 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3298 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3299 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3300 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3301 else
3302 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3303 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3304 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3306 gcc_assert (!mhigh_hi || nh - d < d);
3307 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3308 /* Assert that mlow < mhigh. */
3309 gcc_assert (mlow_hi < mhigh_hi
3310 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3312 /* If precision == N, then mlow, mhigh exceed 2^N
3313 (but they do not exceed 2^(N+1)). */
3315 /* Reduce to lowest terms. */
3316 for (post_shift = lgup; post_shift > 0; post_shift--)
3318 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3319 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3320 if (ml_lo >= mh_lo)
3321 break;
3323 mlow_hi = 0;
3324 mlow_lo = ml_lo;
3325 mhigh_hi = 0;
3326 mhigh_lo = mh_lo;
3329 *post_shift_ptr = post_shift;
3330 *lgup_ptr = lgup;
3331 if (n < HOST_BITS_PER_WIDE_INT)
3333 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3334 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3335 return mhigh_lo >= mask;
3337 else
3339 *multiplier_ptr = GEN_INT (mhigh_lo);
3340 return mhigh_hi;
3344 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3345 congruent to 1 (mod 2**N). */
3347 static unsigned HOST_WIDE_INT
3348 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3350 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3352 /* The algorithm notes that the choice y = x satisfies
3353 x*y == 1 mod 2^3, since x is assumed odd.
3354 Each iteration doubles the number of bits of significance in y. */
3356 unsigned HOST_WIDE_INT mask;
3357 unsigned HOST_WIDE_INT y = x;
3358 int nbit = 3;
3360 mask = (n == HOST_BITS_PER_WIDE_INT
3361 ? ~(unsigned HOST_WIDE_INT) 0
3362 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3364 while (nbit < n)
3366 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3367 nbit *= 2;
3369 return y;
3372 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3373 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3374 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3375 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3376 become signed.
3378 The result is put in TARGET if that is convenient.
3380 MODE is the mode of operation. */
3383 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3384 rtx op1, rtx target, int unsignedp)
3386 rtx tem;
3387 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3389 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3390 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3391 tem = expand_and (mode, tem, op1, NULL_RTX);
3392 adj_operand
3393 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3394 adj_operand);
3396 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3397 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3398 tem = expand_and (mode, tem, op0, NULL_RTX);
3399 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3400 target);
3402 return target;
3405 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3407 static rtx
3408 extract_high_half (enum machine_mode mode, rtx op)
3410 enum machine_mode wider_mode;
3412 if (mode == word_mode)
3413 return gen_highpart (mode, op);
3415 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3417 wider_mode = GET_MODE_WIDER_MODE (mode);
3418 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3419 GET_MODE_BITSIZE (mode), 0, 1);
3420 return convert_modes (mode, wider_mode, op, 0);
3423 /* Like expand_mult_highpart, but only consider using a multiplication
3424 optab. OP1 is an rtx for the constant operand. */
3426 static rtx
3427 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3428 rtx target, int unsignedp, int max_cost)
3430 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3431 enum machine_mode wider_mode;
3432 optab moptab;
3433 rtx tem;
3434 int size;
3435 bool speed = optimize_insn_for_speed_p ();
3437 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3439 wider_mode = GET_MODE_WIDER_MODE (mode);
3440 size = GET_MODE_BITSIZE (mode);
3442 /* Firstly, try using a multiplication insn that only generates the needed
3443 high part of the product, and in the sign flavor of unsignedp. */
3444 if (mul_highpart_cost[speed][mode] < max_cost)
3446 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3447 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3448 unsignedp, OPTAB_DIRECT);
3449 if (tem)
3450 return tem;
3453 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3454 Need to adjust the result after the multiplication. */
3455 if (size - 1 < BITS_PER_WORD
3456 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3457 + 4 * add_cost[speed][mode] < max_cost))
3459 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3460 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3461 unsignedp, OPTAB_DIRECT);
3462 if (tem)
3463 /* We used the wrong signedness. Adjust the result. */
3464 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3465 tem, unsignedp);
3468 /* Try widening multiplication. */
3469 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3470 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3471 && mul_widen_cost[speed][wider_mode] < max_cost)
3473 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3474 unsignedp, OPTAB_WIDEN);
3475 if (tem)
3476 return extract_high_half (mode, tem);
3479 /* Try widening the mode and perform a non-widening multiplication. */
3480 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3481 && size - 1 < BITS_PER_WORD
3482 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3484 rtx insns, wop0, wop1;
3486 /* We need to widen the operands, for example to ensure the
3487 constant multiplier is correctly sign or zero extended.
3488 Use a sequence to clean-up any instructions emitted by
3489 the conversions if things don't work out. */
3490 start_sequence ();
3491 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3492 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3493 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3494 unsignedp, OPTAB_WIDEN);
3495 insns = get_insns ();
3496 end_sequence ();
3498 if (tem)
3500 emit_insn (insns);
3501 return extract_high_half (mode, tem);
3505 /* Try widening multiplication of opposite signedness, and adjust. */
3506 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3507 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3508 && size - 1 < BITS_PER_WORD
3509 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3510 + 4 * add_cost[speed][mode] < max_cost))
3512 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3513 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3514 if (tem != 0)
3516 tem = extract_high_half (mode, tem);
3517 /* We used the wrong signedness. Adjust the result. */
3518 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3519 target, unsignedp);
3523 return 0;
3526 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3527 putting the high half of the result in TARGET if that is convenient,
3528 and return where the result is. If the operation can not be performed,
3529 0 is returned.
3531 MODE is the mode of operation and result.
3533 UNSIGNEDP nonzero means unsigned multiply.
3535 MAX_COST is the total allowed cost for the expanded RTL. */
3537 static rtx
3538 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3539 rtx target, int unsignedp, int max_cost)
3541 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3542 unsigned HOST_WIDE_INT cnst1;
3543 int extra_cost;
3544 bool sign_adjust = false;
3545 enum mult_variant variant;
3546 struct algorithm alg;
3547 rtx tem;
3548 bool speed = optimize_insn_for_speed_p ();
3550 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3551 /* We can't support modes wider than HOST_BITS_PER_INT. */
3552 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3554 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3556 /* We can't optimize modes wider than BITS_PER_WORD.
3557 ??? We might be able to perform double-word arithmetic if
3558 mode == word_mode, however all the cost calculations in
3559 synth_mult etc. assume single-word operations. */
3560 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3561 return expand_mult_highpart_optab (mode, op0, op1, target,
3562 unsignedp, max_cost);
3564 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3566 /* Check whether we try to multiply by a negative constant. */
3567 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3569 sign_adjust = true;
3570 extra_cost += add_cost[speed][mode];
3573 /* See whether shift/add multiplication is cheap enough. */
3574 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3575 max_cost - extra_cost))
3577 /* See whether the specialized multiplication optabs are
3578 cheaper than the shift/add version. */
3579 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3580 alg.cost.cost + extra_cost);
3581 if (tem)
3582 return tem;
3584 tem = convert_to_mode (wider_mode, op0, unsignedp);
3585 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3586 tem = extract_high_half (mode, tem);
3588 /* Adjust result for signedness. */
3589 if (sign_adjust)
3590 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3592 return tem;
3594 return expand_mult_highpart_optab (mode, op0, op1, target,
3595 unsignedp, max_cost);
3599 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3601 static rtx
3602 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3604 unsigned HOST_WIDE_INT masklow, maskhigh;
3605 rtx result, temp, shift, label;
3606 int logd;
3608 logd = floor_log2 (d);
3609 result = gen_reg_rtx (mode);
3611 /* Avoid conditional branches when they're expensive. */
3612 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3613 && optimize_insn_for_speed_p ())
3615 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3616 mode, 0, -1);
3617 if (signmask)
3619 signmask = force_reg (mode, signmask);
3620 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3621 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3623 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3624 which instruction sequence to use. If logical right shifts
3625 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3626 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3628 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3629 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3630 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3631 > COSTS_N_INSNS (2)))
3633 temp = expand_binop (mode, xor_optab, op0, signmask,
3634 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635 temp = expand_binop (mode, sub_optab, temp, signmask,
3636 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3637 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3638 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639 temp = expand_binop (mode, xor_optab, temp, signmask,
3640 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641 temp = expand_binop (mode, sub_optab, temp, signmask,
3642 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644 else
3646 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3647 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3648 signmask = force_reg (mode, signmask);
3650 temp = expand_binop (mode, add_optab, op0, signmask,
3651 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3653 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654 temp = expand_binop (mode, sub_optab, temp, signmask,
3655 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657 return temp;
3661 /* Mask contains the mode's signbit and the significant bits of the
3662 modulus. By including the signbit in the operation, many targets
3663 can avoid an explicit compare operation in the following comparison
3664 against zero. */
3666 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3667 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3669 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3670 maskhigh = -1;
3672 else
3673 maskhigh = (HOST_WIDE_INT) -1
3674 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3676 temp = expand_binop (mode, and_optab, op0,
3677 immed_double_const (masklow, maskhigh, mode),
3678 result, 1, OPTAB_LIB_WIDEN);
3679 if (temp != result)
3680 emit_move_insn (result, temp);
3682 label = gen_label_rtx ();
3683 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3685 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3686 0, OPTAB_LIB_WIDEN);
3687 masklow = (HOST_WIDE_INT) -1 << logd;
3688 maskhigh = -1;
3689 temp = expand_binop (mode, ior_optab, temp,
3690 immed_double_const (masklow, maskhigh, mode),
3691 result, 1, OPTAB_LIB_WIDEN);
3692 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3693 0, OPTAB_LIB_WIDEN);
3694 if (temp != result)
3695 emit_move_insn (result, temp);
3696 emit_label (label);
3697 return result;
3700 /* Expand signed division of OP0 by a power of two D in mode MODE.
3701 This routine is only called for positive values of D. */
3703 static rtx
3704 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3706 rtx temp, label;
3707 int logd;
3709 logd = floor_log2 (d);
3711 if (d == 2
3712 && BRANCH_COST (optimize_insn_for_speed_p (),
3713 false) >= 1)
3715 temp = gen_reg_rtx (mode);
3716 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3717 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3718 0, OPTAB_LIB_WIDEN);
3719 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3722 #ifdef HAVE_conditional_move
3723 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3724 >= 2)
3726 rtx temp2;
3728 /* ??? emit_conditional_move forces a stack adjustment via
3729 compare_from_rtx so, if the sequence is discarded, it will
3730 be lost. Do it now instead. */
3731 do_pending_stack_adjust ();
3733 start_sequence ();
3734 temp2 = copy_to_mode_reg (mode, op0);
3735 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3736 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3737 temp = force_reg (mode, temp);
3739 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3740 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3741 mode, temp, temp2, mode, 0);
3742 if (temp2)
3744 rtx seq = get_insns ();
3745 end_sequence ();
3746 emit_insn (seq);
3747 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3749 end_sequence ();
3751 #endif
3753 if (BRANCH_COST (optimize_insn_for_speed_p (),
3754 false) >= 2)
3756 int ushift = GET_MODE_BITSIZE (mode) - logd;
3758 temp = gen_reg_rtx (mode);
3759 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3760 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3761 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3762 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3763 else
3764 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3765 ushift, NULL_RTX, 1);
3766 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3767 0, OPTAB_LIB_WIDEN);
3768 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3771 label = gen_label_rtx ();
3772 temp = copy_to_mode_reg (mode, op0);
3773 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3774 expand_inc (temp, GEN_INT (d - 1));
3775 emit_label (label);
3776 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3779 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3780 if that is convenient, and returning where the result is.
3781 You may request either the quotient or the remainder as the result;
3782 specify REM_FLAG nonzero to get the remainder.
3784 CODE is the expression code for which kind of division this is;
3785 it controls how rounding is done. MODE is the machine mode to use.
3786 UNSIGNEDP nonzero means do unsigned division. */
3788 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3789 and then correct it by or'ing in missing high bits
3790 if result of ANDI is nonzero.
3791 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3792 This could optimize to a bfexts instruction.
3793 But C doesn't use these operations, so their optimizations are
3794 left for later. */
3795 /* ??? For modulo, we don't actually need the highpart of the first product,
3796 the low part will do nicely. And for small divisors, the second multiply
3797 can also be a low-part only multiply or even be completely left out.
3798 E.g. to calculate the remainder of a division by 3 with a 32 bit
3799 multiply, multiply with 0x55555556 and extract the upper two bits;
3800 the result is exact for inputs up to 0x1fffffff.
3801 The input range can be reduced by using cross-sum rules.
3802 For odd divisors >= 3, the following table gives right shift counts
3803 so that if a number is shifted by an integer multiple of the given
3804 amount, the remainder stays the same:
3805 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3806 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3807 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3808 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3809 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3811 Cross-sum rules for even numbers can be derived by leaving as many bits
3812 to the right alone as the divisor has zeros to the right.
3813 E.g. if x is an unsigned 32 bit number:
3814 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3818 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3819 rtx op0, rtx op1, rtx target, int unsignedp)
3821 enum machine_mode compute_mode;
3822 rtx tquotient;
3823 rtx quotient = 0, remainder = 0;
3824 rtx last;
3825 int size;
3826 rtx insn, set;
3827 optab optab1, optab2;
3828 int op1_is_constant, op1_is_pow2 = 0;
3829 int max_cost, extra_cost;
3830 static HOST_WIDE_INT last_div_const = 0;
3831 static HOST_WIDE_INT ext_op1;
3832 bool speed = optimize_insn_for_speed_p ();
3834 op1_is_constant = CONST_INT_P (op1);
3835 if (op1_is_constant)
3837 ext_op1 = INTVAL (op1);
3838 if (unsignedp)
3839 ext_op1 &= GET_MODE_MASK (mode);
3840 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3841 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3845 This is the structure of expand_divmod:
3847 First comes code to fix up the operands so we can perform the operations
3848 correctly and efficiently.
3850 Second comes a switch statement with code specific for each rounding mode.
3851 For some special operands this code emits all RTL for the desired
3852 operation, for other cases, it generates only a quotient and stores it in
3853 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3854 to indicate that it has not done anything.
3856 Last comes code that finishes the operation. If QUOTIENT is set and
3857 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3858 QUOTIENT is not set, it is computed using trunc rounding.
3860 We try to generate special code for division and remainder when OP1 is a
3861 constant. If |OP1| = 2**n we can use shifts and some other fast
3862 operations. For other values of OP1, we compute a carefully selected
3863 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3864 by m.
3866 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3867 half of the product. Different strategies for generating the product are
3868 implemented in expand_mult_highpart.
3870 If what we actually want is the remainder, we generate that by another
3871 by-constant multiplication and a subtraction. */
3873 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3874 code below will malfunction if we are, so check here and handle
3875 the special case if so. */
3876 if (op1 == const1_rtx)
3877 return rem_flag ? const0_rtx : op0;
3879 /* When dividing by -1, we could get an overflow.
3880 negv_optab can handle overflows. */
3881 if (! unsignedp && op1 == constm1_rtx)
3883 if (rem_flag)
3884 return const0_rtx;
3885 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3886 ? negv_optab : neg_optab, op0, target, 0);
3889 if (target
3890 /* Don't use the function value register as a target
3891 since we have to read it as well as write it,
3892 and function-inlining gets confused by this. */
3893 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3894 /* Don't clobber an operand while doing a multi-step calculation. */
3895 || ((rem_flag || op1_is_constant)
3896 && (reg_mentioned_p (target, op0)
3897 || (MEM_P (op0) && MEM_P (target))))
3898 || reg_mentioned_p (target, op1)
3899 || (MEM_P (op1) && MEM_P (target))))
3900 target = 0;
3902 /* Get the mode in which to perform this computation. Normally it will
3903 be MODE, but sometimes we can't do the desired operation in MODE.
3904 If so, pick a wider mode in which we can do the operation. Convert
3905 to that mode at the start to avoid repeated conversions.
3907 First see what operations we need. These depend on the expression
3908 we are evaluating. (We assume that divxx3 insns exist under the
3909 same conditions that modxx3 insns and that these insns don't normally
3910 fail. If these assumptions are not correct, we may generate less
3911 efficient code in some cases.)
3913 Then see if we find a mode in which we can open-code that operation
3914 (either a division, modulus, or shift). Finally, check for the smallest
3915 mode for which we can do the operation with a library call. */
3917 /* We might want to refine this now that we have division-by-constant
3918 optimization. Since expand_mult_highpart tries so many variants, it is
3919 not straightforward to generalize this. Maybe we should make an array
3920 of possible modes in init_expmed? Save this for GCC 2.7. */
3922 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3923 ? (unsignedp ? lshr_optab : ashr_optab)
3924 : (unsignedp ? udiv_optab : sdiv_optab));
3925 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3926 ? optab1
3927 : (unsignedp ? udivmod_optab : sdivmod_optab));
3929 for (compute_mode = mode; compute_mode != VOIDmode;
3930 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3931 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3932 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3933 break;
3935 if (compute_mode == VOIDmode)
3936 for (compute_mode = mode; compute_mode != VOIDmode;
3937 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3938 if (optab_libfunc (optab1, compute_mode)
3939 || optab_libfunc (optab2, compute_mode))
3940 break;
3942 /* If we still couldn't find a mode, use MODE, but expand_binop will
3943 probably die. */
3944 if (compute_mode == VOIDmode)
3945 compute_mode = mode;
3947 if (target && GET_MODE (target) == compute_mode)
3948 tquotient = target;
3949 else
3950 tquotient = gen_reg_rtx (compute_mode);
3952 size = GET_MODE_BITSIZE (compute_mode);
3953 #if 0
3954 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3955 (mode), and thereby get better code when OP1 is a constant. Do that
3956 later. It will require going over all usages of SIZE below. */
3957 size = GET_MODE_BITSIZE (mode);
3958 #endif
3960 /* Only deduct something for a REM if the last divide done was
3961 for a different constant. Then set the constant of the last
3962 divide. */
3963 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3964 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3965 && INTVAL (op1) == last_div_const))
3966 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3968 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3970 /* Now convert to the best mode to use. */
3971 if (compute_mode != mode)
3973 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3974 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3976 /* convert_modes may have placed op1 into a register, so we
3977 must recompute the following. */
3978 op1_is_constant = CONST_INT_P (op1);
3979 op1_is_pow2 = (op1_is_constant
3980 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3981 || (! unsignedp
3982 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3985 /* If one of the operands is a volatile MEM, copy it into a register. */
3987 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3988 op0 = force_reg (compute_mode, op0);
3989 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3990 op1 = force_reg (compute_mode, op1);
3992 /* If we need the remainder or if OP1 is constant, we need to
3993 put OP0 in a register in case it has any queued subexpressions. */
3994 if (rem_flag || op1_is_constant)
3995 op0 = force_reg (compute_mode, op0);
3997 last = get_last_insn ();
3999 /* Promote floor rounding to trunc rounding for unsigned operations. */
4000 if (unsignedp)
4002 if (code == FLOOR_DIV_EXPR)
4003 code = TRUNC_DIV_EXPR;
4004 if (code == FLOOR_MOD_EXPR)
4005 code = TRUNC_MOD_EXPR;
4006 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4007 code = TRUNC_DIV_EXPR;
4010 if (op1 != const0_rtx)
4011 switch (code)
4013 case TRUNC_MOD_EXPR:
4014 case TRUNC_DIV_EXPR:
4015 if (op1_is_constant)
4017 if (unsignedp)
4019 unsigned HOST_WIDE_INT mh;
4020 int pre_shift, post_shift;
4021 int dummy;
4022 rtx ml;
4023 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4024 & GET_MODE_MASK (compute_mode));
4026 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4028 pre_shift = floor_log2 (d);
4029 if (rem_flag)
4031 remainder
4032 = expand_binop (compute_mode, and_optab, op0,
4033 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4034 remainder, 1,
4035 OPTAB_LIB_WIDEN);
4036 if (remainder)
4037 return gen_lowpart (mode, remainder);
4039 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4040 pre_shift, tquotient, 1);
4042 else if (size <= HOST_BITS_PER_WIDE_INT)
4044 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4046 /* Most significant bit of divisor is set; emit an scc
4047 insn. */
4048 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4049 compute_mode, 1, 1);
4051 else
4053 /* Find a suitable multiplier and right shift count
4054 instead of multiplying with D. */
4056 mh = choose_multiplier (d, size, size,
4057 &ml, &post_shift, &dummy);
4059 /* If the suggested multiplier is more than SIZE bits,
4060 we can do better for even divisors, using an
4061 initial right shift. */
4062 if (mh != 0 && (d & 1) == 0)
4064 pre_shift = floor_log2 (d & -d);
4065 mh = choose_multiplier (d >> pre_shift, size,
4066 size - pre_shift,
4067 &ml, &post_shift, &dummy);
4068 gcc_assert (!mh);
4070 else
4071 pre_shift = 0;
4073 if (mh != 0)
4075 rtx t1, t2, t3, t4;
4077 if (post_shift - 1 >= BITS_PER_WORD)
4078 goto fail1;
4080 extra_cost
4081 = (shift_cost[speed][compute_mode][post_shift - 1]
4082 + shift_cost[speed][compute_mode][1]
4083 + 2 * add_cost[speed][compute_mode]);
4084 t1 = expand_mult_highpart (compute_mode, op0, ml,
4085 NULL_RTX, 1,
4086 max_cost - extra_cost);
4087 if (t1 == 0)
4088 goto fail1;
4089 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4090 op0, t1),
4091 NULL_RTX);
4092 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4093 t2, 1, NULL_RTX, 1);
4094 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4095 t1, t3),
4096 NULL_RTX);
4097 quotient = expand_shift
4098 (RSHIFT_EXPR, compute_mode, t4,
4099 post_shift - 1, tquotient, 1);
4101 else
4103 rtx t1, t2;
4105 if (pre_shift >= BITS_PER_WORD
4106 || post_shift >= BITS_PER_WORD)
4107 goto fail1;
4109 t1 = expand_shift
4110 (RSHIFT_EXPR, compute_mode, op0,
4111 pre_shift, NULL_RTX, 1);
4112 extra_cost
4113 = (shift_cost[speed][compute_mode][pre_shift]
4114 + shift_cost[speed][compute_mode][post_shift]);
4115 t2 = expand_mult_highpart (compute_mode, t1, ml,
4116 NULL_RTX, 1,
4117 max_cost - extra_cost);
4118 if (t2 == 0)
4119 goto fail1;
4120 quotient = expand_shift
4121 (RSHIFT_EXPR, compute_mode, t2,
4122 post_shift, tquotient, 1);
4126 else /* Too wide mode to use tricky code */
4127 break;
4129 insn = get_last_insn ();
4130 if (insn != last
4131 && (set = single_set (insn)) != 0
4132 && SET_DEST (set) == quotient)
4133 set_unique_reg_note (insn,
4134 REG_EQUAL,
4135 gen_rtx_UDIV (compute_mode, op0, op1));
4137 else /* TRUNC_DIV, signed */
4139 unsigned HOST_WIDE_INT ml;
4140 int lgup, post_shift;
4141 rtx mlr;
4142 HOST_WIDE_INT d = INTVAL (op1);
4143 unsigned HOST_WIDE_INT abs_d;
4145 /* Since d might be INT_MIN, we have to cast to
4146 unsigned HOST_WIDE_INT before negating to avoid
4147 undefined signed overflow. */
4148 abs_d = (d >= 0
4149 ? (unsigned HOST_WIDE_INT) d
4150 : - (unsigned HOST_WIDE_INT) d);
4152 /* n rem d = n rem -d */
4153 if (rem_flag && d < 0)
4155 d = abs_d;
4156 op1 = gen_int_mode (abs_d, compute_mode);
4159 if (d == 1)
4160 quotient = op0;
4161 else if (d == -1)
4162 quotient = expand_unop (compute_mode, neg_optab, op0,
4163 tquotient, 0);
4164 else if (HOST_BITS_PER_WIDE_INT >= size
4165 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4167 /* This case is not handled correctly below. */
4168 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4169 compute_mode, 1, 1);
4170 if (quotient == 0)
4171 goto fail1;
4173 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4174 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4175 : sdiv_pow2_cheap[speed][compute_mode])
4176 /* We assume that cheap metric is true if the
4177 optab has an expander for this mode. */
4178 && ((optab_handler ((rem_flag ? smod_optab
4179 : sdiv_optab),
4180 compute_mode)
4181 != CODE_FOR_nothing)
4182 || (optab_handler (sdivmod_optab,
4183 compute_mode)
4184 != CODE_FOR_nothing)))
4186 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4188 if (rem_flag)
4190 remainder = expand_smod_pow2 (compute_mode, op0, d);
4191 if (remainder)
4192 return gen_lowpart (mode, remainder);
4195 if (sdiv_pow2_cheap[speed][compute_mode]
4196 && ((optab_handler (sdiv_optab, compute_mode)
4197 != CODE_FOR_nothing)
4198 || (optab_handler (sdivmod_optab, compute_mode)
4199 != CODE_FOR_nothing)))
4200 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4201 compute_mode, op0,
4202 gen_int_mode (abs_d,
4203 compute_mode),
4204 NULL_RTX, 0);
4205 else
4206 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4208 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4209 negate the quotient. */
4210 if (d < 0)
4212 insn = get_last_insn ();
4213 if (insn != last
4214 && (set = single_set (insn)) != 0
4215 && SET_DEST (set) == quotient
4216 && abs_d < ((unsigned HOST_WIDE_INT) 1
4217 << (HOST_BITS_PER_WIDE_INT - 1)))
4218 set_unique_reg_note (insn,
4219 REG_EQUAL,
4220 gen_rtx_DIV (compute_mode,
4221 op0,
4222 GEN_INT
4223 (trunc_int_for_mode
4224 (abs_d,
4225 compute_mode))));
4227 quotient = expand_unop (compute_mode, neg_optab,
4228 quotient, quotient, 0);
4231 else if (size <= HOST_BITS_PER_WIDE_INT)
4233 choose_multiplier (abs_d, size, size - 1,
4234 &mlr, &post_shift, &lgup);
4235 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4236 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4238 rtx t1, t2, t3;
4240 if (post_shift >= BITS_PER_WORD
4241 || size - 1 >= BITS_PER_WORD)
4242 goto fail1;
4244 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4245 + shift_cost[speed][compute_mode][size - 1]
4246 + add_cost[speed][compute_mode]);
4247 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4248 NULL_RTX, 0,
4249 max_cost - extra_cost);
4250 if (t1 == 0)
4251 goto fail1;
4252 t2 = expand_shift
4253 (RSHIFT_EXPR, compute_mode, t1,
4254 post_shift, NULL_RTX, 0);
4255 t3 = expand_shift
4256 (RSHIFT_EXPR, compute_mode, op0,
4257 size - 1, NULL_RTX, 0);
4258 if (d < 0)
4259 quotient
4260 = force_operand (gen_rtx_MINUS (compute_mode,
4261 t3, t2),
4262 tquotient);
4263 else
4264 quotient
4265 = force_operand (gen_rtx_MINUS (compute_mode,
4266 t2, t3),
4267 tquotient);
4269 else
4271 rtx t1, t2, t3, t4;
4273 if (post_shift >= BITS_PER_WORD
4274 || size - 1 >= BITS_PER_WORD)
4275 goto fail1;
4277 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4278 mlr = gen_int_mode (ml, compute_mode);
4279 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4280 + shift_cost[speed][compute_mode][size - 1]
4281 + 2 * add_cost[speed][compute_mode]);
4282 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4283 NULL_RTX, 0,
4284 max_cost - extra_cost);
4285 if (t1 == 0)
4286 goto fail1;
4287 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4288 t1, op0),
4289 NULL_RTX);
4290 t3 = expand_shift
4291 (RSHIFT_EXPR, compute_mode, t2,
4292 post_shift, NULL_RTX, 0);
4293 t4 = expand_shift
4294 (RSHIFT_EXPR, compute_mode, op0,
4295 size - 1, NULL_RTX, 0);
4296 if (d < 0)
4297 quotient
4298 = force_operand (gen_rtx_MINUS (compute_mode,
4299 t4, t3),
4300 tquotient);
4301 else
4302 quotient
4303 = force_operand (gen_rtx_MINUS (compute_mode,
4304 t3, t4),
4305 tquotient);
4308 else /* Too wide mode to use tricky code */
4309 break;
4311 insn = get_last_insn ();
4312 if (insn != last
4313 && (set = single_set (insn)) != 0
4314 && SET_DEST (set) == quotient)
4315 set_unique_reg_note (insn,
4316 REG_EQUAL,
4317 gen_rtx_DIV (compute_mode, op0, op1));
4319 break;
4321 fail1:
4322 delete_insns_since (last);
4323 break;
4325 case FLOOR_DIV_EXPR:
4326 case FLOOR_MOD_EXPR:
4327 /* We will come here only for signed operations. */
4328 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4330 unsigned HOST_WIDE_INT mh;
4331 int pre_shift, lgup, post_shift;
4332 HOST_WIDE_INT d = INTVAL (op1);
4333 rtx ml;
4335 if (d > 0)
4337 /* We could just as easily deal with negative constants here,
4338 but it does not seem worth the trouble for GCC 2.6. */
4339 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4341 pre_shift = floor_log2 (d);
4342 if (rem_flag)
4344 remainder = expand_binop (compute_mode, and_optab, op0,
4345 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4346 remainder, 0, OPTAB_LIB_WIDEN);
4347 if (remainder)
4348 return gen_lowpart (mode, remainder);
4350 quotient = expand_shift
4351 (RSHIFT_EXPR, compute_mode, op0,
4352 pre_shift, tquotient, 0);
4354 else
4356 rtx t1, t2, t3, t4;
4358 mh = choose_multiplier (d, size, size - 1,
4359 &ml, &post_shift, &lgup);
4360 gcc_assert (!mh);
4362 if (post_shift < BITS_PER_WORD
4363 && size - 1 < BITS_PER_WORD)
4365 t1 = expand_shift
4366 (RSHIFT_EXPR, compute_mode, op0,
4367 size - 1, NULL_RTX, 0);
4368 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4369 NULL_RTX, 0, OPTAB_WIDEN);
4370 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4371 + shift_cost[speed][compute_mode][size - 1]
4372 + 2 * add_cost[speed][compute_mode]);
4373 t3 = expand_mult_highpart (compute_mode, t2, ml,
4374 NULL_RTX, 1,
4375 max_cost - extra_cost);
4376 if (t3 != 0)
4378 t4 = expand_shift
4379 (RSHIFT_EXPR, compute_mode, t3,
4380 post_shift, NULL_RTX, 1);
4381 quotient = expand_binop (compute_mode, xor_optab,
4382 t4, t1, tquotient, 0,
4383 OPTAB_WIDEN);
4388 else
4390 rtx nsign, t1, t2, t3, t4;
4391 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4392 op0, constm1_rtx), NULL_RTX);
4393 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4394 0, OPTAB_WIDEN);
4395 nsign = expand_shift
4396 (RSHIFT_EXPR, compute_mode, t2,
4397 size - 1, NULL_RTX, 0);
4398 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4399 NULL_RTX);
4400 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4401 NULL_RTX, 0);
4402 if (t4)
4404 rtx t5;
4405 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4406 NULL_RTX, 0);
4407 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4408 t4, t5),
4409 tquotient);
4414 if (quotient != 0)
4415 break;
4416 delete_insns_since (last);
4418 /* Try using an instruction that produces both the quotient and
4419 remainder, using truncation. We can easily compensate the quotient
4420 or remainder to get floor rounding, once we have the remainder.
4421 Notice that we compute also the final remainder value here,
4422 and return the result right away. */
4423 if (target == 0 || GET_MODE (target) != compute_mode)
4424 target = gen_reg_rtx (compute_mode);
4426 if (rem_flag)
4428 remainder
4429 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4430 quotient = gen_reg_rtx (compute_mode);
4432 else
4434 quotient
4435 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4436 remainder = gen_reg_rtx (compute_mode);
4439 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4440 quotient, remainder, 0))
4442 /* This could be computed with a branch-less sequence.
4443 Save that for later. */
4444 rtx tem;
4445 rtx label = gen_label_rtx ();
4446 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4447 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4448 NULL_RTX, 0, OPTAB_WIDEN);
4449 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4450 expand_dec (quotient, const1_rtx);
4451 expand_inc (remainder, op1);
4452 emit_label (label);
4453 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4456 /* No luck with division elimination or divmod. Have to do it
4457 by conditionally adjusting op0 *and* the result. */
4459 rtx label1, label2, label3, label4, label5;
4460 rtx adjusted_op0;
4461 rtx tem;
4463 quotient = gen_reg_rtx (compute_mode);
4464 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4465 label1 = gen_label_rtx ();
4466 label2 = gen_label_rtx ();
4467 label3 = gen_label_rtx ();
4468 label4 = gen_label_rtx ();
4469 label5 = gen_label_rtx ();
4470 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4471 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4472 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4473 quotient, 0, OPTAB_LIB_WIDEN);
4474 if (tem != quotient)
4475 emit_move_insn (quotient, tem);
4476 emit_jump_insn (gen_jump (label5));
4477 emit_barrier ();
4478 emit_label (label1);
4479 expand_inc (adjusted_op0, const1_rtx);
4480 emit_jump_insn (gen_jump (label4));
4481 emit_barrier ();
4482 emit_label (label2);
4483 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4484 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4485 quotient, 0, OPTAB_LIB_WIDEN);
4486 if (tem != quotient)
4487 emit_move_insn (quotient, tem);
4488 emit_jump_insn (gen_jump (label5));
4489 emit_barrier ();
4490 emit_label (label3);
4491 expand_dec (adjusted_op0, const1_rtx);
4492 emit_label (label4);
4493 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4494 quotient, 0, OPTAB_LIB_WIDEN);
4495 if (tem != quotient)
4496 emit_move_insn (quotient, tem);
4497 expand_dec (quotient, const1_rtx);
4498 emit_label (label5);
4500 break;
4502 case CEIL_DIV_EXPR:
4503 case CEIL_MOD_EXPR:
4504 if (unsignedp)
4506 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4508 rtx t1, t2, t3;
4509 unsigned HOST_WIDE_INT d = INTVAL (op1);
4510 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4511 floor_log2 (d), tquotient, 1);
4512 t2 = expand_binop (compute_mode, and_optab, op0,
4513 GEN_INT (d - 1),
4514 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4515 t3 = gen_reg_rtx (compute_mode);
4516 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4517 compute_mode, 1, 1);
4518 if (t3 == 0)
4520 rtx lab;
4521 lab = gen_label_rtx ();
4522 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4523 expand_inc (t1, const1_rtx);
4524 emit_label (lab);
4525 quotient = t1;
4527 else
4528 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4529 t1, t3),
4530 tquotient);
4531 break;
4534 /* Try using an instruction that produces both the quotient and
4535 remainder, using truncation. We can easily compensate the
4536 quotient or remainder to get ceiling rounding, once we have the
4537 remainder. Notice that we compute also the final remainder
4538 value here, and return the result right away. */
4539 if (target == 0 || GET_MODE (target) != compute_mode)
4540 target = gen_reg_rtx (compute_mode);
4542 if (rem_flag)
4544 remainder = (REG_P (target)
4545 ? target : gen_reg_rtx (compute_mode));
4546 quotient = gen_reg_rtx (compute_mode);
4548 else
4550 quotient = (REG_P (target)
4551 ? target : gen_reg_rtx (compute_mode));
4552 remainder = gen_reg_rtx (compute_mode);
4555 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4556 remainder, 1))
4558 /* This could be computed with a branch-less sequence.
4559 Save that for later. */
4560 rtx label = gen_label_rtx ();
4561 do_cmp_and_jump (remainder, const0_rtx, EQ,
4562 compute_mode, label);
4563 expand_inc (quotient, const1_rtx);
4564 expand_dec (remainder, op1);
4565 emit_label (label);
4566 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4569 /* No luck with division elimination or divmod. Have to do it
4570 by conditionally adjusting op0 *and* the result. */
4572 rtx label1, label2;
4573 rtx adjusted_op0, tem;
4575 quotient = gen_reg_rtx (compute_mode);
4576 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4577 label1 = gen_label_rtx ();
4578 label2 = gen_label_rtx ();
4579 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4580 compute_mode, label1);
4581 emit_move_insn (quotient, const0_rtx);
4582 emit_jump_insn (gen_jump (label2));
4583 emit_barrier ();
4584 emit_label (label1);
4585 expand_dec (adjusted_op0, const1_rtx);
4586 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4587 quotient, 1, OPTAB_LIB_WIDEN);
4588 if (tem != quotient)
4589 emit_move_insn (quotient, tem);
4590 expand_inc (quotient, const1_rtx);
4591 emit_label (label2);
4594 else /* signed */
4596 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4597 && INTVAL (op1) >= 0)
4599 /* This is extremely similar to the code for the unsigned case
4600 above. For 2.7 we should merge these variants, but for
4601 2.6.1 I don't want to touch the code for unsigned since that
4602 get used in C. The signed case will only be used by other
4603 languages (Ada). */
4605 rtx t1, t2, t3;
4606 unsigned HOST_WIDE_INT d = INTVAL (op1);
4607 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4608 floor_log2 (d), tquotient, 0);
4609 t2 = expand_binop (compute_mode, and_optab, op0,
4610 GEN_INT (d - 1),
4611 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4612 t3 = gen_reg_rtx (compute_mode);
4613 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4614 compute_mode, 1, 1);
4615 if (t3 == 0)
4617 rtx lab;
4618 lab = gen_label_rtx ();
4619 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4620 expand_inc (t1, const1_rtx);
4621 emit_label (lab);
4622 quotient = t1;
4624 else
4625 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4626 t1, t3),
4627 tquotient);
4628 break;
4631 /* Try using an instruction that produces both the quotient and
4632 remainder, using truncation. We can easily compensate the
4633 quotient or remainder to get ceiling rounding, once we have the
4634 remainder. Notice that we compute also the final remainder
4635 value here, and return the result right away. */
4636 if (target == 0 || GET_MODE (target) != compute_mode)
4637 target = gen_reg_rtx (compute_mode);
4638 if (rem_flag)
4640 remainder= (REG_P (target)
4641 ? target : gen_reg_rtx (compute_mode));
4642 quotient = gen_reg_rtx (compute_mode);
4644 else
4646 quotient = (REG_P (target)
4647 ? target : gen_reg_rtx (compute_mode));
4648 remainder = gen_reg_rtx (compute_mode);
4651 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4652 remainder, 0))
4654 /* This could be computed with a branch-less sequence.
4655 Save that for later. */
4656 rtx tem;
4657 rtx label = gen_label_rtx ();
4658 do_cmp_and_jump (remainder, const0_rtx, EQ,
4659 compute_mode, label);
4660 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4661 NULL_RTX, 0, OPTAB_WIDEN);
4662 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4663 expand_inc (quotient, const1_rtx);
4664 expand_dec (remainder, op1);
4665 emit_label (label);
4666 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4669 /* No luck with division elimination or divmod. Have to do it
4670 by conditionally adjusting op0 *and* the result. */
4672 rtx label1, label2, label3, label4, label5;
4673 rtx adjusted_op0;
4674 rtx tem;
4676 quotient = gen_reg_rtx (compute_mode);
4677 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4678 label1 = gen_label_rtx ();
4679 label2 = gen_label_rtx ();
4680 label3 = gen_label_rtx ();
4681 label4 = gen_label_rtx ();
4682 label5 = gen_label_rtx ();
4683 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4684 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4685 compute_mode, label1);
4686 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687 quotient, 0, OPTAB_LIB_WIDEN);
4688 if (tem != quotient)
4689 emit_move_insn (quotient, tem);
4690 emit_jump_insn (gen_jump (label5));
4691 emit_barrier ();
4692 emit_label (label1);
4693 expand_dec (adjusted_op0, const1_rtx);
4694 emit_jump_insn (gen_jump (label4));
4695 emit_barrier ();
4696 emit_label (label2);
4697 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4698 compute_mode, label3);
4699 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4700 quotient, 0, OPTAB_LIB_WIDEN);
4701 if (tem != quotient)
4702 emit_move_insn (quotient, tem);
4703 emit_jump_insn (gen_jump (label5));
4704 emit_barrier ();
4705 emit_label (label3);
4706 expand_inc (adjusted_op0, const1_rtx);
4707 emit_label (label4);
4708 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4709 quotient, 0, OPTAB_LIB_WIDEN);
4710 if (tem != quotient)
4711 emit_move_insn (quotient, tem);
4712 expand_inc (quotient, const1_rtx);
4713 emit_label (label5);
4716 break;
4718 case EXACT_DIV_EXPR:
4719 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4721 HOST_WIDE_INT d = INTVAL (op1);
4722 unsigned HOST_WIDE_INT ml;
4723 int pre_shift;
4724 rtx t1;
4726 pre_shift = floor_log2 (d & -d);
4727 ml = invert_mod2n (d >> pre_shift, size);
4728 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4729 pre_shift, NULL_RTX, unsignedp);
4730 quotient = expand_mult (compute_mode, t1,
4731 gen_int_mode (ml, compute_mode),
4732 NULL_RTX, 1);
4734 insn = get_last_insn ();
4735 set_unique_reg_note (insn,
4736 REG_EQUAL,
4737 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4738 compute_mode,
4739 op0, op1));
4741 break;
4743 case ROUND_DIV_EXPR:
4744 case ROUND_MOD_EXPR:
4745 if (unsignedp)
4747 rtx tem;
4748 rtx label;
4749 label = gen_label_rtx ();
4750 quotient = gen_reg_rtx (compute_mode);
4751 remainder = gen_reg_rtx (compute_mode);
4752 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4754 rtx tem;
4755 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4756 quotient, 1, OPTAB_LIB_WIDEN);
4757 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4758 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4759 remainder, 1, OPTAB_LIB_WIDEN);
4761 tem = plus_constant (op1, -1);
4762 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4763 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4764 expand_inc (quotient, const1_rtx);
4765 expand_dec (remainder, op1);
4766 emit_label (label);
4768 else
4770 rtx abs_rem, abs_op1, tem, mask;
4771 rtx label;
4772 label = gen_label_rtx ();
4773 quotient = gen_reg_rtx (compute_mode);
4774 remainder = gen_reg_rtx (compute_mode);
4775 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4777 rtx tem;
4778 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4779 quotient, 0, OPTAB_LIB_WIDEN);
4780 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4781 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4782 remainder, 0, OPTAB_LIB_WIDEN);
4784 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4785 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4786 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4787 1, NULL_RTX, 1);
4788 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4789 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4790 NULL_RTX, 0, OPTAB_WIDEN);
4791 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4792 size - 1, NULL_RTX, 0);
4793 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4794 NULL_RTX, 0, OPTAB_WIDEN);
4795 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4796 NULL_RTX, 0, OPTAB_WIDEN);
4797 expand_inc (quotient, tem);
4798 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4799 NULL_RTX, 0, OPTAB_WIDEN);
4800 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4801 NULL_RTX, 0, OPTAB_WIDEN);
4802 expand_dec (remainder, tem);
4803 emit_label (label);
4805 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4807 default:
4808 gcc_unreachable ();
4811 if (quotient == 0)
4813 if (target && GET_MODE (target) != compute_mode)
4814 target = 0;
4816 if (rem_flag)
4818 /* Try to produce the remainder without producing the quotient.
4819 If we seem to have a divmod pattern that does not require widening,
4820 don't try widening here. We should really have a WIDEN argument
4821 to expand_twoval_binop, since what we'd really like to do here is
4822 1) try a mod insn in compute_mode
4823 2) try a divmod insn in compute_mode
4824 3) try a div insn in compute_mode and multiply-subtract to get
4825 remainder
4826 4) try the same things with widening allowed. */
4827 remainder
4828 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4829 op0, op1, target,
4830 unsignedp,
4831 ((optab_handler (optab2, compute_mode)
4832 != CODE_FOR_nothing)
4833 ? OPTAB_DIRECT : OPTAB_WIDEN));
4834 if (remainder == 0)
4836 /* No luck there. Can we do remainder and divide at once
4837 without a library call? */
4838 remainder = gen_reg_rtx (compute_mode);
4839 if (! expand_twoval_binop ((unsignedp
4840 ? udivmod_optab
4841 : sdivmod_optab),
4842 op0, op1,
4843 NULL_RTX, remainder, unsignedp))
4844 remainder = 0;
4847 if (remainder)
4848 return gen_lowpart (mode, remainder);
4851 /* Produce the quotient. Try a quotient insn, but not a library call.
4852 If we have a divmod in this mode, use it in preference to widening
4853 the div (for this test we assume it will not fail). Note that optab2
4854 is set to the one of the two optabs that the call below will use. */
4855 quotient
4856 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4857 op0, op1, rem_flag ? NULL_RTX : target,
4858 unsignedp,
4859 ((optab_handler (optab2, compute_mode)
4860 != CODE_FOR_nothing)
4861 ? OPTAB_DIRECT : OPTAB_WIDEN));
4863 if (quotient == 0)
4865 /* No luck there. Try a quotient-and-remainder insn,
4866 keeping the quotient alone. */
4867 quotient = gen_reg_rtx (compute_mode);
4868 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4869 op0, op1,
4870 quotient, NULL_RTX, unsignedp))
4872 quotient = 0;
4873 if (! rem_flag)
4874 /* Still no luck. If we are not computing the remainder,
4875 use a library call for the quotient. */
4876 quotient = sign_expand_binop (compute_mode,
4877 udiv_optab, sdiv_optab,
4878 op0, op1, target,
4879 unsignedp, OPTAB_LIB_WIDEN);
4884 if (rem_flag)
4886 if (target && GET_MODE (target) != compute_mode)
4887 target = 0;
4889 if (quotient == 0)
4891 /* No divide instruction either. Use library for remainder. */
4892 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4893 op0, op1, target,
4894 unsignedp, OPTAB_LIB_WIDEN);
4895 /* No remainder function. Try a quotient-and-remainder
4896 function, keeping the remainder. */
4897 if (!remainder)
4899 remainder = gen_reg_rtx (compute_mode);
4900 if (!expand_twoval_binop_libfunc
4901 (unsignedp ? udivmod_optab : sdivmod_optab,
4902 op0, op1,
4903 NULL_RTX, remainder,
4904 unsignedp ? UMOD : MOD))
4905 remainder = NULL_RTX;
4908 else
4910 /* We divided. Now finish doing X - Y * (X / Y). */
4911 remainder = expand_mult (compute_mode, quotient, op1,
4912 NULL_RTX, unsignedp);
4913 remainder = expand_binop (compute_mode, sub_optab, op0,
4914 remainder, target, unsignedp,
4915 OPTAB_LIB_WIDEN);
4919 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4922 /* Return a tree node with data type TYPE, describing the value of X.
4923 Usually this is an VAR_DECL, if there is no obvious better choice.
4924 X may be an expression, however we only support those expressions
4925 generated by loop.c. */
4927 tree
4928 make_tree (tree type, rtx x)
4930 tree t;
4932 switch (GET_CODE (x))
4934 case CONST_INT:
4936 HOST_WIDE_INT hi = 0;
4938 if (INTVAL (x) < 0
4939 && !(TYPE_UNSIGNED (type)
4940 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4941 < HOST_BITS_PER_WIDE_INT)))
4942 hi = -1;
4944 t = build_int_cst_wide (type, INTVAL (x), hi);
4946 return t;
4949 case CONST_DOUBLE:
4950 if (GET_MODE (x) == VOIDmode)
4951 t = build_int_cst_wide (type,
4952 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4953 else
4955 REAL_VALUE_TYPE d;
4957 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4958 t = build_real (type, d);
4961 return t;
4963 case CONST_VECTOR:
4965 int units = CONST_VECTOR_NUNITS (x);
4966 tree itype = TREE_TYPE (type);
4967 tree t = NULL_TREE;
4968 int i;
4971 /* Build a tree with vector elements. */
4972 for (i = units - 1; i >= 0; --i)
4974 rtx elt = CONST_VECTOR_ELT (x, i);
4975 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4978 return build_vector (type, t);
4981 case PLUS:
4982 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4983 make_tree (type, XEXP (x, 1)));
4985 case MINUS:
4986 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4987 make_tree (type, XEXP (x, 1)));
4989 case NEG:
4990 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4992 case MULT:
4993 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4994 make_tree (type, XEXP (x, 1)));
4996 case ASHIFT:
4997 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4998 make_tree (type, XEXP (x, 1)));
5000 case LSHIFTRT:
5001 t = unsigned_type_for (type);
5002 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5003 make_tree (t, XEXP (x, 0)),
5004 make_tree (type, XEXP (x, 1))));
5006 case ASHIFTRT:
5007 t = signed_type_for (type);
5008 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5009 make_tree (t, XEXP (x, 0)),
5010 make_tree (type, XEXP (x, 1))));
5012 case DIV:
5013 if (TREE_CODE (type) != REAL_TYPE)
5014 t = signed_type_for (type);
5015 else
5016 t = type;
5018 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5019 make_tree (t, XEXP (x, 0)),
5020 make_tree (t, XEXP (x, 1))));
5021 case UDIV:
5022 t = unsigned_type_for (type);
5023 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5024 make_tree (t, XEXP (x, 0)),
5025 make_tree (t, XEXP (x, 1))));
5027 case SIGN_EXTEND:
5028 case ZERO_EXTEND:
5029 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5030 GET_CODE (x) == ZERO_EXTEND);
5031 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5033 case CONST:
5034 return make_tree (type, XEXP (x, 0));
5036 case SYMBOL_REF:
5037 t = SYMBOL_REF_DECL (x);
5038 if (t)
5039 return fold_convert (type, build_fold_addr_expr (t));
5040 /* else fall through. */
5042 default:
5043 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5045 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5046 address mode to pointer mode. */
5047 if (POINTER_TYPE_P (type))
5048 x = convert_memory_address_addr_space
5049 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5051 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5052 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5053 t->decl_with_rtl.rtl = x;
5055 return t;
5059 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5060 and returning TARGET.
5062 If TARGET is 0, a pseudo-register or constant is returned. */
5065 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5067 rtx tem = 0;
5069 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5070 tem = simplify_binary_operation (AND, mode, op0, op1);
5071 if (tem == 0)
5072 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5074 if (target == 0)
5075 target = tem;
5076 else if (tem != target)
5077 emit_move_insn (target, tem);
5078 return target;
5081 /* Helper function for emit_store_flag. */
5082 static rtx
5083 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5084 enum machine_mode mode, enum machine_mode compare_mode,
5085 int unsignedp, rtx x, rtx y, int normalizep,
5086 enum machine_mode target_mode)
5088 struct expand_operand ops[4];
5089 rtx op0, last, comparison, subtarget;
5090 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5092 last = get_last_insn ();
5093 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5094 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5095 if (!x || !y)
5097 delete_insns_since (last);
5098 return NULL_RTX;
5101 if (target_mode == VOIDmode)
5102 target_mode = result_mode;
5103 if (!target)
5104 target = gen_reg_rtx (target_mode);
5106 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5108 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5109 create_fixed_operand (&ops[1], comparison);
5110 create_fixed_operand (&ops[2], x);
5111 create_fixed_operand (&ops[3], y);
5112 if (!maybe_expand_insn (icode, 4, ops))
5114 delete_insns_since (last);
5115 return NULL_RTX;
5117 subtarget = ops[0].value;
5119 /* If we are converting to a wider mode, first convert to
5120 TARGET_MODE, then normalize. This produces better combining
5121 opportunities on machines that have a SIGN_EXTRACT when we are
5122 testing a single bit. This mostly benefits the 68k.
5124 If STORE_FLAG_VALUE does not have the sign bit set when
5125 interpreted in MODE, we can do this conversion as unsigned, which
5126 is usually more efficient. */
5127 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5129 convert_move (target, subtarget,
5130 val_signbit_known_clear_p (result_mode,
5131 STORE_FLAG_VALUE));
5132 op0 = target;
5133 result_mode = target_mode;
5135 else
5136 op0 = subtarget;
5138 /* If we want to keep subexpressions around, don't reuse our last
5139 target. */
5140 if (optimize)
5141 subtarget = 0;
5143 /* Now normalize to the proper value in MODE. Sometimes we don't
5144 have to do anything. */
5145 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5147 /* STORE_FLAG_VALUE might be the most negative number, so write
5148 the comparison this way to avoid a compiler-time warning. */
5149 else if (- normalizep == STORE_FLAG_VALUE)
5150 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5152 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5153 it hard to use a value of just the sign bit due to ANSI integer
5154 constant typing rules. */
5155 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5156 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5157 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5158 normalizep == 1);
5159 else
5161 gcc_assert (STORE_FLAG_VALUE & 1);
5163 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5164 if (normalizep == -1)
5165 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5168 /* If we were converting to a smaller mode, do the conversion now. */
5169 if (target_mode != result_mode)
5171 convert_move (target, op0, 0);
5172 return target;
5174 else
5175 return op0;
5179 /* A subroutine of emit_store_flag only including "tricks" that do not
5180 need a recursive call. These are kept separate to avoid infinite
5181 loops. */
5183 static rtx
5184 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5185 enum machine_mode mode, int unsignedp, int normalizep,
5186 enum machine_mode target_mode)
5188 rtx subtarget;
5189 enum insn_code icode;
5190 enum machine_mode compare_mode;
5191 enum mode_class mclass;
5192 enum rtx_code scode;
5193 rtx tem;
5195 if (unsignedp)
5196 code = unsigned_condition (code);
5197 scode = swap_condition (code);
5199 /* If one operand is constant, make it the second one. Only do this
5200 if the other operand is not constant as well. */
5202 if (swap_commutative_operands_p (op0, op1))
5204 tem = op0;
5205 op0 = op1;
5206 op1 = tem;
5207 code = swap_condition (code);
5210 if (mode == VOIDmode)
5211 mode = GET_MODE (op0);
5213 /* For some comparisons with 1 and -1, we can convert this to
5214 comparisons with zero. This will often produce more opportunities for
5215 store-flag insns. */
5217 switch (code)
5219 case LT:
5220 if (op1 == const1_rtx)
5221 op1 = const0_rtx, code = LE;
5222 break;
5223 case LE:
5224 if (op1 == constm1_rtx)
5225 op1 = const0_rtx, code = LT;
5226 break;
5227 case GE:
5228 if (op1 == const1_rtx)
5229 op1 = const0_rtx, code = GT;
5230 break;
5231 case GT:
5232 if (op1 == constm1_rtx)
5233 op1 = const0_rtx, code = GE;
5234 break;
5235 case GEU:
5236 if (op1 == const1_rtx)
5237 op1 = const0_rtx, code = NE;
5238 break;
5239 case LTU:
5240 if (op1 == const1_rtx)
5241 op1 = const0_rtx, code = EQ;
5242 break;
5243 default:
5244 break;
5247 /* If we are comparing a double-word integer with zero or -1, we can
5248 convert the comparison into one involving a single word. */
5249 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5250 && GET_MODE_CLASS (mode) == MODE_INT
5251 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5253 if ((code == EQ || code == NE)
5254 && (op1 == const0_rtx || op1 == constm1_rtx))
5256 rtx op00, op01;
5258 /* Do a logical OR or AND of the two words and compare the
5259 result. */
5260 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5261 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5262 tem = expand_binop (word_mode,
5263 op1 == const0_rtx ? ior_optab : and_optab,
5264 op00, op01, NULL_RTX, unsignedp,
5265 OPTAB_DIRECT);
5267 if (tem != 0)
5268 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5269 unsignedp, normalizep);
5271 else if ((code == LT || code == GE) && op1 == const0_rtx)
5273 rtx op0h;
5275 /* If testing the sign bit, can just test on high word. */
5276 op0h = simplify_gen_subreg (word_mode, op0, mode,
5277 subreg_highpart_offset (word_mode,
5278 mode));
5279 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5280 unsignedp, normalizep);
5282 else
5283 tem = NULL_RTX;
5285 if (tem)
5287 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5288 return tem;
5289 if (!target)
5290 target = gen_reg_rtx (target_mode);
5292 convert_move (target, tem,
5293 !val_signbit_known_set_p (word_mode,
5294 (normalizep ? normalizep
5295 : STORE_FLAG_VALUE)));
5296 return target;
5300 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5301 complement of A (for GE) and shifting the sign bit to the low bit. */
5302 if (op1 == const0_rtx && (code == LT || code == GE)
5303 && GET_MODE_CLASS (mode) == MODE_INT
5304 && (normalizep || STORE_FLAG_VALUE == 1
5305 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5307 subtarget = target;
5309 if (!target)
5310 target_mode = mode;
5312 /* If the result is to be wider than OP0, it is best to convert it
5313 first. If it is to be narrower, it is *incorrect* to convert it
5314 first. */
5315 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5317 op0 = convert_modes (target_mode, mode, op0, 0);
5318 mode = target_mode;
5321 if (target_mode != mode)
5322 subtarget = 0;
5324 if (code == GE)
5325 op0 = expand_unop (mode, one_cmpl_optab, op0,
5326 ((STORE_FLAG_VALUE == 1 || normalizep)
5327 ? 0 : subtarget), 0);
5329 if (STORE_FLAG_VALUE == 1 || normalizep)
5330 /* If we are supposed to produce a 0/1 value, we want to do
5331 a logical shift from the sign bit to the low-order bit; for
5332 a -1/0 value, we do an arithmetic shift. */
5333 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5334 GET_MODE_BITSIZE (mode) - 1,
5335 subtarget, normalizep != -1);
5337 if (mode != target_mode)
5338 op0 = convert_modes (target_mode, mode, op0, 0);
5340 return op0;
5343 mclass = GET_MODE_CLASS (mode);
5344 for (compare_mode = mode; compare_mode != VOIDmode;
5345 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5347 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5348 icode = optab_handler (cstore_optab, optab_mode);
5349 if (icode != CODE_FOR_nothing)
5351 do_pending_stack_adjust ();
5352 tem = emit_cstore (target, icode, code, mode, compare_mode,
5353 unsignedp, op0, op1, normalizep, target_mode);
5354 if (tem)
5355 return tem;
5357 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5359 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5360 unsignedp, op1, op0, normalizep, target_mode);
5361 if (tem)
5362 return tem;
5364 break;
5368 return 0;
5371 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5372 and storing in TARGET. Normally return TARGET.
5373 Return 0 if that cannot be done.
5375 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5376 it is VOIDmode, they cannot both be CONST_INT.
5378 UNSIGNEDP is for the case where we have to widen the operands
5379 to perform the operation. It says to use zero-extension.
5381 NORMALIZEP is 1 if we should convert the result to be either zero
5382 or one. Normalize is -1 if we should convert the result to be
5383 either zero or -1. If NORMALIZEP is zero, the result will be left
5384 "raw" out of the scc insn. */
5387 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5388 enum machine_mode mode, int unsignedp, int normalizep)
5390 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5391 enum rtx_code rcode;
5392 rtx subtarget;
5393 rtx tem, last, trueval;
5395 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5396 target_mode);
5397 if (tem)
5398 return tem;
5400 /* If we reached here, we can't do this with a scc insn, however there
5401 are some comparisons that can be done in other ways. Don't do any
5402 of these cases if branches are very cheap. */
5403 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5404 return 0;
5406 /* See what we need to return. We can only return a 1, -1, or the
5407 sign bit. */
5409 if (normalizep == 0)
5411 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5412 normalizep = STORE_FLAG_VALUE;
5414 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5416 else
5417 return 0;
5420 last = get_last_insn ();
5422 /* If optimizing, use different pseudo registers for each insn, instead
5423 of reusing the same pseudo. This leads to better CSE, but slows
5424 down the compiler, since there are more pseudos */
5425 subtarget = (!optimize
5426 && (target_mode == mode)) ? target : NULL_RTX;
5427 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5429 /* For floating-point comparisons, try the reverse comparison or try
5430 changing the "orderedness" of the comparison. */
5431 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5433 enum rtx_code first_code;
5434 bool and_them;
5436 rcode = reverse_condition_maybe_unordered (code);
5437 if (can_compare_p (rcode, mode, ccp_store_flag)
5438 && (code == ORDERED || code == UNORDERED
5439 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5440 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5442 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5443 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5445 /* For the reverse comparison, use either an addition or a XOR. */
5446 if (want_add
5447 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5448 optimize_insn_for_speed_p ()) == 0)
5450 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5451 STORE_FLAG_VALUE, target_mode);
5452 if (tem)
5453 return expand_binop (target_mode, add_optab, tem,
5454 GEN_INT (normalizep),
5455 target, 0, OPTAB_WIDEN);
5457 else if (!want_add
5458 && rtx_cost (trueval, XOR, 1,
5459 optimize_insn_for_speed_p ()) == 0)
5461 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5462 normalizep, target_mode);
5463 if (tem)
5464 return expand_binop (target_mode, xor_optab, tem, trueval,
5465 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5469 delete_insns_since (last);
5471 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5472 if (code == ORDERED || code == UNORDERED)
5473 return 0;
5475 and_them = split_comparison (code, mode, &first_code, &code);
5477 /* If there are no NaNs, the first comparison should always fall through.
5478 Effectively change the comparison to the other one. */
5479 if (!HONOR_NANS (mode))
5481 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5482 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5483 target_mode);
5486 #ifdef HAVE_conditional_move
5487 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5488 conditional move. */
5489 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5490 normalizep, target_mode);
5491 if (tem == 0)
5492 return 0;
5494 if (and_them)
5495 tem = emit_conditional_move (target, code, op0, op1, mode,
5496 tem, const0_rtx, GET_MODE (tem), 0);
5497 else
5498 tem = emit_conditional_move (target, code, op0, op1, mode,
5499 trueval, tem, GET_MODE (tem), 0);
5501 if (tem == 0)
5502 delete_insns_since (last);
5503 return tem;
5504 #else
5505 return 0;
5506 #endif
5509 /* The remaining tricks only apply to integer comparisons. */
5511 if (GET_MODE_CLASS (mode) != MODE_INT)
5512 return 0;
5514 /* If this is an equality comparison of integers, we can try to exclusive-or
5515 (or subtract) the two operands and use a recursive call to try the
5516 comparison with zero. Don't do any of these cases if branches are
5517 very cheap. */
5519 if ((code == EQ || code == NE) && op1 != const0_rtx)
5521 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5522 OPTAB_WIDEN);
5524 if (tem == 0)
5525 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5526 OPTAB_WIDEN);
5527 if (tem != 0)
5528 tem = emit_store_flag (target, code, tem, const0_rtx,
5529 mode, unsignedp, normalizep);
5530 if (tem != 0)
5531 return tem;
5533 delete_insns_since (last);
5536 /* For integer comparisons, try the reverse comparison. However, for
5537 small X and if we'd have anyway to extend, implementing "X != 0"
5538 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5539 rcode = reverse_condition (code);
5540 if (can_compare_p (rcode, mode, ccp_store_flag)
5541 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5542 && code == NE
5543 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5544 && op1 == const0_rtx))
5546 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5547 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5549 /* Again, for the reverse comparison, use either an addition or a XOR. */
5550 if (want_add
5551 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5552 optimize_insn_for_speed_p ()) == 0)
5554 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5555 STORE_FLAG_VALUE, target_mode);
5556 if (tem != 0)
5557 tem = expand_binop (target_mode, add_optab, tem,
5558 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5560 else if (!want_add
5561 && rtx_cost (trueval, XOR, 1,
5562 optimize_insn_for_speed_p ()) == 0)
5564 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5565 normalizep, target_mode);
5566 if (tem != 0)
5567 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5568 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5571 if (tem != 0)
5572 return tem;
5573 delete_insns_since (last);
5576 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5577 the constant zero. Reject all other comparisons at this point. Only
5578 do LE and GT if branches are expensive since they are expensive on
5579 2-operand machines. */
5581 if (op1 != const0_rtx
5582 || (code != EQ && code != NE
5583 && (BRANCH_COST (optimize_insn_for_speed_p (),
5584 false) <= 1 || (code != LE && code != GT))))
5585 return 0;
5587 /* Try to put the result of the comparison in the sign bit. Assume we can't
5588 do the necessary operation below. */
5590 tem = 0;
5592 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5593 the sign bit set. */
5595 if (code == LE)
5597 /* This is destructive, so SUBTARGET can't be OP0. */
5598 if (rtx_equal_p (subtarget, op0))
5599 subtarget = 0;
5601 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5602 OPTAB_WIDEN);
5603 if (tem)
5604 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5605 OPTAB_WIDEN);
5608 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5609 number of bits in the mode of OP0, minus one. */
5611 if (code == GT)
5613 if (rtx_equal_p (subtarget, op0))
5614 subtarget = 0;
5616 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5617 GET_MODE_BITSIZE (mode) - 1,
5618 subtarget, 0);
5619 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5620 OPTAB_WIDEN);
5623 if (code == EQ || code == NE)
5625 /* For EQ or NE, one way to do the comparison is to apply an operation
5626 that converts the operand into a positive number if it is nonzero
5627 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5628 for NE we negate. This puts the result in the sign bit. Then we
5629 normalize with a shift, if needed.
5631 Two operations that can do the above actions are ABS and FFS, so try
5632 them. If that doesn't work, and MODE is smaller than a full word,
5633 we can use zero-extension to the wider mode (an unsigned conversion)
5634 as the operation. */
5636 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5637 that is compensated by the subsequent overflow when subtracting
5638 one / negating. */
5640 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5641 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5642 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5643 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5644 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5646 tem = convert_modes (word_mode, mode, op0, 1);
5647 mode = word_mode;
5650 if (tem != 0)
5652 if (code == EQ)
5653 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5654 0, OPTAB_WIDEN);
5655 else
5656 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5659 /* If we couldn't do it that way, for NE we can "or" the two's complement
5660 of the value with itself. For EQ, we take the one's complement of
5661 that "or", which is an extra insn, so we only handle EQ if branches
5662 are expensive. */
5664 if (tem == 0
5665 && (code == NE
5666 || BRANCH_COST (optimize_insn_for_speed_p (),
5667 false) > 1))
5669 if (rtx_equal_p (subtarget, op0))
5670 subtarget = 0;
5672 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5673 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5674 OPTAB_WIDEN);
5676 if (tem && code == EQ)
5677 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5681 if (tem && normalizep)
5682 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5683 GET_MODE_BITSIZE (mode) - 1,
5684 subtarget, normalizep == 1);
5686 if (tem)
5688 if (!target)
5690 else if (GET_MODE (tem) != target_mode)
5692 convert_move (target, tem, 0);
5693 tem = target;
5695 else if (!subtarget)
5697 emit_move_insn (target, tem);
5698 tem = target;
5701 else
5702 delete_insns_since (last);
5704 return tem;
5707 /* Like emit_store_flag, but always succeeds. */
5710 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5711 enum machine_mode mode, int unsignedp, int normalizep)
5713 rtx tem, label;
5714 rtx trueval, falseval;
5716 /* First see if emit_store_flag can do the job. */
5717 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5718 if (tem != 0)
5719 return tem;
5721 if (!target)
5722 target = gen_reg_rtx (word_mode);
5724 /* If this failed, we have to do this with set/compare/jump/set code.
5725 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5726 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5727 if (code == NE
5728 && GET_MODE_CLASS (mode) == MODE_INT
5729 && REG_P (target)
5730 && op0 == target
5731 && op1 == const0_rtx)
5733 label = gen_label_rtx ();
5734 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5735 mode, NULL_RTX, NULL_RTX, label, -1);
5736 emit_move_insn (target, trueval);
5737 emit_label (label);
5738 return target;
5741 if (!REG_P (target)
5742 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5743 target = gen_reg_rtx (GET_MODE (target));
5745 /* Jump in the right direction if the target cannot implement CODE
5746 but can jump on its reverse condition. */
5747 falseval = const0_rtx;
5748 if (! can_compare_p (code, mode, ccp_jump)
5749 && (! FLOAT_MODE_P (mode)
5750 || code == ORDERED || code == UNORDERED
5751 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5752 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5754 enum rtx_code rcode;
5755 if (FLOAT_MODE_P (mode))
5756 rcode = reverse_condition_maybe_unordered (code);
5757 else
5758 rcode = reverse_condition (code);
5760 /* Canonicalize to UNORDERED for the libcall. */
5761 if (can_compare_p (rcode, mode, ccp_jump)
5762 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5764 falseval = trueval;
5765 trueval = const0_rtx;
5766 code = rcode;
5770 emit_move_insn (target, trueval);
5771 label = gen_label_rtx ();
5772 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5773 NULL_RTX, label, -1);
5775 emit_move_insn (target, falseval);
5776 emit_label (label);
5778 return target;
5781 /* Perform possibly multi-word comparison and conditional jump to LABEL
5782 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5783 now a thin wrapper around do_compare_rtx_and_jump. */
5785 static void
5786 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5787 rtx label)
5789 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5790 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5791 NULL_RTX, NULL_RTX, label, -1);