2011-08-31 Tom de Vries <tom@codesourcery.com>
[official-gcc.git] / gcc / expmed.c
blob0047cd09076024ff7952822438ee26f9bb98461d
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
96 void
97 init_expmed (void)
99 struct
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
103 struct rtx_def neg;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
107 struct rtx_def zext;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
118 } all;
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
122 int m, n;
123 enum machine_mode mode, wider_mode;
124 int speed;
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
132 memset (&all, 0, sizeof all);
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
196 for (speed = 0; speed < 2; speed++)
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 mode != VOIDmode;
203 mode = GET_MODE_WIDER_MODE (mode))
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257 speed);
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259 speed);
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261 speed);
265 if (alg_hash_used_p)
266 memset (alg_hash, 0, sizeof (alg_hash));
267 else
268 alg_hash_used_p = true;
269 default_rtl_profile ();
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
277 negate_rtx (enum machine_mode mode, rtx x)
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
281 if (result == 0)
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
284 return result;
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
291 enum machine_mode
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
294 const struct insn_data_d *data;
296 switch (pattern)
298 case EP_insv:
299 if (HAVE_insv)
301 data = &insn_data[CODE_FOR_insv];
302 break;
304 return MAX_MACHINE_MODE;
306 case EP_extv:
307 if (HAVE_extv)
309 data = &insn_data[CODE_FOR_extv];
310 break;
312 return MAX_MACHINE_MODE;
314 case EP_extzv:
315 if (HAVE_extzv)
317 data = &insn_data[CODE_FOR_extzv];
318 break;
320 return MAX_MACHINE_MODE;
322 default:
323 gcc_unreachable ();
326 if (opno == -1)
327 return VOIDmode;
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
332 return word_mode;
333 return data->operand[opno].mode;
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
343 static bool
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
351 unsigned int unit
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
354 rtx op0 = str_rtx;
355 int byte_offset;
356 rtx orig_value;
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
360 while (GET_CODE (op0) == SUBREG)
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
370 byte_offset = 0;
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
375 int difference = inner_mode_size - outer_mode_size;
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
382 else
383 byte_offset = SUBREG_BYTE (op0);
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393 return true;
395 /* Use vec_set patterns for inserting parts of vectors whenever
396 available. */
397 if (VECTOR_MODE_P (GET_MODE (op0))
398 && !MEM_P (op0)
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
414 return true;
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
429 if (bitpos == 0
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
431 && (!MEM_P (op0)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436 byte_offset)))
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
441 if (MEM_P (op0))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445 byte_offset);
446 emit_move_insn (op0, value);
447 return true;
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
458 if (MEM_P (op0))
459 op0 = adjust_address (op0, imode, 0);
460 else
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
471 if (MEM_P (op0))
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
481 if (BYTES_BIG_ENDIAN
482 && !MEM_P (op0)
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
489 if (!MEM_P (op0)
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496 rtx arg0 = op0;
497 unsigned HOST_WIDE_INT subreg_off;
499 if (GET_CODE (arg0) == SUBREG)
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
520 return true;
524 /* Handle fields bigger than a word. */
526 if (bitsize > BITS_PER_WORD)
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
531 be less than full.
532 However, only do that if the value is not BLKmode. */
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536 unsigned int i;
537 rtx last;
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
543 is not allowed. */
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
554 unsigned int bit_offset = (backwards
555 ? MAX ((int) bitsize - ((int) i + 1)
556 * BITS_PER_WORD,
558 : (int) i * BITS_PER_WORD);
559 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
561 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
562 bitsize - i * BITS_PER_WORD),
563 bitnum + bit_offset,
564 bitregion_start, bitregion_end,
565 word_mode,
566 value_word, fallback_p))
568 delete_insns_since (last);
569 return false;
572 return true;
575 /* From here on we can assume that the field to be stored in is
576 a full-word (whatever type that is), since it is shorter than a word. */
578 /* OFFSET is the number of words or bytes (UNIT says which)
579 from STR_RTX to the first word or byte containing part of the field. */
581 if (!MEM_P (op0))
583 if (offset != 0
584 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
586 if (!REG_P (op0))
588 /* Since this is a destination (lvalue), we can't copy
589 it to a pseudo. We can remove a SUBREG that does not
590 change the size of the operand. Such a SUBREG may
591 have been added above. */
592 gcc_assert (GET_CODE (op0) == SUBREG
593 && (GET_MODE_SIZE (GET_MODE (op0))
594 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
595 op0 = SUBREG_REG (op0);
597 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
598 op0, (offset * UNITS_PER_WORD));
600 offset = 0;
603 /* If VALUE has a floating-point or complex mode, access it as an
604 integer of the corresponding size. This can occur on a machine
605 with 64 bit registers that uses SFmode for float. It can also
606 occur for unaligned float or complex fields. */
607 orig_value = value;
608 if (GET_MODE (value) != VOIDmode
609 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
610 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
612 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
613 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
616 /* Now OFFSET is nonzero only if OP0 is memory
617 and is therefore always measured in bytes. */
619 if (HAVE_insv
620 && GET_MODE (value) != BLKmode
621 && bitsize > 0
622 && GET_MODE_BITSIZE (op_mode) >= bitsize
623 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
624 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
626 struct expand_operand ops[4];
627 int xbitpos = bitpos;
628 rtx value1;
629 rtx xop0 = op0;
630 rtx last = get_last_insn ();
631 bool copy_back = false;
633 /* Add OFFSET into OP0's address. */
634 if (MEM_P (xop0))
635 xop0 = adjust_address (xop0, byte_mode, offset);
637 /* If xop0 is a register, we need it in OP_MODE
638 to make it acceptable to the format of insv. */
639 if (GET_CODE (xop0) == SUBREG)
640 /* We can't just change the mode, because this might clobber op0,
641 and we will need the original value of op0 if insv fails. */
642 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
643 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
644 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
646 /* If the destination is a paradoxical subreg such that we need a
647 truncate to the inner mode, perform the insertion on a temporary and
648 truncate the result to the original destination. Note that we can't
649 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
650 X) 0)) is (reg:N X). */
651 if (GET_CODE (xop0) == SUBREG
652 && REG_P (SUBREG_REG (xop0))
653 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
654 op_mode)))
656 rtx tem = gen_reg_rtx (op_mode);
657 emit_move_insn (tem, xop0);
658 xop0 = tem;
659 copy_back = true;
662 /* On big-endian machines, we count bits from the most significant.
663 If the bit field insn does not, we must invert. */
665 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
666 xbitpos = unit - bitsize - xbitpos;
668 /* We have been counting XBITPOS within UNIT.
669 Count instead within the size of the register. */
670 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
671 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
673 unit = GET_MODE_BITSIZE (op_mode);
675 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
676 value1 = value;
677 if (GET_MODE (value) != op_mode)
679 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
681 /* Optimization: Don't bother really extending VALUE
682 if it has all the bits we will actually use. However,
683 if we must narrow it, be sure we do it correctly. */
685 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
687 rtx tmp;
689 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
690 if (! tmp)
691 tmp = simplify_gen_subreg (op_mode,
692 force_reg (GET_MODE (value),
693 value1),
694 GET_MODE (value), 0);
695 value1 = tmp;
697 else
698 value1 = gen_lowpart (op_mode, value1);
700 else if (CONST_INT_P (value))
701 value1 = gen_int_mode (INTVAL (value), op_mode);
702 else
703 /* Parse phase is supposed to make VALUE's data type
704 match that of the component reference, which is a type
705 at least as wide as the field; so VALUE should have
706 a mode that corresponds to that type. */
707 gcc_assert (CONSTANT_P (value));
710 create_fixed_operand (&ops[0], xop0);
711 create_integer_operand (&ops[1], bitsize);
712 create_integer_operand (&ops[2], xbitpos);
713 create_input_operand (&ops[3], value1, op_mode);
714 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
716 if (copy_back)
717 convert_move (op0, xop0, true);
718 return true;
720 delete_insns_since (last);
723 /* If OP0 is a memory, try copying it to a register and seeing if a
724 cheap register alternative is available. */
725 if (HAVE_insv && MEM_P (op0))
727 enum machine_mode bestmode;
728 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
730 if (bitregion_end)
731 maxbits = bitregion_end - bitregion_start + 1;
733 /* Get the mode to use for inserting into this field. If OP0 is
734 BLKmode, get the smallest mode consistent with the alignment. If
735 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
736 mode. Otherwise, use the smallest mode containing the field. */
738 if (GET_MODE (op0) == BLKmode
739 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
740 || (op_mode != MAX_MACHINE_MODE
741 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
742 bestmode = get_best_mode (bitsize, bitnum,
743 bitregion_start, bitregion_end,
744 MEM_ALIGN (op0),
745 (op_mode == MAX_MACHINE_MODE
746 ? VOIDmode : op_mode),
747 MEM_VOLATILE_P (op0));
748 else
749 bestmode = GET_MODE (op0);
751 if (bestmode != VOIDmode
752 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
753 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
754 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
756 rtx last, tempreg, xop0;
757 unsigned HOST_WIDE_INT xoffset, xbitpos;
759 last = get_last_insn ();
761 /* Adjust address to point to the containing unit of
762 that mode. Compute the offset as a multiple of this unit,
763 counting in bytes. */
764 unit = GET_MODE_BITSIZE (bestmode);
765 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
766 xbitpos = bitnum % unit;
767 xop0 = adjust_address (op0, bestmode, xoffset);
769 /* Fetch that unit, store the bitfield in it, then store
770 the unit. */
771 tempreg = copy_to_reg (xop0);
772 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
773 bitregion_start, bitregion_end,
774 fieldmode, orig_value, false))
776 emit_move_insn (xop0, tempreg);
777 return true;
779 delete_insns_since (last);
783 if (!fallback_p)
784 return false;
786 store_fixed_bit_field (op0, offset, bitsize, bitpos,
787 bitregion_start, bitregion_end, value);
788 return true;
791 /* Generate code to store value from rtx VALUE
792 into a bit-field within structure STR_RTX
793 containing BITSIZE bits starting at bit BITNUM.
795 BITREGION_START is bitpos of the first bitfield in this region.
796 BITREGION_END is the bitpos of the ending bitfield in this region.
797 These two fields are 0, if the C++ memory model does not apply,
798 or we are not interested in keeping track of bitfield regions.
800 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
802 void
803 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
804 unsigned HOST_WIDE_INT bitnum,
805 unsigned HOST_WIDE_INT bitregion_start,
806 unsigned HOST_WIDE_INT bitregion_end,
807 enum machine_mode fieldmode,
808 rtx value)
810 /* Under the C++0x memory model, we must not touch bits outside the
811 bit region. Adjust the address to start at the beginning of the
812 bit region. */
813 if (MEM_P (str_rtx)
814 && bitregion_start > 0)
816 enum machine_mode bestmode;
817 enum machine_mode op_mode;
818 unsigned HOST_WIDE_INT offset;
820 op_mode = mode_for_extraction (EP_insv, 3);
821 if (op_mode == MAX_MACHINE_MODE)
822 op_mode = VOIDmode;
824 offset = bitregion_start / BITS_PER_UNIT;
825 bitnum -= bitregion_start;
826 bitregion_end -= bitregion_start;
827 bitregion_start = 0;
828 bestmode = get_best_mode (bitsize, bitnum,
829 bitregion_start, bitregion_end,
830 MEM_ALIGN (str_rtx),
831 op_mode,
832 MEM_VOLATILE_P (str_rtx));
833 str_rtx = adjust_address (str_rtx, bestmode, offset);
836 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
837 bitregion_start, bitregion_end,
838 fieldmode, value, true))
839 gcc_unreachable ();
842 /* Use shifts and boolean operations to store VALUE
843 into a bit field of width BITSIZE
844 in a memory location specified by OP0 except offset by OFFSET bytes.
845 (OFFSET must be 0 if OP0 is a register.)
846 The field starts at position BITPOS within the byte.
847 (If OP0 is a register, it may be a full word or a narrower mode,
848 but BITPOS still counts within a full word,
849 which is significant on bigendian machines.) */
851 static void
852 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
853 unsigned HOST_WIDE_INT bitsize,
854 unsigned HOST_WIDE_INT bitpos,
855 unsigned HOST_WIDE_INT bitregion_start,
856 unsigned HOST_WIDE_INT bitregion_end,
857 rtx value)
859 enum machine_mode mode;
860 unsigned int total_bits = BITS_PER_WORD;
861 rtx temp;
862 int all_zero = 0;
863 int all_one = 0;
865 /* There is a case not handled here:
866 a structure with a known alignment of just a halfword
867 and a field split across two aligned halfwords within the structure.
868 Or likewise a structure with a known alignment of just a byte
869 and a field split across two bytes.
870 Such cases are not supposed to be able to occur. */
872 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
874 gcc_assert (!offset);
875 /* Special treatment for a bit field split across two registers. */
876 if (bitsize + bitpos > BITS_PER_WORD)
878 store_split_bit_field (op0, bitsize, bitpos,
879 bitregion_start, bitregion_end,
880 value);
881 return;
884 else
886 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
888 if (bitregion_end)
889 maxbits = bitregion_end - bitregion_start + 1;
891 /* Get the proper mode to use for this field. We want a mode that
892 includes the entire field. If such a mode would be larger than
893 a word, we won't be doing the extraction the normal way.
894 We don't want a mode bigger than the destination. */
896 mode = GET_MODE (op0);
897 if (GET_MODE_BITSIZE (mode) == 0
898 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
899 mode = word_mode;
901 if (MEM_VOLATILE_P (op0)
902 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
903 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
904 && flag_strict_volatile_bitfields > 0)
905 mode = GET_MODE (op0);
906 else
907 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
908 bitregion_start, bitregion_end,
909 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
911 if (mode == VOIDmode)
913 /* The only way this should occur is if the field spans word
914 boundaries. */
915 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
916 bitregion_start, bitregion_end, value);
917 return;
920 total_bits = GET_MODE_BITSIZE (mode);
922 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
923 be in the range 0 to total_bits-1, and put any excess bytes in
924 OFFSET. */
925 if (bitpos >= total_bits)
927 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
928 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
929 * BITS_PER_UNIT);
932 /* Get ref to an aligned byte, halfword, or word containing the field.
933 Adjust BITPOS to be position within a word,
934 and OFFSET to be the offset of that word.
935 Then alter OP0 to refer to that word. */
936 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
937 offset -= (offset % (total_bits / BITS_PER_UNIT));
938 op0 = adjust_address (op0, mode, offset);
941 mode = GET_MODE (op0);
943 /* Now MODE is either some integral mode for a MEM as OP0,
944 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
945 The bit field is contained entirely within OP0.
946 BITPOS is the starting bit number within OP0.
947 (OP0's mode may actually be narrower than MODE.) */
949 if (BYTES_BIG_ENDIAN)
950 /* BITPOS is the distance between our msb
951 and that of the containing datum.
952 Convert it to the distance from the lsb. */
953 bitpos = total_bits - bitsize - bitpos;
955 /* Now BITPOS is always the distance between our lsb
956 and that of OP0. */
958 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
959 we must first convert its mode to MODE. */
961 if (CONST_INT_P (value))
963 HOST_WIDE_INT v = INTVAL (value);
965 if (bitsize < HOST_BITS_PER_WIDE_INT)
966 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
968 if (v == 0)
969 all_zero = 1;
970 else if ((bitsize < HOST_BITS_PER_WIDE_INT
971 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
972 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
973 all_one = 1;
975 value = lshift_value (mode, value, bitpos, bitsize);
977 else
979 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
980 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
982 if (GET_MODE (value) != mode)
983 value = convert_to_mode (mode, value, 1);
985 if (must_and)
986 value = expand_binop (mode, and_optab, value,
987 mask_rtx (mode, 0, bitsize, 0),
988 NULL_RTX, 1, OPTAB_LIB_WIDEN);
989 if (bitpos > 0)
990 value = expand_shift (LSHIFT_EXPR, mode, value,
991 bitpos, NULL_RTX, 1);
994 /* Now clear the chosen bits in OP0,
995 except that if VALUE is -1 we need not bother. */
996 /* We keep the intermediates in registers to allow CSE to combine
997 consecutive bitfield assignments. */
999 temp = force_reg (mode, op0);
1001 if (! all_one)
1003 temp = expand_binop (mode, and_optab, temp,
1004 mask_rtx (mode, bitpos, bitsize, 1),
1005 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1006 temp = force_reg (mode, temp);
1009 /* Now logical-or VALUE into OP0, unless it is zero. */
1011 if (! all_zero)
1013 temp = expand_binop (mode, ior_optab, temp, value,
1014 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1015 temp = force_reg (mode, temp);
1018 if (op0 != temp)
1020 op0 = copy_rtx (op0);
1021 emit_move_insn (op0, temp);
1025 /* Store a bit field that is split across multiple accessible memory objects.
1027 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1028 BITSIZE is the field width; BITPOS the position of its first bit
1029 (within the word).
1030 VALUE is the value to store.
1032 This does not yet handle fields wider than BITS_PER_WORD. */
1034 static void
1035 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1036 unsigned HOST_WIDE_INT bitpos,
1037 unsigned HOST_WIDE_INT bitregion_start,
1038 unsigned HOST_WIDE_INT bitregion_end,
1039 rtx value)
1041 unsigned int unit;
1042 unsigned int bitsdone = 0;
1044 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1045 much at a time. */
1046 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1047 unit = BITS_PER_WORD;
1048 else
1049 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1051 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1052 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1053 that VALUE might be a floating-point constant. */
1054 if (CONSTANT_P (value) && !CONST_INT_P (value))
1056 rtx word = gen_lowpart_common (word_mode, value);
1058 if (word && (value != word))
1059 value = word;
1060 else
1061 value = gen_lowpart_common (word_mode,
1062 force_reg (GET_MODE (value) != VOIDmode
1063 ? GET_MODE (value)
1064 : word_mode, value));
1067 while (bitsdone < bitsize)
1069 unsigned HOST_WIDE_INT thissize;
1070 rtx part, word;
1071 unsigned HOST_WIDE_INT thispos;
1072 unsigned HOST_WIDE_INT offset;
1074 offset = (bitpos + bitsdone) / unit;
1075 thispos = (bitpos + bitsdone) % unit;
1077 /* THISSIZE must not overrun a word boundary. Otherwise,
1078 store_fixed_bit_field will call us again, and we will mutually
1079 recurse forever. */
1080 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1081 thissize = MIN (thissize, unit - thispos);
1083 if (BYTES_BIG_ENDIAN)
1085 int total_bits;
1087 /* We must do an endian conversion exactly the same way as it is
1088 done in extract_bit_field, so that the two calls to
1089 extract_fixed_bit_field will have comparable arguments. */
1090 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1091 total_bits = BITS_PER_WORD;
1092 else
1093 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1095 /* Fetch successively less significant portions. */
1096 if (CONST_INT_P (value))
1097 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1098 >> (bitsize - bitsdone - thissize))
1099 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1100 else
1101 /* The args are chosen so that the last part includes the
1102 lsb. Give extract_bit_field the value it needs (with
1103 endianness compensation) to fetch the piece we want. */
1104 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1105 total_bits - bitsize + bitsdone,
1106 NULL_RTX, 1, false);
1108 else
1110 /* Fetch successively more significant portions. */
1111 if (CONST_INT_P (value))
1112 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1113 >> bitsdone)
1114 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1115 else
1116 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1117 bitsdone, NULL_RTX, 1, false);
1120 /* If OP0 is a register, then handle OFFSET here.
1122 When handling multiword bitfields, extract_bit_field may pass
1123 down a word_mode SUBREG of a larger REG for a bitfield that actually
1124 crosses a word boundary. Thus, for a SUBREG, we must find
1125 the current word starting from the base register. */
1126 if (GET_CODE (op0) == SUBREG)
1128 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1129 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1130 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1131 word = word_offset ? const0_rtx : op0;
1132 else
1133 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1134 GET_MODE (SUBREG_REG (op0)));
1135 offset = 0;
1137 else if (REG_P (op0))
1139 enum machine_mode op0_mode = GET_MODE (op0);
1140 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1141 word = offset ? const0_rtx : op0;
1142 else
1143 word = operand_subword_force (op0, offset, GET_MODE (op0));
1144 offset = 0;
1146 else
1147 word = op0;
1149 /* OFFSET is in UNITs, and UNIT is in bits.
1150 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1151 it is just an out-of-bounds access. Ignore it. */
1152 if (word != const0_rtx)
1153 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1154 thispos, bitregion_start, bitregion_end, part);
1155 bitsdone += thissize;
1159 /* A subroutine of extract_bit_field_1 that converts return value X
1160 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1161 to extract_bit_field. */
1163 static rtx
1164 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1165 enum machine_mode tmode, bool unsignedp)
1167 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1168 return x;
1170 /* If the x mode is not a scalar integral, first convert to the
1171 integer mode of that size and then access it as a floating-point
1172 value via a SUBREG. */
1173 if (!SCALAR_INT_MODE_P (tmode))
1175 enum machine_mode smode;
1177 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1178 x = convert_to_mode (smode, x, unsignedp);
1179 x = force_reg (smode, x);
1180 return gen_lowpart (tmode, x);
1183 return convert_to_mode (tmode, x, unsignedp);
1186 /* A subroutine of extract_bit_field, with the same arguments.
1187 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1188 if we can find no other means of implementing the operation.
1189 if FALLBACK_P is false, return NULL instead. */
1191 static rtx
1192 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1193 unsigned HOST_WIDE_INT bitnum,
1194 int unsignedp, bool packedp, rtx target,
1195 enum machine_mode mode, enum machine_mode tmode,
1196 bool fallback_p)
1198 unsigned int unit
1199 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1200 unsigned HOST_WIDE_INT offset, bitpos;
1201 rtx op0 = str_rtx;
1202 enum machine_mode int_mode;
1203 enum machine_mode ext_mode;
1204 enum machine_mode mode1;
1205 int byte_offset;
1207 if (tmode == VOIDmode)
1208 tmode = mode;
1210 while (GET_CODE (op0) == SUBREG)
1212 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1213 op0 = SUBREG_REG (op0);
1216 /* If we have an out-of-bounds access to a register, just return an
1217 uninitialized register of the required mode. This can occur if the
1218 source code contains an out-of-bounds access to a small array. */
1219 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1220 return gen_reg_rtx (tmode);
1222 if (REG_P (op0)
1223 && mode == GET_MODE (op0)
1224 && bitnum == 0
1225 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1227 /* We're trying to extract a full register from itself. */
1228 return op0;
1231 /* See if we can get a better vector mode before extracting. */
1232 if (VECTOR_MODE_P (GET_MODE (op0))
1233 && !MEM_P (op0)
1234 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1236 enum machine_mode new_mode;
1238 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1239 new_mode = MIN_MODE_VECTOR_FLOAT;
1240 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1241 new_mode = MIN_MODE_VECTOR_FRACT;
1242 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1243 new_mode = MIN_MODE_VECTOR_UFRACT;
1244 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1245 new_mode = MIN_MODE_VECTOR_ACCUM;
1246 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1247 new_mode = MIN_MODE_VECTOR_UACCUM;
1248 else
1249 new_mode = MIN_MODE_VECTOR_INT;
1251 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1252 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1253 && targetm.vector_mode_supported_p (new_mode))
1254 break;
1255 if (new_mode != VOIDmode)
1256 op0 = gen_lowpart (new_mode, op0);
1259 /* Use vec_extract patterns for extracting parts of vectors whenever
1260 available. */
1261 if (VECTOR_MODE_P (GET_MODE (op0))
1262 && !MEM_P (op0)
1263 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1264 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1265 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1267 struct expand_operand ops[3];
1268 enum machine_mode outermode = GET_MODE (op0);
1269 enum machine_mode innermode = GET_MODE_INNER (outermode);
1270 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1271 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1273 create_output_operand (&ops[0], target, innermode);
1274 create_input_operand (&ops[1], op0, outermode);
1275 create_integer_operand (&ops[2], pos);
1276 if (maybe_expand_insn (icode, 3, ops))
1278 target = ops[0].value;
1279 if (GET_MODE (target) != mode)
1280 return gen_lowpart (tmode, target);
1281 return target;
1285 /* Make sure we are playing with integral modes. Pun with subregs
1286 if we aren't. */
1288 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1289 if (imode != GET_MODE (op0))
1291 if (MEM_P (op0))
1292 op0 = adjust_address (op0, imode, 0);
1293 else if (imode != BLKmode)
1295 op0 = gen_lowpart (imode, op0);
1297 /* If we got a SUBREG, force it into a register since we
1298 aren't going to be able to do another SUBREG on it. */
1299 if (GET_CODE (op0) == SUBREG)
1300 op0 = force_reg (imode, op0);
1302 else if (REG_P (op0))
1304 rtx reg, subreg;
1305 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1306 MODE_INT);
1307 reg = gen_reg_rtx (imode);
1308 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1309 emit_move_insn (subreg, op0);
1310 op0 = reg;
1311 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1313 else
1315 rtx mem = assign_stack_temp (GET_MODE (op0),
1316 GET_MODE_SIZE (GET_MODE (op0)), 0);
1317 emit_move_insn (mem, op0);
1318 op0 = adjust_address (mem, BLKmode, 0);
1323 /* We may be accessing data outside the field, which means
1324 we can alias adjacent data. */
1325 if (MEM_P (op0))
1327 op0 = shallow_copy_rtx (op0);
1328 set_mem_alias_set (op0, 0);
1329 set_mem_expr (op0, 0);
1332 /* Extraction of a full-word or multi-word value from a structure
1333 in a register or aligned memory can be done with just a SUBREG.
1334 A subword value in the least significant part of a register
1335 can also be extracted with a SUBREG. For this, we need the
1336 byte offset of the value in op0. */
1338 bitpos = bitnum % unit;
1339 offset = bitnum / unit;
1340 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1342 /* If OP0 is a register, BITPOS must count within a word.
1343 But as we have it, it counts within whatever size OP0 now has.
1344 On a bigendian machine, these are not the same, so convert. */
1345 if (BYTES_BIG_ENDIAN
1346 && !MEM_P (op0)
1347 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1348 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1350 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1351 If that's wrong, the solution is to test for it and set TARGET to 0
1352 if needed. */
1354 /* Only scalar integer modes can be converted via subregs. There is an
1355 additional problem for FP modes here in that they can have a precision
1356 which is different from the size. mode_for_size uses precision, but
1357 we want a mode based on the size, so we must avoid calling it for FP
1358 modes. */
1359 mode1 = (SCALAR_INT_MODE_P (tmode)
1360 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1361 : mode);
1363 /* If the bitfield is volatile, we need to make sure the access
1364 remains on a type-aligned boundary. */
1365 if (GET_CODE (op0) == MEM
1366 && MEM_VOLATILE_P (op0)
1367 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1368 && flag_strict_volatile_bitfields > 0)
1369 goto no_subreg_mode_swap;
1371 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1372 && bitpos % BITS_PER_WORD == 0)
1373 || (mode1 != BLKmode
1374 /* ??? The big endian test here is wrong. This is correct
1375 if the value is in a register, and if mode_for_size is not
1376 the same mode as op0. This causes us to get unnecessarily
1377 inefficient code from the Thumb port when -mbig-endian. */
1378 && (BYTES_BIG_ENDIAN
1379 ? bitpos + bitsize == BITS_PER_WORD
1380 : bitpos == 0)))
1381 && ((!MEM_P (op0)
1382 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1383 && GET_MODE_SIZE (mode1) != 0
1384 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1385 || (MEM_P (op0)
1386 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1387 || (offset * BITS_PER_UNIT % bitsize == 0
1388 && MEM_ALIGN (op0) % bitsize == 0)))))
1390 if (MEM_P (op0))
1391 op0 = adjust_address (op0, mode1, offset);
1392 else if (mode1 != GET_MODE (op0))
1394 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1395 byte_offset);
1396 if (sub == NULL)
1397 goto no_subreg_mode_swap;
1398 op0 = sub;
1400 if (mode1 != mode)
1401 return convert_to_mode (tmode, op0, unsignedp);
1402 return op0;
1404 no_subreg_mode_swap:
1406 /* Handle fields bigger than a word. */
1408 if (bitsize > BITS_PER_WORD)
1410 /* Here we transfer the words of the field
1411 in the order least significant first.
1412 This is because the most significant word is the one which may
1413 be less than full. */
1415 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1416 unsigned int i;
1418 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1419 target = gen_reg_rtx (mode);
1421 /* Indicate for flow that the entire target reg is being set. */
1422 emit_clobber (target);
1424 for (i = 0; i < nwords; i++)
1426 /* If I is 0, use the low-order word in both field and target;
1427 if I is 1, use the next to lowest word; and so on. */
1428 /* Word number in TARGET to use. */
1429 unsigned int wordnum
1430 = (WORDS_BIG_ENDIAN
1431 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1432 : i);
1433 /* Offset from start of field in OP0. */
1434 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1435 ? MAX (0, ((int) bitsize - ((int) i + 1)
1436 * (int) BITS_PER_WORD))
1437 : (int) i * BITS_PER_WORD);
1438 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1439 rtx result_part
1440 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1441 bitsize - i * BITS_PER_WORD),
1442 bitnum + bit_offset, 1, false, target_part, mode,
1443 word_mode);
1445 gcc_assert (target_part);
1447 if (result_part != target_part)
1448 emit_move_insn (target_part, result_part);
1451 if (unsignedp)
1453 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1454 need to be zero'd out. */
1455 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1457 unsigned int i, total_words;
1459 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1460 for (i = nwords; i < total_words; i++)
1461 emit_move_insn
1462 (operand_subword (target,
1463 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1464 1, VOIDmode),
1465 const0_rtx);
1467 return target;
1470 /* Signed bit field: sign-extend with two arithmetic shifts. */
1471 target = expand_shift (LSHIFT_EXPR, mode, target,
1472 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1473 return expand_shift (RSHIFT_EXPR, mode, target,
1474 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1477 /* From here on we know the desired field is smaller than a word. */
1479 /* Check if there is a correspondingly-sized integer field, so we can
1480 safely extract it as one size of integer, if necessary; then
1481 truncate or extend to the size that is wanted; then use SUBREGs or
1482 convert_to_mode to get one of the modes we really wanted. */
1484 int_mode = int_mode_for_mode (tmode);
1485 if (int_mode == BLKmode)
1486 int_mode = int_mode_for_mode (mode);
1487 /* Should probably push op0 out to memory and then do a load. */
1488 gcc_assert (int_mode != BLKmode);
1490 /* OFFSET is the number of words or bytes (UNIT says which)
1491 from STR_RTX to the first word or byte containing part of the field. */
1492 if (!MEM_P (op0))
1494 if (offset != 0
1495 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1497 if (!REG_P (op0))
1498 op0 = copy_to_reg (op0);
1499 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1500 op0, (offset * UNITS_PER_WORD));
1502 offset = 0;
1505 /* Now OFFSET is nonzero only for memory operands. */
1506 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1507 if (ext_mode != MAX_MACHINE_MODE
1508 && bitsize > 0
1509 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1510 /* If op0 is a register, we need it in EXT_MODE to make it
1511 acceptable to the format of ext(z)v. */
1512 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1513 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1514 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1516 struct expand_operand ops[4];
1517 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1518 rtx xop0 = op0;
1519 rtx xtarget = target;
1520 rtx xspec_target = target;
1521 rtx xspec_target_subreg = 0;
1523 /* If op0 is a register, we need it in EXT_MODE to make it
1524 acceptable to the format of ext(z)v. */
1525 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1526 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1527 if (MEM_P (xop0))
1528 /* Get ref to first byte containing part of the field. */
1529 xop0 = adjust_address (xop0, byte_mode, xoffset);
1531 /* On big-endian machines, we count bits from the most significant.
1532 If the bit field insn does not, we must invert. */
1533 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1534 xbitpos = unit - bitsize - xbitpos;
1536 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1537 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1538 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1540 unit = GET_MODE_BITSIZE (ext_mode);
1542 if (xtarget == 0)
1543 xtarget = xspec_target = gen_reg_rtx (tmode);
1545 if (GET_MODE (xtarget) != ext_mode)
1547 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1548 between the mode of the extraction (word_mode) and the target
1549 mode. Instead, create a temporary and use convert_move to set
1550 the target. */
1551 if (REG_P (xtarget)
1552 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1554 xtarget = gen_lowpart (ext_mode, xtarget);
1555 if (GET_MODE_PRECISION (ext_mode)
1556 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1557 xspec_target_subreg = xtarget;
1559 else
1560 xtarget = gen_reg_rtx (ext_mode);
1563 create_output_operand (&ops[0], xtarget, ext_mode);
1564 create_fixed_operand (&ops[1], xop0);
1565 create_integer_operand (&ops[2], bitsize);
1566 create_integer_operand (&ops[3], xbitpos);
1567 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1568 4, ops))
1570 xtarget = ops[0].value;
1571 if (xtarget == xspec_target)
1572 return xtarget;
1573 if (xtarget == xspec_target_subreg)
1574 return xspec_target;
1575 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1579 /* If OP0 is a memory, try copying it to a register and seeing if a
1580 cheap register alternative is available. */
1581 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1583 enum machine_mode bestmode;
1585 /* Get the mode to use for inserting into this field. If
1586 OP0 is BLKmode, get the smallest mode consistent with the
1587 alignment. If OP0 is a non-BLKmode object that is no
1588 wider than EXT_MODE, use its mode. Otherwise, use the
1589 smallest mode containing the field. */
1591 if (GET_MODE (op0) == BLKmode
1592 || (ext_mode != MAX_MACHINE_MODE
1593 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1594 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1595 (ext_mode == MAX_MACHINE_MODE
1596 ? VOIDmode : ext_mode),
1597 MEM_VOLATILE_P (op0));
1598 else
1599 bestmode = GET_MODE (op0);
1601 if (bestmode != VOIDmode
1602 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1603 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1605 unsigned HOST_WIDE_INT xoffset, xbitpos;
1607 /* Compute the offset as a multiple of this unit,
1608 counting in bytes. */
1609 unit = GET_MODE_BITSIZE (bestmode);
1610 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1611 xbitpos = bitnum % unit;
1613 /* Make sure the register is big enough for the whole field. */
1614 if (xoffset * BITS_PER_UNIT + unit
1615 >= offset * BITS_PER_UNIT + bitsize)
1617 rtx last, result, xop0;
1619 last = get_last_insn ();
1621 /* Fetch it to a register in that size. */
1622 xop0 = adjust_address (op0, bestmode, xoffset);
1623 xop0 = force_reg (bestmode, xop0);
1624 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1625 unsignedp, packedp, target,
1626 mode, tmode, false);
1627 if (result)
1628 return result;
1630 delete_insns_since (last);
1635 if (!fallback_p)
1636 return NULL;
1638 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1639 bitpos, target, unsignedp, packedp);
1640 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1643 /* Generate code to extract a byte-field from STR_RTX
1644 containing BITSIZE bits, starting at BITNUM,
1645 and put it in TARGET if possible (if TARGET is nonzero).
1646 Regardless of TARGET, we return the rtx for where the value is placed.
1648 STR_RTX is the structure containing the byte (a REG or MEM).
1649 UNSIGNEDP is nonzero if this is an unsigned bit field.
1650 PACKEDP is nonzero if the field has the packed attribute.
1651 MODE is the natural mode of the field value once extracted.
1652 TMODE is the mode the caller would like the value to have;
1653 but the value may be returned with type MODE instead.
1655 If a TARGET is specified and we can store in it at no extra cost,
1656 we do so, and return TARGET.
1657 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1658 if they are equally easy. */
1661 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1662 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1663 rtx target, enum machine_mode mode, enum machine_mode tmode)
1665 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1666 target, mode, tmode, true);
1669 /* Extract a bit field using shifts and boolean operations
1670 Returns an rtx to represent the value.
1671 OP0 addresses a register (word) or memory (byte).
1672 BITPOS says which bit within the word or byte the bit field starts in.
1673 OFFSET says how many bytes farther the bit field starts;
1674 it is 0 if OP0 is a register.
1675 BITSIZE says how many bits long the bit field is.
1676 (If OP0 is a register, it may be narrower than a full word,
1677 but BITPOS still counts within a full word,
1678 which is significant on bigendian machines.)
1680 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1681 PACKEDP is true if the field has the packed attribute.
1683 If TARGET is nonzero, attempts to store the value there
1684 and return TARGET, but this is not guaranteed.
1685 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1687 static rtx
1688 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1689 unsigned HOST_WIDE_INT offset,
1690 unsigned HOST_WIDE_INT bitsize,
1691 unsigned HOST_WIDE_INT bitpos, rtx target,
1692 int unsignedp, bool packedp)
1694 unsigned int total_bits = BITS_PER_WORD;
1695 enum machine_mode mode;
1697 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1699 /* Special treatment for a bit field split across two registers. */
1700 if (bitsize + bitpos > BITS_PER_WORD)
1701 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1703 else
1705 /* Get the proper mode to use for this field. We want a mode that
1706 includes the entire field. If such a mode would be larger than
1707 a word, we won't be doing the extraction the normal way. */
1709 if (MEM_VOLATILE_P (op0)
1710 && flag_strict_volatile_bitfields > 0)
1712 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1713 mode = GET_MODE (op0);
1714 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1715 mode = GET_MODE (target);
1716 else
1717 mode = tmode;
1719 else
1720 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1721 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1723 if (mode == VOIDmode)
1724 /* The only way this should occur is if the field spans word
1725 boundaries. */
1726 return extract_split_bit_field (op0, bitsize,
1727 bitpos + offset * BITS_PER_UNIT,
1728 unsignedp);
1730 total_bits = GET_MODE_BITSIZE (mode);
1732 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1733 be in the range 0 to total_bits-1, and put any excess bytes in
1734 OFFSET. */
1735 if (bitpos >= total_bits)
1737 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1738 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1739 * BITS_PER_UNIT);
1742 /* If we're accessing a volatile MEM, we can't do the next
1743 alignment step if it results in a multi-word access where we
1744 otherwise wouldn't have one. So, check for that case
1745 here. */
1746 if (MEM_P (op0)
1747 && MEM_VOLATILE_P (op0)
1748 && flag_strict_volatile_bitfields > 0
1749 && bitpos + bitsize <= total_bits
1750 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1752 if (STRICT_ALIGNMENT)
1754 static bool informed_about_misalignment = false;
1755 bool warned;
1757 if (packedp)
1759 if (bitsize == total_bits)
1760 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1761 "multiple accesses to volatile structure member"
1762 " because of packed attribute");
1763 else
1764 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1765 "multiple accesses to volatile structure bitfield"
1766 " because of packed attribute");
1768 return extract_split_bit_field (op0, bitsize,
1769 bitpos + offset * BITS_PER_UNIT,
1770 unsignedp);
1773 if (bitsize == total_bits)
1774 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1775 "mis-aligned access used for structure member");
1776 else
1777 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1778 "mis-aligned access used for structure bitfield");
1780 if (! informed_about_misalignment && warned)
1782 informed_about_misalignment = true;
1783 inform (input_location,
1784 "when a volatile object spans multiple type-sized locations,"
1785 " the compiler must choose between using a single mis-aligned access to"
1786 " preserve the volatility, or using multiple aligned accesses to avoid"
1787 " runtime faults; this code may fail at runtime if the hardware does"
1788 " not allow this access");
1792 else
1795 /* Get ref to an aligned byte, halfword, or word containing the field.
1796 Adjust BITPOS to be position within a word,
1797 and OFFSET to be the offset of that word.
1798 Then alter OP0 to refer to that word. */
1799 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1800 offset -= (offset % (total_bits / BITS_PER_UNIT));
1803 op0 = adjust_address (op0, mode, offset);
1806 mode = GET_MODE (op0);
1808 if (BYTES_BIG_ENDIAN)
1809 /* BITPOS is the distance between our msb and that of OP0.
1810 Convert it to the distance from the lsb. */
1811 bitpos = total_bits - bitsize - bitpos;
1813 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1814 We have reduced the big-endian case to the little-endian case. */
1816 if (unsignedp)
1818 if (bitpos)
1820 /* If the field does not already start at the lsb,
1821 shift it so it does. */
1822 /* Maybe propagate the target for the shift. */
1823 /* But not if we will return it--could confuse integrate.c. */
1824 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1825 if (tmode != mode) subtarget = 0;
1826 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1828 /* Convert the value to the desired mode. */
1829 if (mode != tmode)
1830 op0 = convert_to_mode (tmode, op0, 1);
1832 /* Unless the msb of the field used to be the msb when we shifted,
1833 mask out the upper bits. */
1835 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1836 return expand_binop (GET_MODE (op0), and_optab, op0,
1837 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1838 target, 1, OPTAB_LIB_WIDEN);
1839 return op0;
1842 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1843 then arithmetic-shift its lsb to the lsb of the word. */
1844 op0 = force_reg (mode, op0);
1846 /* Find the narrowest integer mode that contains the field. */
1848 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1849 mode = GET_MODE_WIDER_MODE (mode))
1850 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1852 op0 = convert_to_mode (mode, op0, 0);
1853 break;
1856 if (mode != tmode)
1857 target = 0;
1859 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1861 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1862 /* Maybe propagate the target for the shift. */
1863 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1864 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1867 return expand_shift (RSHIFT_EXPR, mode, op0,
1868 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1871 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1872 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1873 complement of that if COMPLEMENT. The mask is truncated if
1874 necessary to the width of mode MODE. The mask is zero-extended if
1875 BITSIZE+BITPOS is too small for MODE. */
1877 static rtx
1878 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1880 double_int mask;
1882 mask = double_int_mask (bitsize);
1883 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1885 if (complement)
1886 mask = double_int_not (mask);
1888 return immed_double_int_const (mask, mode);
1891 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1892 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1894 static rtx
1895 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1897 double_int val;
1899 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1900 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1902 return immed_double_int_const (val, mode);
1905 /* Extract a bit field that is split across two words
1906 and return an RTX for the result.
1908 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1909 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1910 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1912 static rtx
1913 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1914 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1916 unsigned int unit;
1917 unsigned int bitsdone = 0;
1918 rtx result = NULL_RTX;
1919 int first = 1;
1921 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1922 much at a time. */
1923 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1924 unit = BITS_PER_WORD;
1925 else
1926 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1928 while (bitsdone < bitsize)
1930 unsigned HOST_WIDE_INT thissize;
1931 rtx part, word;
1932 unsigned HOST_WIDE_INT thispos;
1933 unsigned HOST_WIDE_INT offset;
1935 offset = (bitpos + bitsdone) / unit;
1936 thispos = (bitpos + bitsdone) % unit;
1938 /* THISSIZE must not overrun a word boundary. Otherwise,
1939 extract_fixed_bit_field will call us again, and we will mutually
1940 recurse forever. */
1941 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1942 thissize = MIN (thissize, unit - thispos);
1944 /* If OP0 is a register, then handle OFFSET here.
1946 When handling multiword bitfields, extract_bit_field may pass
1947 down a word_mode SUBREG of a larger REG for a bitfield that actually
1948 crosses a word boundary. Thus, for a SUBREG, we must find
1949 the current word starting from the base register. */
1950 if (GET_CODE (op0) == SUBREG)
1952 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1953 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1954 GET_MODE (SUBREG_REG (op0)));
1955 offset = 0;
1957 else if (REG_P (op0))
1959 word = operand_subword_force (op0, offset, GET_MODE (op0));
1960 offset = 0;
1962 else
1963 word = op0;
1965 /* Extract the parts in bit-counting order,
1966 whose meaning is determined by BYTES_PER_UNIT.
1967 OFFSET is in UNITs, and UNIT is in bits.
1968 extract_fixed_bit_field wants offset in bytes. */
1969 part = extract_fixed_bit_field (word_mode, word,
1970 offset * unit / BITS_PER_UNIT,
1971 thissize, thispos, 0, 1, false);
1972 bitsdone += thissize;
1974 /* Shift this part into place for the result. */
1975 if (BYTES_BIG_ENDIAN)
1977 if (bitsize != bitsdone)
1978 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1979 bitsize - bitsdone, 0, 1);
1981 else
1983 if (bitsdone != thissize)
1984 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1985 bitsdone - thissize, 0, 1);
1988 if (first)
1989 result = part;
1990 else
1991 /* Combine the parts with bitwise or. This works
1992 because we extracted each part as an unsigned bit field. */
1993 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1994 OPTAB_LIB_WIDEN);
1996 first = 0;
1999 /* Unsigned bit field: we are done. */
2000 if (unsignedp)
2001 return result;
2002 /* Signed bit field: sign-extend with two arithmetic shifts. */
2003 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2004 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2005 return expand_shift (RSHIFT_EXPR, word_mode, result,
2006 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2009 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2010 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2011 MODE, fill the upper bits with zeros. Fail if the layout of either
2012 mode is unknown (as for CC modes) or if the extraction would involve
2013 unprofitable mode punning. Return the value on success, otherwise
2014 return null.
2016 This is different from gen_lowpart* in these respects:
2018 - the returned value must always be considered an rvalue
2020 - when MODE is wider than SRC_MODE, the extraction involves
2021 a zero extension
2023 - when MODE is smaller than SRC_MODE, the extraction involves
2024 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2026 In other words, this routine performs a computation, whereas the
2027 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2028 operations. */
2031 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2033 enum machine_mode int_mode, src_int_mode;
2035 if (mode == src_mode)
2036 return src;
2038 if (CONSTANT_P (src))
2040 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2041 fails, it will happily create (subreg (symbol_ref)) or similar
2042 invalid SUBREGs. */
2043 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2044 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2045 if (ret)
2046 return ret;
2048 if (GET_MODE (src) == VOIDmode
2049 || !validate_subreg (mode, src_mode, src, byte))
2050 return NULL_RTX;
2052 src = force_reg (GET_MODE (src), src);
2053 return gen_rtx_SUBREG (mode, src, byte);
2056 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2057 return NULL_RTX;
2059 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2060 && MODES_TIEABLE_P (mode, src_mode))
2062 rtx x = gen_lowpart_common (mode, src);
2063 if (x)
2064 return x;
2067 src_int_mode = int_mode_for_mode (src_mode);
2068 int_mode = int_mode_for_mode (mode);
2069 if (src_int_mode == BLKmode || int_mode == BLKmode)
2070 return NULL_RTX;
2072 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2073 return NULL_RTX;
2074 if (!MODES_TIEABLE_P (int_mode, mode))
2075 return NULL_RTX;
2077 src = gen_lowpart (src_int_mode, src);
2078 src = convert_modes (int_mode, src_int_mode, src, true);
2079 src = gen_lowpart (mode, src);
2080 return src;
2083 /* Add INC into TARGET. */
2085 void
2086 expand_inc (rtx target, rtx inc)
2088 rtx value = expand_binop (GET_MODE (target), add_optab,
2089 target, inc,
2090 target, 0, OPTAB_LIB_WIDEN);
2091 if (value != target)
2092 emit_move_insn (target, value);
2095 /* Subtract DEC from TARGET. */
2097 void
2098 expand_dec (rtx target, rtx dec)
2100 rtx value = expand_binop (GET_MODE (target), sub_optab,
2101 target, dec,
2102 target, 0, OPTAB_LIB_WIDEN);
2103 if (value != target)
2104 emit_move_insn (target, value);
2107 /* Output a shift instruction for expression code CODE,
2108 with SHIFTED being the rtx for the value to shift,
2109 and AMOUNT the rtx for the amount to shift by.
2110 Store the result in the rtx TARGET, if that is convenient.
2111 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2112 Return the rtx for where the value is. */
2114 static rtx
2115 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2116 rtx amount, rtx target, int unsignedp)
2118 rtx op1, temp = 0;
2119 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2120 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2121 optab lshift_optab = ashl_optab;
2122 optab rshift_arith_optab = ashr_optab;
2123 optab rshift_uns_optab = lshr_optab;
2124 optab lrotate_optab = rotl_optab;
2125 optab rrotate_optab = rotr_optab;
2126 enum machine_mode op1_mode;
2127 int attempt;
2128 bool speed = optimize_insn_for_speed_p ();
2130 op1 = amount;
2131 op1_mode = GET_MODE (op1);
2133 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2134 shift amount is a vector, use the vector/vector shift patterns. */
2135 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2137 lshift_optab = vashl_optab;
2138 rshift_arith_optab = vashr_optab;
2139 rshift_uns_optab = vlshr_optab;
2140 lrotate_optab = vrotl_optab;
2141 rrotate_optab = vrotr_optab;
2144 /* Previously detected shift-counts computed by NEGATE_EXPR
2145 and shifted in the other direction; but that does not work
2146 on all machines. */
2148 if (SHIFT_COUNT_TRUNCATED)
2150 if (CONST_INT_P (op1)
2151 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2152 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2153 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2154 % GET_MODE_BITSIZE (mode));
2155 else if (GET_CODE (op1) == SUBREG
2156 && subreg_lowpart_p (op1)
2157 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2158 op1 = SUBREG_REG (op1);
2161 if (op1 == const0_rtx)
2162 return shifted;
2164 /* Check whether its cheaper to implement a left shift by a constant
2165 bit count by a sequence of additions. */
2166 if (code == LSHIFT_EXPR
2167 && CONST_INT_P (op1)
2168 && INTVAL (op1) > 0
2169 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2170 && INTVAL (op1) < MAX_BITS_PER_WORD
2171 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2172 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2174 int i;
2175 for (i = 0; i < INTVAL (op1); i++)
2177 temp = force_reg (mode, shifted);
2178 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2179 unsignedp, OPTAB_LIB_WIDEN);
2181 return shifted;
2184 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2186 enum optab_methods methods;
2188 if (attempt == 0)
2189 methods = OPTAB_DIRECT;
2190 else if (attempt == 1)
2191 methods = OPTAB_WIDEN;
2192 else
2193 methods = OPTAB_LIB_WIDEN;
2195 if (rotate)
2197 /* Widening does not work for rotation. */
2198 if (methods == OPTAB_WIDEN)
2199 continue;
2200 else if (methods == OPTAB_LIB_WIDEN)
2202 /* If we have been unable to open-code this by a rotation,
2203 do it as the IOR of two shifts. I.e., to rotate A
2204 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2205 where C is the bitsize of A.
2207 It is theoretically possible that the target machine might
2208 not be able to perform either shift and hence we would
2209 be making two libcalls rather than just the one for the
2210 shift (similarly if IOR could not be done). We will allow
2211 this extremely unlikely lossage to avoid complicating the
2212 code below. */
2214 rtx subtarget = target == shifted ? 0 : target;
2215 rtx new_amount, other_amount;
2216 rtx temp1;
2218 new_amount = op1;
2219 if (CONST_INT_P (op1))
2220 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2221 - INTVAL (op1));
2222 else
2223 other_amount
2224 = simplify_gen_binary (MINUS, GET_MODE (op1),
2225 GEN_INT (GET_MODE_PRECISION (mode)),
2226 op1);
2228 shifted = force_reg (mode, shifted);
2230 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2231 mode, shifted, new_amount, 0, 1);
2232 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2233 mode, shifted, other_amount,
2234 subtarget, 1);
2235 return expand_binop (mode, ior_optab, temp, temp1, target,
2236 unsignedp, methods);
2239 temp = expand_binop (mode,
2240 left ? lrotate_optab : rrotate_optab,
2241 shifted, op1, target, unsignedp, methods);
2243 else if (unsignedp)
2244 temp = expand_binop (mode,
2245 left ? lshift_optab : rshift_uns_optab,
2246 shifted, op1, target, unsignedp, methods);
2248 /* Do arithmetic shifts.
2249 Also, if we are going to widen the operand, we can just as well
2250 use an arithmetic right-shift instead of a logical one. */
2251 if (temp == 0 && ! rotate
2252 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2254 enum optab_methods methods1 = methods;
2256 /* If trying to widen a log shift to an arithmetic shift,
2257 don't accept an arithmetic shift of the same size. */
2258 if (unsignedp)
2259 methods1 = OPTAB_MUST_WIDEN;
2261 /* Arithmetic shift */
2263 temp = expand_binop (mode,
2264 left ? lshift_optab : rshift_arith_optab,
2265 shifted, op1, target, unsignedp, methods1);
2268 /* We used to try extzv here for logical right shifts, but that was
2269 only useful for one machine, the VAX, and caused poor code
2270 generation there for lshrdi3, so the code was deleted and a
2271 define_expand for lshrsi3 was added to vax.md. */
2274 gcc_assert (temp);
2275 return temp;
2278 /* Output a shift instruction for expression code CODE,
2279 with SHIFTED being the rtx for the value to shift,
2280 and AMOUNT the amount to shift by.
2281 Store the result in the rtx TARGET, if that is convenient.
2282 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2283 Return the rtx for where the value is. */
2286 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2287 int amount, rtx target, int unsignedp)
2289 return expand_shift_1 (code, mode,
2290 shifted, GEN_INT (amount), target, unsignedp);
2293 /* Output a shift instruction for expression code CODE,
2294 with SHIFTED being the rtx for the value to shift,
2295 and AMOUNT the tree for the amount to shift by.
2296 Store the result in the rtx TARGET, if that is convenient.
2297 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2298 Return the rtx for where the value is. */
2301 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2302 tree amount, rtx target, int unsignedp)
2304 return expand_shift_1 (code, mode,
2305 shifted, expand_normal (amount), target, unsignedp);
2309 /* Indicates the type of fixup needed after a constant multiplication.
2310 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2311 the result should be negated, and ADD_VARIANT means that the
2312 multiplicand should be added to the result. */
2313 enum mult_variant {basic_variant, negate_variant, add_variant};
2315 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2316 const struct mult_cost *, enum machine_mode mode);
2317 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2318 struct algorithm *, enum mult_variant *, int);
2319 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2320 const struct algorithm *, enum mult_variant);
2321 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2322 int, rtx *, int *, int *);
2323 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2324 static rtx extract_high_half (enum machine_mode, rtx);
2325 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2326 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2327 int, int);
2328 /* Compute and return the best algorithm for multiplying by T.
2329 The algorithm must cost less than cost_limit
2330 If retval.cost >= COST_LIMIT, no algorithm was found and all
2331 other field of the returned struct are undefined.
2332 MODE is the machine mode of the multiplication. */
2334 static void
2335 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2336 const struct mult_cost *cost_limit, enum machine_mode mode)
2338 int m;
2339 struct algorithm *alg_in, *best_alg;
2340 struct mult_cost best_cost;
2341 struct mult_cost new_limit;
2342 int op_cost, op_latency;
2343 unsigned HOST_WIDE_INT orig_t = t;
2344 unsigned HOST_WIDE_INT q;
2345 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2346 int hash_index;
2347 bool cache_hit = false;
2348 enum alg_code cache_alg = alg_zero;
2349 bool speed = optimize_insn_for_speed_p ();
2351 /* Indicate that no algorithm is yet found. If no algorithm
2352 is found, this value will be returned and indicate failure. */
2353 alg_out->cost.cost = cost_limit->cost + 1;
2354 alg_out->cost.latency = cost_limit->latency + 1;
2356 if (cost_limit->cost < 0
2357 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2358 return;
2360 /* Restrict the bits of "t" to the multiplication's mode. */
2361 t &= GET_MODE_MASK (mode);
2363 /* t == 1 can be done in zero cost. */
2364 if (t == 1)
2366 alg_out->ops = 1;
2367 alg_out->cost.cost = 0;
2368 alg_out->cost.latency = 0;
2369 alg_out->op[0] = alg_m;
2370 return;
2373 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2374 fail now. */
2375 if (t == 0)
2377 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2378 return;
2379 else
2381 alg_out->ops = 1;
2382 alg_out->cost.cost = zero_cost[speed];
2383 alg_out->cost.latency = zero_cost[speed];
2384 alg_out->op[0] = alg_zero;
2385 return;
2389 /* We'll be needing a couple extra algorithm structures now. */
2391 alg_in = XALLOCA (struct algorithm);
2392 best_alg = XALLOCA (struct algorithm);
2393 best_cost = *cost_limit;
2395 /* Compute the hash index. */
2396 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2398 /* See if we already know what to do for T. */
2399 if (alg_hash[hash_index].t == t
2400 && alg_hash[hash_index].mode == mode
2401 && alg_hash[hash_index].mode == mode
2402 && alg_hash[hash_index].speed == speed
2403 && alg_hash[hash_index].alg != alg_unknown)
2405 cache_alg = alg_hash[hash_index].alg;
2407 if (cache_alg == alg_impossible)
2409 /* The cache tells us that it's impossible to synthesize
2410 multiplication by T within alg_hash[hash_index].cost. */
2411 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2412 /* COST_LIMIT is at least as restrictive as the one
2413 recorded in the hash table, in which case we have no
2414 hope of synthesizing a multiplication. Just
2415 return. */
2416 return;
2418 /* If we get here, COST_LIMIT is less restrictive than the
2419 one recorded in the hash table, so we may be able to
2420 synthesize a multiplication. Proceed as if we didn't
2421 have the cache entry. */
2423 else
2425 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2426 /* The cached algorithm shows that this multiplication
2427 requires more cost than COST_LIMIT. Just return. This
2428 way, we don't clobber this cache entry with
2429 alg_impossible but retain useful information. */
2430 return;
2432 cache_hit = true;
2434 switch (cache_alg)
2436 case alg_shift:
2437 goto do_alg_shift;
2439 case alg_add_t_m2:
2440 case alg_sub_t_m2:
2441 goto do_alg_addsub_t_m2;
2443 case alg_add_factor:
2444 case alg_sub_factor:
2445 goto do_alg_addsub_factor;
2447 case alg_add_t2_m:
2448 goto do_alg_add_t2_m;
2450 case alg_sub_t2_m:
2451 goto do_alg_sub_t2_m;
2453 default:
2454 gcc_unreachable ();
2459 /* If we have a group of zero bits at the low-order part of T, try
2460 multiplying by the remaining bits and then doing a shift. */
2462 if ((t & 1) == 0)
2464 do_alg_shift:
2465 m = floor_log2 (t & -t); /* m = number of low zero bits */
2466 if (m < maxm)
2468 q = t >> m;
2469 /* The function expand_shift will choose between a shift and
2470 a sequence of additions, so the observed cost is given as
2471 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2472 op_cost = m * add_cost[speed][mode];
2473 if (shift_cost[speed][mode][m] < op_cost)
2474 op_cost = shift_cost[speed][mode][m];
2475 new_limit.cost = best_cost.cost - op_cost;
2476 new_limit.latency = best_cost.latency - op_cost;
2477 synth_mult (alg_in, q, &new_limit, mode);
2479 alg_in->cost.cost += op_cost;
2480 alg_in->cost.latency += op_cost;
2481 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2483 struct algorithm *x;
2484 best_cost = alg_in->cost;
2485 x = alg_in, alg_in = best_alg, best_alg = x;
2486 best_alg->log[best_alg->ops] = m;
2487 best_alg->op[best_alg->ops] = alg_shift;
2490 /* See if treating ORIG_T as a signed number yields a better
2491 sequence. Try this sequence only for a negative ORIG_T
2492 as it would be useless for a non-negative ORIG_T. */
2493 if ((HOST_WIDE_INT) orig_t < 0)
2495 /* Shift ORIG_T as follows because a right shift of a
2496 negative-valued signed type is implementation
2497 defined. */
2498 q = ~(~orig_t >> m);
2499 /* The function expand_shift will choose between a shift
2500 and a sequence of additions, so the observed cost is
2501 given as MIN (m * add_cost[speed][mode],
2502 shift_cost[speed][mode][m]). */
2503 op_cost = m * add_cost[speed][mode];
2504 if (shift_cost[speed][mode][m] < op_cost)
2505 op_cost = shift_cost[speed][mode][m];
2506 new_limit.cost = best_cost.cost - op_cost;
2507 new_limit.latency = best_cost.latency - op_cost;
2508 synth_mult (alg_in, q, &new_limit, mode);
2510 alg_in->cost.cost += op_cost;
2511 alg_in->cost.latency += op_cost;
2512 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2514 struct algorithm *x;
2515 best_cost = alg_in->cost;
2516 x = alg_in, alg_in = best_alg, best_alg = x;
2517 best_alg->log[best_alg->ops] = m;
2518 best_alg->op[best_alg->ops] = alg_shift;
2522 if (cache_hit)
2523 goto done;
2526 /* If we have an odd number, add or subtract one. */
2527 if ((t & 1) != 0)
2529 unsigned HOST_WIDE_INT w;
2531 do_alg_addsub_t_m2:
2532 for (w = 1; (w & t) != 0; w <<= 1)
2534 /* If T was -1, then W will be zero after the loop. This is another
2535 case where T ends with ...111. Handling this with (T + 1) and
2536 subtract 1 produces slightly better code and results in algorithm
2537 selection much faster than treating it like the ...0111 case
2538 below. */
2539 if (w == 0
2540 || (w > 2
2541 /* Reject the case where t is 3.
2542 Thus we prefer addition in that case. */
2543 && t != 3))
2545 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2547 op_cost = add_cost[speed][mode];
2548 new_limit.cost = best_cost.cost - op_cost;
2549 new_limit.latency = best_cost.latency - op_cost;
2550 synth_mult (alg_in, t + 1, &new_limit, mode);
2552 alg_in->cost.cost += op_cost;
2553 alg_in->cost.latency += op_cost;
2554 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2556 struct algorithm *x;
2557 best_cost = alg_in->cost;
2558 x = alg_in, alg_in = best_alg, best_alg = x;
2559 best_alg->log[best_alg->ops] = 0;
2560 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2563 else
2565 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2567 op_cost = add_cost[speed][mode];
2568 new_limit.cost = best_cost.cost - op_cost;
2569 new_limit.latency = best_cost.latency - op_cost;
2570 synth_mult (alg_in, t - 1, &new_limit, mode);
2572 alg_in->cost.cost += op_cost;
2573 alg_in->cost.latency += op_cost;
2574 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2576 struct algorithm *x;
2577 best_cost = alg_in->cost;
2578 x = alg_in, alg_in = best_alg, best_alg = x;
2579 best_alg->log[best_alg->ops] = 0;
2580 best_alg->op[best_alg->ops] = alg_add_t_m2;
2584 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2585 quickly with a - a * n for some appropriate constant n. */
2586 m = exact_log2 (-orig_t + 1);
2587 if (m >= 0 && m < maxm)
2589 op_cost = shiftsub1_cost[speed][mode][m];
2590 new_limit.cost = best_cost.cost - op_cost;
2591 new_limit.latency = best_cost.latency - op_cost;
2592 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2594 alg_in->cost.cost += op_cost;
2595 alg_in->cost.latency += op_cost;
2596 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2598 struct algorithm *x;
2599 best_cost = alg_in->cost;
2600 x = alg_in, alg_in = best_alg, best_alg = x;
2601 best_alg->log[best_alg->ops] = m;
2602 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2606 if (cache_hit)
2607 goto done;
2610 /* Look for factors of t of the form
2611 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2612 If we find such a factor, we can multiply by t using an algorithm that
2613 multiplies by q, shift the result by m and add/subtract it to itself.
2615 We search for large factors first and loop down, even if large factors
2616 are less probable than small; if we find a large factor we will find a
2617 good sequence quickly, and therefore be able to prune (by decreasing
2618 COST_LIMIT) the search. */
2620 do_alg_addsub_factor:
2621 for (m = floor_log2 (t - 1); m >= 2; m--)
2623 unsigned HOST_WIDE_INT d;
2625 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2626 if (t % d == 0 && t > d && m < maxm
2627 && (!cache_hit || cache_alg == alg_add_factor))
2629 /* If the target has a cheap shift-and-add instruction use
2630 that in preference to a shift insn followed by an add insn.
2631 Assume that the shift-and-add is "atomic" with a latency
2632 equal to its cost, otherwise assume that on superscalar
2633 hardware the shift may be executed concurrently with the
2634 earlier steps in the algorithm. */
2635 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2636 if (shiftadd_cost[speed][mode][m] < op_cost)
2638 op_cost = shiftadd_cost[speed][mode][m];
2639 op_latency = op_cost;
2641 else
2642 op_latency = add_cost[speed][mode];
2644 new_limit.cost = best_cost.cost - op_cost;
2645 new_limit.latency = best_cost.latency - op_latency;
2646 synth_mult (alg_in, t / d, &new_limit, mode);
2648 alg_in->cost.cost += op_cost;
2649 alg_in->cost.latency += op_latency;
2650 if (alg_in->cost.latency < op_cost)
2651 alg_in->cost.latency = op_cost;
2652 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2654 struct algorithm *x;
2655 best_cost = alg_in->cost;
2656 x = alg_in, alg_in = best_alg, best_alg = x;
2657 best_alg->log[best_alg->ops] = m;
2658 best_alg->op[best_alg->ops] = alg_add_factor;
2660 /* Other factors will have been taken care of in the recursion. */
2661 break;
2664 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2665 if (t % d == 0 && t > d && m < maxm
2666 && (!cache_hit || cache_alg == alg_sub_factor))
2668 /* If the target has a cheap shift-and-subtract insn use
2669 that in preference to a shift insn followed by a sub insn.
2670 Assume that the shift-and-sub is "atomic" with a latency
2671 equal to it's cost, otherwise assume that on superscalar
2672 hardware the shift may be executed concurrently with the
2673 earlier steps in the algorithm. */
2674 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2675 if (shiftsub0_cost[speed][mode][m] < op_cost)
2677 op_cost = shiftsub0_cost[speed][mode][m];
2678 op_latency = op_cost;
2680 else
2681 op_latency = add_cost[speed][mode];
2683 new_limit.cost = best_cost.cost - op_cost;
2684 new_limit.latency = best_cost.latency - op_latency;
2685 synth_mult (alg_in, t / d, &new_limit, mode);
2687 alg_in->cost.cost += op_cost;
2688 alg_in->cost.latency += op_latency;
2689 if (alg_in->cost.latency < op_cost)
2690 alg_in->cost.latency = op_cost;
2691 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2693 struct algorithm *x;
2694 best_cost = alg_in->cost;
2695 x = alg_in, alg_in = best_alg, best_alg = x;
2696 best_alg->log[best_alg->ops] = m;
2697 best_alg->op[best_alg->ops] = alg_sub_factor;
2699 break;
2702 if (cache_hit)
2703 goto done;
2705 /* Try shift-and-add (load effective address) instructions,
2706 i.e. do a*3, a*5, a*9. */
2707 if ((t & 1) != 0)
2709 do_alg_add_t2_m:
2710 q = t - 1;
2711 q = q & -q;
2712 m = exact_log2 (q);
2713 if (m >= 0 && m < maxm)
2715 op_cost = shiftadd_cost[speed][mode][m];
2716 new_limit.cost = best_cost.cost - op_cost;
2717 new_limit.latency = best_cost.latency - op_cost;
2718 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2720 alg_in->cost.cost += op_cost;
2721 alg_in->cost.latency += op_cost;
2722 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2724 struct algorithm *x;
2725 best_cost = alg_in->cost;
2726 x = alg_in, alg_in = best_alg, best_alg = x;
2727 best_alg->log[best_alg->ops] = m;
2728 best_alg->op[best_alg->ops] = alg_add_t2_m;
2731 if (cache_hit)
2732 goto done;
2734 do_alg_sub_t2_m:
2735 q = t + 1;
2736 q = q & -q;
2737 m = exact_log2 (q);
2738 if (m >= 0 && m < maxm)
2740 op_cost = shiftsub0_cost[speed][mode][m];
2741 new_limit.cost = best_cost.cost - op_cost;
2742 new_limit.latency = best_cost.latency - op_cost;
2743 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2745 alg_in->cost.cost += op_cost;
2746 alg_in->cost.latency += op_cost;
2747 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2749 struct algorithm *x;
2750 best_cost = alg_in->cost;
2751 x = alg_in, alg_in = best_alg, best_alg = x;
2752 best_alg->log[best_alg->ops] = m;
2753 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2756 if (cache_hit)
2757 goto done;
2760 done:
2761 /* If best_cost has not decreased, we have not found any algorithm. */
2762 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2764 /* We failed to find an algorithm. Record alg_impossible for
2765 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2766 we are asked to find an algorithm for T within the same or
2767 lower COST_LIMIT, we can immediately return to the
2768 caller. */
2769 alg_hash[hash_index].t = t;
2770 alg_hash[hash_index].mode = mode;
2771 alg_hash[hash_index].speed = speed;
2772 alg_hash[hash_index].alg = alg_impossible;
2773 alg_hash[hash_index].cost = *cost_limit;
2774 return;
2777 /* Cache the result. */
2778 if (!cache_hit)
2780 alg_hash[hash_index].t = t;
2781 alg_hash[hash_index].mode = mode;
2782 alg_hash[hash_index].speed = speed;
2783 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2784 alg_hash[hash_index].cost.cost = best_cost.cost;
2785 alg_hash[hash_index].cost.latency = best_cost.latency;
2788 /* If we are getting a too long sequence for `struct algorithm'
2789 to record, make this search fail. */
2790 if (best_alg->ops == MAX_BITS_PER_WORD)
2791 return;
2793 /* Copy the algorithm from temporary space to the space at alg_out.
2794 We avoid using structure assignment because the majority of
2795 best_alg is normally undefined, and this is a critical function. */
2796 alg_out->ops = best_alg->ops + 1;
2797 alg_out->cost = best_cost;
2798 memcpy (alg_out->op, best_alg->op,
2799 alg_out->ops * sizeof *alg_out->op);
2800 memcpy (alg_out->log, best_alg->log,
2801 alg_out->ops * sizeof *alg_out->log);
2804 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2805 Try three variations:
2807 - a shift/add sequence based on VAL itself
2808 - a shift/add sequence based on -VAL, followed by a negation
2809 - a shift/add sequence based on VAL - 1, followed by an addition.
2811 Return true if the cheapest of these cost less than MULT_COST,
2812 describing the algorithm in *ALG and final fixup in *VARIANT. */
2814 static bool
2815 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2816 struct algorithm *alg, enum mult_variant *variant,
2817 int mult_cost)
2819 struct algorithm alg2;
2820 struct mult_cost limit;
2821 int op_cost;
2822 bool speed = optimize_insn_for_speed_p ();
2824 /* Fail quickly for impossible bounds. */
2825 if (mult_cost < 0)
2826 return false;
2828 /* Ensure that mult_cost provides a reasonable upper bound.
2829 Any constant multiplication can be performed with less
2830 than 2 * bits additions. */
2831 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2832 if (mult_cost > op_cost)
2833 mult_cost = op_cost;
2835 *variant = basic_variant;
2836 limit.cost = mult_cost;
2837 limit.latency = mult_cost;
2838 synth_mult (alg, val, &limit, mode);
2840 /* This works only if the inverted value actually fits in an
2841 `unsigned int' */
2842 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2844 op_cost = neg_cost[speed][mode];
2845 if (MULT_COST_LESS (&alg->cost, mult_cost))
2847 limit.cost = alg->cost.cost - op_cost;
2848 limit.latency = alg->cost.latency - op_cost;
2850 else
2852 limit.cost = mult_cost - op_cost;
2853 limit.latency = mult_cost - op_cost;
2856 synth_mult (&alg2, -val, &limit, mode);
2857 alg2.cost.cost += op_cost;
2858 alg2.cost.latency += op_cost;
2859 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2860 *alg = alg2, *variant = negate_variant;
2863 /* This proves very useful for division-by-constant. */
2864 op_cost = add_cost[speed][mode];
2865 if (MULT_COST_LESS (&alg->cost, mult_cost))
2867 limit.cost = alg->cost.cost - op_cost;
2868 limit.latency = alg->cost.latency - op_cost;
2870 else
2872 limit.cost = mult_cost - op_cost;
2873 limit.latency = mult_cost - op_cost;
2876 synth_mult (&alg2, val - 1, &limit, mode);
2877 alg2.cost.cost += op_cost;
2878 alg2.cost.latency += op_cost;
2879 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2880 *alg = alg2, *variant = add_variant;
2882 return MULT_COST_LESS (&alg->cost, mult_cost);
2885 /* A subroutine of expand_mult, used for constant multiplications.
2886 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2887 convenient. Use the shift/add sequence described by ALG and apply
2888 the final fixup specified by VARIANT. */
2890 static rtx
2891 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2892 rtx target, const struct algorithm *alg,
2893 enum mult_variant variant)
2895 HOST_WIDE_INT val_so_far;
2896 rtx insn, accum, tem;
2897 int opno;
2898 enum machine_mode nmode;
2900 /* Avoid referencing memory over and over and invalid sharing
2901 on SUBREGs. */
2902 op0 = force_reg (mode, op0);
2904 /* ACCUM starts out either as OP0 or as a zero, depending on
2905 the first operation. */
2907 if (alg->op[0] == alg_zero)
2909 accum = copy_to_mode_reg (mode, const0_rtx);
2910 val_so_far = 0;
2912 else if (alg->op[0] == alg_m)
2914 accum = copy_to_mode_reg (mode, op0);
2915 val_so_far = 1;
2917 else
2918 gcc_unreachable ();
2920 for (opno = 1; opno < alg->ops; opno++)
2922 int log = alg->log[opno];
2923 rtx shift_subtarget = optimize ? 0 : accum;
2924 rtx add_target
2925 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2926 && !optimize)
2927 ? target : 0;
2928 rtx accum_target = optimize ? 0 : accum;
2930 switch (alg->op[opno])
2932 case alg_shift:
2933 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2934 /* REG_EQUAL note will be attached to the following insn. */
2935 emit_move_insn (accum, tem);
2936 val_so_far <<= log;
2937 break;
2939 case alg_add_t_m2:
2940 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2941 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2942 add_target ? add_target : accum_target);
2943 val_so_far += (HOST_WIDE_INT) 1 << log;
2944 break;
2946 case alg_sub_t_m2:
2947 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2948 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2949 add_target ? add_target : accum_target);
2950 val_so_far -= (HOST_WIDE_INT) 1 << log;
2951 break;
2953 case alg_add_t2_m:
2954 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2955 log, shift_subtarget, 0);
2956 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2957 add_target ? add_target : accum_target);
2958 val_so_far = (val_so_far << log) + 1;
2959 break;
2961 case alg_sub_t2_m:
2962 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2963 log, shift_subtarget, 0);
2964 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2965 add_target ? add_target : accum_target);
2966 val_so_far = (val_so_far << log) - 1;
2967 break;
2969 case alg_add_factor:
2970 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2971 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2972 add_target ? add_target : accum_target);
2973 val_so_far += val_so_far << log;
2974 break;
2976 case alg_sub_factor:
2977 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2978 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2979 (add_target
2980 ? add_target : (optimize ? 0 : tem)));
2981 val_so_far = (val_so_far << log) - val_so_far;
2982 break;
2984 default:
2985 gcc_unreachable ();
2988 /* Write a REG_EQUAL note on the last insn so that we can cse
2989 multiplication sequences. Note that if ACCUM is a SUBREG,
2990 we've set the inner register and must properly indicate
2991 that. */
2993 tem = op0, nmode = mode;
2994 if (GET_CODE (accum) == SUBREG)
2996 nmode = GET_MODE (SUBREG_REG (accum));
2997 tem = gen_lowpart (nmode, op0);
3000 insn = get_last_insn ();
3001 set_unique_reg_note (insn, REG_EQUAL,
3002 gen_rtx_MULT (nmode, tem,
3003 GEN_INT (val_so_far)));
3006 if (variant == negate_variant)
3008 val_so_far = -val_so_far;
3009 accum = expand_unop (mode, neg_optab, accum, target, 0);
3011 else if (variant == add_variant)
3013 val_so_far = val_so_far + 1;
3014 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3017 /* Compare only the bits of val and val_so_far that are significant
3018 in the result mode, to avoid sign-/zero-extension confusion. */
3019 val &= GET_MODE_MASK (mode);
3020 val_so_far &= GET_MODE_MASK (mode);
3021 gcc_assert (val == val_so_far);
3023 return accum;
3026 /* Perform a multiplication and return an rtx for the result.
3027 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3028 TARGET is a suggestion for where to store the result (an rtx).
3030 We check specially for a constant integer as OP1.
3031 If you want this check for OP0 as well, then before calling
3032 you should swap the two operands if OP0 would be constant. */
3035 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3036 int unsignedp)
3038 enum mult_variant variant;
3039 struct algorithm algorithm;
3040 int max_cost;
3041 bool speed = optimize_insn_for_speed_p ();
3043 /* Handling const0_rtx here allows us to use zero as a rogue value for
3044 coeff below. */
3045 if (op1 == const0_rtx)
3046 return const0_rtx;
3047 if (op1 == const1_rtx)
3048 return op0;
3049 if (op1 == constm1_rtx)
3050 return expand_unop (mode,
3051 GET_MODE_CLASS (mode) == MODE_INT
3052 && !unsignedp && flag_trapv
3053 ? negv_optab : neg_optab,
3054 op0, target, 0);
3056 /* These are the operations that are potentially turned into a sequence
3057 of shifts and additions. */
3058 if (SCALAR_INT_MODE_P (mode)
3059 && (unsignedp || !flag_trapv))
3061 HOST_WIDE_INT coeff = 0;
3062 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3064 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3065 less than or equal in size to `unsigned int' this doesn't matter.
3066 If the mode is larger than `unsigned int', then synth_mult works
3067 only if the constant value exactly fits in an `unsigned int' without
3068 any truncation. This means that multiplying by negative values does
3069 not work; results are off by 2^32 on a 32 bit machine. */
3071 if (CONST_INT_P (op1))
3073 /* Attempt to handle multiplication of DImode values by negative
3074 coefficients, by performing the multiplication by a positive
3075 multiplier and then inverting the result. */
3076 if (INTVAL (op1) < 0
3077 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3079 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3080 result is interpreted as an unsigned coefficient.
3081 Exclude cost of op0 from max_cost to match the cost
3082 calculation of the synth_mult. */
3083 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3084 speed)
3085 - neg_cost[speed][mode]);
3086 if (max_cost > 0
3087 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3088 &variant, max_cost))
3090 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3091 NULL_RTX, &algorithm,
3092 variant);
3093 return expand_unop (mode, neg_optab, temp, target, 0);
3096 else coeff = INTVAL (op1);
3098 else if (GET_CODE (op1) == CONST_DOUBLE)
3100 /* If we are multiplying in DImode, it may still be a win
3101 to try to work with shifts and adds. */
3102 if (CONST_DOUBLE_HIGH (op1) == 0
3103 && CONST_DOUBLE_LOW (op1) > 0)
3104 coeff = CONST_DOUBLE_LOW (op1);
3105 else if (CONST_DOUBLE_LOW (op1) == 0
3106 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3108 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3109 + HOST_BITS_PER_WIDE_INT;
3110 return expand_shift (LSHIFT_EXPR, mode, op0,
3111 shift, target, unsignedp);
3115 /* We used to test optimize here, on the grounds that it's better to
3116 produce a smaller program when -O is not used. But this causes
3117 such a terrible slowdown sometimes that it seems better to always
3118 use synth_mult. */
3119 if (coeff != 0)
3121 /* Special case powers of two. */
3122 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3123 return expand_shift (LSHIFT_EXPR, mode, op0,
3124 floor_log2 (coeff), target, unsignedp);
3126 /* Exclude cost of op0 from max_cost to match the cost
3127 calculation of the synth_mult. */
3128 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3129 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3130 max_cost))
3131 return expand_mult_const (mode, op0, coeff, target,
3132 &algorithm, variant);
3136 if (GET_CODE (op0) == CONST_DOUBLE)
3138 rtx temp = op0;
3139 op0 = op1;
3140 op1 = temp;
3143 /* Expand x*2.0 as x+x. */
3144 if (GET_CODE (op1) == CONST_DOUBLE
3145 && SCALAR_FLOAT_MODE_P (mode))
3147 REAL_VALUE_TYPE d;
3148 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3150 if (REAL_VALUES_EQUAL (d, dconst2))
3152 op0 = force_reg (GET_MODE (op0), op0);
3153 return expand_binop (mode, add_optab, op0, op0,
3154 target, unsignedp, OPTAB_LIB_WIDEN);
3158 /* This used to use umul_optab if unsigned, but for non-widening multiply
3159 there is no difference between signed and unsigned. */
3160 op0 = expand_binop (mode,
3161 ! unsignedp
3162 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3163 ? smulv_optab : smul_optab,
3164 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3165 gcc_assert (op0);
3166 return op0;
3169 /* Perform a widening multiplication and return an rtx for the result.
3170 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3171 TARGET is a suggestion for where to store the result (an rtx).
3172 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3173 or smul_widen_optab.
3175 We check specially for a constant integer as OP1, comparing the
3176 cost of a widening multiply against the cost of a sequence of shifts
3177 and adds. */
3180 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3181 int unsignedp, optab this_optab)
3183 bool speed = optimize_insn_for_speed_p ();
3184 rtx cop1;
3186 if (CONST_INT_P (op1)
3187 && GET_MODE (op0) != VOIDmode
3188 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3189 this_optab == umul_widen_optab))
3190 && CONST_INT_P (cop1)
3191 && (INTVAL (cop1) >= 0
3192 || HWI_COMPUTABLE_MODE_P (mode)))
3194 HOST_WIDE_INT coeff = INTVAL (cop1);
3195 int max_cost;
3196 enum mult_variant variant;
3197 struct algorithm algorithm;
3199 /* Special case powers of two. */
3200 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3202 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3203 return expand_shift (LSHIFT_EXPR, mode, op0,
3204 floor_log2 (coeff), target, unsignedp);
3207 /* Exclude cost of op0 from max_cost to match the cost
3208 calculation of the synth_mult. */
3209 max_cost = mul_widen_cost[speed][mode];
3210 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3211 max_cost))
3213 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3214 return expand_mult_const (mode, op0, coeff, target,
3215 &algorithm, variant);
3218 return expand_binop (mode, this_optab, op0, op1, target,
3219 unsignedp, OPTAB_LIB_WIDEN);
3222 /* Return the smallest n such that 2**n >= X. */
3225 ceil_log2 (unsigned HOST_WIDE_INT x)
3227 return floor_log2 (x - 1) + 1;
3230 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3231 replace division by D, and put the least significant N bits of the result
3232 in *MULTIPLIER_PTR and return the most significant bit.
3234 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3235 needed precision is in PRECISION (should be <= N).
3237 PRECISION should be as small as possible so this function can choose
3238 multiplier more freely.
3240 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3241 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3243 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3244 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3246 static
3247 unsigned HOST_WIDE_INT
3248 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3249 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3251 HOST_WIDE_INT mhigh_hi, mlow_hi;
3252 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3253 int lgup, post_shift;
3254 int pow, pow2;
3255 unsigned HOST_WIDE_INT nl, dummy1;
3256 HOST_WIDE_INT nh, dummy2;
3258 /* lgup = ceil(log2(divisor)); */
3259 lgup = ceil_log2 (d);
3261 gcc_assert (lgup <= n);
3263 pow = n + lgup;
3264 pow2 = n + lgup - precision;
3266 /* We could handle this with some effort, but this case is much
3267 better handled directly with a scc insn, so rely on caller using
3268 that. */
3269 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3271 /* mlow = 2^(N + lgup)/d */
3272 if (pow >= HOST_BITS_PER_WIDE_INT)
3274 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3275 nl = 0;
3277 else
3279 nh = 0;
3280 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3282 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3283 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3285 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3286 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3287 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3288 else
3289 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3290 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3291 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3293 gcc_assert (!mhigh_hi || nh - d < d);
3294 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3295 /* Assert that mlow < mhigh. */
3296 gcc_assert (mlow_hi < mhigh_hi
3297 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3299 /* If precision == N, then mlow, mhigh exceed 2^N
3300 (but they do not exceed 2^(N+1)). */
3302 /* Reduce to lowest terms. */
3303 for (post_shift = lgup; post_shift > 0; post_shift--)
3305 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3306 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3307 if (ml_lo >= mh_lo)
3308 break;
3310 mlow_hi = 0;
3311 mlow_lo = ml_lo;
3312 mhigh_hi = 0;
3313 mhigh_lo = mh_lo;
3316 *post_shift_ptr = post_shift;
3317 *lgup_ptr = lgup;
3318 if (n < HOST_BITS_PER_WIDE_INT)
3320 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3321 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3322 return mhigh_lo >= mask;
3324 else
3326 *multiplier_ptr = GEN_INT (mhigh_lo);
3327 return mhigh_hi;
3331 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3332 congruent to 1 (mod 2**N). */
3334 static unsigned HOST_WIDE_INT
3335 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3337 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3339 /* The algorithm notes that the choice y = x satisfies
3340 x*y == 1 mod 2^3, since x is assumed odd.
3341 Each iteration doubles the number of bits of significance in y. */
3343 unsigned HOST_WIDE_INT mask;
3344 unsigned HOST_WIDE_INT y = x;
3345 int nbit = 3;
3347 mask = (n == HOST_BITS_PER_WIDE_INT
3348 ? ~(unsigned HOST_WIDE_INT) 0
3349 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3351 while (nbit < n)
3353 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3354 nbit *= 2;
3356 return y;
3359 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3360 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3361 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3362 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3363 become signed.
3365 The result is put in TARGET if that is convenient.
3367 MODE is the mode of operation. */
3370 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3371 rtx op1, rtx target, int unsignedp)
3373 rtx tem;
3374 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3376 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3377 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3378 tem = expand_and (mode, tem, op1, NULL_RTX);
3379 adj_operand
3380 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3381 adj_operand);
3383 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3384 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3385 tem = expand_and (mode, tem, op0, NULL_RTX);
3386 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3387 target);
3389 return target;
3392 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3394 static rtx
3395 extract_high_half (enum machine_mode mode, rtx op)
3397 enum machine_mode wider_mode;
3399 if (mode == word_mode)
3400 return gen_highpart (mode, op);
3402 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3404 wider_mode = GET_MODE_WIDER_MODE (mode);
3405 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3406 GET_MODE_BITSIZE (mode), 0, 1);
3407 return convert_modes (mode, wider_mode, op, 0);
3410 /* Like expand_mult_highpart, but only consider using a multiplication
3411 optab. OP1 is an rtx for the constant operand. */
3413 static rtx
3414 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3415 rtx target, int unsignedp, int max_cost)
3417 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3418 enum machine_mode wider_mode;
3419 optab moptab;
3420 rtx tem;
3421 int size;
3422 bool speed = optimize_insn_for_speed_p ();
3424 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3426 wider_mode = GET_MODE_WIDER_MODE (mode);
3427 size = GET_MODE_BITSIZE (mode);
3429 /* Firstly, try using a multiplication insn that only generates the needed
3430 high part of the product, and in the sign flavor of unsignedp. */
3431 if (mul_highpart_cost[speed][mode] < max_cost)
3433 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3434 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3435 unsignedp, OPTAB_DIRECT);
3436 if (tem)
3437 return tem;
3440 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3441 Need to adjust the result after the multiplication. */
3442 if (size - 1 < BITS_PER_WORD
3443 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3444 + 4 * add_cost[speed][mode] < max_cost))
3446 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3447 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3448 unsignedp, OPTAB_DIRECT);
3449 if (tem)
3450 /* We used the wrong signedness. Adjust the result. */
3451 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3452 tem, unsignedp);
3455 /* Try widening multiplication. */
3456 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3457 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3458 && mul_widen_cost[speed][wider_mode] < max_cost)
3460 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3461 unsignedp, OPTAB_WIDEN);
3462 if (tem)
3463 return extract_high_half (mode, tem);
3466 /* Try widening the mode and perform a non-widening multiplication. */
3467 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3468 && size - 1 < BITS_PER_WORD
3469 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3471 rtx insns, wop0, wop1;
3473 /* We need to widen the operands, for example to ensure the
3474 constant multiplier is correctly sign or zero extended.
3475 Use a sequence to clean-up any instructions emitted by
3476 the conversions if things don't work out. */
3477 start_sequence ();
3478 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3479 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3480 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3481 unsignedp, OPTAB_WIDEN);
3482 insns = get_insns ();
3483 end_sequence ();
3485 if (tem)
3487 emit_insn (insns);
3488 return extract_high_half (mode, tem);
3492 /* Try widening multiplication of opposite signedness, and adjust. */
3493 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3494 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3495 && size - 1 < BITS_PER_WORD
3496 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3497 + 4 * add_cost[speed][mode] < max_cost))
3499 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3500 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3501 if (tem != 0)
3503 tem = extract_high_half (mode, tem);
3504 /* We used the wrong signedness. Adjust the result. */
3505 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3506 target, unsignedp);
3510 return 0;
3513 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3514 putting the high half of the result in TARGET if that is convenient,
3515 and return where the result is. If the operation can not be performed,
3516 0 is returned.
3518 MODE is the mode of operation and result.
3520 UNSIGNEDP nonzero means unsigned multiply.
3522 MAX_COST is the total allowed cost for the expanded RTL. */
3524 static rtx
3525 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3526 rtx target, int unsignedp, int max_cost)
3528 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3529 unsigned HOST_WIDE_INT cnst1;
3530 int extra_cost;
3531 bool sign_adjust = false;
3532 enum mult_variant variant;
3533 struct algorithm alg;
3534 rtx tem;
3535 bool speed = optimize_insn_for_speed_p ();
3537 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3538 /* We can't support modes wider than HOST_BITS_PER_INT. */
3539 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3541 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3543 /* We can't optimize modes wider than BITS_PER_WORD.
3544 ??? We might be able to perform double-word arithmetic if
3545 mode == word_mode, however all the cost calculations in
3546 synth_mult etc. assume single-word operations. */
3547 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3548 return expand_mult_highpart_optab (mode, op0, op1, target,
3549 unsignedp, max_cost);
3551 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3553 /* Check whether we try to multiply by a negative constant. */
3554 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3556 sign_adjust = true;
3557 extra_cost += add_cost[speed][mode];
3560 /* See whether shift/add multiplication is cheap enough. */
3561 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3562 max_cost - extra_cost))
3564 /* See whether the specialized multiplication optabs are
3565 cheaper than the shift/add version. */
3566 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3567 alg.cost.cost + extra_cost);
3568 if (tem)
3569 return tem;
3571 tem = convert_to_mode (wider_mode, op0, unsignedp);
3572 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3573 tem = extract_high_half (mode, tem);
3575 /* Adjust result for signedness. */
3576 if (sign_adjust)
3577 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3579 return tem;
3581 return expand_mult_highpart_optab (mode, op0, op1, target,
3582 unsignedp, max_cost);
3586 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3588 static rtx
3589 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3591 unsigned HOST_WIDE_INT masklow, maskhigh;
3592 rtx result, temp, shift, label;
3593 int logd;
3595 logd = floor_log2 (d);
3596 result = gen_reg_rtx (mode);
3598 /* Avoid conditional branches when they're expensive. */
3599 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3600 && optimize_insn_for_speed_p ())
3602 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3603 mode, 0, -1);
3604 if (signmask)
3606 signmask = force_reg (mode, signmask);
3607 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3608 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3610 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3611 which instruction sequence to use. If logical right shifts
3612 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3613 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3615 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3616 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3617 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3618 > COSTS_N_INSNS (2)))
3620 temp = expand_binop (mode, xor_optab, op0, signmask,
3621 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3622 temp = expand_binop (mode, sub_optab, temp, signmask,
3623 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3624 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3625 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3626 temp = expand_binop (mode, xor_optab, temp, signmask,
3627 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3628 temp = expand_binop (mode, sub_optab, temp, signmask,
3629 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3631 else
3633 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3634 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635 signmask = force_reg (mode, signmask);
3637 temp = expand_binop (mode, add_optab, op0, signmask,
3638 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3640 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641 temp = expand_binop (mode, sub_optab, temp, signmask,
3642 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644 return temp;
3648 /* Mask contains the mode's signbit and the significant bits of the
3649 modulus. By including the signbit in the operation, many targets
3650 can avoid an explicit compare operation in the following comparison
3651 against zero. */
3653 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3654 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3656 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3657 maskhigh = -1;
3659 else
3660 maskhigh = (HOST_WIDE_INT) -1
3661 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3663 temp = expand_binop (mode, and_optab, op0,
3664 immed_double_const (masklow, maskhigh, mode),
3665 result, 1, OPTAB_LIB_WIDEN);
3666 if (temp != result)
3667 emit_move_insn (result, temp);
3669 label = gen_label_rtx ();
3670 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3672 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3673 0, OPTAB_LIB_WIDEN);
3674 masklow = (HOST_WIDE_INT) -1 << logd;
3675 maskhigh = -1;
3676 temp = expand_binop (mode, ior_optab, temp,
3677 immed_double_const (masklow, maskhigh, mode),
3678 result, 1, OPTAB_LIB_WIDEN);
3679 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3680 0, OPTAB_LIB_WIDEN);
3681 if (temp != result)
3682 emit_move_insn (result, temp);
3683 emit_label (label);
3684 return result;
3687 /* Expand signed division of OP0 by a power of two D in mode MODE.
3688 This routine is only called for positive values of D. */
3690 static rtx
3691 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3693 rtx temp, label;
3694 int logd;
3696 logd = floor_log2 (d);
3698 if (d == 2
3699 && BRANCH_COST (optimize_insn_for_speed_p (),
3700 false) >= 1)
3702 temp = gen_reg_rtx (mode);
3703 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3704 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3705 0, OPTAB_LIB_WIDEN);
3706 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3709 #ifdef HAVE_conditional_move
3710 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3711 >= 2)
3713 rtx temp2;
3715 /* ??? emit_conditional_move forces a stack adjustment via
3716 compare_from_rtx so, if the sequence is discarded, it will
3717 be lost. Do it now instead. */
3718 do_pending_stack_adjust ();
3720 start_sequence ();
3721 temp2 = copy_to_mode_reg (mode, op0);
3722 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3723 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3724 temp = force_reg (mode, temp);
3726 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3727 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3728 mode, temp, temp2, mode, 0);
3729 if (temp2)
3731 rtx seq = get_insns ();
3732 end_sequence ();
3733 emit_insn (seq);
3734 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3736 end_sequence ();
3738 #endif
3740 if (BRANCH_COST (optimize_insn_for_speed_p (),
3741 false) >= 2)
3743 int ushift = GET_MODE_BITSIZE (mode) - logd;
3745 temp = gen_reg_rtx (mode);
3746 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3747 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3748 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3749 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3750 else
3751 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3752 ushift, NULL_RTX, 1);
3753 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3754 0, OPTAB_LIB_WIDEN);
3755 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3758 label = gen_label_rtx ();
3759 temp = copy_to_mode_reg (mode, op0);
3760 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3761 expand_inc (temp, GEN_INT (d - 1));
3762 emit_label (label);
3763 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3766 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3767 if that is convenient, and returning where the result is.
3768 You may request either the quotient or the remainder as the result;
3769 specify REM_FLAG nonzero to get the remainder.
3771 CODE is the expression code for which kind of division this is;
3772 it controls how rounding is done. MODE is the machine mode to use.
3773 UNSIGNEDP nonzero means do unsigned division. */
3775 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3776 and then correct it by or'ing in missing high bits
3777 if result of ANDI is nonzero.
3778 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3779 This could optimize to a bfexts instruction.
3780 But C doesn't use these operations, so their optimizations are
3781 left for later. */
3782 /* ??? For modulo, we don't actually need the highpart of the first product,
3783 the low part will do nicely. And for small divisors, the second multiply
3784 can also be a low-part only multiply or even be completely left out.
3785 E.g. to calculate the remainder of a division by 3 with a 32 bit
3786 multiply, multiply with 0x55555556 and extract the upper two bits;
3787 the result is exact for inputs up to 0x1fffffff.
3788 The input range can be reduced by using cross-sum rules.
3789 For odd divisors >= 3, the following table gives right shift counts
3790 so that if a number is shifted by an integer multiple of the given
3791 amount, the remainder stays the same:
3792 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3793 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3794 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3795 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3796 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3798 Cross-sum rules for even numbers can be derived by leaving as many bits
3799 to the right alone as the divisor has zeros to the right.
3800 E.g. if x is an unsigned 32 bit number:
3801 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3805 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3806 rtx op0, rtx op1, rtx target, int unsignedp)
3808 enum machine_mode compute_mode;
3809 rtx tquotient;
3810 rtx quotient = 0, remainder = 0;
3811 rtx last;
3812 int size;
3813 rtx insn, set;
3814 optab optab1, optab2;
3815 int op1_is_constant, op1_is_pow2 = 0;
3816 int max_cost, extra_cost;
3817 static HOST_WIDE_INT last_div_const = 0;
3818 static HOST_WIDE_INT ext_op1;
3819 bool speed = optimize_insn_for_speed_p ();
3821 op1_is_constant = CONST_INT_P (op1);
3822 if (op1_is_constant)
3824 ext_op1 = INTVAL (op1);
3825 if (unsignedp)
3826 ext_op1 &= GET_MODE_MASK (mode);
3827 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3828 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3832 This is the structure of expand_divmod:
3834 First comes code to fix up the operands so we can perform the operations
3835 correctly and efficiently.
3837 Second comes a switch statement with code specific for each rounding mode.
3838 For some special operands this code emits all RTL for the desired
3839 operation, for other cases, it generates only a quotient and stores it in
3840 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3841 to indicate that it has not done anything.
3843 Last comes code that finishes the operation. If QUOTIENT is set and
3844 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3845 QUOTIENT is not set, it is computed using trunc rounding.
3847 We try to generate special code for division and remainder when OP1 is a
3848 constant. If |OP1| = 2**n we can use shifts and some other fast
3849 operations. For other values of OP1, we compute a carefully selected
3850 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3851 by m.
3853 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3854 half of the product. Different strategies for generating the product are
3855 implemented in expand_mult_highpart.
3857 If what we actually want is the remainder, we generate that by another
3858 by-constant multiplication and a subtraction. */
3860 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3861 code below will malfunction if we are, so check here and handle
3862 the special case if so. */
3863 if (op1 == const1_rtx)
3864 return rem_flag ? const0_rtx : op0;
3866 /* When dividing by -1, we could get an overflow.
3867 negv_optab can handle overflows. */
3868 if (! unsignedp && op1 == constm1_rtx)
3870 if (rem_flag)
3871 return const0_rtx;
3872 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3873 ? negv_optab : neg_optab, op0, target, 0);
3876 if (target
3877 /* Don't use the function value register as a target
3878 since we have to read it as well as write it,
3879 and function-inlining gets confused by this. */
3880 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3881 /* Don't clobber an operand while doing a multi-step calculation. */
3882 || ((rem_flag || op1_is_constant)
3883 && (reg_mentioned_p (target, op0)
3884 || (MEM_P (op0) && MEM_P (target))))
3885 || reg_mentioned_p (target, op1)
3886 || (MEM_P (op1) && MEM_P (target))))
3887 target = 0;
3889 /* Get the mode in which to perform this computation. Normally it will
3890 be MODE, but sometimes we can't do the desired operation in MODE.
3891 If so, pick a wider mode in which we can do the operation. Convert
3892 to that mode at the start to avoid repeated conversions.
3894 First see what operations we need. These depend on the expression
3895 we are evaluating. (We assume that divxx3 insns exist under the
3896 same conditions that modxx3 insns and that these insns don't normally
3897 fail. If these assumptions are not correct, we may generate less
3898 efficient code in some cases.)
3900 Then see if we find a mode in which we can open-code that operation
3901 (either a division, modulus, or shift). Finally, check for the smallest
3902 mode for which we can do the operation with a library call. */
3904 /* We might want to refine this now that we have division-by-constant
3905 optimization. Since expand_mult_highpart tries so many variants, it is
3906 not straightforward to generalize this. Maybe we should make an array
3907 of possible modes in init_expmed? Save this for GCC 2.7. */
3909 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3910 ? (unsignedp ? lshr_optab : ashr_optab)
3911 : (unsignedp ? udiv_optab : sdiv_optab));
3912 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3913 ? optab1
3914 : (unsignedp ? udivmod_optab : sdivmod_optab));
3916 for (compute_mode = mode; compute_mode != VOIDmode;
3917 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3918 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3919 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3920 break;
3922 if (compute_mode == VOIDmode)
3923 for (compute_mode = mode; compute_mode != VOIDmode;
3924 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3925 if (optab_libfunc (optab1, compute_mode)
3926 || optab_libfunc (optab2, compute_mode))
3927 break;
3929 /* If we still couldn't find a mode, use MODE, but expand_binop will
3930 probably die. */
3931 if (compute_mode == VOIDmode)
3932 compute_mode = mode;
3934 if (target && GET_MODE (target) == compute_mode)
3935 tquotient = target;
3936 else
3937 tquotient = gen_reg_rtx (compute_mode);
3939 size = GET_MODE_BITSIZE (compute_mode);
3940 #if 0
3941 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3942 (mode), and thereby get better code when OP1 is a constant. Do that
3943 later. It will require going over all usages of SIZE below. */
3944 size = GET_MODE_BITSIZE (mode);
3945 #endif
3947 /* Only deduct something for a REM if the last divide done was
3948 for a different constant. Then set the constant of the last
3949 divide. */
3950 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3951 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3952 && INTVAL (op1) == last_div_const))
3953 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3955 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3957 /* Now convert to the best mode to use. */
3958 if (compute_mode != mode)
3960 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3961 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3963 /* convert_modes may have placed op1 into a register, so we
3964 must recompute the following. */
3965 op1_is_constant = CONST_INT_P (op1);
3966 op1_is_pow2 = (op1_is_constant
3967 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3968 || (! unsignedp
3969 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3972 /* If one of the operands is a volatile MEM, copy it into a register. */
3974 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3975 op0 = force_reg (compute_mode, op0);
3976 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3977 op1 = force_reg (compute_mode, op1);
3979 /* If we need the remainder or if OP1 is constant, we need to
3980 put OP0 in a register in case it has any queued subexpressions. */
3981 if (rem_flag || op1_is_constant)
3982 op0 = force_reg (compute_mode, op0);
3984 last = get_last_insn ();
3986 /* Promote floor rounding to trunc rounding for unsigned operations. */
3987 if (unsignedp)
3989 if (code == FLOOR_DIV_EXPR)
3990 code = TRUNC_DIV_EXPR;
3991 if (code == FLOOR_MOD_EXPR)
3992 code = TRUNC_MOD_EXPR;
3993 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3994 code = TRUNC_DIV_EXPR;
3997 if (op1 != const0_rtx)
3998 switch (code)
4000 case TRUNC_MOD_EXPR:
4001 case TRUNC_DIV_EXPR:
4002 if (op1_is_constant)
4004 if (unsignedp)
4006 unsigned HOST_WIDE_INT mh;
4007 int pre_shift, post_shift;
4008 int dummy;
4009 rtx ml;
4010 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4011 & GET_MODE_MASK (compute_mode));
4013 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4015 pre_shift = floor_log2 (d);
4016 if (rem_flag)
4018 remainder
4019 = expand_binop (compute_mode, and_optab, op0,
4020 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4021 remainder, 1,
4022 OPTAB_LIB_WIDEN);
4023 if (remainder)
4024 return gen_lowpart (mode, remainder);
4026 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4027 pre_shift, tquotient, 1);
4029 else if (size <= HOST_BITS_PER_WIDE_INT)
4031 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4033 /* Most significant bit of divisor is set; emit an scc
4034 insn. */
4035 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4036 compute_mode, 1, 1);
4038 else
4040 /* Find a suitable multiplier and right shift count
4041 instead of multiplying with D. */
4043 mh = choose_multiplier (d, size, size,
4044 &ml, &post_shift, &dummy);
4046 /* If the suggested multiplier is more than SIZE bits,
4047 we can do better for even divisors, using an
4048 initial right shift. */
4049 if (mh != 0 && (d & 1) == 0)
4051 pre_shift = floor_log2 (d & -d);
4052 mh = choose_multiplier (d >> pre_shift, size,
4053 size - pre_shift,
4054 &ml, &post_shift, &dummy);
4055 gcc_assert (!mh);
4057 else
4058 pre_shift = 0;
4060 if (mh != 0)
4062 rtx t1, t2, t3, t4;
4064 if (post_shift - 1 >= BITS_PER_WORD)
4065 goto fail1;
4067 extra_cost
4068 = (shift_cost[speed][compute_mode][post_shift - 1]
4069 + shift_cost[speed][compute_mode][1]
4070 + 2 * add_cost[speed][compute_mode]);
4071 t1 = expand_mult_highpart (compute_mode, op0, ml,
4072 NULL_RTX, 1,
4073 max_cost - extra_cost);
4074 if (t1 == 0)
4075 goto fail1;
4076 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4077 op0, t1),
4078 NULL_RTX);
4079 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4080 t2, 1, NULL_RTX, 1);
4081 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4082 t1, t3),
4083 NULL_RTX);
4084 quotient = expand_shift
4085 (RSHIFT_EXPR, compute_mode, t4,
4086 post_shift - 1, tquotient, 1);
4088 else
4090 rtx t1, t2;
4092 if (pre_shift >= BITS_PER_WORD
4093 || post_shift >= BITS_PER_WORD)
4094 goto fail1;
4096 t1 = expand_shift
4097 (RSHIFT_EXPR, compute_mode, op0,
4098 pre_shift, NULL_RTX, 1);
4099 extra_cost
4100 = (shift_cost[speed][compute_mode][pre_shift]
4101 + shift_cost[speed][compute_mode][post_shift]);
4102 t2 = expand_mult_highpart (compute_mode, t1, ml,
4103 NULL_RTX, 1,
4104 max_cost - extra_cost);
4105 if (t2 == 0)
4106 goto fail1;
4107 quotient = expand_shift
4108 (RSHIFT_EXPR, compute_mode, t2,
4109 post_shift, tquotient, 1);
4113 else /* Too wide mode to use tricky code */
4114 break;
4116 insn = get_last_insn ();
4117 if (insn != last
4118 && (set = single_set (insn)) != 0
4119 && SET_DEST (set) == quotient)
4120 set_unique_reg_note (insn,
4121 REG_EQUAL,
4122 gen_rtx_UDIV (compute_mode, op0, op1));
4124 else /* TRUNC_DIV, signed */
4126 unsigned HOST_WIDE_INT ml;
4127 int lgup, post_shift;
4128 rtx mlr;
4129 HOST_WIDE_INT d = INTVAL (op1);
4130 unsigned HOST_WIDE_INT abs_d;
4132 /* Since d might be INT_MIN, we have to cast to
4133 unsigned HOST_WIDE_INT before negating to avoid
4134 undefined signed overflow. */
4135 abs_d = (d >= 0
4136 ? (unsigned HOST_WIDE_INT) d
4137 : - (unsigned HOST_WIDE_INT) d);
4139 /* n rem d = n rem -d */
4140 if (rem_flag && d < 0)
4142 d = abs_d;
4143 op1 = gen_int_mode (abs_d, compute_mode);
4146 if (d == 1)
4147 quotient = op0;
4148 else if (d == -1)
4149 quotient = expand_unop (compute_mode, neg_optab, op0,
4150 tquotient, 0);
4151 else if (HOST_BITS_PER_WIDE_INT >= size
4152 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4154 /* This case is not handled correctly below. */
4155 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4156 compute_mode, 1, 1);
4157 if (quotient == 0)
4158 goto fail1;
4160 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4161 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4162 : sdiv_pow2_cheap[speed][compute_mode])
4163 /* We assume that cheap metric is true if the
4164 optab has an expander for this mode. */
4165 && ((optab_handler ((rem_flag ? smod_optab
4166 : sdiv_optab),
4167 compute_mode)
4168 != CODE_FOR_nothing)
4169 || (optab_handler (sdivmod_optab,
4170 compute_mode)
4171 != CODE_FOR_nothing)))
4173 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4175 if (rem_flag)
4177 remainder = expand_smod_pow2 (compute_mode, op0, d);
4178 if (remainder)
4179 return gen_lowpart (mode, remainder);
4182 if (sdiv_pow2_cheap[speed][compute_mode]
4183 && ((optab_handler (sdiv_optab, compute_mode)
4184 != CODE_FOR_nothing)
4185 || (optab_handler (sdivmod_optab, compute_mode)
4186 != CODE_FOR_nothing)))
4187 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4188 compute_mode, op0,
4189 gen_int_mode (abs_d,
4190 compute_mode),
4191 NULL_RTX, 0);
4192 else
4193 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4195 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4196 negate the quotient. */
4197 if (d < 0)
4199 insn = get_last_insn ();
4200 if (insn != last
4201 && (set = single_set (insn)) != 0
4202 && SET_DEST (set) == quotient
4203 && abs_d < ((unsigned HOST_WIDE_INT) 1
4204 << (HOST_BITS_PER_WIDE_INT - 1)))
4205 set_unique_reg_note (insn,
4206 REG_EQUAL,
4207 gen_rtx_DIV (compute_mode,
4208 op0,
4209 GEN_INT
4210 (trunc_int_for_mode
4211 (abs_d,
4212 compute_mode))));
4214 quotient = expand_unop (compute_mode, neg_optab,
4215 quotient, quotient, 0);
4218 else if (size <= HOST_BITS_PER_WIDE_INT)
4220 choose_multiplier (abs_d, size, size - 1,
4221 &mlr, &post_shift, &lgup);
4222 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4223 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4225 rtx t1, t2, t3;
4227 if (post_shift >= BITS_PER_WORD
4228 || size - 1 >= BITS_PER_WORD)
4229 goto fail1;
4231 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4232 + shift_cost[speed][compute_mode][size - 1]
4233 + add_cost[speed][compute_mode]);
4234 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4235 NULL_RTX, 0,
4236 max_cost - extra_cost);
4237 if (t1 == 0)
4238 goto fail1;
4239 t2 = expand_shift
4240 (RSHIFT_EXPR, compute_mode, t1,
4241 post_shift, NULL_RTX, 0);
4242 t3 = expand_shift
4243 (RSHIFT_EXPR, compute_mode, op0,
4244 size - 1, NULL_RTX, 0);
4245 if (d < 0)
4246 quotient
4247 = force_operand (gen_rtx_MINUS (compute_mode,
4248 t3, t2),
4249 tquotient);
4250 else
4251 quotient
4252 = force_operand (gen_rtx_MINUS (compute_mode,
4253 t2, t3),
4254 tquotient);
4256 else
4258 rtx t1, t2, t3, t4;
4260 if (post_shift >= BITS_PER_WORD
4261 || size - 1 >= BITS_PER_WORD)
4262 goto fail1;
4264 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4265 mlr = gen_int_mode (ml, compute_mode);
4266 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4267 + shift_cost[speed][compute_mode][size - 1]
4268 + 2 * add_cost[speed][compute_mode]);
4269 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4270 NULL_RTX, 0,
4271 max_cost - extra_cost);
4272 if (t1 == 0)
4273 goto fail1;
4274 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4275 t1, op0),
4276 NULL_RTX);
4277 t3 = expand_shift
4278 (RSHIFT_EXPR, compute_mode, t2,
4279 post_shift, NULL_RTX, 0);
4280 t4 = expand_shift
4281 (RSHIFT_EXPR, compute_mode, op0,
4282 size - 1, NULL_RTX, 0);
4283 if (d < 0)
4284 quotient
4285 = force_operand (gen_rtx_MINUS (compute_mode,
4286 t4, t3),
4287 tquotient);
4288 else
4289 quotient
4290 = force_operand (gen_rtx_MINUS (compute_mode,
4291 t3, t4),
4292 tquotient);
4295 else /* Too wide mode to use tricky code */
4296 break;
4298 insn = get_last_insn ();
4299 if (insn != last
4300 && (set = single_set (insn)) != 0
4301 && SET_DEST (set) == quotient)
4302 set_unique_reg_note (insn,
4303 REG_EQUAL,
4304 gen_rtx_DIV (compute_mode, op0, op1));
4306 break;
4308 fail1:
4309 delete_insns_since (last);
4310 break;
4312 case FLOOR_DIV_EXPR:
4313 case FLOOR_MOD_EXPR:
4314 /* We will come here only for signed operations. */
4315 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4317 unsigned HOST_WIDE_INT mh;
4318 int pre_shift, lgup, post_shift;
4319 HOST_WIDE_INT d = INTVAL (op1);
4320 rtx ml;
4322 if (d > 0)
4324 /* We could just as easily deal with negative constants here,
4325 but it does not seem worth the trouble for GCC 2.6. */
4326 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4328 pre_shift = floor_log2 (d);
4329 if (rem_flag)
4331 remainder = expand_binop (compute_mode, and_optab, op0,
4332 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4333 remainder, 0, OPTAB_LIB_WIDEN);
4334 if (remainder)
4335 return gen_lowpart (mode, remainder);
4337 quotient = expand_shift
4338 (RSHIFT_EXPR, compute_mode, op0,
4339 pre_shift, tquotient, 0);
4341 else
4343 rtx t1, t2, t3, t4;
4345 mh = choose_multiplier (d, size, size - 1,
4346 &ml, &post_shift, &lgup);
4347 gcc_assert (!mh);
4349 if (post_shift < BITS_PER_WORD
4350 && size - 1 < BITS_PER_WORD)
4352 t1 = expand_shift
4353 (RSHIFT_EXPR, compute_mode, op0,
4354 size - 1, NULL_RTX, 0);
4355 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4356 NULL_RTX, 0, OPTAB_WIDEN);
4357 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4358 + shift_cost[speed][compute_mode][size - 1]
4359 + 2 * add_cost[speed][compute_mode]);
4360 t3 = expand_mult_highpart (compute_mode, t2, ml,
4361 NULL_RTX, 1,
4362 max_cost - extra_cost);
4363 if (t3 != 0)
4365 t4 = expand_shift
4366 (RSHIFT_EXPR, compute_mode, t3,
4367 post_shift, NULL_RTX, 1);
4368 quotient = expand_binop (compute_mode, xor_optab,
4369 t4, t1, tquotient, 0,
4370 OPTAB_WIDEN);
4375 else
4377 rtx nsign, t1, t2, t3, t4;
4378 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4379 op0, constm1_rtx), NULL_RTX);
4380 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4381 0, OPTAB_WIDEN);
4382 nsign = expand_shift
4383 (RSHIFT_EXPR, compute_mode, t2,
4384 size - 1, NULL_RTX, 0);
4385 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4386 NULL_RTX);
4387 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4388 NULL_RTX, 0);
4389 if (t4)
4391 rtx t5;
4392 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4393 NULL_RTX, 0);
4394 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4395 t4, t5),
4396 tquotient);
4401 if (quotient != 0)
4402 break;
4403 delete_insns_since (last);
4405 /* Try using an instruction that produces both the quotient and
4406 remainder, using truncation. We can easily compensate the quotient
4407 or remainder to get floor rounding, once we have the remainder.
4408 Notice that we compute also the final remainder value here,
4409 and return the result right away. */
4410 if (target == 0 || GET_MODE (target) != compute_mode)
4411 target = gen_reg_rtx (compute_mode);
4413 if (rem_flag)
4415 remainder
4416 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4417 quotient = gen_reg_rtx (compute_mode);
4419 else
4421 quotient
4422 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4423 remainder = gen_reg_rtx (compute_mode);
4426 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4427 quotient, remainder, 0))
4429 /* This could be computed with a branch-less sequence.
4430 Save that for later. */
4431 rtx tem;
4432 rtx label = gen_label_rtx ();
4433 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4434 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4435 NULL_RTX, 0, OPTAB_WIDEN);
4436 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4437 expand_dec (quotient, const1_rtx);
4438 expand_inc (remainder, op1);
4439 emit_label (label);
4440 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4443 /* No luck with division elimination or divmod. Have to do it
4444 by conditionally adjusting op0 *and* the result. */
4446 rtx label1, label2, label3, label4, label5;
4447 rtx adjusted_op0;
4448 rtx tem;
4450 quotient = gen_reg_rtx (compute_mode);
4451 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4452 label1 = gen_label_rtx ();
4453 label2 = gen_label_rtx ();
4454 label3 = gen_label_rtx ();
4455 label4 = gen_label_rtx ();
4456 label5 = gen_label_rtx ();
4457 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4458 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4459 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4460 quotient, 0, OPTAB_LIB_WIDEN);
4461 if (tem != quotient)
4462 emit_move_insn (quotient, tem);
4463 emit_jump_insn (gen_jump (label5));
4464 emit_barrier ();
4465 emit_label (label1);
4466 expand_inc (adjusted_op0, const1_rtx);
4467 emit_jump_insn (gen_jump (label4));
4468 emit_barrier ();
4469 emit_label (label2);
4470 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4471 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4472 quotient, 0, OPTAB_LIB_WIDEN);
4473 if (tem != quotient)
4474 emit_move_insn (quotient, tem);
4475 emit_jump_insn (gen_jump (label5));
4476 emit_barrier ();
4477 emit_label (label3);
4478 expand_dec (adjusted_op0, const1_rtx);
4479 emit_label (label4);
4480 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4481 quotient, 0, OPTAB_LIB_WIDEN);
4482 if (tem != quotient)
4483 emit_move_insn (quotient, tem);
4484 expand_dec (quotient, const1_rtx);
4485 emit_label (label5);
4487 break;
4489 case CEIL_DIV_EXPR:
4490 case CEIL_MOD_EXPR:
4491 if (unsignedp)
4493 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4495 rtx t1, t2, t3;
4496 unsigned HOST_WIDE_INT d = INTVAL (op1);
4497 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4498 floor_log2 (d), tquotient, 1);
4499 t2 = expand_binop (compute_mode, and_optab, op0,
4500 GEN_INT (d - 1),
4501 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4502 t3 = gen_reg_rtx (compute_mode);
4503 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4504 compute_mode, 1, 1);
4505 if (t3 == 0)
4507 rtx lab;
4508 lab = gen_label_rtx ();
4509 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4510 expand_inc (t1, const1_rtx);
4511 emit_label (lab);
4512 quotient = t1;
4514 else
4515 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4516 t1, t3),
4517 tquotient);
4518 break;
4521 /* Try using an instruction that produces both the quotient and
4522 remainder, using truncation. We can easily compensate the
4523 quotient or remainder to get ceiling rounding, once we have the
4524 remainder. Notice that we compute also the final remainder
4525 value here, and return the result right away. */
4526 if (target == 0 || GET_MODE (target) != compute_mode)
4527 target = gen_reg_rtx (compute_mode);
4529 if (rem_flag)
4531 remainder = (REG_P (target)
4532 ? target : gen_reg_rtx (compute_mode));
4533 quotient = gen_reg_rtx (compute_mode);
4535 else
4537 quotient = (REG_P (target)
4538 ? target : gen_reg_rtx (compute_mode));
4539 remainder = gen_reg_rtx (compute_mode);
4542 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4543 remainder, 1))
4545 /* This could be computed with a branch-less sequence.
4546 Save that for later. */
4547 rtx label = gen_label_rtx ();
4548 do_cmp_and_jump (remainder, const0_rtx, EQ,
4549 compute_mode, label);
4550 expand_inc (quotient, const1_rtx);
4551 expand_dec (remainder, op1);
4552 emit_label (label);
4553 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4556 /* No luck with division elimination or divmod. Have to do it
4557 by conditionally adjusting op0 *and* the result. */
4559 rtx label1, label2;
4560 rtx adjusted_op0, tem;
4562 quotient = gen_reg_rtx (compute_mode);
4563 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4564 label1 = gen_label_rtx ();
4565 label2 = gen_label_rtx ();
4566 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4567 compute_mode, label1);
4568 emit_move_insn (quotient, const0_rtx);
4569 emit_jump_insn (gen_jump (label2));
4570 emit_barrier ();
4571 emit_label (label1);
4572 expand_dec (adjusted_op0, const1_rtx);
4573 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4574 quotient, 1, OPTAB_LIB_WIDEN);
4575 if (tem != quotient)
4576 emit_move_insn (quotient, tem);
4577 expand_inc (quotient, const1_rtx);
4578 emit_label (label2);
4581 else /* signed */
4583 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4584 && INTVAL (op1) >= 0)
4586 /* This is extremely similar to the code for the unsigned case
4587 above. For 2.7 we should merge these variants, but for
4588 2.6.1 I don't want to touch the code for unsigned since that
4589 get used in C. The signed case will only be used by other
4590 languages (Ada). */
4592 rtx t1, t2, t3;
4593 unsigned HOST_WIDE_INT d = INTVAL (op1);
4594 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4595 floor_log2 (d), tquotient, 0);
4596 t2 = expand_binop (compute_mode, and_optab, op0,
4597 GEN_INT (d - 1),
4598 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4599 t3 = gen_reg_rtx (compute_mode);
4600 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4601 compute_mode, 1, 1);
4602 if (t3 == 0)
4604 rtx lab;
4605 lab = gen_label_rtx ();
4606 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4607 expand_inc (t1, const1_rtx);
4608 emit_label (lab);
4609 quotient = t1;
4611 else
4612 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4613 t1, t3),
4614 tquotient);
4615 break;
4618 /* Try using an instruction that produces both the quotient and
4619 remainder, using truncation. We can easily compensate the
4620 quotient or remainder to get ceiling rounding, once we have the
4621 remainder. Notice that we compute also the final remainder
4622 value here, and return the result right away. */
4623 if (target == 0 || GET_MODE (target) != compute_mode)
4624 target = gen_reg_rtx (compute_mode);
4625 if (rem_flag)
4627 remainder= (REG_P (target)
4628 ? target : gen_reg_rtx (compute_mode));
4629 quotient = gen_reg_rtx (compute_mode);
4631 else
4633 quotient = (REG_P (target)
4634 ? target : gen_reg_rtx (compute_mode));
4635 remainder = gen_reg_rtx (compute_mode);
4638 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4639 remainder, 0))
4641 /* This could be computed with a branch-less sequence.
4642 Save that for later. */
4643 rtx tem;
4644 rtx label = gen_label_rtx ();
4645 do_cmp_and_jump (remainder, const0_rtx, EQ,
4646 compute_mode, label);
4647 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4648 NULL_RTX, 0, OPTAB_WIDEN);
4649 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4650 expand_inc (quotient, const1_rtx);
4651 expand_dec (remainder, op1);
4652 emit_label (label);
4653 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4656 /* No luck with division elimination or divmod. Have to do it
4657 by conditionally adjusting op0 *and* the result. */
4659 rtx label1, label2, label3, label4, label5;
4660 rtx adjusted_op0;
4661 rtx tem;
4663 quotient = gen_reg_rtx (compute_mode);
4664 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4665 label1 = gen_label_rtx ();
4666 label2 = gen_label_rtx ();
4667 label3 = gen_label_rtx ();
4668 label4 = gen_label_rtx ();
4669 label5 = gen_label_rtx ();
4670 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4671 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4672 compute_mode, label1);
4673 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4674 quotient, 0, OPTAB_LIB_WIDEN);
4675 if (tem != quotient)
4676 emit_move_insn (quotient, tem);
4677 emit_jump_insn (gen_jump (label5));
4678 emit_barrier ();
4679 emit_label (label1);
4680 expand_dec (adjusted_op0, const1_rtx);
4681 emit_jump_insn (gen_jump (label4));
4682 emit_barrier ();
4683 emit_label (label2);
4684 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4685 compute_mode, label3);
4686 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687 quotient, 0, OPTAB_LIB_WIDEN);
4688 if (tem != quotient)
4689 emit_move_insn (quotient, tem);
4690 emit_jump_insn (gen_jump (label5));
4691 emit_barrier ();
4692 emit_label (label3);
4693 expand_inc (adjusted_op0, const1_rtx);
4694 emit_label (label4);
4695 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4696 quotient, 0, OPTAB_LIB_WIDEN);
4697 if (tem != quotient)
4698 emit_move_insn (quotient, tem);
4699 expand_inc (quotient, const1_rtx);
4700 emit_label (label5);
4703 break;
4705 case EXACT_DIV_EXPR:
4706 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4708 HOST_WIDE_INT d = INTVAL (op1);
4709 unsigned HOST_WIDE_INT ml;
4710 int pre_shift;
4711 rtx t1;
4713 pre_shift = floor_log2 (d & -d);
4714 ml = invert_mod2n (d >> pre_shift, size);
4715 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4716 pre_shift, NULL_RTX, unsignedp);
4717 quotient = expand_mult (compute_mode, t1,
4718 gen_int_mode (ml, compute_mode),
4719 NULL_RTX, 1);
4721 insn = get_last_insn ();
4722 set_unique_reg_note (insn,
4723 REG_EQUAL,
4724 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4725 compute_mode,
4726 op0, op1));
4728 break;
4730 case ROUND_DIV_EXPR:
4731 case ROUND_MOD_EXPR:
4732 if (unsignedp)
4734 rtx tem;
4735 rtx label;
4736 label = gen_label_rtx ();
4737 quotient = gen_reg_rtx (compute_mode);
4738 remainder = gen_reg_rtx (compute_mode);
4739 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4741 rtx tem;
4742 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4743 quotient, 1, OPTAB_LIB_WIDEN);
4744 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4745 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4746 remainder, 1, OPTAB_LIB_WIDEN);
4748 tem = plus_constant (op1, -1);
4749 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4750 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4751 expand_inc (quotient, const1_rtx);
4752 expand_dec (remainder, op1);
4753 emit_label (label);
4755 else
4757 rtx abs_rem, abs_op1, tem, mask;
4758 rtx label;
4759 label = gen_label_rtx ();
4760 quotient = gen_reg_rtx (compute_mode);
4761 remainder = gen_reg_rtx (compute_mode);
4762 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4764 rtx tem;
4765 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4766 quotient, 0, OPTAB_LIB_WIDEN);
4767 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4768 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4769 remainder, 0, OPTAB_LIB_WIDEN);
4771 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4772 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4773 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4774 1, NULL_RTX, 1);
4775 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4776 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4777 NULL_RTX, 0, OPTAB_WIDEN);
4778 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4779 size - 1, NULL_RTX, 0);
4780 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4781 NULL_RTX, 0, OPTAB_WIDEN);
4782 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4783 NULL_RTX, 0, OPTAB_WIDEN);
4784 expand_inc (quotient, tem);
4785 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4786 NULL_RTX, 0, OPTAB_WIDEN);
4787 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4788 NULL_RTX, 0, OPTAB_WIDEN);
4789 expand_dec (remainder, tem);
4790 emit_label (label);
4792 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4794 default:
4795 gcc_unreachable ();
4798 if (quotient == 0)
4800 if (target && GET_MODE (target) != compute_mode)
4801 target = 0;
4803 if (rem_flag)
4805 /* Try to produce the remainder without producing the quotient.
4806 If we seem to have a divmod pattern that does not require widening,
4807 don't try widening here. We should really have a WIDEN argument
4808 to expand_twoval_binop, since what we'd really like to do here is
4809 1) try a mod insn in compute_mode
4810 2) try a divmod insn in compute_mode
4811 3) try a div insn in compute_mode and multiply-subtract to get
4812 remainder
4813 4) try the same things with widening allowed. */
4814 remainder
4815 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4816 op0, op1, target,
4817 unsignedp,
4818 ((optab_handler (optab2, compute_mode)
4819 != CODE_FOR_nothing)
4820 ? OPTAB_DIRECT : OPTAB_WIDEN));
4821 if (remainder == 0)
4823 /* No luck there. Can we do remainder and divide at once
4824 without a library call? */
4825 remainder = gen_reg_rtx (compute_mode);
4826 if (! expand_twoval_binop ((unsignedp
4827 ? udivmod_optab
4828 : sdivmod_optab),
4829 op0, op1,
4830 NULL_RTX, remainder, unsignedp))
4831 remainder = 0;
4834 if (remainder)
4835 return gen_lowpart (mode, remainder);
4838 /* Produce the quotient. Try a quotient insn, but not a library call.
4839 If we have a divmod in this mode, use it in preference to widening
4840 the div (for this test we assume it will not fail). Note that optab2
4841 is set to the one of the two optabs that the call below will use. */
4842 quotient
4843 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4844 op0, op1, rem_flag ? NULL_RTX : target,
4845 unsignedp,
4846 ((optab_handler (optab2, compute_mode)
4847 != CODE_FOR_nothing)
4848 ? OPTAB_DIRECT : OPTAB_WIDEN));
4850 if (quotient == 0)
4852 /* No luck there. Try a quotient-and-remainder insn,
4853 keeping the quotient alone. */
4854 quotient = gen_reg_rtx (compute_mode);
4855 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4856 op0, op1,
4857 quotient, NULL_RTX, unsignedp))
4859 quotient = 0;
4860 if (! rem_flag)
4861 /* Still no luck. If we are not computing the remainder,
4862 use a library call for the quotient. */
4863 quotient = sign_expand_binop (compute_mode,
4864 udiv_optab, sdiv_optab,
4865 op0, op1, target,
4866 unsignedp, OPTAB_LIB_WIDEN);
4871 if (rem_flag)
4873 if (target && GET_MODE (target) != compute_mode)
4874 target = 0;
4876 if (quotient == 0)
4878 /* No divide instruction either. Use library for remainder. */
4879 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4880 op0, op1, target,
4881 unsignedp, OPTAB_LIB_WIDEN);
4882 /* No remainder function. Try a quotient-and-remainder
4883 function, keeping the remainder. */
4884 if (!remainder)
4886 remainder = gen_reg_rtx (compute_mode);
4887 if (!expand_twoval_binop_libfunc
4888 (unsignedp ? udivmod_optab : sdivmod_optab,
4889 op0, op1,
4890 NULL_RTX, remainder,
4891 unsignedp ? UMOD : MOD))
4892 remainder = NULL_RTX;
4895 else
4897 /* We divided. Now finish doing X - Y * (X / Y). */
4898 remainder = expand_mult (compute_mode, quotient, op1,
4899 NULL_RTX, unsignedp);
4900 remainder = expand_binop (compute_mode, sub_optab, op0,
4901 remainder, target, unsignedp,
4902 OPTAB_LIB_WIDEN);
4906 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4909 /* Return a tree node with data type TYPE, describing the value of X.
4910 Usually this is an VAR_DECL, if there is no obvious better choice.
4911 X may be an expression, however we only support those expressions
4912 generated by loop.c. */
4914 tree
4915 make_tree (tree type, rtx x)
4917 tree t;
4919 switch (GET_CODE (x))
4921 case CONST_INT:
4923 HOST_WIDE_INT hi = 0;
4925 if (INTVAL (x) < 0
4926 && !(TYPE_UNSIGNED (type)
4927 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4928 < HOST_BITS_PER_WIDE_INT)))
4929 hi = -1;
4931 t = build_int_cst_wide (type, INTVAL (x), hi);
4933 return t;
4936 case CONST_DOUBLE:
4937 if (GET_MODE (x) == VOIDmode)
4938 t = build_int_cst_wide (type,
4939 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4940 else
4942 REAL_VALUE_TYPE d;
4944 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4945 t = build_real (type, d);
4948 return t;
4950 case CONST_VECTOR:
4952 int units = CONST_VECTOR_NUNITS (x);
4953 tree itype = TREE_TYPE (type);
4954 tree t = NULL_TREE;
4955 int i;
4958 /* Build a tree with vector elements. */
4959 for (i = units - 1; i >= 0; --i)
4961 rtx elt = CONST_VECTOR_ELT (x, i);
4962 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4965 return build_vector (type, t);
4968 case PLUS:
4969 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4970 make_tree (type, XEXP (x, 1)));
4972 case MINUS:
4973 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4974 make_tree (type, XEXP (x, 1)));
4976 case NEG:
4977 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4979 case MULT:
4980 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4981 make_tree (type, XEXP (x, 1)));
4983 case ASHIFT:
4984 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4985 make_tree (type, XEXP (x, 1)));
4987 case LSHIFTRT:
4988 t = unsigned_type_for (type);
4989 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4990 make_tree (t, XEXP (x, 0)),
4991 make_tree (type, XEXP (x, 1))));
4993 case ASHIFTRT:
4994 t = signed_type_for (type);
4995 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4996 make_tree (t, XEXP (x, 0)),
4997 make_tree (type, XEXP (x, 1))));
4999 case DIV:
5000 if (TREE_CODE (type) != REAL_TYPE)
5001 t = signed_type_for (type);
5002 else
5003 t = type;
5005 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5006 make_tree (t, XEXP (x, 0)),
5007 make_tree (t, XEXP (x, 1))));
5008 case UDIV:
5009 t = unsigned_type_for (type);
5010 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5011 make_tree (t, XEXP (x, 0)),
5012 make_tree (t, XEXP (x, 1))));
5014 case SIGN_EXTEND:
5015 case ZERO_EXTEND:
5016 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5017 GET_CODE (x) == ZERO_EXTEND);
5018 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5020 case CONST:
5021 return make_tree (type, XEXP (x, 0));
5023 case SYMBOL_REF:
5024 t = SYMBOL_REF_DECL (x);
5025 if (t)
5026 return fold_convert (type, build_fold_addr_expr (t));
5027 /* else fall through. */
5029 default:
5030 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5032 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5033 address mode to pointer mode. */
5034 if (POINTER_TYPE_P (type))
5035 x = convert_memory_address_addr_space
5036 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5038 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5039 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5040 t->decl_with_rtl.rtl = x;
5042 return t;
5046 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5047 and returning TARGET.
5049 If TARGET is 0, a pseudo-register or constant is returned. */
5052 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5054 rtx tem = 0;
5056 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5057 tem = simplify_binary_operation (AND, mode, op0, op1);
5058 if (tem == 0)
5059 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5061 if (target == 0)
5062 target = tem;
5063 else if (tem != target)
5064 emit_move_insn (target, tem);
5065 return target;
5068 /* Helper function for emit_store_flag. */
5069 static rtx
5070 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5071 enum machine_mode mode, enum machine_mode compare_mode,
5072 int unsignedp, rtx x, rtx y, int normalizep,
5073 enum machine_mode target_mode)
5075 struct expand_operand ops[4];
5076 rtx op0, last, comparison, subtarget;
5077 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5079 last = get_last_insn ();
5080 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5081 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5082 if (!x || !y)
5084 delete_insns_since (last);
5085 return NULL_RTX;
5088 if (target_mode == VOIDmode)
5089 target_mode = result_mode;
5090 if (!target)
5091 target = gen_reg_rtx (target_mode);
5093 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5095 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5096 create_fixed_operand (&ops[1], comparison);
5097 create_fixed_operand (&ops[2], x);
5098 create_fixed_operand (&ops[3], y);
5099 if (!maybe_expand_insn (icode, 4, ops))
5101 delete_insns_since (last);
5102 return NULL_RTX;
5104 subtarget = ops[0].value;
5106 /* If we are converting to a wider mode, first convert to
5107 TARGET_MODE, then normalize. This produces better combining
5108 opportunities on machines that have a SIGN_EXTRACT when we are
5109 testing a single bit. This mostly benefits the 68k.
5111 If STORE_FLAG_VALUE does not have the sign bit set when
5112 interpreted in MODE, we can do this conversion as unsigned, which
5113 is usually more efficient. */
5114 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5116 convert_move (target, subtarget,
5117 val_signbit_known_clear_p (result_mode,
5118 STORE_FLAG_VALUE));
5119 op0 = target;
5120 result_mode = target_mode;
5122 else
5123 op0 = subtarget;
5125 /* If we want to keep subexpressions around, don't reuse our last
5126 target. */
5127 if (optimize)
5128 subtarget = 0;
5130 /* Now normalize to the proper value in MODE. Sometimes we don't
5131 have to do anything. */
5132 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5134 /* STORE_FLAG_VALUE might be the most negative number, so write
5135 the comparison this way to avoid a compiler-time warning. */
5136 else if (- normalizep == STORE_FLAG_VALUE)
5137 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5139 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5140 it hard to use a value of just the sign bit due to ANSI integer
5141 constant typing rules. */
5142 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5143 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5144 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5145 normalizep == 1);
5146 else
5148 gcc_assert (STORE_FLAG_VALUE & 1);
5150 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5151 if (normalizep == -1)
5152 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5155 /* If we were converting to a smaller mode, do the conversion now. */
5156 if (target_mode != result_mode)
5158 convert_move (target, op0, 0);
5159 return target;
5161 else
5162 return op0;
5166 /* A subroutine of emit_store_flag only including "tricks" that do not
5167 need a recursive call. These are kept separate to avoid infinite
5168 loops. */
5170 static rtx
5171 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5172 enum machine_mode mode, int unsignedp, int normalizep,
5173 enum machine_mode target_mode)
5175 rtx subtarget;
5176 enum insn_code icode;
5177 enum machine_mode compare_mode;
5178 enum mode_class mclass;
5179 enum rtx_code scode;
5180 rtx tem;
5182 if (unsignedp)
5183 code = unsigned_condition (code);
5184 scode = swap_condition (code);
5186 /* If one operand is constant, make it the second one. Only do this
5187 if the other operand is not constant as well. */
5189 if (swap_commutative_operands_p (op0, op1))
5191 tem = op0;
5192 op0 = op1;
5193 op1 = tem;
5194 code = swap_condition (code);
5197 if (mode == VOIDmode)
5198 mode = GET_MODE (op0);
5200 /* For some comparisons with 1 and -1, we can convert this to
5201 comparisons with zero. This will often produce more opportunities for
5202 store-flag insns. */
5204 switch (code)
5206 case LT:
5207 if (op1 == const1_rtx)
5208 op1 = const0_rtx, code = LE;
5209 break;
5210 case LE:
5211 if (op1 == constm1_rtx)
5212 op1 = const0_rtx, code = LT;
5213 break;
5214 case GE:
5215 if (op1 == const1_rtx)
5216 op1 = const0_rtx, code = GT;
5217 break;
5218 case GT:
5219 if (op1 == constm1_rtx)
5220 op1 = const0_rtx, code = GE;
5221 break;
5222 case GEU:
5223 if (op1 == const1_rtx)
5224 op1 = const0_rtx, code = NE;
5225 break;
5226 case LTU:
5227 if (op1 == const1_rtx)
5228 op1 = const0_rtx, code = EQ;
5229 break;
5230 default:
5231 break;
5234 /* If we are comparing a double-word integer with zero or -1, we can
5235 convert the comparison into one involving a single word. */
5236 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5237 && GET_MODE_CLASS (mode) == MODE_INT
5238 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5240 if ((code == EQ || code == NE)
5241 && (op1 == const0_rtx || op1 == constm1_rtx))
5243 rtx op00, op01;
5245 /* Do a logical OR or AND of the two words and compare the
5246 result. */
5247 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5248 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5249 tem = expand_binop (word_mode,
5250 op1 == const0_rtx ? ior_optab : and_optab,
5251 op00, op01, NULL_RTX, unsignedp,
5252 OPTAB_DIRECT);
5254 if (tem != 0)
5255 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5256 unsignedp, normalizep);
5258 else if ((code == LT || code == GE) && op1 == const0_rtx)
5260 rtx op0h;
5262 /* If testing the sign bit, can just test on high word. */
5263 op0h = simplify_gen_subreg (word_mode, op0, mode,
5264 subreg_highpart_offset (word_mode,
5265 mode));
5266 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5267 unsignedp, normalizep);
5269 else
5270 tem = NULL_RTX;
5272 if (tem)
5274 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5275 return tem;
5276 if (!target)
5277 target = gen_reg_rtx (target_mode);
5279 convert_move (target, tem,
5280 !val_signbit_known_set_p (word_mode,
5281 (normalizep ? normalizep
5282 : STORE_FLAG_VALUE)));
5283 return target;
5287 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5288 complement of A (for GE) and shifting the sign bit to the low bit. */
5289 if (op1 == const0_rtx && (code == LT || code == GE)
5290 && GET_MODE_CLASS (mode) == MODE_INT
5291 && (normalizep || STORE_FLAG_VALUE == 1
5292 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5294 subtarget = target;
5296 if (!target)
5297 target_mode = mode;
5299 /* If the result is to be wider than OP0, it is best to convert it
5300 first. If it is to be narrower, it is *incorrect* to convert it
5301 first. */
5302 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5304 op0 = convert_modes (target_mode, mode, op0, 0);
5305 mode = target_mode;
5308 if (target_mode != mode)
5309 subtarget = 0;
5311 if (code == GE)
5312 op0 = expand_unop (mode, one_cmpl_optab, op0,
5313 ((STORE_FLAG_VALUE == 1 || normalizep)
5314 ? 0 : subtarget), 0);
5316 if (STORE_FLAG_VALUE == 1 || normalizep)
5317 /* If we are supposed to produce a 0/1 value, we want to do
5318 a logical shift from the sign bit to the low-order bit; for
5319 a -1/0 value, we do an arithmetic shift. */
5320 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5321 GET_MODE_BITSIZE (mode) - 1,
5322 subtarget, normalizep != -1);
5324 if (mode != target_mode)
5325 op0 = convert_modes (target_mode, mode, op0, 0);
5327 return op0;
5330 mclass = GET_MODE_CLASS (mode);
5331 for (compare_mode = mode; compare_mode != VOIDmode;
5332 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5334 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5335 icode = optab_handler (cstore_optab, optab_mode);
5336 if (icode != CODE_FOR_nothing)
5338 do_pending_stack_adjust ();
5339 tem = emit_cstore (target, icode, code, mode, compare_mode,
5340 unsignedp, op0, op1, normalizep, target_mode);
5341 if (tem)
5342 return tem;
5344 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5346 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5347 unsignedp, op1, op0, normalizep, target_mode);
5348 if (tem)
5349 return tem;
5351 break;
5355 return 0;
5358 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5359 and storing in TARGET. Normally return TARGET.
5360 Return 0 if that cannot be done.
5362 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5363 it is VOIDmode, they cannot both be CONST_INT.
5365 UNSIGNEDP is for the case where we have to widen the operands
5366 to perform the operation. It says to use zero-extension.
5368 NORMALIZEP is 1 if we should convert the result to be either zero
5369 or one. Normalize is -1 if we should convert the result to be
5370 either zero or -1. If NORMALIZEP is zero, the result will be left
5371 "raw" out of the scc insn. */
5374 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5375 enum machine_mode mode, int unsignedp, int normalizep)
5377 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5378 enum rtx_code rcode;
5379 rtx subtarget;
5380 rtx tem, last, trueval;
5382 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5383 target_mode);
5384 if (tem)
5385 return tem;
5387 /* If we reached here, we can't do this with a scc insn, however there
5388 are some comparisons that can be done in other ways. Don't do any
5389 of these cases if branches are very cheap. */
5390 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5391 return 0;
5393 /* See what we need to return. We can only return a 1, -1, or the
5394 sign bit. */
5396 if (normalizep == 0)
5398 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5399 normalizep = STORE_FLAG_VALUE;
5401 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5403 else
5404 return 0;
5407 last = get_last_insn ();
5409 /* If optimizing, use different pseudo registers for each insn, instead
5410 of reusing the same pseudo. This leads to better CSE, but slows
5411 down the compiler, since there are more pseudos */
5412 subtarget = (!optimize
5413 && (target_mode == mode)) ? target : NULL_RTX;
5414 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5416 /* For floating-point comparisons, try the reverse comparison or try
5417 changing the "orderedness" of the comparison. */
5418 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5420 enum rtx_code first_code;
5421 bool and_them;
5423 rcode = reverse_condition_maybe_unordered (code);
5424 if (can_compare_p (rcode, mode, ccp_store_flag)
5425 && (code == ORDERED || code == UNORDERED
5426 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5427 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5429 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5430 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5432 /* For the reverse comparison, use either an addition or a XOR. */
5433 if (want_add
5434 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5435 optimize_insn_for_speed_p ()) == 0)
5437 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5438 STORE_FLAG_VALUE, target_mode);
5439 if (tem)
5440 return expand_binop (target_mode, add_optab, tem,
5441 GEN_INT (normalizep),
5442 target, 0, OPTAB_WIDEN);
5444 else if (!want_add
5445 && rtx_cost (trueval, XOR, 1,
5446 optimize_insn_for_speed_p ()) == 0)
5448 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5449 normalizep, target_mode);
5450 if (tem)
5451 return expand_binop (target_mode, xor_optab, tem, trueval,
5452 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5456 delete_insns_since (last);
5458 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5459 if (code == ORDERED || code == UNORDERED)
5460 return 0;
5462 and_them = split_comparison (code, mode, &first_code, &code);
5464 /* If there are no NaNs, the first comparison should always fall through.
5465 Effectively change the comparison to the other one. */
5466 if (!HONOR_NANS (mode))
5468 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5469 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5470 target_mode);
5473 #ifdef HAVE_conditional_move
5474 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5475 conditional move. */
5476 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5477 normalizep, target_mode);
5478 if (tem == 0)
5479 return 0;
5481 if (and_them)
5482 tem = emit_conditional_move (target, code, op0, op1, mode,
5483 tem, const0_rtx, GET_MODE (tem), 0);
5484 else
5485 tem = emit_conditional_move (target, code, op0, op1, mode,
5486 trueval, tem, GET_MODE (tem), 0);
5488 if (tem == 0)
5489 delete_insns_since (last);
5490 return tem;
5491 #else
5492 return 0;
5493 #endif
5496 /* The remaining tricks only apply to integer comparisons. */
5498 if (GET_MODE_CLASS (mode) != MODE_INT)
5499 return 0;
5501 /* If this is an equality comparison of integers, we can try to exclusive-or
5502 (or subtract) the two operands and use a recursive call to try the
5503 comparison with zero. Don't do any of these cases if branches are
5504 very cheap. */
5506 if ((code == EQ || code == NE) && op1 != const0_rtx)
5508 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5509 OPTAB_WIDEN);
5511 if (tem == 0)
5512 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5513 OPTAB_WIDEN);
5514 if (tem != 0)
5515 tem = emit_store_flag (target, code, tem, const0_rtx,
5516 mode, unsignedp, normalizep);
5517 if (tem != 0)
5518 return tem;
5520 delete_insns_since (last);
5523 /* For integer comparisons, try the reverse comparison. However, for
5524 small X and if we'd have anyway to extend, implementing "X != 0"
5525 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5526 rcode = reverse_condition (code);
5527 if (can_compare_p (rcode, mode, ccp_store_flag)
5528 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5529 && code == NE
5530 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5531 && op1 == const0_rtx))
5533 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5534 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5536 /* Again, for the reverse comparison, use either an addition or a XOR. */
5537 if (want_add
5538 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5539 optimize_insn_for_speed_p ()) == 0)
5541 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5542 STORE_FLAG_VALUE, target_mode);
5543 if (tem != 0)
5544 tem = expand_binop (target_mode, add_optab, tem,
5545 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5547 else if (!want_add
5548 && rtx_cost (trueval, XOR, 1,
5549 optimize_insn_for_speed_p ()) == 0)
5551 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5552 normalizep, target_mode);
5553 if (tem != 0)
5554 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5555 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5558 if (tem != 0)
5559 return tem;
5560 delete_insns_since (last);
5563 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5564 the constant zero. Reject all other comparisons at this point. Only
5565 do LE and GT if branches are expensive since they are expensive on
5566 2-operand machines. */
5568 if (op1 != const0_rtx
5569 || (code != EQ && code != NE
5570 && (BRANCH_COST (optimize_insn_for_speed_p (),
5571 false) <= 1 || (code != LE && code != GT))))
5572 return 0;
5574 /* Try to put the result of the comparison in the sign bit. Assume we can't
5575 do the necessary operation below. */
5577 tem = 0;
5579 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5580 the sign bit set. */
5582 if (code == LE)
5584 /* This is destructive, so SUBTARGET can't be OP0. */
5585 if (rtx_equal_p (subtarget, op0))
5586 subtarget = 0;
5588 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5589 OPTAB_WIDEN);
5590 if (tem)
5591 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5592 OPTAB_WIDEN);
5595 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5596 number of bits in the mode of OP0, minus one. */
5598 if (code == GT)
5600 if (rtx_equal_p (subtarget, op0))
5601 subtarget = 0;
5603 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5604 GET_MODE_BITSIZE (mode) - 1,
5605 subtarget, 0);
5606 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5607 OPTAB_WIDEN);
5610 if (code == EQ || code == NE)
5612 /* For EQ or NE, one way to do the comparison is to apply an operation
5613 that converts the operand into a positive number if it is nonzero
5614 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5615 for NE we negate. This puts the result in the sign bit. Then we
5616 normalize with a shift, if needed.
5618 Two operations that can do the above actions are ABS and FFS, so try
5619 them. If that doesn't work, and MODE is smaller than a full word,
5620 we can use zero-extension to the wider mode (an unsigned conversion)
5621 as the operation. */
5623 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5624 that is compensated by the subsequent overflow when subtracting
5625 one / negating. */
5627 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5628 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5629 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5630 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5631 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5633 tem = convert_modes (word_mode, mode, op0, 1);
5634 mode = word_mode;
5637 if (tem != 0)
5639 if (code == EQ)
5640 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5641 0, OPTAB_WIDEN);
5642 else
5643 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5646 /* If we couldn't do it that way, for NE we can "or" the two's complement
5647 of the value with itself. For EQ, we take the one's complement of
5648 that "or", which is an extra insn, so we only handle EQ if branches
5649 are expensive. */
5651 if (tem == 0
5652 && (code == NE
5653 || BRANCH_COST (optimize_insn_for_speed_p (),
5654 false) > 1))
5656 if (rtx_equal_p (subtarget, op0))
5657 subtarget = 0;
5659 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5660 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5661 OPTAB_WIDEN);
5663 if (tem && code == EQ)
5664 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5668 if (tem && normalizep)
5669 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5670 GET_MODE_BITSIZE (mode) - 1,
5671 subtarget, normalizep == 1);
5673 if (tem)
5675 if (!target)
5677 else if (GET_MODE (tem) != target_mode)
5679 convert_move (target, tem, 0);
5680 tem = target;
5682 else if (!subtarget)
5684 emit_move_insn (target, tem);
5685 tem = target;
5688 else
5689 delete_insns_since (last);
5691 return tem;
5694 /* Like emit_store_flag, but always succeeds. */
5697 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5698 enum machine_mode mode, int unsignedp, int normalizep)
5700 rtx tem, label;
5701 rtx trueval, falseval;
5703 /* First see if emit_store_flag can do the job. */
5704 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5705 if (tem != 0)
5706 return tem;
5708 if (!target)
5709 target = gen_reg_rtx (word_mode);
5711 /* If this failed, we have to do this with set/compare/jump/set code.
5712 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5713 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5714 if (code == NE
5715 && GET_MODE_CLASS (mode) == MODE_INT
5716 && REG_P (target)
5717 && op0 == target
5718 && op1 == const0_rtx)
5720 label = gen_label_rtx ();
5721 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5722 mode, NULL_RTX, NULL_RTX, label, -1);
5723 emit_move_insn (target, trueval);
5724 emit_label (label);
5725 return target;
5728 if (!REG_P (target)
5729 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5730 target = gen_reg_rtx (GET_MODE (target));
5732 /* Jump in the right direction if the target cannot implement CODE
5733 but can jump on its reverse condition. */
5734 falseval = const0_rtx;
5735 if (! can_compare_p (code, mode, ccp_jump)
5736 && (! FLOAT_MODE_P (mode)
5737 || code == ORDERED || code == UNORDERED
5738 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5739 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5741 enum rtx_code rcode;
5742 if (FLOAT_MODE_P (mode))
5743 rcode = reverse_condition_maybe_unordered (code);
5744 else
5745 rcode = reverse_condition (code);
5747 /* Canonicalize to UNORDERED for the libcall. */
5748 if (can_compare_p (rcode, mode, ccp_jump)
5749 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5751 falseval = trueval;
5752 trueval = const0_rtx;
5753 code = rcode;
5757 emit_move_insn (target, trueval);
5758 label = gen_label_rtx ();
5759 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5760 NULL_RTX, label, -1);
5762 emit_move_insn (target, falseval);
5763 emit_label (label);
5765 return target;
5768 /* Perform possibly multi-word comparison and conditional jump to LABEL
5769 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5770 now a thin wrapper around do_compare_rtx_and_jump. */
5772 static void
5773 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5774 rtx label)
5776 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5777 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5778 NULL_RTX, NULL_RTX, label, -1);