1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
29 #include "diagnostic-core.h"
34 #include "insn-config.h"
38 #include "langhooks.h"
43 struct target_expmed default_target_expmed
;
45 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
48 static void store_fixed_bit_field (rtx
, unsigned HOST_WIDE_INT
,
49 unsigned HOST_WIDE_INT
,
50 unsigned HOST_WIDE_INT
,
51 unsigned HOST_WIDE_INT
,
52 unsigned HOST_WIDE_INT
,
54 static void store_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
55 unsigned HOST_WIDE_INT
,
56 unsigned HOST_WIDE_INT
,
57 unsigned HOST_WIDE_INT
,
59 static rtx
extract_fixed_bit_field (enum machine_mode
, rtx
,
60 unsigned HOST_WIDE_INT
,
61 unsigned HOST_WIDE_INT
,
62 unsigned HOST_WIDE_INT
, rtx
, int, bool);
63 static rtx
mask_rtx (enum machine_mode
, int, int, int);
64 static rtx
lshift_value (enum machine_mode
, rtx
, int, int);
65 static rtx
extract_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
66 unsigned HOST_WIDE_INT
, int);
67 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, enum machine_mode
, rtx
);
68 static rtx
expand_smod_pow2 (enum machine_mode
, rtx
, HOST_WIDE_INT
);
69 static rtx
expand_sdiv_pow2 (enum machine_mode
, rtx
, HOST_WIDE_INT
);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
79 /* Reduce conditional compilation elsewhere. */
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
96 struct init_expmed_rtl
98 struct rtx_def reg
; rtunion reg_fld
[2];
99 struct rtx_def plus
; rtunion plus_fld1
;
101 struct rtx_def mult
; rtunion mult_fld1
;
102 struct rtx_def sdiv
; rtunion sdiv_fld1
;
103 struct rtx_def udiv
; rtunion udiv_fld1
;
104 struct rtx_def sdiv_32
; rtunion sdiv_32_fld1
;
105 struct rtx_def smod_32
; rtunion smod_32_fld1
;
106 struct rtx_def wide_mult
; rtunion wide_mult_fld1
;
107 struct rtx_def wide_lshr
; rtunion wide_lshr_fld1
;
108 struct rtx_def wide_trunc
;
109 struct rtx_def shift
; rtunion shift_fld1
;
110 struct rtx_def shift_mult
; rtunion shift_mult_fld1
;
111 struct rtx_def shift_add
; rtunion shift_add_fld1
;
112 struct rtx_def shift_sub0
; rtunion shift_sub0_fld1
;
113 struct rtx_def shift_sub1
; rtunion shift_sub1_fld1
;
115 struct rtx_def trunc
;
117 rtx pow2
[MAX_BITS_PER_WORD
];
118 rtx cint
[MAX_BITS_PER_WORD
];
122 init_expmed_one_conv (struct init_expmed_rtl
*all
, enum machine_mode to_mode
,
123 enum machine_mode from_mode
, bool speed
)
125 int to_size
, from_size
;
128 /* We're given no information about the true size of a partial integer,
129 only the size of the "full" integer it requires for storage. For
130 comparison purposes here, reduce the bit size by one in that case. */
131 to_size
= (GET_MODE_BITSIZE (to_mode
)
132 - (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
));
133 from_size
= (GET_MODE_BITSIZE (from_mode
)
134 - (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
));
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which
= (to_size
< from_size
? &all
->trunc
: &all
->zext
);
139 PUT_MODE (&all
->reg
, from_mode
);
140 set_convert_cost (to_mode
, from_mode
, speed
, set_src_cost (which
, speed
));
144 init_expmed_one_mode (struct init_expmed_rtl
*all
,
145 enum machine_mode mode
, int speed
)
147 int m
, n
, mode_bitsize
;
148 enum machine_mode mode_from
;
150 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
152 PUT_MODE (&all
->reg
, mode
);
153 PUT_MODE (&all
->plus
, mode
);
154 PUT_MODE (&all
->neg
, mode
);
155 PUT_MODE (&all
->mult
, mode
);
156 PUT_MODE (&all
->sdiv
, mode
);
157 PUT_MODE (&all
->udiv
, mode
);
158 PUT_MODE (&all
->sdiv_32
, mode
);
159 PUT_MODE (&all
->smod_32
, mode
);
160 PUT_MODE (&all
->wide_trunc
, mode
);
161 PUT_MODE (&all
->shift
, mode
);
162 PUT_MODE (&all
->shift_mult
, mode
);
163 PUT_MODE (&all
->shift_add
, mode
);
164 PUT_MODE (&all
->shift_sub0
, mode
);
165 PUT_MODE (&all
->shift_sub1
, mode
);
166 PUT_MODE (&all
->zext
, mode
);
167 PUT_MODE (&all
->trunc
, mode
);
169 set_add_cost (speed
, mode
, set_src_cost (&all
->plus
, speed
));
170 set_neg_cost (speed
, mode
, set_src_cost (&all
->neg
, speed
));
171 set_mul_cost (speed
, mode
, set_src_cost (&all
->mult
, speed
));
172 set_sdiv_cost (speed
, mode
, set_src_cost (&all
->sdiv
, speed
));
173 set_udiv_cost (speed
, mode
, set_src_cost (&all
->udiv
, speed
));
175 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (&all
->sdiv_32
, speed
)
176 <= 2 * add_cost (speed
, mode
)));
177 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (&all
->smod_32
, speed
)
178 <= 4 * add_cost (speed
, mode
)));
180 set_shift_cost (speed
, mode
, 0, 0);
182 int cost
= add_cost (speed
, mode
);
183 set_shiftadd_cost (speed
, mode
, 0, cost
);
184 set_shiftsub0_cost (speed
, mode
, 0, cost
);
185 set_shiftsub1_cost (speed
, mode
, 0, cost
);
188 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
189 for (m
= 1; m
< n
; m
++)
191 XEXP (&all
->shift
, 1) = all
->cint
[m
];
192 XEXP (&all
->shift_mult
, 1) = all
->pow2
[m
];
194 set_shift_cost (speed
, mode
, m
, set_src_cost (&all
->shift
, speed
));
195 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (&all
->shift_add
, speed
));
196 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (&all
->shift_sub0
, speed
));
197 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (&all
->shift_sub1
, speed
));
200 if (SCALAR_INT_MODE_P (mode
))
202 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
203 mode_from
= (enum machine_mode
)(mode_from
+ 1))
204 init_expmed_one_conv (all
, mode
, mode_from
, speed
);
206 if (GET_MODE_CLASS (mode
) == MODE_INT
)
208 enum machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
209 if (wider_mode
!= VOIDmode
)
211 PUT_MODE (&all
->zext
, wider_mode
);
212 PUT_MODE (&all
->wide_mult
, wider_mode
);
213 PUT_MODE (&all
->wide_lshr
, wider_mode
);
214 XEXP (&all
->wide_lshr
, 1) = GEN_INT (mode_bitsize
);
216 set_mul_widen_cost (speed
, wider_mode
,
217 set_src_cost (&all
->wide_mult
, speed
));
218 set_mul_highpart_cost (speed
, mode
,
219 set_src_cost (&all
->wide_trunc
, speed
));
227 struct init_expmed_rtl all
;
228 enum machine_mode mode
;
231 memset (&all
, 0, sizeof all
);
232 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
234 all
.pow2
[m
] = GEN_INT ((HOST_WIDE_INT
) 1 << m
);
235 all
.cint
[m
] = GEN_INT (m
);
238 PUT_CODE (&all
.reg
, REG
);
239 /* Avoid using hard regs in ways which may be unsupported. */
240 SET_REGNO (&all
.reg
, LAST_VIRTUAL_REGISTER
+ 1);
242 PUT_CODE (&all
.plus
, PLUS
);
243 XEXP (&all
.plus
, 0) = &all
.reg
;
244 XEXP (&all
.plus
, 1) = &all
.reg
;
246 PUT_CODE (&all
.neg
, NEG
);
247 XEXP (&all
.neg
, 0) = &all
.reg
;
249 PUT_CODE (&all
.mult
, MULT
);
250 XEXP (&all
.mult
, 0) = &all
.reg
;
251 XEXP (&all
.mult
, 1) = &all
.reg
;
253 PUT_CODE (&all
.sdiv
, DIV
);
254 XEXP (&all
.sdiv
, 0) = &all
.reg
;
255 XEXP (&all
.sdiv
, 1) = &all
.reg
;
257 PUT_CODE (&all
.udiv
, UDIV
);
258 XEXP (&all
.udiv
, 0) = &all
.reg
;
259 XEXP (&all
.udiv
, 1) = &all
.reg
;
261 PUT_CODE (&all
.sdiv_32
, DIV
);
262 XEXP (&all
.sdiv_32
, 0) = &all
.reg
;
263 XEXP (&all
.sdiv_32
, 1) = 32 < MAX_BITS_PER_WORD
? all
.cint
[32] : GEN_INT (32);
265 PUT_CODE (&all
.smod_32
, MOD
);
266 XEXP (&all
.smod_32
, 0) = &all
.reg
;
267 XEXP (&all
.smod_32
, 1) = XEXP (&all
.sdiv_32
, 1);
269 PUT_CODE (&all
.zext
, ZERO_EXTEND
);
270 XEXP (&all
.zext
, 0) = &all
.reg
;
272 PUT_CODE (&all
.wide_mult
, MULT
);
273 XEXP (&all
.wide_mult
, 0) = &all
.zext
;
274 XEXP (&all
.wide_mult
, 1) = &all
.zext
;
276 PUT_CODE (&all
.wide_lshr
, LSHIFTRT
);
277 XEXP (&all
.wide_lshr
, 0) = &all
.wide_mult
;
279 PUT_CODE (&all
.wide_trunc
, TRUNCATE
);
280 XEXP (&all
.wide_trunc
, 0) = &all
.wide_lshr
;
282 PUT_CODE (&all
.shift
, ASHIFT
);
283 XEXP (&all
.shift
, 0) = &all
.reg
;
285 PUT_CODE (&all
.shift_mult
, MULT
);
286 XEXP (&all
.shift_mult
, 0) = &all
.reg
;
288 PUT_CODE (&all
.shift_add
, PLUS
);
289 XEXP (&all
.shift_add
, 0) = &all
.shift_mult
;
290 XEXP (&all
.shift_add
, 1) = &all
.reg
;
292 PUT_CODE (&all
.shift_sub0
, MINUS
);
293 XEXP (&all
.shift_sub0
, 0) = &all
.shift_mult
;
294 XEXP (&all
.shift_sub0
, 1) = &all
.reg
;
296 PUT_CODE (&all
.shift_sub1
, MINUS
);
297 XEXP (&all
.shift_sub1
, 0) = &all
.reg
;
298 XEXP (&all
.shift_sub1
, 1) = &all
.shift_mult
;
300 PUT_CODE (&all
.trunc
, TRUNCATE
);
301 XEXP (&all
.trunc
, 0) = &all
.reg
;
303 for (speed
= 0; speed
< 2; speed
++)
305 crtl
->maybe_hot_insn_p
= speed
;
306 set_zero_cost (speed
, set_src_cost (const0_rtx
, speed
));
308 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
309 mode
= (enum machine_mode
)(mode
+ 1))
310 init_expmed_one_mode (&all
, mode
, speed
);
312 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
313 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
314 mode
= (enum machine_mode
)(mode
+ 1))
315 init_expmed_one_mode (&all
, mode
, speed
);
317 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
318 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
319 mode
= (enum machine_mode
)(mode
+ 1))
320 init_expmed_one_mode (&all
, mode
, speed
);
323 if (alg_hash_used_p ())
325 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
326 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
329 set_alg_hash_used_p (true);
330 default_rtl_profile ();
333 /* Return an rtx representing minus the value of X.
334 MODE is the intended mode of the result,
335 useful if X is a CONST_INT. */
338 negate_rtx (enum machine_mode mode
, rtx x
)
340 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
343 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
348 /* Report on the availability of insv/extv/extzv and the desired mode
349 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
350 is false; else the mode of the specified operand. If OPNO is -1,
351 all the caller cares about is whether the insn is available. */
353 mode_for_extraction (enum extraction_pattern pattern
, int opno
)
355 const struct insn_data_d
*data
;
362 data
= &insn_data
[CODE_FOR_insv
];
365 return MAX_MACHINE_MODE
;
370 data
= &insn_data
[CODE_FOR_extv
];
373 return MAX_MACHINE_MODE
;
378 data
= &insn_data
[CODE_FOR_extzv
];
381 return MAX_MACHINE_MODE
;
390 /* Everyone who uses this function used to follow it with
391 if (result == VOIDmode) result = word_mode; */
392 if (data
->operand
[opno
].mode
== VOIDmode
)
394 return data
->operand
[opno
].mode
;
397 /* A subroutine of store_bit_field, with the same arguments. Return true
398 if the operation could be implemented.
400 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
401 no other way of implementing the operation. If FALLBACK_P is false,
402 return false instead. */
405 store_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
406 unsigned HOST_WIDE_INT bitnum
,
407 unsigned HOST_WIDE_INT bitregion_start
,
408 unsigned HOST_WIDE_INT bitregion_end
,
409 enum machine_mode fieldmode
,
410 rtx value
, bool fallback_p
)
413 = (MEM_P (str_rtx
)) ? BITS_PER_UNIT
: BITS_PER_WORD
;
414 unsigned HOST_WIDE_INT offset
, bitpos
;
419 enum machine_mode op_mode
= mode_for_extraction (EP_insv
, 3);
421 while (GET_CODE (op0
) == SUBREG
)
423 /* The following line once was done only if WORDS_BIG_ENDIAN,
424 but I think that is a mistake. WORDS_BIG_ENDIAN is
425 meaningful at a much higher level; when structures are copied
426 between memory and regs, the higher-numbered regs
427 always get higher addresses. */
428 int inner_mode_size
= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0
)));
429 int outer_mode_size
= GET_MODE_SIZE (GET_MODE (op0
));
433 /* Paradoxical subregs need special handling on big endian machines. */
434 if (SUBREG_BYTE (op0
) == 0 && inner_mode_size
< outer_mode_size
)
436 int difference
= inner_mode_size
- outer_mode_size
;
438 if (WORDS_BIG_ENDIAN
)
439 byte_offset
+= (difference
/ UNITS_PER_WORD
) * UNITS_PER_WORD
;
440 if (BYTES_BIG_ENDIAN
)
441 byte_offset
+= difference
% UNITS_PER_WORD
;
444 byte_offset
= SUBREG_BYTE (op0
);
446 bitnum
+= byte_offset
* BITS_PER_UNIT
;
447 op0
= SUBREG_REG (op0
);
450 /* No action is needed if the target is a register and if the field
451 lies completely outside that register. This can occur if the source
452 code contains an out-of-bounds access to a small array. */
453 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
456 /* Use vec_set patterns for inserting parts of vectors whenever
458 if (VECTOR_MODE_P (GET_MODE (op0
))
460 && optab_handler (vec_set_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
461 && fieldmode
== GET_MODE_INNER (GET_MODE (op0
))
462 && bitsize
== GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))
463 && !(bitnum
% GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))))
465 struct expand_operand ops
[3];
466 enum machine_mode outermode
= GET_MODE (op0
);
467 enum machine_mode innermode
= GET_MODE_INNER (outermode
);
468 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
469 int pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
471 create_fixed_operand (&ops
[0], op0
);
472 create_input_operand (&ops
[1], value
, innermode
);
473 create_integer_operand (&ops
[2], pos
);
474 if (maybe_expand_insn (icode
, 3, ops
))
478 /* If the target is a register, overwriting the entire object, or storing
479 a full-word or multi-word field can be done with just a SUBREG.
481 If the target is memory, storing any naturally aligned field can be
482 done with a simple store. For targets that support fast unaligned
483 memory, any naturally sized, unit aligned field can be done directly. */
485 offset
= bitnum
/ unit
;
486 bitpos
= bitnum
% unit
;
487 byte_offset
= (bitnum
% BITS_PER_WORD
) / BITS_PER_UNIT
488 + (offset
* UNITS_PER_WORD
);
491 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
493 ? ((GET_MODE_SIZE (fieldmode
) >= UNITS_PER_WORD
494 || GET_MODE_SIZE (GET_MODE (op0
)) == GET_MODE_SIZE (fieldmode
))
495 && ((GET_MODE (op0
) == fieldmode
&& byte_offset
== 0)
496 || validate_subreg (fieldmode
, GET_MODE (op0
), op0
,
498 : (! SLOW_UNALIGNED_ACCESS (fieldmode
, MEM_ALIGN (op0
))
499 || (offset
* BITS_PER_UNIT
% bitsize
== 0
500 && MEM_ALIGN (op0
) % GET_MODE_BITSIZE (fieldmode
) == 0))))
503 op0
= adjust_address (op0
, fieldmode
, offset
);
504 else if (GET_MODE (op0
) != fieldmode
)
505 op0
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
507 emit_move_insn (op0
, value
);
511 /* Make sure we are playing with integral modes. Pun with subregs
512 if we aren't. This must come after the entire register case above,
513 since that case is valid for any mode. The following cases are only
514 valid for integral modes. */
516 enum machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
517 if (imode
!= GET_MODE (op0
))
520 op0
= adjust_address (op0
, imode
, 0);
523 gcc_assert (imode
!= BLKmode
);
524 op0
= gen_lowpart (imode
, op0
);
529 /* We may be accessing data outside the field, which means
530 we can alias adjacent data. */
531 /* ?? not always for C++0x memory model ?? */
534 op0
= shallow_copy_rtx (op0
);
535 set_mem_alias_set (op0
, 0);
536 set_mem_expr (op0
, 0);
539 /* If OP0 is a register, BITPOS must count within a word.
540 But as we have it, it counts within whatever size OP0 now has.
541 On a bigendian machine, these are not the same, so convert. */
544 && unit
> GET_MODE_BITSIZE (GET_MODE (op0
)))
545 bitpos
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
547 /* Storing an lsb-aligned field in a register
548 can be done with a movestrict instruction. */
551 && (BYTES_BIG_ENDIAN
? bitpos
+ bitsize
== unit
: bitpos
== 0)
552 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
553 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
555 struct expand_operand ops
[2];
556 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
558 unsigned HOST_WIDE_INT subreg_off
;
560 if (GET_CODE (arg0
) == SUBREG
)
562 /* Else we've got some float mode source being extracted into
563 a different float mode destination -- this combination of
564 subregs results in Severe Tire Damage. */
565 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
566 || GET_MODE_CLASS (fieldmode
) == MODE_INT
567 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
568 arg0
= SUBREG_REG (arg0
);
571 subreg_off
= (bitnum
% BITS_PER_WORD
) / BITS_PER_UNIT
572 + (offset
* UNITS_PER_WORD
);
573 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
575 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
577 create_fixed_operand (&ops
[0], arg0
);
578 /* Shrink the source operand to FIELDMODE. */
579 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
580 if (maybe_expand_insn (icode
, 2, ops
))
585 /* Handle fields bigger than a word. */
587 if (bitsize
> BITS_PER_WORD
)
589 /* Here we transfer the words of the field
590 in the order least significant first.
591 This is because the most significant word is the one which may
593 However, only do that if the value is not BLKmode. */
595 unsigned int backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
596 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
600 /* This is the mode we must force value to, so that there will be enough
601 subwords to extract. Note that fieldmode will often (always?) be
602 VOIDmode, because that is what store_field uses to indicate that this
603 is a bit field, but passing VOIDmode to operand_subword_force
605 fieldmode
= GET_MODE (value
);
606 if (fieldmode
== VOIDmode
)
607 fieldmode
= smallest_mode_for_size (nwords
* BITS_PER_WORD
, MODE_INT
);
609 last
= get_last_insn ();
610 for (i
= 0; i
< nwords
; i
++)
612 /* If I is 0, use the low-order word in both field and target;
613 if I is 1, use the next to lowest word; and so on. */
614 unsigned int wordnum
= (backwards
615 ? GET_MODE_SIZE (fieldmode
) / UNITS_PER_WORD
618 unsigned int bit_offset
= (backwards
619 ? MAX ((int) bitsize
- ((int) i
+ 1)
622 : (int) i
* BITS_PER_WORD
);
623 rtx value_word
= operand_subword_force (value
, wordnum
, fieldmode
);
624 unsigned HOST_WIDE_INT new_bitsize
=
625 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
627 /* If the remaining chunk doesn't have full wordsize we have
628 to make sure that for big endian machines the higher order
630 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
631 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
633 GEN_INT (BITS_PER_WORD
638 if (!store_bit_field_1 (op0
, new_bitsize
,
640 bitregion_start
, bitregion_end
,
642 value_word
, fallback_p
))
644 delete_insns_since (last
);
651 /* From here on we can assume that the field to be stored in is
652 a full-word (whatever type that is), since it is shorter than a word. */
654 /* OFFSET is the number of words or bytes (UNIT says which)
655 from STR_RTX to the first word or byte containing part of the field. */
660 || GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
664 /* Since this is a destination (lvalue), we can't copy
665 it to a pseudo. We can remove a SUBREG that does not
666 change the size of the operand. Such a SUBREG may
667 have been added above. */
668 gcc_assert (GET_CODE (op0
) == SUBREG
669 && (GET_MODE_SIZE (GET_MODE (op0
))
670 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0
)))));
671 op0
= SUBREG_REG (op0
);
673 op0
= gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD
, MODE_INT
, 0),
674 op0
, (offset
* UNITS_PER_WORD
));
679 /* If VALUE has a floating-point or complex mode, access it as an
680 integer of the corresponding size. This can occur on a machine
681 with 64 bit registers that uses SFmode for float. It can also
682 occur for unaligned float or complex fields. */
684 if (GET_MODE (value
) != VOIDmode
685 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_INT
686 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_PARTIAL_INT
)
688 value
= gen_reg_rtx (int_mode_for_mode (GET_MODE (value
)));
689 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
692 /* Now OFFSET is nonzero only if OP0 is memory
693 and is therefore always measured in bytes. */
696 && GET_MODE (value
) != BLKmode
698 && GET_MODE_BITSIZE (op_mode
) >= bitsize
699 /* Do not use insv for volatile bitfields when
700 -fstrict-volatile-bitfields is in effect. */
701 && !(MEM_P (op0
) && MEM_VOLATILE_P (op0
)
702 && flag_strict_volatile_bitfields
> 0)
703 && ! ((REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
704 && (bitsize
+ bitpos
> GET_MODE_BITSIZE (op_mode
)))
705 /* Do not use insv if the bit region is restricted and
706 op_mode integer at offset doesn't fit into the
707 restricted region. */
708 && !(MEM_P (op0
) && bitregion_end
709 && bitnum
- bitpos
+ GET_MODE_BITSIZE (op_mode
)
710 > bitregion_end
+ 1))
712 struct expand_operand ops
[4];
713 int xbitpos
= bitpos
;
716 rtx last
= get_last_insn ();
717 bool copy_back
= false;
719 /* Add OFFSET into OP0's address. */
721 xop0
= adjust_address (xop0
, byte_mode
, offset
);
723 /* If xop0 is a register, we need it in OP_MODE
724 to make it acceptable to the format of insv. */
725 if (GET_CODE (xop0
) == SUBREG
)
726 /* We can't just change the mode, because this might clobber op0,
727 and we will need the original value of op0 if insv fails. */
728 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
729 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
730 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
732 /* If the destination is a paradoxical subreg such that we need a
733 truncate to the inner mode, perform the insertion on a temporary and
734 truncate the result to the original destination. Note that we can't
735 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
736 X) 0)) is (reg:N X). */
737 if (GET_CODE (xop0
) == SUBREG
738 && REG_P (SUBREG_REG (xop0
))
739 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
742 rtx tem
= gen_reg_rtx (op_mode
);
743 emit_move_insn (tem
, xop0
);
748 /* We have been counting XBITPOS within UNIT.
749 Count instead within the size of the register. */
750 if (BYTES_BIG_ENDIAN
&& !MEM_P (xop0
))
751 xbitpos
+= GET_MODE_BITSIZE (op_mode
) - unit
;
753 unit
= GET_MODE_BITSIZE (op_mode
);
755 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
756 "backwards" from the size of the unit we are inserting into.
757 Otherwise, we count bits from the most significant on a
758 BYTES/BITS_BIG_ENDIAN machine. */
760 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
761 xbitpos
= unit
- bitsize
- xbitpos
;
763 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
765 if (GET_MODE (value
) != op_mode
)
767 if (GET_MODE_BITSIZE (GET_MODE (value
)) >= bitsize
)
769 /* Optimization: Don't bother really extending VALUE
770 if it has all the bits we will actually use. However,
771 if we must narrow it, be sure we do it correctly. */
773 if (GET_MODE_SIZE (GET_MODE (value
)) < GET_MODE_SIZE (op_mode
))
777 tmp
= simplify_subreg (op_mode
, value1
, GET_MODE (value
), 0);
779 tmp
= simplify_gen_subreg (op_mode
,
780 force_reg (GET_MODE (value
),
782 GET_MODE (value
), 0);
786 value1
= gen_lowpart (op_mode
, value1
);
788 else if (CONST_INT_P (value
))
789 value1
= gen_int_mode (INTVAL (value
), op_mode
);
791 /* Parse phase is supposed to make VALUE's data type
792 match that of the component reference, which is a type
793 at least as wide as the field; so VALUE should have
794 a mode that corresponds to that type. */
795 gcc_assert (CONSTANT_P (value
));
798 create_fixed_operand (&ops
[0], xop0
);
799 create_integer_operand (&ops
[1], bitsize
);
800 create_integer_operand (&ops
[2], xbitpos
);
801 create_input_operand (&ops
[3], value1
, op_mode
);
802 if (maybe_expand_insn (CODE_FOR_insv
, 4, ops
))
805 convert_move (op0
, xop0
, true);
808 delete_insns_since (last
);
811 /* If OP0 is a memory, try copying it to a register and seeing if a
812 cheap register alternative is available. */
813 if (HAVE_insv
&& MEM_P (op0
))
815 enum machine_mode bestmode
;
816 unsigned HOST_WIDE_INT maxbits
= MAX_FIXED_MODE_SIZE
;
819 maxbits
= bitregion_end
- bitregion_start
+ 1;
821 /* Get the mode to use for inserting into this field. If OP0 is
822 BLKmode, get the smallest mode consistent with the alignment. If
823 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
824 mode. Otherwise, use the smallest mode containing the field. */
826 if (GET_MODE (op0
) == BLKmode
827 || GET_MODE_BITSIZE (GET_MODE (op0
)) > maxbits
828 || (op_mode
!= MAX_MACHINE_MODE
829 && GET_MODE_SIZE (GET_MODE (op0
)) > GET_MODE_SIZE (op_mode
)))
830 bestmode
= get_best_mode (bitsize
, bitnum
,
831 bitregion_start
, bitregion_end
,
833 (op_mode
== MAX_MACHINE_MODE
834 ? VOIDmode
: op_mode
),
835 MEM_VOLATILE_P (op0
));
837 bestmode
= GET_MODE (op0
);
839 if (bestmode
!= VOIDmode
840 && GET_MODE_SIZE (bestmode
) >= GET_MODE_SIZE (fieldmode
)
841 && !(SLOW_UNALIGNED_ACCESS (bestmode
, MEM_ALIGN (op0
))
842 && GET_MODE_BITSIZE (bestmode
) > MEM_ALIGN (op0
)))
844 rtx last
, tempreg
, xop0
;
845 unsigned HOST_WIDE_INT xoffset
, xbitpos
;
847 last
= get_last_insn ();
849 /* Adjust address to point to the containing unit of
850 that mode. Compute the offset as a multiple of this unit,
851 counting in bytes. */
852 unit
= GET_MODE_BITSIZE (bestmode
);
853 xoffset
= (bitnum
/ unit
) * GET_MODE_SIZE (bestmode
);
854 xbitpos
= bitnum
% unit
;
855 xop0
= adjust_address (op0
, bestmode
, xoffset
);
857 /* Fetch that unit, store the bitfield in it, then store
859 tempreg
= copy_to_reg (xop0
);
860 if (store_bit_field_1 (tempreg
, bitsize
, xbitpos
,
861 bitregion_start
, bitregion_end
,
862 fieldmode
, orig_value
, false))
864 emit_move_insn (xop0
, tempreg
);
867 delete_insns_since (last
);
874 store_fixed_bit_field (op0
, offset
, bitsize
, bitpos
,
875 bitregion_start
, bitregion_end
, value
);
879 /* Generate code to store value from rtx VALUE
880 into a bit-field within structure STR_RTX
881 containing BITSIZE bits starting at bit BITNUM.
883 BITREGION_START is bitpos of the first bitfield in this region.
884 BITREGION_END is the bitpos of the ending bitfield in this region.
885 These two fields are 0, if the C++ memory model does not apply,
886 or we are not interested in keeping track of bitfield regions.
888 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
891 store_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
892 unsigned HOST_WIDE_INT bitnum
,
893 unsigned HOST_WIDE_INT bitregion_start
,
894 unsigned HOST_WIDE_INT bitregion_end
,
895 enum machine_mode fieldmode
,
898 /* Under the C++0x memory model, we must not touch bits outside the
899 bit region. Adjust the address to start at the beginning of the
901 if (MEM_P (str_rtx
) && bitregion_start
> 0)
903 enum machine_mode bestmode
;
904 enum machine_mode op_mode
;
905 unsigned HOST_WIDE_INT offset
;
907 op_mode
= mode_for_extraction (EP_insv
, 3);
908 if (op_mode
== MAX_MACHINE_MODE
)
911 gcc_assert ((bitregion_start
% BITS_PER_UNIT
) == 0);
913 offset
= bitregion_start
/ BITS_PER_UNIT
;
914 bitnum
-= bitregion_start
;
915 bitregion_end
-= bitregion_start
;
917 bestmode
= get_best_mode (bitsize
, bitnum
,
918 bitregion_start
, bitregion_end
,
921 MEM_VOLATILE_P (str_rtx
));
922 str_rtx
= adjust_address (str_rtx
, bestmode
, offset
);
925 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
926 bitregion_start
, bitregion_end
,
927 fieldmode
, value
, true))
931 /* Use shifts and boolean operations to store VALUE
932 into a bit field of width BITSIZE
933 in a memory location specified by OP0 except offset by OFFSET bytes.
934 (OFFSET must be 0 if OP0 is a register.)
935 The field starts at position BITPOS within the byte.
936 (If OP0 is a register, it may be a full word or a narrower mode,
937 but BITPOS still counts within a full word,
938 which is significant on bigendian machines.) */
941 store_fixed_bit_field (rtx op0
, unsigned HOST_WIDE_INT offset
,
942 unsigned HOST_WIDE_INT bitsize
,
943 unsigned HOST_WIDE_INT bitpos
,
944 unsigned HOST_WIDE_INT bitregion_start
,
945 unsigned HOST_WIDE_INT bitregion_end
,
948 enum machine_mode mode
;
949 unsigned int total_bits
= BITS_PER_WORD
;
954 /* There is a case not handled here:
955 a structure with a known alignment of just a halfword
956 and a field split across two aligned halfwords within the structure.
957 Or likewise a structure with a known alignment of just a byte
958 and a field split across two bytes.
959 Such cases are not supposed to be able to occur. */
961 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
963 gcc_assert (!offset
);
964 /* Special treatment for a bit field split across two registers. */
965 if (bitsize
+ bitpos
> BITS_PER_WORD
)
967 store_split_bit_field (op0
, bitsize
, bitpos
,
968 bitregion_start
, bitregion_end
,
975 unsigned HOST_WIDE_INT maxbits
= MAX_FIXED_MODE_SIZE
;
978 maxbits
= bitregion_end
- bitregion_start
+ 1;
980 /* Get the proper mode to use for this field. We want a mode that
981 includes the entire field. If such a mode would be larger than
982 a word, we won't be doing the extraction the normal way.
983 We don't want a mode bigger than the destination. */
985 mode
= GET_MODE (op0
);
986 if (GET_MODE_BITSIZE (mode
) == 0
987 || GET_MODE_BITSIZE (mode
) > GET_MODE_BITSIZE (word_mode
))
990 if (MEM_VOLATILE_P (op0
)
991 && GET_MODE_BITSIZE (GET_MODE (op0
)) > 0
992 && GET_MODE_BITSIZE (GET_MODE (op0
)) <= maxbits
993 && flag_strict_volatile_bitfields
> 0)
994 mode
= GET_MODE (op0
);
996 mode
= get_best_mode (bitsize
, bitpos
+ offset
* BITS_PER_UNIT
,
997 bitregion_start
, bitregion_end
,
998 MEM_ALIGN (op0
), mode
, MEM_VOLATILE_P (op0
));
1000 if (mode
== VOIDmode
)
1002 /* The only way this should occur is if the field spans word
1004 store_split_bit_field (op0
, bitsize
, bitpos
+ offset
* BITS_PER_UNIT
,
1005 bitregion_start
, bitregion_end
, value
);
1009 total_bits
= GET_MODE_BITSIZE (mode
);
1011 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1012 be in the range 0 to total_bits-1, and put any excess bytes in
1014 if (bitpos
>= total_bits
)
1016 offset
+= (bitpos
/ total_bits
) * (total_bits
/ BITS_PER_UNIT
);
1017 bitpos
-= ((bitpos
/ total_bits
) * (total_bits
/ BITS_PER_UNIT
)
1021 /* Get ref to an aligned byte, halfword, or word containing the field.
1022 Adjust BITPOS to be position within a word,
1023 and OFFSET to be the offset of that word.
1024 Then alter OP0 to refer to that word. */
1025 bitpos
+= (offset
% (total_bits
/ BITS_PER_UNIT
)) * BITS_PER_UNIT
;
1026 offset
-= (offset
% (total_bits
/ BITS_PER_UNIT
));
1027 op0
= adjust_address (op0
, mode
, offset
);
1030 mode
= GET_MODE (op0
);
1032 /* Now MODE is either some integral mode for a MEM as OP0,
1033 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
1034 The bit field is contained entirely within OP0.
1035 BITPOS is the starting bit number within OP0.
1036 (OP0's mode may actually be narrower than MODE.) */
1038 if (BYTES_BIG_ENDIAN
)
1039 /* BITPOS is the distance between our msb
1040 and that of the containing datum.
1041 Convert it to the distance from the lsb. */
1042 bitpos
= total_bits
- bitsize
- bitpos
;
1044 /* Now BITPOS is always the distance between our lsb
1047 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1048 we must first convert its mode to MODE. */
1050 if (CONST_INT_P (value
))
1052 HOST_WIDE_INT v
= INTVAL (value
);
1054 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1055 v
&= ((HOST_WIDE_INT
) 1 << bitsize
) - 1;
1059 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1060 && v
== ((HOST_WIDE_INT
) 1 << bitsize
) - 1)
1061 || (bitsize
== HOST_BITS_PER_WIDE_INT
&& v
== -1))
1064 value
= lshift_value (mode
, value
, bitpos
, bitsize
);
1068 int must_and
= (GET_MODE_BITSIZE (GET_MODE (value
)) != bitsize
1069 && bitpos
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1071 if (GET_MODE (value
) != mode
)
1072 value
= convert_to_mode (mode
, value
, 1);
1075 value
= expand_binop (mode
, and_optab
, value
,
1076 mask_rtx (mode
, 0, bitsize
, 0),
1077 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1079 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1080 bitpos
, NULL_RTX
, 1);
1083 /* Now clear the chosen bits in OP0,
1084 except that if VALUE is -1 we need not bother. */
1085 /* We keep the intermediates in registers to allow CSE to combine
1086 consecutive bitfield assignments. */
1088 temp
= force_reg (mode
, op0
);
1092 temp
= expand_binop (mode
, and_optab
, temp
,
1093 mask_rtx (mode
, bitpos
, bitsize
, 1),
1094 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1095 temp
= force_reg (mode
, temp
);
1098 /* Now logical-or VALUE into OP0, unless it is zero. */
1102 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1103 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1104 temp
= force_reg (mode
, temp
);
1109 op0
= copy_rtx (op0
);
1110 emit_move_insn (op0
, temp
);
1114 /* Store a bit field that is split across multiple accessible memory objects.
1116 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1117 BITSIZE is the field width; BITPOS the position of its first bit
1119 VALUE is the value to store.
1121 This does not yet handle fields wider than BITS_PER_WORD. */
1124 store_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1125 unsigned HOST_WIDE_INT bitpos
,
1126 unsigned HOST_WIDE_INT bitregion_start
,
1127 unsigned HOST_WIDE_INT bitregion_end
,
1131 unsigned int bitsdone
= 0;
1133 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1135 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1136 unit
= BITS_PER_WORD
;
1138 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1140 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1141 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1142 that VALUE might be a floating-point constant. */
1143 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1145 rtx word
= gen_lowpart_common (word_mode
, value
);
1147 if (word
&& (value
!= word
))
1150 value
= gen_lowpart_common (word_mode
,
1151 force_reg (GET_MODE (value
) != VOIDmode
1153 : word_mode
, value
));
1156 while (bitsdone
< bitsize
)
1158 unsigned HOST_WIDE_INT thissize
;
1160 unsigned HOST_WIDE_INT thispos
;
1161 unsigned HOST_WIDE_INT offset
;
1163 offset
= (bitpos
+ bitsdone
) / unit
;
1164 thispos
= (bitpos
+ bitsdone
) % unit
;
1166 /* When region of bytes we can touch is restricted, decrease
1167 UNIT close to the end of the region as needed. */
1169 && unit
> BITS_PER_UNIT
1170 && bitpos
+ bitsdone
- thispos
+ unit
> bitregion_end
+ 1)
1176 /* THISSIZE must not overrun a word boundary. Otherwise,
1177 store_fixed_bit_field will call us again, and we will mutually
1179 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1180 thissize
= MIN (thissize
, unit
- thispos
);
1182 if (BYTES_BIG_ENDIAN
)
1186 /* We must do an endian conversion exactly the same way as it is
1187 done in extract_bit_field, so that the two calls to
1188 extract_fixed_bit_field will have comparable arguments. */
1189 if (!MEM_P (value
) || GET_MODE (value
) == BLKmode
)
1190 total_bits
= BITS_PER_WORD
;
1192 total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1194 /* Fetch successively less significant portions. */
1195 if (CONST_INT_P (value
))
1196 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1197 >> (bitsize
- bitsdone
- thissize
))
1198 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1200 /* The args are chosen so that the last part includes the
1201 lsb. Give extract_bit_field the value it needs (with
1202 endianness compensation) to fetch the piece we want. */
1203 part
= extract_fixed_bit_field (word_mode
, value
, 0, thissize
,
1204 total_bits
- bitsize
+ bitsdone
,
1205 NULL_RTX
, 1, false);
1209 /* Fetch successively more significant portions. */
1210 if (CONST_INT_P (value
))
1211 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1213 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1215 part
= extract_fixed_bit_field (word_mode
, value
, 0, thissize
,
1216 bitsdone
, NULL_RTX
, 1, false);
1219 /* If OP0 is a register, then handle OFFSET here.
1221 When handling multiword bitfields, extract_bit_field may pass
1222 down a word_mode SUBREG of a larger REG for a bitfield that actually
1223 crosses a word boundary. Thus, for a SUBREG, we must find
1224 the current word starting from the base register. */
1225 if (GET_CODE (op0
) == SUBREG
)
1227 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
) + offset
;
1228 enum machine_mode sub_mode
= GET_MODE (SUBREG_REG (op0
));
1229 if (sub_mode
!= BLKmode
&& GET_MODE_SIZE (sub_mode
) < UNITS_PER_WORD
)
1230 word
= word_offset
? const0_rtx
: op0
;
1232 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
1233 GET_MODE (SUBREG_REG (op0
)));
1236 else if (REG_P (op0
))
1238 enum machine_mode op0_mode
= GET_MODE (op0
);
1239 if (op0_mode
!= BLKmode
&& GET_MODE_SIZE (op0_mode
) < UNITS_PER_WORD
)
1240 word
= offset
? const0_rtx
: op0
;
1242 word
= operand_subword_force (op0
, offset
, GET_MODE (op0
));
1248 /* OFFSET is in UNITs, and UNIT is in bits.
1249 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1250 it is just an out-of-bounds access. Ignore it. */
1251 if (word
!= const0_rtx
)
1252 store_fixed_bit_field (word
, offset
* unit
/ BITS_PER_UNIT
, thissize
,
1253 thispos
, bitregion_start
, bitregion_end
, part
);
1254 bitsdone
+= thissize
;
1258 /* A subroutine of extract_bit_field_1 that converts return value X
1259 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1260 to extract_bit_field. */
1263 convert_extracted_bit_field (rtx x
, enum machine_mode mode
,
1264 enum machine_mode tmode
, bool unsignedp
)
1266 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1269 /* If the x mode is not a scalar integral, first convert to the
1270 integer mode of that size and then access it as a floating-point
1271 value via a SUBREG. */
1272 if (!SCALAR_INT_MODE_P (tmode
))
1274 enum machine_mode smode
;
1276 smode
= mode_for_size (GET_MODE_BITSIZE (tmode
), MODE_INT
, 0);
1277 x
= convert_to_mode (smode
, x
, unsignedp
);
1278 x
= force_reg (smode
, x
);
1279 return gen_lowpart (tmode
, x
);
1282 return convert_to_mode (tmode
, x
, unsignedp
);
1285 /* A subroutine of extract_bit_field, with the same arguments.
1286 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1287 if we can find no other means of implementing the operation.
1288 if FALLBACK_P is false, return NULL instead. */
1291 extract_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1292 unsigned HOST_WIDE_INT bitnum
,
1293 int unsignedp
, bool packedp
, rtx target
,
1294 enum machine_mode mode
, enum machine_mode tmode
,
1298 = (MEM_P (str_rtx
)) ? BITS_PER_UNIT
: BITS_PER_WORD
;
1299 unsigned HOST_WIDE_INT offset
, bitpos
;
1301 enum machine_mode int_mode
;
1302 enum machine_mode ext_mode
;
1303 enum machine_mode mode1
;
1306 if (tmode
== VOIDmode
)
1309 while (GET_CODE (op0
) == SUBREG
)
1311 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1312 op0
= SUBREG_REG (op0
);
1315 /* If we have an out-of-bounds access to a register, just return an
1316 uninitialized register of the required mode. This can occur if the
1317 source code contains an out-of-bounds access to a small array. */
1318 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
1319 return gen_reg_rtx (tmode
);
1322 && mode
== GET_MODE (op0
)
1324 && bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
1326 /* We're trying to extract a full register from itself. */
1330 /* See if we can get a better vector mode before extracting. */
1331 if (VECTOR_MODE_P (GET_MODE (op0
))
1333 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1335 enum machine_mode new_mode
;
1337 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1338 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1339 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1340 new_mode
= MIN_MODE_VECTOR_FRACT
;
1341 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1342 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1343 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1344 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1345 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1346 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1348 new_mode
= MIN_MODE_VECTOR_INT
;
1350 for (; new_mode
!= VOIDmode
; new_mode
= GET_MODE_WIDER_MODE (new_mode
))
1351 if (GET_MODE_SIZE (new_mode
) == GET_MODE_SIZE (GET_MODE (op0
))
1352 && targetm
.vector_mode_supported_p (new_mode
))
1354 if (new_mode
!= VOIDmode
)
1355 op0
= gen_lowpart (new_mode
, op0
);
1358 /* Use vec_extract patterns for extracting parts of vectors whenever
1360 if (VECTOR_MODE_P (GET_MODE (op0
))
1362 && optab_handler (vec_extract_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
1363 && ((bitnum
+ bitsize
- 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))
1364 == bitnum
/ GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))))
1366 struct expand_operand ops
[3];
1367 enum machine_mode outermode
= GET_MODE (op0
);
1368 enum machine_mode innermode
= GET_MODE_INNER (outermode
);
1369 enum insn_code icode
= optab_handler (vec_extract_optab
, outermode
);
1370 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1372 create_output_operand (&ops
[0], target
, innermode
);
1373 create_input_operand (&ops
[1], op0
, outermode
);
1374 create_integer_operand (&ops
[2], pos
);
1375 if (maybe_expand_insn (icode
, 3, ops
))
1377 target
= ops
[0].value
;
1378 if (GET_MODE (target
) != mode
)
1379 return gen_lowpart (tmode
, target
);
1384 /* Make sure we are playing with integral modes. Pun with subregs
1387 enum machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
1388 if (imode
!= GET_MODE (op0
))
1391 op0
= adjust_address (op0
, imode
, 0);
1392 else if (imode
!= BLKmode
)
1394 op0
= gen_lowpart (imode
, op0
);
1396 /* If we got a SUBREG, force it into a register since we
1397 aren't going to be able to do another SUBREG on it. */
1398 if (GET_CODE (op0
) == SUBREG
)
1399 op0
= force_reg (imode
, op0
);
1401 else if (REG_P (op0
))
1404 imode
= smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0
)),
1406 reg
= gen_reg_rtx (imode
);
1407 subreg
= gen_lowpart_SUBREG (GET_MODE (op0
), reg
);
1408 emit_move_insn (subreg
, op0
);
1410 bitnum
+= SUBREG_BYTE (subreg
) * BITS_PER_UNIT
;
1414 rtx mem
= assign_stack_temp (GET_MODE (op0
),
1415 GET_MODE_SIZE (GET_MODE (op0
)));
1416 emit_move_insn (mem
, op0
);
1417 op0
= adjust_address (mem
, BLKmode
, 0);
1422 /* We may be accessing data outside the field, which means
1423 we can alias adjacent data. */
1426 op0
= shallow_copy_rtx (op0
);
1427 set_mem_alias_set (op0
, 0);
1428 set_mem_expr (op0
, 0);
1431 /* Extraction of a full-word or multi-word value from a structure
1432 in a register or aligned memory can be done with just a SUBREG.
1433 A subword value in the least significant part of a register
1434 can also be extracted with a SUBREG. For this, we need the
1435 byte offset of the value in op0. */
1437 bitpos
= bitnum
% unit
;
1438 offset
= bitnum
/ unit
;
1439 byte_offset
= bitpos
/ BITS_PER_UNIT
+ offset
* UNITS_PER_WORD
;
1441 /* If OP0 is a register, BITPOS must count within a word.
1442 But as we have it, it counts within whatever size OP0 now has.
1443 On a bigendian machine, these are not the same, so convert. */
1444 if (BYTES_BIG_ENDIAN
1446 && unit
> GET_MODE_BITSIZE (GET_MODE (op0
)))
1447 bitpos
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
1449 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1450 If that's wrong, the solution is to test for it and set TARGET to 0
1453 /* Only scalar integer modes can be converted via subregs. There is an
1454 additional problem for FP modes here in that they can have a precision
1455 which is different from the size. mode_for_size uses precision, but
1456 we want a mode based on the size, so we must avoid calling it for FP
1458 mode1
= (SCALAR_INT_MODE_P (tmode
)
1459 ? mode_for_size (bitsize
, GET_MODE_CLASS (tmode
), 0)
1462 /* If the bitfield is volatile, we need to make sure the access
1463 remains on a type-aligned boundary. */
1464 if (GET_CODE (op0
) == MEM
1465 && MEM_VOLATILE_P (op0
)
1466 && GET_MODE_BITSIZE (GET_MODE (op0
)) > 0
1467 && flag_strict_volatile_bitfields
> 0)
1468 goto no_subreg_mode_swap
;
1470 if (((bitsize
>= BITS_PER_WORD
&& bitsize
== GET_MODE_BITSIZE (mode
)
1471 && bitpos
% BITS_PER_WORD
== 0)
1472 || (mode1
!= BLKmode
1473 /* ??? The big endian test here is wrong. This is correct
1474 if the value is in a register, and if mode_for_size is not
1475 the same mode as op0. This causes us to get unnecessarily
1476 inefficient code from the Thumb port when -mbig-endian. */
1477 && (BYTES_BIG_ENDIAN
1478 ? bitpos
+ bitsize
== BITS_PER_WORD
1481 && TRULY_NOOP_TRUNCATION_MODES_P (mode1
, GET_MODE (op0
))
1482 && GET_MODE_SIZE (mode1
) != 0
1483 && byte_offset
% GET_MODE_SIZE (mode1
) == 0)
1485 && (! SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (op0
))
1486 || (offset
* BITS_PER_UNIT
% bitsize
== 0
1487 && MEM_ALIGN (op0
) % bitsize
== 0)))))
1490 op0
= adjust_address (op0
, mode1
, offset
);
1491 else if (mode1
!= GET_MODE (op0
))
1493 rtx sub
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
),
1496 goto no_subreg_mode_swap
;
1500 return convert_to_mode (tmode
, op0
, unsignedp
);
1503 no_subreg_mode_swap
:
1505 /* Handle fields bigger than a word. */
1507 if (bitsize
> BITS_PER_WORD
)
1509 /* Here we transfer the words of the field
1510 in the order least significant first.
1511 This is because the most significant word is the one which may
1512 be less than full. */
1514 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1517 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1518 target
= gen_reg_rtx (mode
);
1520 /* Indicate for flow that the entire target reg is being set. */
1521 emit_clobber (target
);
1523 for (i
= 0; i
< nwords
; i
++)
1525 /* If I is 0, use the low-order word in both field and target;
1526 if I is 1, use the next to lowest word; and so on. */
1527 /* Word number in TARGET to use. */
1528 unsigned int wordnum
1530 ? GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
- i
- 1
1532 /* Offset from start of field in OP0. */
1533 unsigned int bit_offset
= (WORDS_BIG_ENDIAN
1534 ? MAX (0, ((int) bitsize
- ((int) i
+ 1)
1535 * (int) BITS_PER_WORD
))
1536 : (int) i
* BITS_PER_WORD
);
1537 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1539 = extract_bit_field (op0
, MIN (BITS_PER_WORD
,
1540 bitsize
- i
* BITS_PER_WORD
),
1541 bitnum
+ bit_offset
, 1, false, target_part
, mode
,
1544 gcc_assert (target_part
);
1546 if (result_part
!= target_part
)
1547 emit_move_insn (target_part
, result_part
);
1552 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1553 need to be zero'd out. */
1554 if (GET_MODE_SIZE (GET_MODE (target
)) > nwords
* UNITS_PER_WORD
)
1556 unsigned int i
, total_words
;
1558 total_words
= GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
;
1559 for (i
= nwords
; i
< total_words
; i
++)
1561 (operand_subword (target
,
1562 WORDS_BIG_ENDIAN
? total_words
- i
- 1 : i
,
1569 /* Signed bit field: sign-extend with two arithmetic shifts. */
1570 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1571 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1572 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1573 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1576 /* From here on we know the desired field is smaller than a word. */
1578 /* Check if there is a correspondingly-sized integer field, so we can
1579 safely extract it as one size of integer, if necessary; then
1580 truncate or extend to the size that is wanted; then use SUBREGs or
1581 convert_to_mode to get one of the modes we really wanted. */
1583 int_mode
= int_mode_for_mode (tmode
);
1584 if (int_mode
== BLKmode
)
1585 int_mode
= int_mode_for_mode (mode
);
1586 /* Should probably push op0 out to memory and then do a load. */
1587 gcc_assert (int_mode
!= BLKmode
);
1589 /* OFFSET is the number of words or bytes (UNIT says which)
1590 from STR_RTX to the first word or byte containing part of the field. */
1594 || GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
1597 op0
= copy_to_reg (op0
);
1598 op0
= gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD
, MODE_INT
, 0),
1599 op0
, (offset
* UNITS_PER_WORD
));
1604 /* Now OFFSET is nonzero only for memory operands. */
1605 ext_mode
= mode_for_extraction (unsignedp
? EP_extzv
: EP_extv
, 0);
1606 if (ext_mode
!= MAX_MACHINE_MODE
1608 && GET_MODE_BITSIZE (ext_mode
) >= bitsize
1609 /* Do not use extv/extzv for volatile bitfields when
1610 -fstrict-volatile-bitfields is in effect. */
1611 && !(MEM_P (op0
) && MEM_VOLATILE_P (op0
)
1612 && flag_strict_volatile_bitfields
> 0)
1613 /* If op0 is a register, we need it in EXT_MODE to make it
1614 acceptable to the format of ext(z)v. */
1615 && !(GET_CODE (op0
) == SUBREG
&& GET_MODE (op0
) != ext_mode
)
1616 && !((REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1617 && (bitsize
+ bitpos
> GET_MODE_BITSIZE (ext_mode
))))
1619 struct expand_operand ops
[4];
1620 unsigned HOST_WIDE_INT xbitpos
= bitpos
, xoffset
= offset
;
1622 rtx xtarget
= target
;
1623 rtx xspec_target
= target
;
1624 rtx xspec_target_subreg
= 0;
1626 /* If op0 is a register, we need it in EXT_MODE to make it
1627 acceptable to the format of ext(z)v. */
1628 if (REG_P (xop0
) && GET_MODE (xop0
) != ext_mode
)
1629 xop0
= gen_lowpart_SUBREG (ext_mode
, xop0
);
1631 /* Get ref to first byte containing part of the field. */
1632 xop0
= adjust_address (xop0
, byte_mode
, xoffset
);
1634 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1635 if (BYTES_BIG_ENDIAN
&& !MEM_P (xop0
))
1636 xbitpos
+= GET_MODE_BITSIZE (ext_mode
) - unit
;
1638 unit
= GET_MODE_BITSIZE (ext_mode
);
1640 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1641 "backwards" from the size of the unit we are extracting from.
1642 Otherwise, we count bits from the most significant on a
1643 BYTES/BITS_BIG_ENDIAN machine. */
1645 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1646 xbitpos
= unit
- bitsize
- xbitpos
;
1649 xtarget
= xspec_target
= gen_reg_rtx (tmode
);
1651 if (GET_MODE (xtarget
) != ext_mode
)
1653 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1654 between the mode of the extraction (word_mode) and the target
1655 mode. Instead, create a temporary and use convert_move to set
1658 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget
), ext_mode
))
1660 xtarget
= gen_lowpart (ext_mode
, xtarget
);
1661 if (GET_MODE_PRECISION (ext_mode
)
1662 > GET_MODE_PRECISION (GET_MODE (xspec_target
)))
1663 xspec_target_subreg
= xtarget
;
1666 xtarget
= gen_reg_rtx (ext_mode
);
1669 create_output_operand (&ops
[0], xtarget
, ext_mode
);
1670 create_fixed_operand (&ops
[1], xop0
);
1671 create_integer_operand (&ops
[2], bitsize
);
1672 create_integer_operand (&ops
[3], xbitpos
);
1673 if (maybe_expand_insn (unsignedp
? CODE_FOR_extzv
: CODE_FOR_extv
,
1676 xtarget
= ops
[0].value
;
1677 if (xtarget
== xspec_target
)
1679 if (xtarget
== xspec_target_subreg
)
1680 return xspec_target
;
1681 return convert_extracted_bit_field (xtarget
, mode
, tmode
, unsignedp
);
1685 /* If OP0 is a memory, try copying it to a register and seeing if a
1686 cheap register alternative is available. */
1687 if (ext_mode
!= MAX_MACHINE_MODE
&& MEM_P (op0
))
1689 enum machine_mode bestmode
;
1691 /* Get the mode to use for inserting into this field. If
1692 OP0 is BLKmode, get the smallest mode consistent with the
1693 alignment. If OP0 is a non-BLKmode object that is no
1694 wider than EXT_MODE, use its mode. Otherwise, use the
1695 smallest mode containing the field. */
1697 if (GET_MODE (op0
) == BLKmode
1698 || (ext_mode
!= MAX_MACHINE_MODE
1699 && GET_MODE_SIZE (GET_MODE (op0
)) > GET_MODE_SIZE (ext_mode
)))
1700 bestmode
= get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
),
1701 (ext_mode
== MAX_MACHINE_MODE
1702 ? VOIDmode
: ext_mode
),
1703 MEM_VOLATILE_P (op0
));
1705 bestmode
= GET_MODE (op0
);
1707 if (bestmode
!= VOIDmode
1708 && !(SLOW_UNALIGNED_ACCESS (bestmode
, MEM_ALIGN (op0
))
1709 && GET_MODE_BITSIZE (bestmode
) > MEM_ALIGN (op0
)))
1711 unsigned HOST_WIDE_INT xoffset
, xbitpos
;
1713 /* Compute the offset as a multiple of this unit,
1714 counting in bytes. */
1715 unit
= GET_MODE_BITSIZE (bestmode
);
1716 xoffset
= (bitnum
/ unit
) * GET_MODE_SIZE (bestmode
);
1717 xbitpos
= bitnum
% unit
;
1719 /* Make sure the register is big enough for the whole field. */
1720 if (xoffset
* BITS_PER_UNIT
+ unit
1721 >= offset
* BITS_PER_UNIT
+ bitsize
)
1723 rtx last
, result
, xop0
;
1725 last
= get_last_insn ();
1727 /* Fetch it to a register in that size. */
1728 xop0
= adjust_address (op0
, bestmode
, xoffset
);
1729 xop0
= force_reg (bestmode
, xop0
);
1730 result
= extract_bit_field_1 (xop0
, bitsize
, xbitpos
,
1731 unsignedp
, packedp
, target
,
1732 mode
, tmode
, false);
1736 delete_insns_since (last
);
1744 target
= extract_fixed_bit_field (int_mode
, op0
, offset
, bitsize
,
1745 bitpos
, target
, unsignedp
, packedp
);
1746 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1749 /* Generate code to extract a byte-field from STR_RTX
1750 containing BITSIZE bits, starting at BITNUM,
1751 and put it in TARGET if possible (if TARGET is nonzero).
1752 Regardless of TARGET, we return the rtx for where the value is placed.
1754 STR_RTX is the structure containing the byte (a REG or MEM).
1755 UNSIGNEDP is nonzero if this is an unsigned bit field.
1756 PACKEDP is nonzero if the field has the packed attribute.
1757 MODE is the natural mode of the field value once extracted.
1758 TMODE is the mode the caller would like the value to have;
1759 but the value may be returned with type MODE instead.
1761 If a TARGET is specified and we can store in it at no extra cost,
1762 we do so, and return TARGET.
1763 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1764 if they are equally easy. */
1767 extract_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1768 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, bool packedp
,
1769 rtx target
, enum machine_mode mode
, enum machine_mode tmode
)
1771 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
, packedp
,
1772 target
, mode
, tmode
, true);
1775 /* Extract a bit field using shifts and boolean operations
1776 Returns an rtx to represent the value.
1777 OP0 addresses a register (word) or memory (byte).
1778 BITPOS says which bit within the word or byte the bit field starts in.
1779 OFFSET says how many bytes farther the bit field starts;
1780 it is 0 if OP0 is a register.
1781 BITSIZE says how many bits long the bit field is.
1782 (If OP0 is a register, it may be narrower than a full word,
1783 but BITPOS still counts within a full word,
1784 which is significant on bigendian machines.)
1786 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1787 PACKEDP is true if the field has the packed attribute.
1789 If TARGET is nonzero, attempts to store the value there
1790 and return TARGET, but this is not guaranteed.
1791 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1794 extract_fixed_bit_field (enum machine_mode tmode
, rtx op0
,
1795 unsigned HOST_WIDE_INT offset
,
1796 unsigned HOST_WIDE_INT bitsize
,
1797 unsigned HOST_WIDE_INT bitpos
, rtx target
,
1798 int unsignedp
, bool packedp
)
1800 unsigned int total_bits
= BITS_PER_WORD
;
1801 enum machine_mode mode
;
1803 if (GET_CODE (op0
) == SUBREG
|| REG_P (op0
))
1805 /* Special treatment for a bit field split across two registers. */
1806 if (bitsize
+ bitpos
> BITS_PER_WORD
)
1807 return extract_split_bit_field (op0
, bitsize
, bitpos
, unsignedp
);
1811 /* Get the proper mode to use for this field. We want a mode that
1812 includes the entire field. If such a mode would be larger than
1813 a word, we won't be doing the extraction the normal way. */
1815 if (MEM_VOLATILE_P (op0
)
1816 && flag_strict_volatile_bitfields
> 0)
1818 if (GET_MODE_BITSIZE (GET_MODE (op0
)) > 0)
1819 mode
= GET_MODE (op0
);
1820 else if (target
&& GET_MODE_BITSIZE (GET_MODE (target
)) > 0)
1821 mode
= GET_MODE (target
);
1826 mode
= get_best_mode (bitsize
, bitpos
+ offset
* BITS_PER_UNIT
, 0, 0,
1827 MEM_ALIGN (op0
), word_mode
, MEM_VOLATILE_P (op0
));
1829 if (mode
== VOIDmode
)
1830 /* The only way this should occur is if the field spans word
1832 return extract_split_bit_field (op0
, bitsize
,
1833 bitpos
+ offset
* BITS_PER_UNIT
,
1836 total_bits
= GET_MODE_BITSIZE (mode
);
1838 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1839 be in the range 0 to total_bits-1, and put any excess bytes in
1841 if (bitpos
>= total_bits
)
1843 offset
+= (bitpos
/ total_bits
) * (total_bits
/ BITS_PER_UNIT
);
1844 bitpos
-= ((bitpos
/ total_bits
) * (total_bits
/ BITS_PER_UNIT
)
1848 /* If we're accessing a volatile MEM, we can't do the next
1849 alignment step if it results in a multi-word access where we
1850 otherwise wouldn't have one. So, check for that case
1853 && MEM_VOLATILE_P (op0
)
1854 && flag_strict_volatile_bitfields
> 0
1855 && bitpos
+ bitsize
<= total_bits
1856 && bitpos
+ bitsize
+ (offset
% (total_bits
/ BITS_PER_UNIT
)) * BITS_PER_UNIT
> total_bits
)
1858 if (STRICT_ALIGNMENT
)
1860 static bool informed_about_misalignment
= false;
1865 if (bitsize
== total_bits
)
1866 warned
= warning_at (input_location
, OPT_fstrict_volatile_bitfields
,
1867 "multiple accesses to volatile structure member"
1868 " because of packed attribute");
1870 warned
= warning_at (input_location
, OPT_fstrict_volatile_bitfields
,
1871 "multiple accesses to volatile structure bitfield"
1872 " because of packed attribute");
1874 return extract_split_bit_field (op0
, bitsize
,
1875 bitpos
+ offset
* BITS_PER_UNIT
,
1879 if (bitsize
== total_bits
)
1880 warned
= warning_at (input_location
, OPT_fstrict_volatile_bitfields
,
1881 "mis-aligned access used for structure member");
1883 warned
= warning_at (input_location
, OPT_fstrict_volatile_bitfields
,
1884 "mis-aligned access used for structure bitfield");
1886 if (! informed_about_misalignment
&& warned
)
1888 informed_about_misalignment
= true;
1889 inform (input_location
,
1890 "when a volatile object spans multiple type-sized locations,"
1891 " the compiler must choose between using a single mis-aligned access to"
1892 " preserve the volatility, or using multiple aligned accesses to avoid"
1893 " runtime faults; this code may fail at runtime if the hardware does"
1894 " not allow this access");
1901 /* Get ref to an aligned byte, halfword, or word containing the field.
1902 Adjust BITPOS to be position within a word,
1903 and OFFSET to be the offset of that word.
1904 Then alter OP0 to refer to that word. */
1905 bitpos
+= (offset
% (total_bits
/ BITS_PER_UNIT
)) * BITS_PER_UNIT
;
1906 offset
-= (offset
% (total_bits
/ BITS_PER_UNIT
));
1909 op0
= adjust_address (op0
, mode
, offset
);
1912 mode
= GET_MODE (op0
);
1914 if (BYTES_BIG_ENDIAN
)
1915 /* BITPOS is the distance between our msb and that of OP0.
1916 Convert it to the distance from the lsb. */
1917 bitpos
= total_bits
- bitsize
- bitpos
;
1919 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1920 We have reduced the big-endian case to the little-endian case. */
1926 /* If the field does not already start at the lsb,
1927 shift it so it does. */
1928 /* Maybe propagate the target for the shift. */
1929 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
1932 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitpos
, subtarget
, 1);
1934 /* Convert the value to the desired mode. */
1936 op0
= convert_to_mode (tmode
, op0
, 1);
1938 /* Unless the msb of the field used to be the msb when we shifted,
1939 mask out the upper bits. */
1941 if (GET_MODE_BITSIZE (mode
) != bitpos
+ bitsize
)
1942 return expand_binop (GET_MODE (op0
), and_optab
, op0
,
1943 mask_rtx (GET_MODE (op0
), 0, bitsize
, 0),
1944 target
, 1, OPTAB_LIB_WIDEN
);
1948 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1949 then arithmetic-shift its lsb to the lsb of the word. */
1950 op0
= force_reg (mode
, op0
);
1952 /* Find the narrowest integer mode that contains the field. */
1954 for (mode
= GET_CLASS_NARROWEST_MODE (MODE_INT
); mode
!= VOIDmode
;
1955 mode
= GET_MODE_WIDER_MODE (mode
))
1956 if (GET_MODE_BITSIZE (mode
) >= bitsize
+ bitpos
)
1958 op0
= convert_to_mode (mode
, op0
, 0);
1965 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitpos
))
1967 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitpos
);
1968 /* Maybe propagate the target for the shift. */
1969 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
1970 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
1973 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
1974 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
1977 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1978 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1979 complement of that if COMPLEMENT. The mask is truncated if
1980 necessary to the width of mode MODE. The mask is zero-extended if
1981 BITSIZE+BITPOS is too small for MODE. */
1984 mask_rtx (enum machine_mode mode
, int bitpos
, int bitsize
, int complement
)
1988 mask
= double_int_mask (bitsize
);
1989 mask
= double_int_lshift (mask
, bitpos
, HOST_BITS_PER_DOUBLE_INT
, false);
1992 mask
= double_int_not (mask
);
1994 return immed_double_int_const (mask
, mode
);
1997 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1998 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
2001 lshift_value (enum machine_mode mode
, rtx value
, int bitpos
, int bitsize
)
2005 val
= double_int_zext (uhwi_to_double_int (INTVAL (value
)), bitsize
);
2006 val
= double_int_lshift (val
, bitpos
, HOST_BITS_PER_DOUBLE_INT
, false);
2008 return immed_double_int_const (val
, mode
);
2011 /* Extract a bit field that is split across two words
2012 and return an RTX for the result.
2014 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2015 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2016 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
2019 extract_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
2020 unsigned HOST_WIDE_INT bitpos
, int unsignedp
)
2023 unsigned int bitsdone
= 0;
2024 rtx result
= NULL_RTX
;
2027 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2029 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2030 unit
= BITS_PER_WORD
;
2032 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2034 while (bitsdone
< bitsize
)
2036 unsigned HOST_WIDE_INT thissize
;
2038 unsigned HOST_WIDE_INT thispos
;
2039 unsigned HOST_WIDE_INT offset
;
2041 offset
= (bitpos
+ bitsdone
) / unit
;
2042 thispos
= (bitpos
+ bitsdone
) % unit
;
2044 /* THISSIZE must not overrun a word boundary. Otherwise,
2045 extract_fixed_bit_field will call us again, and we will mutually
2047 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2048 thissize
= MIN (thissize
, unit
- thispos
);
2050 /* If OP0 is a register, then handle OFFSET here.
2052 When handling multiword bitfields, extract_bit_field may pass
2053 down a word_mode SUBREG of a larger REG for a bitfield that actually
2054 crosses a word boundary. Thus, for a SUBREG, we must find
2055 the current word starting from the base register. */
2056 if (GET_CODE (op0
) == SUBREG
)
2058 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
) + offset
;
2059 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
2060 GET_MODE (SUBREG_REG (op0
)));
2063 else if (REG_P (op0
))
2065 word
= operand_subword_force (op0
, offset
, GET_MODE (op0
));
2071 /* Extract the parts in bit-counting order,
2072 whose meaning is determined by BYTES_PER_UNIT.
2073 OFFSET is in UNITs, and UNIT is in bits.
2074 extract_fixed_bit_field wants offset in bytes. */
2075 part
= extract_fixed_bit_field (word_mode
, word
,
2076 offset
* unit
/ BITS_PER_UNIT
,
2077 thissize
, thispos
, 0, 1, false);
2078 bitsdone
+= thissize
;
2080 /* Shift this part into place for the result. */
2081 if (BYTES_BIG_ENDIAN
)
2083 if (bitsize
!= bitsdone
)
2084 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2085 bitsize
- bitsdone
, 0, 1);
2089 if (bitsdone
!= thissize
)
2090 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2091 bitsdone
- thissize
, 0, 1);
2097 /* Combine the parts with bitwise or. This works
2098 because we extracted each part as an unsigned bit field. */
2099 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2105 /* Unsigned bit field: we are done. */
2108 /* Signed bit field: sign-extend with two arithmetic shifts. */
2109 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2110 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2111 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2112 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2115 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2116 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2117 MODE, fill the upper bits with zeros. Fail if the layout of either
2118 mode is unknown (as for CC modes) or if the extraction would involve
2119 unprofitable mode punning. Return the value on success, otherwise
2122 This is different from gen_lowpart* in these respects:
2124 - the returned value must always be considered an rvalue
2126 - when MODE is wider than SRC_MODE, the extraction involves
2129 - when MODE is smaller than SRC_MODE, the extraction involves
2130 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2132 In other words, this routine performs a computation, whereas the
2133 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2137 extract_low_bits (enum machine_mode mode
, enum machine_mode src_mode
, rtx src
)
2139 enum machine_mode int_mode
, src_int_mode
;
2141 if (mode
== src_mode
)
2144 if (CONSTANT_P (src
))
2146 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2147 fails, it will happily create (subreg (symbol_ref)) or similar
2149 unsigned int byte
= subreg_lowpart_offset (mode
, src_mode
);
2150 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2154 if (GET_MODE (src
) == VOIDmode
2155 || !validate_subreg (mode
, src_mode
, src
, byte
))
2158 src
= force_reg (GET_MODE (src
), src
);
2159 return gen_rtx_SUBREG (mode
, src
, byte
);
2162 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2165 if (GET_MODE_BITSIZE (mode
) == GET_MODE_BITSIZE (src_mode
)
2166 && MODES_TIEABLE_P (mode
, src_mode
))
2168 rtx x
= gen_lowpart_common (mode
, src
);
2173 src_int_mode
= int_mode_for_mode (src_mode
);
2174 int_mode
= int_mode_for_mode (mode
);
2175 if (src_int_mode
== BLKmode
|| int_mode
== BLKmode
)
2178 if (!MODES_TIEABLE_P (src_int_mode
, src_mode
))
2180 if (!MODES_TIEABLE_P (int_mode
, mode
))
2183 src
= gen_lowpart (src_int_mode
, src
);
2184 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2185 src
= gen_lowpart (mode
, src
);
2189 /* Add INC into TARGET. */
2192 expand_inc (rtx target
, rtx inc
)
2194 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2196 target
, 0, OPTAB_LIB_WIDEN
);
2197 if (value
!= target
)
2198 emit_move_insn (target
, value
);
2201 /* Subtract DEC from TARGET. */
2204 expand_dec (rtx target
, rtx dec
)
2206 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2208 target
, 0, OPTAB_LIB_WIDEN
);
2209 if (value
!= target
)
2210 emit_move_insn (target
, value
);
2213 /* Output a shift instruction for expression code CODE,
2214 with SHIFTED being the rtx for the value to shift,
2215 and AMOUNT the rtx for the amount to shift by.
2216 Store the result in the rtx TARGET, if that is convenient.
2217 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2218 Return the rtx for where the value is. */
2221 expand_shift_1 (enum tree_code code
, enum machine_mode mode
, rtx shifted
,
2222 rtx amount
, rtx target
, int unsignedp
)
2225 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2226 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2227 optab lshift_optab
= ashl_optab
;
2228 optab rshift_arith_optab
= ashr_optab
;
2229 optab rshift_uns_optab
= lshr_optab
;
2230 optab lrotate_optab
= rotl_optab
;
2231 optab rrotate_optab
= rotr_optab
;
2232 enum machine_mode op1_mode
;
2234 bool speed
= optimize_insn_for_speed_p ();
2237 op1_mode
= GET_MODE (op1
);
2239 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2240 shift amount is a vector, use the vector/vector shift patterns. */
2241 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2243 lshift_optab
= vashl_optab
;
2244 rshift_arith_optab
= vashr_optab
;
2245 rshift_uns_optab
= vlshr_optab
;
2246 lrotate_optab
= vrotl_optab
;
2247 rrotate_optab
= vrotr_optab
;
2250 /* Previously detected shift-counts computed by NEGATE_EXPR
2251 and shifted in the other direction; but that does not work
2254 if (SHIFT_COUNT_TRUNCATED
)
2256 if (CONST_INT_P (op1
)
2257 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2258 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (mode
)))
2259 op1
= GEN_INT ((unsigned HOST_WIDE_INT
) INTVAL (op1
)
2260 % GET_MODE_BITSIZE (mode
));
2261 else if (GET_CODE (op1
) == SUBREG
2262 && subreg_lowpart_p (op1
)
2263 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1
))))
2264 op1
= SUBREG_REG (op1
);
2267 if (op1
== const0_rtx
)
2270 /* Check whether its cheaper to implement a left shift by a constant
2271 bit count by a sequence of additions. */
2272 if (code
== LSHIFT_EXPR
2273 && CONST_INT_P (op1
)
2275 && INTVAL (op1
) < GET_MODE_PRECISION (mode
)
2276 && INTVAL (op1
) < MAX_BITS_PER_WORD
2277 && (shift_cost (speed
, mode
, INTVAL (op1
))
2278 > INTVAL (op1
) * add_cost (speed
, mode
))
2279 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2282 for (i
= 0; i
< INTVAL (op1
); i
++)
2284 temp
= force_reg (mode
, shifted
);
2285 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2286 unsignedp
, OPTAB_LIB_WIDEN
);
2291 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2293 enum optab_methods methods
;
2296 methods
= OPTAB_DIRECT
;
2297 else if (attempt
== 1)
2298 methods
= OPTAB_WIDEN
;
2300 methods
= OPTAB_LIB_WIDEN
;
2304 /* Widening does not work for rotation. */
2305 if (methods
== OPTAB_WIDEN
)
2307 else if (methods
== OPTAB_LIB_WIDEN
)
2309 /* If we have been unable to open-code this by a rotation,
2310 do it as the IOR of two shifts. I.e., to rotate A
2311 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2312 where C is the bitsize of A.
2314 It is theoretically possible that the target machine might
2315 not be able to perform either shift and hence we would
2316 be making two libcalls rather than just the one for the
2317 shift (similarly if IOR could not be done). We will allow
2318 this extremely unlikely lossage to avoid complicating the
2321 rtx subtarget
= target
== shifted
? 0 : target
;
2322 rtx new_amount
, other_amount
;
2326 if (CONST_INT_P (op1
))
2327 other_amount
= GEN_INT (GET_MODE_BITSIZE (mode
)
2331 = simplify_gen_binary (MINUS
, GET_MODE (op1
),
2332 GEN_INT (GET_MODE_PRECISION (mode
)),
2335 shifted
= force_reg (mode
, shifted
);
2337 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2338 mode
, shifted
, new_amount
, 0, 1);
2339 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2340 mode
, shifted
, other_amount
,
2342 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2343 unsignedp
, methods
);
2346 temp
= expand_binop (mode
,
2347 left
? lrotate_optab
: rrotate_optab
,
2348 shifted
, op1
, target
, unsignedp
, methods
);
2351 temp
= expand_binop (mode
,
2352 left
? lshift_optab
: rshift_uns_optab
,
2353 shifted
, op1
, target
, unsignedp
, methods
);
2355 /* Do arithmetic shifts.
2356 Also, if we are going to widen the operand, we can just as well
2357 use an arithmetic right-shift instead of a logical one. */
2358 if (temp
== 0 && ! rotate
2359 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2361 enum optab_methods methods1
= methods
;
2363 /* If trying to widen a log shift to an arithmetic shift,
2364 don't accept an arithmetic shift of the same size. */
2366 methods1
= OPTAB_MUST_WIDEN
;
2368 /* Arithmetic shift */
2370 temp
= expand_binop (mode
,
2371 left
? lshift_optab
: rshift_arith_optab
,
2372 shifted
, op1
, target
, unsignedp
, methods1
);
2375 /* We used to try extzv here for logical right shifts, but that was
2376 only useful for one machine, the VAX, and caused poor code
2377 generation there for lshrdi3, so the code was deleted and a
2378 define_expand for lshrsi3 was added to vax.md. */
2385 /* Output a shift instruction for expression code CODE,
2386 with SHIFTED being the rtx for the value to shift,
2387 and AMOUNT the amount to shift by.
2388 Store the result in the rtx TARGET, if that is convenient.
2389 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2390 Return the rtx for where the value is. */
2393 expand_shift (enum tree_code code
, enum machine_mode mode
, rtx shifted
,
2394 int amount
, rtx target
, int unsignedp
)
2396 return expand_shift_1 (code
, mode
,
2397 shifted
, GEN_INT (amount
), target
, unsignedp
);
2400 /* Output a shift instruction for expression code CODE,
2401 with SHIFTED being the rtx for the value to shift,
2402 and AMOUNT the tree for the amount to shift by.
2403 Store the result in the rtx TARGET, if that is convenient.
2404 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2405 Return the rtx for where the value is. */
2408 expand_variable_shift (enum tree_code code
, enum machine_mode mode
, rtx shifted
,
2409 tree amount
, rtx target
, int unsignedp
)
2411 return expand_shift_1 (code
, mode
,
2412 shifted
, expand_normal (amount
), target
, unsignedp
);
2416 /* Indicates the type of fixup needed after a constant multiplication.
2417 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2418 the result should be negated, and ADD_VARIANT means that the
2419 multiplicand should be added to the result. */
2420 enum mult_variant
{basic_variant
, negate_variant
, add_variant
};
2422 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2423 const struct mult_cost
*, enum machine_mode mode
);
2424 static bool choose_mult_variant (enum machine_mode
, HOST_WIDE_INT
,
2425 struct algorithm
*, enum mult_variant
*, int);
2426 static rtx
expand_mult_const (enum machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2427 const struct algorithm
*, enum mult_variant
);
2428 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2429 static rtx
extract_high_half (enum machine_mode
, rtx
);
2430 static rtx
expmed_mult_highpart (enum machine_mode
, rtx
, rtx
, rtx
, int, int);
2431 static rtx
expmed_mult_highpart_optab (enum machine_mode
, rtx
, rtx
, rtx
,
2433 /* Compute and return the best algorithm for multiplying by T.
2434 The algorithm must cost less than cost_limit
2435 If retval.cost >= COST_LIMIT, no algorithm was found and all
2436 other field of the returned struct are undefined.
2437 MODE is the machine mode of the multiplication. */
2440 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2441 const struct mult_cost
*cost_limit
, enum machine_mode mode
)
2444 struct algorithm
*alg_in
, *best_alg
;
2445 struct mult_cost best_cost
;
2446 struct mult_cost new_limit
;
2447 int op_cost
, op_latency
;
2448 unsigned HOST_WIDE_INT orig_t
= t
;
2449 unsigned HOST_WIDE_INT q
;
2450 int maxm
, hash_index
;
2451 bool cache_hit
= false;
2452 enum alg_code cache_alg
= alg_zero
;
2453 bool speed
= optimize_insn_for_speed_p ();
2454 enum machine_mode imode
;
2455 struct alg_hash_entry
*entry_ptr
;
2457 /* Indicate that no algorithm is yet found. If no algorithm
2458 is found, this value will be returned and indicate failure. */
2459 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2460 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2462 if (cost_limit
->cost
< 0
2463 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2466 /* Be prepared for vector modes. */
2467 imode
= GET_MODE_INNER (mode
);
2468 if (imode
== VOIDmode
)
2471 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2473 /* Restrict the bits of "t" to the multiplication's mode. */
2474 t
&= GET_MODE_MASK (imode
);
2476 /* t == 1 can be done in zero cost. */
2480 alg_out
->cost
.cost
= 0;
2481 alg_out
->cost
.latency
= 0;
2482 alg_out
->op
[0] = alg_m
;
2486 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2490 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2495 alg_out
->cost
.cost
= zero_cost (speed
);
2496 alg_out
->cost
.latency
= zero_cost (speed
);
2497 alg_out
->op
[0] = alg_zero
;
2502 /* We'll be needing a couple extra algorithm structures now. */
2504 alg_in
= XALLOCA (struct algorithm
);
2505 best_alg
= XALLOCA (struct algorithm
);
2506 best_cost
= *cost_limit
;
2508 /* Compute the hash index. */
2509 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2511 /* See if we already know what to do for T. */
2512 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2513 if (entry_ptr
->t
== t
2514 && entry_ptr
->mode
== mode
2515 && entry_ptr
->mode
== mode
2516 && entry_ptr
->speed
== speed
2517 && entry_ptr
->alg
!= alg_unknown
)
2519 cache_alg
= entry_ptr
->alg
;
2521 if (cache_alg
== alg_impossible
)
2523 /* The cache tells us that it's impossible to synthesize
2524 multiplication by T within entry_ptr->cost. */
2525 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2526 /* COST_LIMIT is at least as restrictive as the one
2527 recorded in the hash table, in which case we have no
2528 hope of synthesizing a multiplication. Just
2532 /* If we get here, COST_LIMIT is less restrictive than the
2533 one recorded in the hash table, so we may be able to
2534 synthesize a multiplication. Proceed as if we didn't
2535 have the cache entry. */
2539 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2540 /* The cached algorithm shows that this multiplication
2541 requires more cost than COST_LIMIT. Just return. This
2542 way, we don't clobber this cache entry with
2543 alg_impossible but retain useful information. */
2555 goto do_alg_addsub_t_m2
;
2557 case alg_add_factor
:
2558 case alg_sub_factor
:
2559 goto do_alg_addsub_factor
;
2562 goto do_alg_add_t2_m
;
2565 goto do_alg_sub_t2_m
;
2573 /* If we have a group of zero bits at the low-order part of T, try
2574 multiplying by the remaining bits and then doing a shift. */
2579 m
= floor_log2 (t
& -t
); /* m = number of low zero bits */
2583 /* The function expand_shift will choose between a shift and
2584 a sequence of additions, so the observed cost is given as
2585 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2586 op_cost
= m
* add_cost (speed
, mode
);
2587 if (shift_cost (speed
, mode
, m
) < op_cost
)
2588 op_cost
= shift_cost (speed
, mode
, m
);
2589 new_limit
.cost
= best_cost
.cost
- op_cost
;
2590 new_limit
.latency
= best_cost
.latency
- op_cost
;
2591 synth_mult (alg_in
, q
, &new_limit
, mode
);
2593 alg_in
->cost
.cost
+= op_cost
;
2594 alg_in
->cost
.latency
+= op_cost
;
2595 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2597 struct algorithm
*x
;
2598 best_cost
= alg_in
->cost
;
2599 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2600 best_alg
->log
[best_alg
->ops
] = m
;
2601 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2604 /* See if treating ORIG_T as a signed number yields a better
2605 sequence. Try this sequence only for a negative ORIG_T
2606 as it would be useless for a non-negative ORIG_T. */
2607 if ((HOST_WIDE_INT
) orig_t
< 0)
2609 /* Shift ORIG_T as follows because a right shift of a
2610 negative-valued signed type is implementation
2612 q
= ~(~orig_t
>> m
);
2613 /* The function expand_shift will choose between a shift
2614 and a sequence of additions, so the observed cost is
2615 given as MIN (m * add_cost(speed, mode),
2616 shift_cost(speed, mode, m)). */
2617 op_cost
= m
* add_cost (speed
, mode
);
2618 if (shift_cost (speed
, mode
, m
) < op_cost
)
2619 op_cost
= shift_cost (speed
, mode
, m
);
2620 new_limit
.cost
= best_cost
.cost
- op_cost
;
2621 new_limit
.latency
= best_cost
.latency
- op_cost
;
2622 synth_mult (alg_in
, q
, &new_limit
, mode
);
2624 alg_in
->cost
.cost
+= op_cost
;
2625 alg_in
->cost
.latency
+= op_cost
;
2626 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2628 struct algorithm
*x
;
2629 best_cost
= alg_in
->cost
;
2630 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2631 best_alg
->log
[best_alg
->ops
] = m
;
2632 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2640 /* If we have an odd number, add or subtract one. */
2643 unsigned HOST_WIDE_INT w
;
2646 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2648 /* If T was -1, then W will be zero after the loop. This is another
2649 case where T ends with ...111. Handling this with (T + 1) and
2650 subtract 1 produces slightly better code and results in algorithm
2651 selection much faster than treating it like the ...0111 case
2655 /* Reject the case where t is 3.
2656 Thus we prefer addition in that case. */
2659 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2661 op_cost
= add_cost (speed
, mode
);
2662 new_limit
.cost
= best_cost
.cost
- op_cost
;
2663 new_limit
.latency
= best_cost
.latency
- op_cost
;
2664 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2666 alg_in
->cost
.cost
+= op_cost
;
2667 alg_in
->cost
.latency
+= op_cost
;
2668 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2670 struct algorithm
*x
;
2671 best_cost
= alg_in
->cost
;
2672 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2673 best_alg
->log
[best_alg
->ops
] = 0;
2674 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2679 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2681 op_cost
= add_cost (speed
, mode
);
2682 new_limit
.cost
= best_cost
.cost
- op_cost
;
2683 new_limit
.latency
= best_cost
.latency
- op_cost
;
2684 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2686 alg_in
->cost
.cost
+= op_cost
;
2687 alg_in
->cost
.latency
+= op_cost
;
2688 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2690 struct algorithm
*x
;
2691 best_cost
= alg_in
->cost
;
2692 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2693 best_alg
->log
[best_alg
->ops
] = 0;
2694 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2698 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2699 quickly with a - a * n for some appropriate constant n. */
2700 m
= exact_log2 (-orig_t
+ 1);
2701 if (m
>= 0 && m
< maxm
)
2703 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2704 new_limit
.cost
= best_cost
.cost
- op_cost
;
2705 new_limit
.latency
= best_cost
.latency
- op_cost
;
2706 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2709 alg_in
->cost
.cost
+= op_cost
;
2710 alg_in
->cost
.latency
+= op_cost
;
2711 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2713 struct algorithm
*x
;
2714 best_cost
= alg_in
->cost
;
2715 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2716 best_alg
->log
[best_alg
->ops
] = m
;
2717 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2725 /* Look for factors of t of the form
2726 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2727 If we find such a factor, we can multiply by t using an algorithm that
2728 multiplies by q, shift the result by m and add/subtract it to itself.
2730 We search for large factors first and loop down, even if large factors
2731 are less probable than small; if we find a large factor we will find a
2732 good sequence quickly, and therefore be able to prune (by decreasing
2733 COST_LIMIT) the search. */
2735 do_alg_addsub_factor
:
2736 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
2738 unsigned HOST_WIDE_INT d
;
2740 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) + 1;
2741 if (t
% d
== 0 && t
> d
&& m
< maxm
2742 && (!cache_hit
|| cache_alg
== alg_add_factor
))
2744 /* If the target has a cheap shift-and-add instruction use
2745 that in preference to a shift insn followed by an add insn.
2746 Assume that the shift-and-add is "atomic" with a latency
2747 equal to its cost, otherwise assume that on superscalar
2748 hardware the shift may be executed concurrently with the
2749 earlier steps in the algorithm. */
2750 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2751 if (shiftadd_cost (speed
, mode
, m
) < op_cost
)
2753 op_cost
= shiftadd_cost (speed
, mode
, m
);
2754 op_latency
= op_cost
;
2757 op_latency
= add_cost (speed
, mode
);
2759 new_limit
.cost
= best_cost
.cost
- op_cost
;
2760 new_limit
.latency
= best_cost
.latency
- op_latency
;
2761 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2763 alg_in
->cost
.cost
+= op_cost
;
2764 alg_in
->cost
.latency
+= op_latency
;
2765 if (alg_in
->cost
.latency
< op_cost
)
2766 alg_in
->cost
.latency
= op_cost
;
2767 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2769 struct algorithm
*x
;
2770 best_cost
= alg_in
->cost
;
2771 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2772 best_alg
->log
[best_alg
->ops
] = m
;
2773 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
2775 /* Other factors will have been taken care of in the recursion. */
2779 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) - 1;
2780 if (t
% d
== 0 && t
> d
&& m
< maxm
2781 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
2783 /* If the target has a cheap shift-and-subtract insn use
2784 that in preference to a shift insn followed by a sub insn.
2785 Assume that the shift-and-sub is "atomic" with a latency
2786 equal to it's cost, otherwise assume that on superscalar
2787 hardware the shift may be executed concurrently with the
2788 earlier steps in the algorithm. */
2789 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2790 if (shiftsub0_cost (speed
, mode
, m
) < op_cost
)
2792 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2793 op_latency
= op_cost
;
2796 op_latency
= add_cost (speed
, mode
);
2798 new_limit
.cost
= best_cost
.cost
- op_cost
;
2799 new_limit
.latency
= best_cost
.latency
- op_latency
;
2800 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2802 alg_in
->cost
.cost
+= op_cost
;
2803 alg_in
->cost
.latency
+= op_latency
;
2804 if (alg_in
->cost
.latency
< op_cost
)
2805 alg_in
->cost
.latency
= op_cost
;
2806 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2808 struct algorithm
*x
;
2809 best_cost
= alg_in
->cost
;
2810 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2811 best_alg
->log
[best_alg
->ops
] = m
;
2812 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
2820 /* Try shift-and-add (load effective address) instructions,
2821 i.e. do a*3, a*5, a*9. */
2828 if (m
>= 0 && m
< maxm
)
2830 op_cost
= shiftadd_cost (speed
, mode
, m
);
2831 new_limit
.cost
= best_cost
.cost
- op_cost
;
2832 new_limit
.latency
= best_cost
.latency
- op_cost
;
2833 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
2835 alg_in
->cost
.cost
+= op_cost
;
2836 alg_in
->cost
.latency
+= op_cost
;
2837 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2839 struct algorithm
*x
;
2840 best_cost
= alg_in
->cost
;
2841 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2842 best_alg
->log
[best_alg
->ops
] = m
;
2843 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
2853 if (m
>= 0 && m
< maxm
)
2855 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2856 new_limit
.cost
= best_cost
.cost
- op_cost
;
2857 new_limit
.latency
= best_cost
.latency
- op_cost
;
2858 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
2860 alg_in
->cost
.cost
+= op_cost
;
2861 alg_in
->cost
.latency
+= op_cost
;
2862 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2864 struct algorithm
*x
;
2865 best_cost
= alg_in
->cost
;
2866 x
= alg_in
, alg_in
= best_alg
, best_alg
= x
;
2867 best_alg
->log
[best_alg
->ops
] = m
;
2868 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
2876 /* If best_cost has not decreased, we have not found any algorithm. */
2877 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
2879 /* We failed to find an algorithm. Record alg_impossible for
2880 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2881 we are asked to find an algorithm for T within the same or
2882 lower COST_LIMIT, we can immediately return to the
2885 entry_ptr
->mode
= mode
;
2886 entry_ptr
->speed
= speed
;
2887 entry_ptr
->alg
= alg_impossible
;
2888 entry_ptr
->cost
= *cost_limit
;
2892 /* Cache the result. */
2896 entry_ptr
->mode
= mode
;
2897 entry_ptr
->speed
= speed
;
2898 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
2899 entry_ptr
->cost
.cost
= best_cost
.cost
;
2900 entry_ptr
->cost
.latency
= best_cost
.latency
;
2903 /* If we are getting a too long sequence for `struct algorithm'
2904 to record, make this search fail. */
2905 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
2908 /* Copy the algorithm from temporary space to the space at alg_out.
2909 We avoid using structure assignment because the majority of
2910 best_alg is normally undefined, and this is a critical function. */
2911 alg_out
->ops
= best_alg
->ops
+ 1;
2912 alg_out
->cost
= best_cost
;
2913 memcpy (alg_out
->op
, best_alg
->op
,
2914 alg_out
->ops
* sizeof *alg_out
->op
);
2915 memcpy (alg_out
->log
, best_alg
->log
,
2916 alg_out
->ops
* sizeof *alg_out
->log
);
2919 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2920 Try three variations:
2922 - a shift/add sequence based on VAL itself
2923 - a shift/add sequence based on -VAL, followed by a negation
2924 - a shift/add sequence based on VAL - 1, followed by an addition.
2926 Return true if the cheapest of these cost less than MULT_COST,
2927 describing the algorithm in *ALG and final fixup in *VARIANT. */
2930 choose_mult_variant (enum machine_mode mode
, HOST_WIDE_INT val
,
2931 struct algorithm
*alg
, enum mult_variant
*variant
,
2934 struct algorithm alg2
;
2935 struct mult_cost limit
;
2937 bool speed
= optimize_insn_for_speed_p ();
2939 /* Fail quickly for impossible bounds. */
2943 /* Ensure that mult_cost provides a reasonable upper bound.
2944 Any constant multiplication can be performed with less
2945 than 2 * bits additions. */
2946 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
2947 if (mult_cost
> op_cost
)
2948 mult_cost
= op_cost
;
2950 *variant
= basic_variant
;
2951 limit
.cost
= mult_cost
;
2952 limit
.latency
= mult_cost
;
2953 synth_mult (alg
, val
, &limit
, mode
);
2955 /* This works only if the inverted value actually fits in an
2957 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
2959 op_cost
= neg_cost(speed
, mode
);
2960 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
2962 limit
.cost
= alg
->cost
.cost
- op_cost
;
2963 limit
.latency
= alg
->cost
.latency
- op_cost
;
2967 limit
.cost
= mult_cost
- op_cost
;
2968 limit
.latency
= mult_cost
- op_cost
;
2971 synth_mult (&alg2
, -val
, &limit
, mode
);
2972 alg2
.cost
.cost
+= op_cost
;
2973 alg2
.cost
.latency
+= op_cost
;
2974 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
2975 *alg
= alg2
, *variant
= negate_variant
;
2978 /* This proves very useful for division-by-constant. */
2979 op_cost
= add_cost (speed
, mode
);
2980 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
2982 limit
.cost
= alg
->cost
.cost
- op_cost
;
2983 limit
.latency
= alg
->cost
.latency
- op_cost
;
2987 limit
.cost
= mult_cost
- op_cost
;
2988 limit
.latency
= mult_cost
- op_cost
;
2991 synth_mult (&alg2
, val
- 1, &limit
, mode
);
2992 alg2
.cost
.cost
+= op_cost
;
2993 alg2
.cost
.latency
+= op_cost
;
2994 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
2995 *alg
= alg2
, *variant
= add_variant
;
2997 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3000 /* A subroutine of expand_mult, used for constant multiplications.
3001 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3002 convenient. Use the shift/add sequence described by ALG and apply
3003 the final fixup specified by VARIANT. */
3006 expand_mult_const (enum machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3007 rtx target
, const struct algorithm
*alg
,
3008 enum mult_variant variant
)
3010 HOST_WIDE_INT val_so_far
;
3011 rtx insn
, accum
, tem
;
3013 enum machine_mode nmode
;
3015 /* Avoid referencing memory over and over and invalid sharing
3017 op0
= force_reg (mode
, op0
);
3019 /* ACCUM starts out either as OP0 or as a zero, depending on
3020 the first operation. */
3022 if (alg
->op
[0] == alg_zero
)
3024 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3027 else if (alg
->op
[0] == alg_m
)
3029 accum
= copy_to_mode_reg (mode
, op0
);
3035 for (opno
= 1; opno
< alg
->ops
; opno
++)
3037 int log
= alg
->log
[opno
];
3038 rtx shift_subtarget
= optimize
? 0 : accum
;
3040 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3043 rtx accum_target
= optimize
? 0 : accum
;
3046 switch (alg
->op
[opno
])
3049 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3050 /* REG_EQUAL note will be attached to the following insn. */
3051 emit_move_insn (accum
, tem
);
3056 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3057 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3058 add_target
? add_target
: accum_target
);
3059 val_so_far
+= (HOST_WIDE_INT
) 1 << log
;
3063 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3064 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3065 add_target
? add_target
: accum_target
);
3066 val_so_far
-= (HOST_WIDE_INT
) 1 << log
;
3070 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3071 log
, shift_subtarget
, 0);
3072 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3073 add_target
? add_target
: accum_target
);
3074 val_so_far
= (val_so_far
<< log
) + 1;
3078 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3079 log
, shift_subtarget
, 0);
3080 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3081 add_target
? add_target
: accum_target
);
3082 val_so_far
= (val_so_far
<< log
) - 1;
3085 case alg_add_factor
:
3086 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3087 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3088 add_target
? add_target
: accum_target
);
3089 val_so_far
+= val_so_far
<< log
;
3092 case alg_sub_factor
:
3093 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3094 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3096 ? add_target
: (optimize
? 0 : tem
)));
3097 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3104 if (SCALAR_INT_MODE_P (mode
))
3106 /* Write a REG_EQUAL note on the last insn so that we can cse
3107 multiplication sequences. Note that if ACCUM is a SUBREG,
3108 we've set the inner register and must properly indicate that. */
3109 tem
= op0
, nmode
= mode
;
3110 accum_inner
= accum
;
3111 if (GET_CODE (accum
) == SUBREG
)
3113 accum_inner
= SUBREG_REG (accum
);
3114 nmode
= GET_MODE (accum_inner
);
3115 tem
= gen_lowpart (nmode
, op0
);
3118 insn
= get_last_insn ();
3119 set_dst_reg_note (insn
, REG_EQUAL
,
3120 gen_rtx_MULT (nmode
, tem
, GEN_INT (val_so_far
)),
3125 if (variant
== negate_variant
)
3127 val_so_far
= -val_so_far
;
3128 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3130 else if (variant
== add_variant
)
3132 val_so_far
= val_so_far
+ 1;
3133 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3136 /* Compare only the bits of val and val_so_far that are significant
3137 in the result mode, to avoid sign-/zero-extension confusion. */
3138 nmode
= GET_MODE_INNER (mode
);
3139 if (nmode
== VOIDmode
)
3141 val
&= GET_MODE_MASK (nmode
);
3142 val_so_far
&= GET_MODE_MASK (nmode
);
3143 gcc_assert (val
== val_so_far
);
3148 /* Perform a multiplication and return an rtx for the result.
3149 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3150 TARGET is a suggestion for where to store the result (an rtx).
3152 We check specially for a constant integer as OP1.
3153 If you want this check for OP0 as well, then before calling
3154 you should swap the two operands if OP0 would be constant. */
3157 expand_mult (enum machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3160 enum mult_variant variant
;
3161 struct algorithm algorithm
;
3164 bool speed
= optimize_insn_for_speed_p ();
3165 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3167 if (CONSTANT_P (op0
))
3174 /* For vectors, there are several simplifications that can be made if
3175 all elements of the vector constant are identical. */
3177 if (GET_CODE (op1
) == CONST_VECTOR
)
3179 int i
, n
= CONST_VECTOR_NUNITS (op1
);
3180 scalar_op1
= CONST_VECTOR_ELT (op1
, 0);
3181 for (i
= 1; i
< n
; ++i
)
3182 if (!rtx_equal_p (scalar_op1
, CONST_VECTOR_ELT (op1
, i
)))
3186 if (INTEGRAL_MODE_P (mode
))
3189 HOST_WIDE_INT coeff
;
3193 if (op1
== CONST0_RTX (mode
))
3195 if (op1
== CONST1_RTX (mode
))
3197 if (op1
== CONSTM1_RTX (mode
))
3198 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3204 /* These are the operations that are potentially turned into
3205 a sequence of shifts and additions. */
3206 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3208 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3209 less than or equal in size to `unsigned int' this doesn't matter.
3210 If the mode is larger than `unsigned int', then synth_mult works
3211 only if the constant value exactly fits in an `unsigned int' without
3212 any truncation. This means that multiplying by negative values does
3213 not work; results are off by 2^32 on a 32 bit machine. */
3215 if (CONST_INT_P (scalar_op1
))
3217 coeff
= INTVAL (scalar_op1
);
3220 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3222 /* If we are multiplying in DImode, it may still be a win
3223 to try to work with shifts and adds. */
3224 if (CONST_DOUBLE_HIGH (scalar_op1
) == 0
3225 && CONST_DOUBLE_LOW (scalar_op1
) > 0)
3227 coeff
= CONST_DOUBLE_LOW (scalar_op1
);
3230 else if (CONST_DOUBLE_LOW (scalar_op1
) == 0)
3232 coeff
= CONST_DOUBLE_HIGH (scalar_op1
);
3233 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3235 int shift
= floor_log2 (coeff
) + HOST_BITS_PER_WIDE_INT
;
3236 if (shift
< HOST_BITS_PER_DOUBLE_INT
- 1
3237 || mode_bitsize
<= HOST_BITS_PER_DOUBLE_INT
)
3238 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3239 shift
, target
, unsignedp
);
3249 /* We used to test optimize here, on the grounds that it's better to
3250 produce a smaller program when -O is not used. But this causes
3251 such a terrible slowdown sometimes that it seems better to always
3254 /* Special case powers of two. */
3255 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3256 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3257 floor_log2 (coeff
), target
, unsignedp
);
3259 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3261 /* Attempt to handle multiplication of DImode values by negative
3262 coefficients, by performing the multiplication by a positive
3263 multiplier and then inverting the result. */
3264 /* ??? How is this not slightly redundant with the neg variant? */
3265 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3267 /* Its safe to use -coeff even for INT_MIN, as the
3268 result is interpreted as an unsigned coefficient.
3269 Exclude cost of op0 from max_cost to match the cost
3270 calculation of the synth_mult. */
3271 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), speed
)
3272 - neg_cost(speed
, mode
));
3274 && choose_mult_variant (mode
, -coeff
, &algorithm
,
3275 &variant
, max_cost
))
3277 rtx temp
= expand_mult_const (mode
, op0
, -coeff
, NULL_RTX
,
3278 &algorithm
, variant
);
3279 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3283 /* Exclude cost of op0 from max_cost to match the cost
3284 calculation of the synth_mult. */
3285 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), speed
);
3286 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3287 return expand_mult_const (mode
, op0
, coeff
, target
,
3288 &algorithm
, variant
);
3292 /* Expand x*2.0 as x+x. */
3293 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
))
3296 REAL_VALUE_FROM_CONST_DOUBLE (d
, scalar_op1
);
3298 if (REAL_VALUES_EQUAL (d
, dconst2
))
3300 op0
= force_reg (GET_MODE (op0
), op0
);
3301 return expand_binop (mode
, add_optab
, op0
, op0
,
3302 target
, unsignedp
, OPTAB_LIB_WIDEN
);
3307 /* This used to use umul_optab if unsigned, but for non-widening multiply
3308 there is no difference between signed and unsigned. */
3309 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3310 op0
, op1
, target
, unsignedp
, OPTAB_LIB_WIDEN
);
3315 /* Return a cost estimate for multiplying a register by the given
3316 COEFFicient in the given MODE and SPEED. */
3319 mult_by_coeff_cost (HOST_WIDE_INT coeff
, enum machine_mode mode
, bool speed
)
3322 struct algorithm algorithm
;
3323 enum mult_variant variant
;
3325 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3326 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
), speed
);
3327 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3328 return algorithm
.cost
.cost
;
3333 /* Perform a widening multiplication and return an rtx for the result.
3334 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3335 TARGET is a suggestion for where to store the result (an rtx).
3336 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3337 or smul_widen_optab.
3339 We check specially for a constant integer as OP1, comparing the
3340 cost of a widening multiply against the cost of a sequence of shifts
3344 expand_widening_mult (enum machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3345 int unsignedp
, optab this_optab
)
3347 bool speed
= optimize_insn_for_speed_p ();
3350 if (CONST_INT_P (op1
)
3351 && GET_MODE (op0
) != VOIDmode
3352 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3353 this_optab
== umul_widen_optab
))
3354 && CONST_INT_P (cop1
)
3355 && (INTVAL (cop1
) >= 0
3356 || HWI_COMPUTABLE_MODE_P (mode
)))
3358 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3360 enum mult_variant variant
;
3361 struct algorithm algorithm
;
3363 /* Special case powers of two. */
3364 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3366 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3367 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3368 floor_log2 (coeff
), target
, unsignedp
);
3371 /* Exclude cost of op0 from max_cost to match the cost
3372 calculation of the synth_mult. */
3373 max_cost
= mul_widen_cost (speed
, mode
);
3374 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3377 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3378 return expand_mult_const (mode
, op0
, coeff
, target
,
3379 &algorithm
, variant
);
3382 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3383 unsignedp
, OPTAB_LIB_WIDEN
);
3386 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3387 replace division by D, and put the least significant N bits of the result
3388 in *MULTIPLIER_PTR and return the most significant bit.
3390 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3391 needed precision is in PRECISION (should be <= N).
3393 PRECISION should be as small as possible so this function can choose
3394 multiplier more freely.
3396 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3397 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3399 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3400 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3402 unsigned HOST_WIDE_INT
3403 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3404 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3405 int *post_shift_ptr
, int *lgup_ptr
)
3407 HOST_WIDE_INT mhigh_hi
, mlow_hi
;
3408 unsigned HOST_WIDE_INT mhigh_lo
, mlow_lo
;
3409 int lgup
, post_shift
;
3411 unsigned HOST_WIDE_INT nl
, dummy1
;
3412 HOST_WIDE_INT nh
, dummy2
;
3414 /* lgup = ceil(log2(divisor)); */
3415 lgup
= ceil_log2 (d
);
3417 gcc_assert (lgup
<= n
);
3420 pow2
= n
+ lgup
- precision
;
3422 /* We could handle this with some effort, but this case is much
3423 better handled directly with a scc insn, so rely on caller using
3425 gcc_assert (pow
!= HOST_BITS_PER_DOUBLE_INT
);
3427 /* mlow = 2^(N + lgup)/d */
3428 if (pow
>= HOST_BITS_PER_WIDE_INT
)
3430 nh
= (HOST_WIDE_INT
) 1 << (pow
- HOST_BITS_PER_WIDE_INT
);
3436 nl
= (unsigned HOST_WIDE_INT
) 1 << pow
;
3438 div_and_round_double (TRUNC_DIV_EXPR
, 1, nl
, nh
, d
, (HOST_WIDE_INT
) 0,
3439 &mlow_lo
, &mlow_hi
, &dummy1
, &dummy2
);
3441 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3442 if (pow2
>= HOST_BITS_PER_WIDE_INT
)
3443 nh
|= (HOST_WIDE_INT
) 1 << (pow2
- HOST_BITS_PER_WIDE_INT
);
3445 nl
|= (unsigned HOST_WIDE_INT
) 1 << pow2
;
3446 div_and_round_double (TRUNC_DIV_EXPR
, 1, nl
, nh
, d
, (HOST_WIDE_INT
) 0,
3447 &mhigh_lo
, &mhigh_hi
, &dummy1
, &dummy2
);
3449 gcc_assert (!mhigh_hi
|| nh
- d
< d
);
3450 gcc_assert (mhigh_hi
<= 1 && mlow_hi
<= 1);
3451 /* Assert that mlow < mhigh. */
3452 gcc_assert (mlow_hi
< mhigh_hi
3453 || (mlow_hi
== mhigh_hi
&& mlow_lo
< mhigh_lo
));
3455 /* If precision == N, then mlow, mhigh exceed 2^N
3456 (but they do not exceed 2^(N+1)). */
3458 /* Reduce to lowest terms. */
3459 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3461 unsigned HOST_WIDE_INT ml_lo
= (mlow_hi
<< (HOST_BITS_PER_WIDE_INT
- 1)) | (mlow_lo
>> 1);
3462 unsigned HOST_WIDE_INT mh_lo
= (mhigh_hi
<< (HOST_BITS_PER_WIDE_INT
- 1)) | (mhigh_lo
>> 1);
3472 *post_shift_ptr
= post_shift
;
3474 if (n
< HOST_BITS_PER_WIDE_INT
)
3476 unsigned HOST_WIDE_INT mask
= ((unsigned HOST_WIDE_INT
) 1 << n
) - 1;
3477 *multiplier_ptr
= mhigh_lo
& mask
;
3478 return mhigh_lo
>= mask
;
3482 *multiplier_ptr
= mhigh_lo
;
3487 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3488 congruent to 1 (mod 2**N). */
3490 static unsigned HOST_WIDE_INT
3491 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3493 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3495 /* The algorithm notes that the choice y = x satisfies
3496 x*y == 1 mod 2^3, since x is assumed odd.
3497 Each iteration doubles the number of bits of significance in y. */
3499 unsigned HOST_WIDE_INT mask
;
3500 unsigned HOST_WIDE_INT y
= x
;
3503 mask
= (n
== HOST_BITS_PER_WIDE_INT
3504 ? ~(unsigned HOST_WIDE_INT
) 0
3505 : ((unsigned HOST_WIDE_INT
) 1 << n
) - 1);
3509 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3515 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3516 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3517 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3518 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3521 The result is put in TARGET if that is convenient.
3523 MODE is the mode of operation. */
3526 expand_mult_highpart_adjust (enum machine_mode mode
, rtx adj_operand
, rtx op0
,
3527 rtx op1
, rtx target
, int unsignedp
)
3530 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3532 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3533 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3534 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3536 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3539 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3540 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3541 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3542 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3548 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3551 extract_high_half (enum machine_mode mode
, rtx op
)
3553 enum machine_mode wider_mode
;
3555 if (mode
== word_mode
)
3556 return gen_highpart (mode
, op
);
3558 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3560 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3561 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3562 GET_MODE_BITSIZE (mode
), 0, 1);
3563 return convert_modes (mode
, wider_mode
, op
, 0);
3566 /* Like expmed_mult_highpart, but only consider using a multiplication
3567 optab. OP1 is an rtx for the constant operand. */
3570 expmed_mult_highpart_optab (enum machine_mode mode
, rtx op0
, rtx op1
,
3571 rtx target
, int unsignedp
, int max_cost
)
3573 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3574 enum machine_mode wider_mode
;
3578 bool speed
= optimize_insn_for_speed_p ();
3580 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3582 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3583 size
= GET_MODE_BITSIZE (mode
);
3585 /* Firstly, try using a multiplication insn that only generates the needed
3586 high part of the product, and in the sign flavor of unsignedp. */
3587 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3589 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3590 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3591 unsignedp
, OPTAB_DIRECT
);
3596 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3597 Need to adjust the result after the multiplication. */
3598 if (size
- 1 < BITS_PER_WORD
3599 && (mul_highpart_cost (speed
, mode
)
3600 + 2 * shift_cost (speed
, mode
, size
-1)
3601 + 4 * add_cost (speed
, mode
) < max_cost
))
3603 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3604 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3605 unsignedp
, OPTAB_DIRECT
);
3607 /* We used the wrong signedness. Adjust the result. */
3608 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3612 /* Try widening multiplication. */
3613 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3614 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3615 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3617 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3618 unsignedp
, OPTAB_WIDEN
);
3620 return extract_high_half (mode
, tem
);
3623 /* Try widening the mode and perform a non-widening multiplication. */
3624 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3625 && size
- 1 < BITS_PER_WORD
3626 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3629 rtx insns
, wop0
, wop1
;
3631 /* We need to widen the operands, for example to ensure the
3632 constant multiplier is correctly sign or zero extended.
3633 Use a sequence to clean-up any instructions emitted by
3634 the conversions if things don't work out. */
3636 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3637 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3638 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3639 unsignedp
, OPTAB_WIDEN
);
3640 insns
= get_insns ();
3646 return extract_high_half (mode
, tem
);
3650 /* Try widening multiplication of opposite signedness, and adjust. */
3651 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3652 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3653 && size
- 1 < BITS_PER_WORD
3654 && (mul_widen_cost (speed
, wider_mode
)
3655 + 2 * shift_cost (speed
, mode
, size
-1)
3656 + 4 * add_cost (speed
, mode
) < max_cost
))
3658 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3659 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3662 tem
= extract_high_half (mode
, tem
);
3663 /* We used the wrong signedness. Adjust the result. */
3664 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3672 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3673 putting the high half of the result in TARGET if that is convenient,
3674 and return where the result is. If the operation can not be performed,
3677 MODE is the mode of operation and result.
3679 UNSIGNEDP nonzero means unsigned multiply.
3681 MAX_COST is the total allowed cost for the expanded RTL. */
3684 expmed_mult_highpart (enum machine_mode mode
, rtx op0
, rtx op1
,
3685 rtx target
, int unsignedp
, int max_cost
)
3687 enum machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
3688 unsigned HOST_WIDE_INT cnst1
;
3690 bool sign_adjust
= false;
3691 enum mult_variant variant
;
3692 struct algorithm alg
;
3694 bool speed
= optimize_insn_for_speed_p ();
3696 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3697 /* We can't support modes wider than HOST_BITS_PER_INT. */
3698 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3700 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3702 /* We can't optimize modes wider than BITS_PER_WORD.
3703 ??? We might be able to perform double-word arithmetic if
3704 mode == word_mode, however all the cost calculations in
3705 synth_mult etc. assume single-word operations. */
3706 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3707 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3708 unsignedp
, max_cost
);
3710 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3712 /* Check whether we try to multiply by a negative constant. */
3713 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3716 extra_cost
+= add_cost (speed
, mode
);
3719 /* See whether shift/add multiplication is cheap enough. */
3720 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3721 max_cost
- extra_cost
))
3723 /* See whether the specialized multiplication optabs are
3724 cheaper than the shift/add version. */
3725 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3726 alg
.cost
.cost
+ extra_cost
);
3730 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3731 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3732 tem
= extract_high_half (mode
, tem
);
3734 /* Adjust result for signedness. */
3736 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3740 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3741 unsignedp
, max_cost
);
3745 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3748 expand_smod_pow2 (enum machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3750 unsigned HOST_WIDE_INT masklow
, maskhigh
;
3751 rtx result
, temp
, shift
, label
;
3754 logd
= floor_log2 (d
);
3755 result
= gen_reg_rtx (mode
);
3757 /* Avoid conditional branches when they're expensive. */
3758 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3759 && optimize_insn_for_speed_p ())
3761 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3765 signmask
= force_reg (mode
, signmask
);
3766 masklow
= ((HOST_WIDE_INT
) 1 << logd
) - 1;
3767 shift
= GEN_INT (GET_MODE_BITSIZE (mode
) - logd
);
3769 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3770 which instruction sequence to use. If logical right shifts
3771 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3772 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3774 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3775 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3776 || (set_src_cost (temp
, optimize_insn_for_speed_p ())
3777 > COSTS_N_INSNS (2)))
3779 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3780 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3781 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3782 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3783 temp
= expand_binop (mode
, and_optab
, temp
, GEN_INT (masklow
),
3784 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3785 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
3786 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3787 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3788 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3792 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
3793 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3794 signmask
= force_reg (mode
, signmask
);
3796 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
3797 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3798 temp
= expand_binop (mode
, and_optab
, temp
, GEN_INT (masklow
),
3799 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3800 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3801 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3807 /* Mask contains the mode's signbit and the significant bits of the
3808 modulus. By including the signbit in the operation, many targets
3809 can avoid an explicit compare operation in the following comparison
3812 masklow
= ((HOST_WIDE_INT
) 1 << logd
) - 1;
3813 if (GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
3815 masklow
|= (HOST_WIDE_INT
) -1 << (GET_MODE_BITSIZE (mode
) - 1);
3819 maskhigh
= (HOST_WIDE_INT
) -1
3820 << (GET_MODE_BITSIZE (mode
) - HOST_BITS_PER_WIDE_INT
- 1);
3822 temp
= expand_binop (mode
, and_optab
, op0
,
3823 immed_double_const (masklow
, maskhigh
, mode
),
3824 result
, 1, OPTAB_LIB_WIDEN
);
3826 emit_move_insn (result
, temp
);
3828 label
= gen_label_rtx ();
3829 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
3831 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
3832 0, OPTAB_LIB_WIDEN
);
3833 masklow
= (HOST_WIDE_INT
) -1 << logd
;
3835 temp
= expand_binop (mode
, ior_optab
, temp
,
3836 immed_double_const (masklow
, maskhigh
, mode
),
3837 result
, 1, OPTAB_LIB_WIDEN
);
3838 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
3839 0, OPTAB_LIB_WIDEN
);
3841 emit_move_insn (result
, temp
);
3846 /* Expand signed division of OP0 by a power of two D in mode MODE.
3847 This routine is only called for positive values of D. */
3850 expand_sdiv_pow2 (enum machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3855 logd
= floor_log2 (d
);
3858 && BRANCH_COST (optimize_insn_for_speed_p (),
3861 temp
= gen_reg_rtx (mode
);
3862 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
3863 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3864 0, OPTAB_LIB_WIDEN
);
3865 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3868 #ifdef HAVE_conditional_move
3869 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3874 /* ??? emit_conditional_move forces a stack adjustment via
3875 compare_from_rtx so, if the sequence is discarded, it will
3876 be lost. Do it now instead. */
3877 do_pending_stack_adjust ();
3880 temp2
= copy_to_mode_reg (mode
, op0
);
3881 temp
= expand_binop (mode
, add_optab
, temp2
, GEN_INT (d
-1),
3882 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3883 temp
= force_reg (mode
, temp
);
3885 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3886 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
3887 mode
, temp
, temp2
, mode
, 0);
3890 rtx seq
= get_insns ();
3893 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
3899 if (BRANCH_COST (optimize_insn_for_speed_p (),
3902 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
3904 temp
= gen_reg_rtx (mode
);
3905 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
3906 if (shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
3907 > COSTS_N_INSNS (1))
3908 temp
= expand_binop (mode
, and_optab
, temp
, GEN_INT (d
- 1),
3909 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3911 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
3912 ushift
, NULL_RTX
, 1);
3913 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3914 0, OPTAB_LIB_WIDEN
);
3915 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3918 label
= gen_label_rtx ();
3919 temp
= copy_to_mode_reg (mode
, op0
);
3920 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
3921 expand_inc (temp
, GEN_INT (d
- 1));
3923 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3926 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3927 if that is convenient, and returning where the result is.
3928 You may request either the quotient or the remainder as the result;
3929 specify REM_FLAG nonzero to get the remainder.
3931 CODE is the expression code for which kind of division this is;
3932 it controls how rounding is done. MODE is the machine mode to use.
3933 UNSIGNEDP nonzero means do unsigned division. */
3935 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3936 and then correct it by or'ing in missing high bits
3937 if result of ANDI is nonzero.
3938 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3939 This could optimize to a bfexts instruction.
3940 But C doesn't use these operations, so their optimizations are
3942 /* ??? For modulo, we don't actually need the highpart of the first product,
3943 the low part will do nicely. And for small divisors, the second multiply
3944 can also be a low-part only multiply or even be completely left out.
3945 E.g. to calculate the remainder of a division by 3 with a 32 bit
3946 multiply, multiply with 0x55555556 and extract the upper two bits;
3947 the result is exact for inputs up to 0x1fffffff.
3948 The input range can be reduced by using cross-sum rules.
3949 For odd divisors >= 3, the following table gives right shift counts
3950 so that if a number is shifted by an integer multiple of the given
3951 amount, the remainder stays the same:
3952 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3953 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3954 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3955 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3956 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3958 Cross-sum rules for even numbers can be derived by leaving as many bits
3959 to the right alone as the divisor has zeros to the right.
3960 E.g. if x is an unsigned 32 bit number:
3961 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3965 expand_divmod (int rem_flag
, enum tree_code code
, enum machine_mode mode
,
3966 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
3968 enum machine_mode compute_mode
;
3970 rtx quotient
= 0, remainder
= 0;
3974 optab optab1
, optab2
;
3975 int op1_is_constant
, op1_is_pow2
= 0;
3976 int max_cost
, extra_cost
;
3977 static HOST_WIDE_INT last_div_const
= 0;
3978 static HOST_WIDE_INT ext_op1
;
3979 bool speed
= optimize_insn_for_speed_p ();
3981 op1_is_constant
= CONST_INT_P (op1
);
3982 if (op1_is_constant
)
3984 ext_op1
= INTVAL (op1
);
3986 ext_op1
&= GET_MODE_MASK (mode
);
3987 op1_is_pow2
= ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1
)
3988 || (! unsignedp
&& EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1
))));
3992 This is the structure of expand_divmod:
3994 First comes code to fix up the operands so we can perform the operations
3995 correctly and efficiently.
3997 Second comes a switch statement with code specific for each rounding mode.
3998 For some special operands this code emits all RTL for the desired
3999 operation, for other cases, it generates only a quotient and stores it in
4000 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4001 to indicate that it has not done anything.
4003 Last comes code that finishes the operation. If QUOTIENT is set and
4004 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4005 QUOTIENT is not set, it is computed using trunc rounding.
4007 We try to generate special code for division and remainder when OP1 is a
4008 constant. If |OP1| = 2**n we can use shifts and some other fast
4009 operations. For other values of OP1, we compute a carefully selected
4010 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4013 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4014 half of the product. Different strategies for generating the product are
4015 implemented in expmed_mult_highpart.
4017 If what we actually want is the remainder, we generate that by another
4018 by-constant multiplication and a subtraction. */
4020 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4021 code below will malfunction if we are, so check here and handle
4022 the special case if so. */
4023 if (op1
== const1_rtx
)
4024 return rem_flag
? const0_rtx
: op0
;
4026 /* When dividing by -1, we could get an overflow.
4027 negv_optab can handle overflows. */
4028 if (! unsignedp
&& op1
== constm1_rtx
)
4032 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS(mode
) == MODE_INT
4033 ? negv_optab
: neg_optab
, op0
, target
, 0);
4037 /* Don't use the function value register as a target
4038 since we have to read it as well as write it,
4039 and function-inlining gets confused by this. */
4040 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4041 /* Don't clobber an operand while doing a multi-step calculation. */
4042 || ((rem_flag
|| op1_is_constant
)
4043 && (reg_mentioned_p (target
, op0
)
4044 || (MEM_P (op0
) && MEM_P (target
))))
4045 || reg_mentioned_p (target
, op1
)
4046 || (MEM_P (op1
) && MEM_P (target
))))
4049 /* Get the mode in which to perform this computation. Normally it will
4050 be MODE, but sometimes we can't do the desired operation in MODE.
4051 If so, pick a wider mode in which we can do the operation. Convert
4052 to that mode at the start to avoid repeated conversions.
4054 First see what operations we need. These depend on the expression
4055 we are evaluating. (We assume that divxx3 insns exist under the
4056 same conditions that modxx3 insns and that these insns don't normally
4057 fail. If these assumptions are not correct, we may generate less
4058 efficient code in some cases.)
4060 Then see if we find a mode in which we can open-code that operation
4061 (either a division, modulus, or shift). Finally, check for the smallest
4062 mode for which we can do the operation with a library call. */
4064 /* We might want to refine this now that we have division-by-constant
4065 optimization. Since expmed_mult_highpart tries so many variants, it is
4066 not straightforward to generalize this. Maybe we should make an array
4067 of possible modes in init_expmed? Save this for GCC 2.7. */
4069 optab1
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4070 ? (unsignedp
? lshr_optab
: ashr_optab
)
4071 : (unsignedp
? udiv_optab
: sdiv_optab
));
4072 optab2
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4074 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4076 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4077 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4078 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4079 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4082 if (compute_mode
== VOIDmode
)
4083 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4084 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4085 if (optab_libfunc (optab1
, compute_mode
)
4086 || optab_libfunc (optab2
, compute_mode
))
4089 /* If we still couldn't find a mode, use MODE, but expand_binop will
4091 if (compute_mode
== VOIDmode
)
4092 compute_mode
= mode
;
4094 if (target
&& GET_MODE (target
) == compute_mode
)
4097 tquotient
= gen_reg_rtx (compute_mode
);
4099 size
= GET_MODE_BITSIZE (compute_mode
);
4101 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4102 (mode), and thereby get better code when OP1 is a constant. Do that
4103 later. It will require going over all usages of SIZE below. */
4104 size
= GET_MODE_BITSIZE (mode
);
4107 /* Only deduct something for a REM if the last divide done was
4108 for a different constant. Then set the constant of the last
4110 max_cost
= (unsignedp
4111 ? udiv_cost (speed
, compute_mode
)
4112 : sdiv_cost (speed
, compute_mode
));
4113 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4114 && INTVAL (op1
) == last_div_const
))
4115 max_cost
-= (mul_cost (speed
, compute_mode
)
4116 + add_cost (speed
, compute_mode
));
4118 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4120 /* Now convert to the best mode to use. */
4121 if (compute_mode
!= mode
)
4123 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4124 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4126 /* convert_modes may have placed op1 into a register, so we
4127 must recompute the following. */
4128 op1_is_constant
= CONST_INT_P (op1
);
4129 op1_is_pow2
= (op1_is_constant
4130 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4132 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1
)))))) ;
4135 /* If one of the operands is a volatile MEM, copy it into a register. */
4137 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4138 op0
= force_reg (compute_mode
, op0
);
4139 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4140 op1
= force_reg (compute_mode
, op1
);
4142 /* If we need the remainder or if OP1 is constant, we need to
4143 put OP0 in a register in case it has any queued subexpressions. */
4144 if (rem_flag
|| op1_is_constant
)
4145 op0
= force_reg (compute_mode
, op0
);
4147 last
= get_last_insn ();
4149 /* Promote floor rounding to trunc rounding for unsigned operations. */
4152 if (code
== FLOOR_DIV_EXPR
)
4153 code
= TRUNC_DIV_EXPR
;
4154 if (code
== FLOOR_MOD_EXPR
)
4155 code
= TRUNC_MOD_EXPR
;
4156 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4157 code
= TRUNC_DIV_EXPR
;
4160 if (op1
!= const0_rtx
)
4163 case TRUNC_MOD_EXPR
:
4164 case TRUNC_DIV_EXPR
:
4165 if (op1_is_constant
)
4169 unsigned HOST_WIDE_INT mh
, ml
;
4170 int pre_shift
, post_shift
;
4172 unsigned HOST_WIDE_INT d
= (INTVAL (op1
)
4173 & GET_MODE_MASK (compute_mode
));
4175 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4177 pre_shift
= floor_log2 (d
);
4181 = expand_binop (compute_mode
, and_optab
, op0
,
4182 GEN_INT (((HOST_WIDE_INT
) 1 << pre_shift
) - 1),
4186 return gen_lowpart (mode
, remainder
);
4188 quotient
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4189 pre_shift
, tquotient
, 1);
4191 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4193 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (size
- 1)))
4195 /* Most significant bit of divisor is set; emit an scc
4197 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4198 compute_mode
, 1, 1);
4202 /* Find a suitable multiplier and right shift count
4203 instead of multiplying with D. */
4205 mh
= choose_multiplier (d
, size
, size
,
4206 &ml
, &post_shift
, &dummy
);
4208 /* If the suggested multiplier is more than SIZE bits,
4209 we can do better for even divisors, using an
4210 initial right shift. */
4211 if (mh
!= 0 && (d
& 1) == 0)
4213 pre_shift
= floor_log2 (d
& -d
);
4214 mh
= choose_multiplier (d
>> pre_shift
, size
,
4216 &ml
, &post_shift
, &dummy
);
4226 if (post_shift
- 1 >= BITS_PER_WORD
)
4230 = (shift_cost (speed
, compute_mode
, post_shift
- 1)
4231 + shift_cost (speed
, compute_mode
, 1)
4232 + 2 * add_cost (speed
, compute_mode
));
4233 t1
= expmed_mult_highpart (compute_mode
, op0
,
4236 max_cost
- extra_cost
);
4239 t2
= force_operand (gen_rtx_MINUS (compute_mode
,
4242 t3
= expand_shift (RSHIFT_EXPR
, compute_mode
,
4243 t2
, 1, NULL_RTX
, 1);
4244 t4
= force_operand (gen_rtx_PLUS (compute_mode
,
4247 quotient
= expand_shift
4248 (RSHIFT_EXPR
, compute_mode
, t4
,
4249 post_shift
- 1, tquotient
, 1);
4255 if (pre_shift
>= BITS_PER_WORD
4256 || post_shift
>= BITS_PER_WORD
)
4260 (RSHIFT_EXPR
, compute_mode
, op0
,
4261 pre_shift
, NULL_RTX
, 1);
4263 = (shift_cost (speed
, compute_mode
, pre_shift
)
4264 + shift_cost (speed
, compute_mode
, post_shift
));
4265 t2
= expmed_mult_highpart (compute_mode
, t1
,
4268 max_cost
- extra_cost
);
4271 quotient
= expand_shift
4272 (RSHIFT_EXPR
, compute_mode
, t2
,
4273 post_shift
, tquotient
, 1);
4277 else /* Too wide mode to use tricky code */
4280 insn
= get_last_insn ();
4282 set_dst_reg_note (insn
, REG_EQUAL
,
4283 gen_rtx_UDIV (compute_mode
, op0
, op1
),
4286 else /* TRUNC_DIV, signed */
4288 unsigned HOST_WIDE_INT ml
;
4289 int lgup
, post_shift
;
4291 HOST_WIDE_INT d
= INTVAL (op1
);
4292 unsigned HOST_WIDE_INT abs_d
;
4294 /* Since d might be INT_MIN, we have to cast to
4295 unsigned HOST_WIDE_INT before negating to avoid
4296 undefined signed overflow. */
4298 ? (unsigned HOST_WIDE_INT
) d
4299 : - (unsigned HOST_WIDE_INT
) d
);
4301 /* n rem d = n rem -d */
4302 if (rem_flag
&& d
< 0)
4305 op1
= gen_int_mode (abs_d
, compute_mode
);
4311 quotient
= expand_unop (compute_mode
, neg_optab
, op0
,
4313 else if (HOST_BITS_PER_WIDE_INT
>= size
4314 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4316 /* This case is not handled correctly below. */
4317 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4318 compute_mode
, 1, 1);
4322 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4324 ? smod_pow2_cheap (speed
, compute_mode
)
4325 : sdiv_pow2_cheap (speed
, compute_mode
))
4326 /* We assume that cheap metric is true if the
4327 optab has an expander for this mode. */
4328 && ((optab_handler ((rem_flag
? smod_optab
4331 != CODE_FOR_nothing
)
4332 || (optab_handler (sdivmod_optab
,
4334 != CODE_FOR_nothing
)))
4336 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
))
4340 remainder
= expand_smod_pow2 (compute_mode
, op0
, d
);
4342 return gen_lowpart (mode
, remainder
);
4345 if (sdiv_pow2_cheap (speed
, compute_mode
)
4346 && ((optab_handler (sdiv_optab
, compute_mode
)
4347 != CODE_FOR_nothing
)
4348 || (optab_handler (sdivmod_optab
, compute_mode
)
4349 != CODE_FOR_nothing
)))
4350 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4352 gen_int_mode (abs_d
,
4356 quotient
= expand_sdiv_pow2 (compute_mode
, op0
, abs_d
);
4358 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4359 negate the quotient. */
4362 insn
= get_last_insn ();
4364 && abs_d
< ((unsigned HOST_WIDE_INT
) 1
4365 << (HOST_BITS_PER_WIDE_INT
- 1)))
4366 set_dst_reg_note (insn
, REG_EQUAL
,
4367 gen_rtx_DIV (compute_mode
, op0
,
4373 quotient
= expand_unop (compute_mode
, neg_optab
,
4374 quotient
, quotient
, 0);
4377 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4379 choose_multiplier (abs_d
, size
, size
- 1,
4380 &ml
, &post_shift
, &lgup
);
4381 if (ml
< (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4385 if (post_shift
>= BITS_PER_WORD
4386 || size
- 1 >= BITS_PER_WORD
)
4389 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4390 + shift_cost (speed
, compute_mode
, size
- 1)
4391 + add_cost (speed
, compute_mode
));
4392 t1
= expmed_mult_highpart (compute_mode
, op0
,
4393 GEN_INT (ml
), NULL_RTX
, 0,
4394 max_cost
- extra_cost
);
4398 (RSHIFT_EXPR
, compute_mode
, t1
,
4399 post_shift
, NULL_RTX
, 0);
4401 (RSHIFT_EXPR
, compute_mode
, op0
,
4402 size
- 1, NULL_RTX
, 0);
4405 = force_operand (gen_rtx_MINUS (compute_mode
,
4410 = force_operand (gen_rtx_MINUS (compute_mode
,
4418 if (post_shift
>= BITS_PER_WORD
4419 || size
- 1 >= BITS_PER_WORD
)
4422 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (size
- 1);
4423 mlr
= gen_int_mode (ml
, compute_mode
);
4424 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4425 + shift_cost (speed
, compute_mode
, size
- 1)
4426 + 2 * add_cost (speed
, compute_mode
));
4427 t1
= expmed_mult_highpart (compute_mode
, op0
, mlr
,
4429 max_cost
- extra_cost
);
4432 t2
= force_operand (gen_rtx_PLUS (compute_mode
,
4436 (RSHIFT_EXPR
, compute_mode
, t2
,
4437 post_shift
, NULL_RTX
, 0);
4439 (RSHIFT_EXPR
, compute_mode
, op0
,
4440 size
- 1, NULL_RTX
, 0);
4443 = force_operand (gen_rtx_MINUS (compute_mode
,
4448 = force_operand (gen_rtx_MINUS (compute_mode
,
4453 else /* Too wide mode to use tricky code */
4456 insn
= get_last_insn ();
4458 set_dst_reg_note (insn
, REG_EQUAL
,
4459 gen_rtx_DIV (compute_mode
, op0
, op1
),
4465 delete_insns_since (last
);
4468 case FLOOR_DIV_EXPR
:
4469 case FLOOR_MOD_EXPR
:
4470 /* We will come here only for signed operations. */
4471 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4473 unsigned HOST_WIDE_INT mh
, ml
;
4474 int pre_shift
, lgup
, post_shift
;
4475 HOST_WIDE_INT d
= INTVAL (op1
);
4479 /* We could just as easily deal with negative constants here,
4480 but it does not seem worth the trouble for GCC 2.6. */
4481 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4483 pre_shift
= floor_log2 (d
);
4486 remainder
= expand_binop (compute_mode
, and_optab
, op0
,
4487 GEN_INT (((HOST_WIDE_INT
) 1 << pre_shift
) - 1),
4488 remainder
, 0, OPTAB_LIB_WIDEN
);
4490 return gen_lowpart (mode
, remainder
);
4492 quotient
= expand_shift
4493 (RSHIFT_EXPR
, compute_mode
, op0
,
4494 pre_shift
, tquotient
, 0);
4500 mh
= choose_multiplier (d
, size
, size
- 1,
4501 &ml
, &post_shift
, &lgup
);
4504 if (post_shift
< BITS_PER_WORD
4505 && size
- 1 < BITS_PER_WORD
)
4508 (RSHIFT_EXPR
, compute_mode
, op0
,
4509 size
- 1, NULL_RTX
, 0);
4510 t2
= expand_binop (compute_mode
, xor_optab
, op0
, t1
,
4511 NULL_RTX
, 0, OPTAB_WIDEN
);
4512 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4513 + shift_cost (speed
, compute_mode
, size
- 1)
4514 + 2 * add_cost (speed
, compute_mode
));
4515 t3
= expmed_mult_highpart (compute_mode
, t2
,
4516 GEN_INT (ml
), NULL_RTX
, 1,
4517 max_cost
- extra_cost
);
4521 (RSHIFT_EXPR
, compute_mode
, t3
,
4522 post_shift
, NULL_RTX
, 1);
4523 quotient
= expand_binop (compute_mode
, xor_optab
,
4524 t4
, t1
, tquotient
, 0,
4532 rtx nsign
, t1
, t2
, t3
, t4
;
4533 t1
= force_operand (gen_rtx_PLUS (compute_mode
,
4534 op0
, constm1_rtx
), NULL_RTX
);
4535 t2
= expand_binop (compute_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4537 nsign
= expand_shift
4538 (RSHIFT_EXPR
, compute_mode
, t2
,
4539 size
- 1, NULL_RTX
, 0);
4540 t3
= force_operand (gen_rtx_MINUS (compute_mode
, t1
, nsign
),
4542 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, compute_mode
, t3
, op1
,
4547 t5
= expand_unop (compute_mode
, one_cmpl_optab
, nsign
,
4549 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4558 delete_insns_since (last
);
4560 /* Try using an instruction that produces both the quotient and
4561 remainder, using truncation. We can easily compensate the quotient
4562 or remainder to get floor rounding, once we have the remainder.
4563 Notice that we compute also the final remainder value here,
4564 and return the result right away. */
4565 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4566 target
= gen_reg_rtx (compute_mode
);
4571 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4572 quotient
= gen_reg_rtx (compute_mode
);
4577 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4578 remainder
= gen_reg_rtx (compute_mode
);
4581 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4582 quotient
, remainder
, 0))
4584 /* This could be computed with a branch-less sequence.
4585 Save that for later. */
4587 rtx label
= gen_label_rtx ();
4588 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4589 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4590 NULL_RTX
, 0, OPTAB_WIDEN
);
4591 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4592 expand_dec (quotient
, const1_rtx
);
4593 expand_inc (remainder
, op1
);
4595 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4598 /* No luck with division elimination or divmod. Have to do it
4599 by conditionally adjusting op0 *and* the result. */
4601 rtx label1
, label2
, label3
, label4
, label5
;
4605 quotient
= gen_reg_rtx (compute_mode
);
4606 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4607 label1
= gen_label_rtx ();
4608 label2
= gen_label_rtx ();
4609 label3
= gen_label_rtx ();
4610 label4
= gen_label_rtx ();
4611 label5
= gen_label_rtx ();
4612 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4613 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4614 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4615 quotient
, 0, OPTAB_LIB_WIDEN
);
4616 if (tem
!= quotient
)
4617 emit_move_insn (quotient
, tem
);
4618 emit_jump_insn (gen_jump (label5
));
4620 emit_label (label1
);
4621 expand_inc (adjusted_op0
, const1_rtx
);
4622 emit_jump_insn (gen_jump (label4
));
4624 emit_label (label2
);
4625 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4626 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4627 quotient
, 0, OPTAB_LIB_WIDEN
);
4628 if (tem
!= quotient
)
4629 emit_move_insn (quotient
, tem
);
4630 emit_jump_insn (gen_jump (label5
));
4632 emit_label (label3
);
4633 expand_dec (adjusted_op0
, const1_rtx
);
4634 emit_label (label4
);
4635 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4636 quotient
, 0, OPTAB_LIB_WIDEN
);
4637 if (tem
!= quotient
)
4638 emit_move_insn (quotient
, tem
);
4639 expand_dec (quotient
, const1_rtx
);
4640 emit_label (label5
);
4648 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
)))
4651 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4652 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4653 floor_log2 (d
), tquotient
, 1);
4654 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4656 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4657 t3
= gen_reg_rtx (compute_mode
);
4658 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4659 compute_mode
, 1, 1);
4663 lab
= gen_label_rtx ();
4664 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4665 expand_inc (t1
, const1_rtx
);
4670 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4676 /* Try using an instruction that produces both the quotient and
4677 remainder, using truncation. We can easily compensate the
4678 quotient or remainder to get ceiling rounding, once we have the
4679 remainder. Notice that we compute also the final remainder
4680 value here, and return the result right away. */
4681 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4682 target
= gen_reg_rtx (compute_mode
);
4686 remainder
= (REG_P (target
)
4687 ? target
: gen_reg_rtx (compute_mode
));
4688 quotient
= gen_reg_rtx (compute_mode
);
4692 quotient
= (REG_P (target
)
4693 ? target
: gen_reg_rtx (compute_mode
));
4694 remainder
= gen_reg_rtx (compute_mode
);
4697 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4700 /* This could be computed with a branch-less sequence.
4701 Save that for later. */
4702 rtx label
= gen_label_rtx ();
4703 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4704 compute_mode
, label
);
4705 expand_inc (quotient
, const1_rtx
);
4706 expand_dec (remainder
, op1
);
4708 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4711 /* No luck with division elimination or divmod. Have to do it
4712 by conditionally adjusting op0 *and* the result. */
4715 rtx adjusted_op0
, tem
;
4717 quotient
= gen_reg_rtx (compute_mode
);
4718 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4719 label1
= gen_label_rtx ();
4720 label2
= gen_label_rtx ();
4721 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4722 compute_mode
, label1
);
4723 emit_move_insn (quotient
, const0_rtx
);
4724 emit_jump_insn (gen_jump (label2
));
4726 emit_label (label1
);
4727 expand_dec (adjusted_op0
, const1_rtx
);
4728 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4729 quotient
, 1, OPTAB_LIB_WIDEN
);
4730 if (tem
!= quotient
)
4731 emit_move_insn (quotient
, tem
);
4732 expand_inc (quotient
, const1_rtx
);
4733 emit_label (label2
);
4738 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4739 && INTVAL (op1
) >= 0)
4741 /* This is extremely similar to the code for the unsigned case
4742 above. For 2.7 we should merge these variants, but for
4743 2.6.1 I don't want to touch the code for unsigned since that
4744 get used in C. The signed case will only be used by other
4748 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4749 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4750 floor_log2 (d
), tquotient
, 0);
4751 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4753 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4754 t3
= gen_reg_rtx (compute_mode
);
4755 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4756 compute_mode
, 1, 1);
4760 lab
= gen_label_rtx ();
4761 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4762 expand_inc (t1
, const1_rtx
);
4767 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4773 /* Try using an instruction that produces both the quotient and
4774 remainder, using truncation. We can easily compensate the
4775 quotient or remainder to get ceiling rounding, once we have the
4776 remainder. Notice that we compute also the final remainder
4777 value here, and return the result right away. */
4778 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4779 target
= gen_reg_rtx (compute_mode
);
4782 remainder
= (REG_P (target
)
4783 ? target
: gen_reg_rtx (compute_mode
));
4784 quotient
= gen_reg_rtx (compute_mode
);
4788 quotient
= (REG_P (target
)
4789 ? target
: gen_reg_rtx (compute_mode
));
4790 remainder
= gen_reg_rtx (compute_mode
);
4793 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
4796 /* This could be computed with a branch-less sequence.
4797 Save that for later. */
4799 rtx label
= gen_label_rtx ();
4800 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4801 compute_mode
, label
);
4802 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4803 NULL_RTX
, 0, OPTAB_WIDEN
);
4804 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
4805 expand_inc (quotient
, const1_rtx
);
4806 expand_dec (remainder
, op1
);
4808 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4811 /* No luck with division elimination or divmod. Have to do it
4812 by conditionally adjusting op0 *and* the result. */
4814 rtx label1
, label2
, label3
, label4
, label5
;
4818 quotient
= gen_reg_rtx (compute_mode
);
4819 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4820 label1
= gen_label_rtx ();
4821 label2
= gen_label_rtx ();
4822 label3
= gen_label_rtx ();
4823 label4
= gen_label_rtx ();
4824 label5
= gen_label_rtx ();
4825 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4826 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
4827 compute_mode
, label1
);
4828 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4829 quotient
, 0, OPTAB_LIB_WIDEN
);
4830 if (tem
!= quotient
)
4831 emit_move_insn (quotient
, tem
);
4832 emit_jump_insn (gen_jump (label5
));
4834 emit_label (label1
);
4835 expand_dec (adjusted_op0
, const1_rtx
);
4836 emit_jump_insn (gen_jump (label4
));
4838 emit_label (label2
);
4839 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
4840 compute_mode
, label3
);
4841 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4842 quotient
, 0, OPTAB_LIB_WIDEN
);
4843 if (tem
!= quotient
)
4844 emit_move_insn (quotient
, tem
);
4845 emit_jump_insn (gen_jump (label5
));
4847 emit_label (label3
);
4848 expand_inc (adjusted_op0
, const1_rtx
);
4849 emit_label (label4
);
4850 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4851 quotient
, 0, OPTAB_LIB_WIDEN
);
4852 if (tem
!= quotient
)
4853 emit_move_insn (quotient
, tem
);
4854 expand_inc (quotient
, const1_rtx
);
4855 emit_label (label5
);
4860 case EXACT_DIV_EXPR
:
4861 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4863 HOST_WIDE_INT d
= INTVAL (op1
);
4864 unsigned HOST_WIDE_INT ml
;
4868 pre_shift
= floor_log2 (d
& -d
);
4869 ml
= invert_mod2n (d
>> pre_shift
, size
);
4870 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4871 pre_shift
, NULL_RTX
, unsignedp
);
4872 quotient
= expand_mult (compute_mode
, t1
,
4873 gen_int_mode (ml
, compute_mode
),
4876 insn
= get_last_insn ();
4877 set_dst_reg_note (insn
, REG_EQUAL
,
4878 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
4879 compute_mode
, op0
, op1
),
4884 case ROUND_DIV_EXPR
:
4885 case ROUND_MOD_EXPR
:
4890 label
= gen_label_rtx ();
4891 quotient
= gen_reg_rtx (compute_mode
);
4892 remainder
= gen_reg_rtx (compute_mode
);
4893 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
4896 quotient
= expand_binop (compute_mode
, udiv_optab
, op0
, op1
,
4897 quotient
, 1, OPTAB_LIB_WIDEN
);
4898 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 1);
4899 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4900 remainder
, 1, OPTAB_LIB_WIDEN
);
4902 tem
= plus_constant (compute_mode
, op1
, -1);
4903 tem
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
, 1, NULL_RTX
, 1);
4904 do_cmp_and_jump (remainder
, tem
, LEU
, compute_mode
, label
);
4905 expand_inc (quotient
, const1_rtx
);
4906 expand_dec (remainder
, op1
);
4911 rtx abs_rem
, abs_op1
, tem
, mask
;
4913 label
= gen_label_rtx ();
4914 quotient
= gen_reg_rtx (compute_mode
);
4915 remainder
= gen_reg_rtx (compute_mode
);
4916 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
4919 quotient
= expand_binop (compute_mode
, sdiv_optab
, op0
, op1
,
4920 quotient
, 0, OPTAB_LIB_WIDEN
);
4921 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 0);
4922 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4923 remainder
, 0, OPTAB_LIB_WIDEN
);
4925 abs_rem
= expand_abs (compute_mode
, remainder
, NULL_RTX
, 1, 0);
4926 abs_op1
= expand_abs (compute_mode
, op1
, NULL_RTX
, 1, 0);
4927 tem
= expand_shift (LSHIFT_EXPR
, compute_mode
, abs_rem
,
4929 do_cmp_and_jump (tem
, abs_op1
, LTU
, compute_mode
, label
);
4930 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4931 NULL_RTX
, 0, OPTAB_WIDEN
);
4932 mask
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
,
4933 size
- 1, NULL_RTX
, 0);
4934 tem
= expand_binop (compute_mode
, xor_optab
, mask
, const1_rtx
,
4935 NULL_RTX
, 0, OPTAB_WIDEN
);
4936 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4937 NULL_RTX
, 0, OPTAB_WIDEN
);
4938 expand_inc (quotient
, tem
);
4939 tem
= expand_binop (compute_mode
, xor_optab
, mask
, op1
,
4940 NULL_RTX
, 0, OPTAB_WIDEN
);
4941 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4942 NULL_RTX
, 0, OPTAB_WIDEN
);
4943 expand_dec (remainder
, tem
);
4946 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4954 if (target
&& GET_MODE (target
) != compute_mode
)
4959 /* Try to produce the remainder without producing the quotient.
4960 If we seem to have a divmod pattern that does not require widening,
4961 don't try widening here. We should really have a WIDEN argument
4962 to expand_twoval_binop, since what we'd really like to do here is
4963 1) try a mod insn in compute_mode
4964 2) try a divmod insn in compute_mode
4965 3) try a div insn in compute_mode and multiply-subtract to get
4967 4) try the same things with widening allowed. */
4969 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
4972 ((optab_handler (optab2
, compute_mode
)
4973 != CODE_FOR_nothing
)
4974 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
4977 /* No luck there. Can we do remainder and divide at once
4978 without a library call? */
4979 remainder
= gen_reg_rtx (compute_mode
);
4980 if (! expand_twoval_binop ((unsignedp
4984 NULL_RTX
, remainder
, unsignedp
))
4989 return gen_lowpart (mode
, remainder
);
4992 /* Produce the quotient. Try a quotient insn, but not a library call.
4993 If we have a divmod in this mode, use it in preference to widening
4994 the div (for this test we assume it will not fail). Note that optab2
4995 is set to the one of the two optabs that the call below will use. */
4997 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
4998 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5000 ((optab_handler (optab2
, compute_mode
)
5001 != CODE_FOR_nothing
)
5002 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5006 /* No luck there. Try a quotient-and-remainder insn,
5007 keeping the quotient alone. */
5008 quotient
= gen_reg_rtx (compute_mode
);
5009 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5011 quotient
, NULL_RTX
, unsignedp
))
5015 /* Still no luck. If we are not computing the remainder,
5016 use a library call for the quotient. */
5017 quotient
= sign_expand_binop (compute_mode
,
5018 udiv_optab
, sdiv_optab
,
5020 unsignedp
, OPTAB_LIB_WIDEN
);
5027 if (target
&& GET_MODE (target
) != compute_mode
)
5032 /* No divide instruction either. Use library for remainder. */
5033 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5035 unsignedp
, OPTAB_LIB_WIDEN
);
5036 /* No remainder function. Try a quotient-and-remainder
5037 function, keeping the remainder. */
5040 remainder
= gen_reg_rtx (compute_mode
);
5041 if (!expand_twoval_binop_libfunc
5042 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5044 NULL_RTX
, remainder
,
5045 unsignedp
? UMOD
: MOD
))
5046 remainder
= NULL_RTX
;
5051 /* We divided. Now finish doing X - Y * (X / Y). */
5052 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5053 NULL_RTX
, unsignedp
);
5054 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5055 remainder
, target
, unsignedp
,
5060 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5063 /* Return a tree node with data type TYPE, describing the value of X.
5064 Usually this is an VAR_DECL, if there is no obvious better choice.
5065 X may be an expression, however we only support those expressions
5066 generated by loop.c. */
5069 make_tree (tree type
, rtx x
)
5073 switch (GET_CODE (x
))
5077 HOST_WIDE_INT hi
= 0;
5080 && !(TYPE_UNSIGNED (type
)
5081 && (GET_MODE_BITSIZE (TYPE_MODE (type
))
5082 < HOST_BITS_PER_WIDE_INT
)))
5085 t
= build_int_cst_wide (type
, INTVAL (x
), hi
);
5091 if (GET_MODE (x
) == VOIDmode
)
5092 t
= build_int_cst_wide (type
,
5093 CONST_DOUBLE_LOW (x
), CONST_DOUBLE_HIGH (x
));
5098 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
5099 t
= build_real (type
, d
);
5106 int units
= CONST_VECTOR_NUNITS (x
);
5107 tree itype
= TREE_TYPE (type
);
5111 /* Build a tree with vector elements. */
5112 elts
= XALLOCAVEC (tree
, units
);
5113 for (i
= units
- 1; i
>= 0; --i
)
5115 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5116 elts
[i
] = make_tree (itype
, elt
);
5119 return build_vector (type
, elts
);
5123 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5124 make_tree (type
, XEXP (x
, 1)));
5127 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5128 make_tree (type
, XEXP (x
, 1)));
5131 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5134 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5135 make_tree (type
, XEXP (x
, 1)));
5138 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5139 make_tree (type
, XEXP (x
, 1)));
5142 t
= unsigned_type_for (type
);
5143 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5144 make_tree (t
, XEXP (x
, 0)),
5145 make_tree (type
, XEXP (x
, 1))));
5148 t
= signed_type_for (type
);
5149 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5150 make_tree (t
, XEXP (x
, 0)),
5151 make_tree (type
, XEXP (x
, 1))));
5154 if (TREE_CODE (type
) != REAL_TYPE
)
5155 t
= signed_type_for (type
);
5159 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5160 make_tree (t
, XEXP (x
, 0)),
5161 make_tree (t
, XEXP (x
, 1))));
5163 t
= unsigned_type_for (type
);
5164 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5165 make_tree (t
, XEXP (x
, 0)),
5166 make_tree (t
, XEXP (x
, 1))));
5170 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5171 GET_CODE (x
) == ZERO_EXTEND
);
5172 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5175 return make_tree (type
, XEXP (x
, 0));
5178 t
= SYMBOL_REF_DECL (x
);
5180 return fold_convert (type
, build_fold_addr_expr (t
));
5181 /* else fall through. */
5184 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5186 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5187 address mode to pointer mode. */
5188 if (POINTER_TYPE_P (type
))
5189 x
= convert_memory_address_addr_space
5190 (TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5192 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5193 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5194 t
->decl_with_rtl
.rtl
= x
;
5200 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5201 and returning TARGET.
5203 If TARGET is 0, a pseudo-register or constant is returned. */
5206 expand_and (enum machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5210 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5211 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5213 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5217 else if (tem
!= target
)
5218 emit_move_insn (target
, tem
);
5222 /* Helper function for emit_store_flag. */
5224 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5225 enum machine_mode mode
, enum machine_mode compare_mode
,
5226 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5227 enum machine_mode target_mode
)
5229 struct expand_operand ops
[4];
5230 rtx op0
, last
, comparison
, subtarget
;
5231 enum machine_mode result_mode
= insn_data
[(int) icode
].operand
[0].mode
;
5233 last
= get_last_insn ();
5234 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5235 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5238 delete_insns_since (last
);
5242 if (target_mode
== VOIDmode
)
5243 target_mode
= result_mode
;
5245 target
= gen_reg_rtx (target_mode
);
5247 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5249 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5250 create_fixed_operand (&ops
[1], comparison
);
5251 create_fixed_operand (&ops
[2], x
);
5252 create_fixed_operand (&ops
[3], y
);
5253 if (!maybe_expand_insn (icode
, 4, ops
))
5255 delete_insns_since (last
);
5258 subtarget
= ops
[0].value
;
5260 /* If we are converting to a wider mode, first convert to
5261 TARGET_MODE, then normalize. This produces better combining
5262 opportunities on machines that have a SIGN_EXTRACT when we are
5263 testing a single bit. This mostly benefits the 68k.
5265 If STORE_FLAG_VALUE does not have the sign bit set when
5266 interpreted in MODE, we can do this conversion as unsigned, which
5267 is usually more efficient. */
5268 if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (result_mode
))
5270 convert_move (target
, subtarget
,
5271 val_signbit_known_clear_p (result_mode
,
5274 result_mode
= target_mode
;
5279 /* If we want to keep subexpressions around, don't reuse our last
5284 /* Now normalize to the proper value in MODE. Sometimes we don't
5285 have to do anything. */
5286 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5288 /* STORE_FLAG_VALUE might be the most negative number, so write
5289 the comparison this way to avoid a compiler-time warning. */
5290 else if (- normalizep
== STORE_FLAG_VALUE
)
5291 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5293 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5294 it hard to use a value of just the sign bit due to ANSI integer
5295 constant typing rules. */
5296 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5297 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5298 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5302 gcc_assert (STORE_FLAG_VALUE
& 1);
5304 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5305 if (normalizep
== -1)
5306 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5309 /* If we were converting to a smaller mode, do the conversion now. */
5310 if (target_mode
!= result_mode
)
5312 convert_move (target
, op0
, 0);
5320 /* A subroutine of emit_store_flag only including "tricks" that do not
5321 need a recursive call. These are kept separate to avoid infinite
5325 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5326 enum machine_mode mode
, int unsignedp
, int normalizep
,
5327 enum machine_mode target_mode
)
5330 enum insn_code icode
;
5331 enum machine_mode compare_mode
;
5332 enum mode_class mclass
;
5333 enum rtx_code scode
;
5337 code
= unsigned_condition (code
);
5338 scode
= swap_condition (code
);
5340 /* If one operand is constant, make it the second one. Only do this
5341 if the other operand is not constant as well. */
5343 if (swap_commutative_operands_p (op0
, op1
))
5348 code
= swap_condition (code
);
5351 if (mode
== VOIDmode
)
5352 mode
= GET_MODE (op0
);
5354 /* For some comparisons with 1 and -1, we can convert this to
5355 comparisons with zero. This will often produce more opportunities for
5356 store-flag insns. */
5361 if (op1
== const1_rtx
)
5362 op1
= const0_rtx
, code
= LE
;
5365 if (op1
== constm1_rtx
)
5366 op1
= const0_rtx
, code
= LT
;
5369 if (op1
== const1_rtx
)
5370 op1
= const0_rtx
, code
= GT
;
5373 if (op1
== constm1_rtx
)
5374 op1
= const0_rtx
, code
= GE
;
5377 if (op1
== const1_rtx
)
5378 op1
= const0_rtx
, code
= NE
;
5381 if (op1
== const1_rtx
)
5382 op1
= const0_rtx
, code
= EQ
;
5388 /* If we are comparing a double-word integer with zero or -1, we can
5389 convert the comparison into one involving a single word. */
5390 if (GET_MODE_BITSIZE (mode
) == BITS_PER_WORD
* 2
5391 && GET_MODE_CLASS (mode
) == MODE_INT
5392 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5394 if ((code
== EQ
|| code
== NE
)
5395 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5399 /* Do a logical OR or AND of the two words and compare the
5401 op00
= simplify_gen_subreg (word_mode
, op0
, mode
, 0);
5402 op01
= simplify_gen_subreg (word_mode
, op0
, mode
, UNITS_PER_WORD
);
5403 tem
= expand_binop (word_mode
,
5404 op1
== const0_rtx
? ior_optab
: and_optab
,
5405 op00
, op01
, NULL_RTX
, unsignedp
,
5409 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5410 unsignedp
, normalizep
);
5412 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5416 /* If testing the sign bit, can just test on high word. */
5417 op0h
= simplify_gen_subreg (word_mode
, op0
, mode
,
5418 subreg_highpart_offset (word_mode
,
5420 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5421 unsignedp
, normalizep
);
5428 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5431 target
= gen_reg_rtx (target_mode
);
5433 convert_move (target
, tem
,
5434 !val_signbit_known_set_p (word_mode
,
5435 (normalizep
? normalizep
5436 : STORE_FLAG_VALUE
)));
5441 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5442 complement of A (for GE) and shifting the sign bit to the low bit. */
5443 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5444 && GET_MODE_CLASS (mode
) == MODE_INT
5445 && (normalizep
|| STORE_FLAG_VALUE
== 1
5446 || val_signbit_p (mode
, STORE_FLAG_VALUE
)))
5453 /* If the result is to be wider than OP0, it is best to convert it
5454 first. If it is to be narrower, it is *incorrect* to convert it
5456 else if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (mode
))
5458 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5462 if (target_mode
!= mode
)
5466 op0
= expand_unop (mode
, one_cmpl_optab
, op0
,
5467 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5468 ? 0 : subtarget
), 0);
5470 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5471 /* If we are supposed to produce a 0/1 value, we want to do
5472 a logical shift from the sign bit to the low-order bit; for
5473 a -1/0 value, we do an arithmetic shift. */
5474 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5475 GET_MODE_BITSIZE (mode
) - 1,
5476 subtarget
, normalizep
!= -1);
5478 if (mode
!= target_mode
)
5479 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5484 mclass
= GET_MODE_CLASS (mode
);
5485 for (compare_mode
= mode
; compare_mode
!= VOIDmode
;
5486 compare_mode
= GET_MODE_WIDER_MODE (compare_mode
))
5488 enum machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5489 icode
= optab_handler (cstore_optab
, optab_mode
);
5490 if (icode
!= CODE_FOR_nothing
)
5492 do_pending_stack_adjust ();
5493 tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5494 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5498 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5500 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5501 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5512 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5513 and storing in TARGET. Normally return TARGET.
5514 Return 0 if that cannot be done.
5516 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5517 it is VOIDmode, they cannot both be CONST_INT.
5519 UNSIGNEDP is for the case where we have to widen the operands
5520 to perform the operation. It says to use zero-extension.
5522 NORMALIZEP is 1 if we should convert the result to be either zero
5523 or one. Normalize is -1 if we should convert the result to be
5524 either zero or -1. If NORMALIZEP is zero, the result will be left
5525 "raw" out of the scc insn. */
5528 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5529 enum machine_mode mode
, int unsignedp
, int normalizep
)
5531 enum machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5532 enum rtx_code rcode
;
5534 rtx tem
, last
, trueval
;
5536 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5541 /* If we reached here, we can't do this with a scc insn, however there
5542 are some comparisons that can be done in other ways. Don't do any
5543 of these cases if branches are very cheap. */
5544 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5547 /* See what we need to return. We can only return a 1, -1, or the
5550 if (normalizep
== 0)
5552 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5553 normalizep
= STORE_FLAG_VALUE
;
5555 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5561 last
= get_last_insn ();
5563 /* If optimizing, use different pseudo registers for each insn, instead
5564 of reusing the same pseudo. This leads to better CSE, but slows
5565 down the compiler, since there are more pseudos */
5566 subtarget
= (!optimize
5567 && (target_mode
== mode
)) ? target
: NULL_RTX
;
5568 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
5570 /* For floating-point comparisons, try the reverse comparison or try
5571 changing the "orderedness" of the comparison. */
5572 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5574 enum rtx_code first_code
;
5577 rcode
= reverse_condition_maybe_unordered (code
);
5578 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5579 && (code
== ORDERED
|| code
== UNORDERED
5580 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5581 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5583 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5584 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5586 /* For the reverse comparison, use either an addition or a XOR. */
5588 && rtx_cost (GEN_INT (normalizep
), PLUS
, 1,
5589 optimize_insn_for_speed_p ()) == 0)
5591 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5592 STORE_FLAG_VALUE
, target_mode
);
5594 return expand_binop (target_mode
, add_optab
, tem
,
5595 GEN_INT (normalizep
),
5596 target
, 0, OPTAB_WIDEN
);
5599 && rtx_cost (trueval
, XOR
, 1,
5600 optimize_insn_for_speed_p ()) == 0)
5602 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5603 normalizep
, target_mode
);
5605 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
5606 target
, INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5610 delete_insns_since (last
);
5612 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5613 if (code
== ORDERED
|| code
== UNORDERED
)
5616 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
5618 /* If there are no NaNs, the first comparison should always fall through.
5619 Effectively change the comparison to the other one. */
5620 if (!HONOR_NANS (mode
))
5622 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
5623 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
5627 #ifdef HAVE_conditional_move
5628 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5629 conditional move. */
5630 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
5631 normalizep
, target_mode
);
5636 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5637 tem
, const0_rtx
, GET_MODE (tem
), 0);
5639 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5640 trueval
, tem
, GET_MODE (tem
), 0);
5643 delete_insns_since (last
);
5650 /* The remaining tricks only apply to integer comparisons. */
5652 if (GET_MODE_CLASS (mode
) != MODE_INT
)
5655 /* If this is an equality comparison of integers, we can try to exclusive-or
5656 (or subtract) the two operands and use a recursive call to try the
5657 comparison with zero. Don't do any of these cases if branches are
5660 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5662 tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5666 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5669 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5670 mode
, unsignedp
, normalizep
);
5674 delete_insns_since (last
);
5677 /* For integer comparisons, try the reverse comparison. However, for
5678 small X and if we'd have anyway to extend, implementing "X != 0"
5679 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5680 rcode
= reverse_condition (code
);
5681 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5682 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5684 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5685 && op1
== const0_rtx
))
5687 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5688 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5690 /* Again, for the reverse comparison, use either an addition or a XOR. */
5692 && rtx_cost (GEN_INT (normalizep
), PLUS
, 1,
5693 optimize_insn_for_speed_p ()) == 0)
5695 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5696 STORE_FLAG_VALUE
, target_mode
);
5698 tem
= expand_binop (target_mode
, add_optab
, tem
,
5699 GEN_INT (normalizep
), target
, 0, OPTAB_WIDEN
);
5702 && rtx_cost (trueval
, XOR
, 1,
5703 optimize_insn_for_speed_p ()) == 0)
5705 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5706 normalizep
, target_mode
);
5708 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5709 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5714 delete_insns_since (last
);
5717 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5718 the constant zero. Reject all other comparisons at this point. Only
5719 do LE and GT if branches are expensive since they are expensive on
5720 2-operand machines. */
5722 if (op1
!= const0_rtx
5723 || (code
!= EQ
&& code
!= NE
5724 && (BRANCH_COST (optimize_insn_for_speed_p (),
5725 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5728 /* Try to put the result of the comparison in the sign bit. Assume we can't
5729 do the necessary operation below. */
5733 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5734 the sign bit set. */
5738 /* This is destructive, so SUBTARGET can't be OP0. */
5739 if (rtx_equal_p (subtarget
, op0
))
5742 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5745 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5749 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5750 number of bits in the mode of OP0, minus one. */
5754 if (rtx_equal_p (subtarget
, op0
))
5757 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5758 GET_MODE_BITSIZE (mode
) - 1,
5760 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5764 if (code
== EQ
|| code
== NE
)
5766 /* For EQ or NE, one way to do the comparison is to apply an operation
5767 that converts the operand into a positive number if it is nonzero
5768 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5769 for NE we negate. This puts the result in the sign bit. Then we
5770 normalize with a shift, if needed.
5772 Two operations that can do the above actions are ABS and FFS, so try
5773 them. If that doesn't work, and MODE is smaller than a full word,
5774 we can use zero-extension to the wider mode (an unsigned conversion)
5775 as the operation. */
5777 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5778 that is compensated by the subsequent overflow when subtracting
5781 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5782 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5783 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5784 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5785 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5787 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5794 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5797 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5800 /* If we couldn't do it that way, for NE we can "or" the two's complement
5801 of the value with itself. For EQ, we take the one's complement of
5802 that "or", which is an extra insn, so we only handle EQ if branches
5807 || BRANCH_COST (optimize_insn_for_speed_p (),
5810 if (rtx_equal_p (subtarget
, op0
))
5813 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5814 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5817 if (tem
&& code
== EQ
)
5818 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5822 if (tem
&& normalizep
)
5823 tem
= expand_shift (RSHIFT_EXPR
, mode
, tem
,
5824 GET_MODE_BITSIZE (mode
) - 1,
5825 subtarget
, normalizep
== 1);
5831 else if (GET_MODE (tem
) != target_mode
)
5833 convert_move (target
, tem
, 0);
5836 else if (!subtarget
)
5838 emit_move_insn (target
, tem
);
5843 delete_insns_since (last
);
5848 /* Like emit_store_flag, but always succeeds. */
5851 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5852 enum machine_mode mode
, int unsignedp
, int normalizep
)
5855 rtx trueval
, falseval
;
5857 /* First see if emit_store_flag can do the job. */
5858 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
5863 target
= gen_reg_rtx (word_mode
);
5865 /* If this failed, we have to do this with set/compare/jump/set code.
5866 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5867 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
5869 && GET_MODE_CLASS (mode
) == MODE_INT
5872 && op1
== const0_rtx
)
5874 label
= gen_label_rtx ();
5875 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
,
5876 mode
, NULL_RTX
, NULL_RTX
, label
, -1);
5877 emit_move_insn (target
, trueval
);
5883 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
5884 target
= gen_reg_rtx (GET_MODE (target
));
5886 /* Jump in the right direction if the target cannot implement CODE
5887 but can jump on its reverse condition. */
5888 falseval
= const0_rtx
;
5889 if (! can_compare_p (code
, mode
, ccp_jump
)
5890 && (! FLOAT_MODE_P (mode
)
5891 || code
== ORDERED
|| code
== UNORDERED
5892 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5893 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5895 enum rtx_code rcode
;
5896 if (FLOAT_MODE_P (mode
))
5897 rcode
= reverse_condition_maybe_unordered (code
);
5899 rcode
= reverse_condition (code
);
5901 /* Canonicalize to UNORDERED for the libcall. */
5902 if (can_compare_p (rcode
, mode
, ccp_jump
)
5903 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
5906 trueval
= const0_rtx
;
5911 emit_move_insn (target
, trueval
);
5912 label
= gen_label_rtx ();
5913 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
,
5914 NULL_RTX
, label
, -1);
5916 emit_move_insn (target
, falseval
);
5922 /* Perform possibly multi-word comparison and conditional jump to LABEL
5923 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5924 now a thin wrapper around do_compare_rtx_and_jump. */
5927 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, enum machine_mode mode
,
5930 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
5931 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
,
5932 NULL_RTX
, NULL_RTX
, label
, -1);