1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed
;
45 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
48 static void store_fixed_bit_field (rtx
, unsigned HOST_WIDE_INT
,
49 unsigned HOST_WIDE_INT
,
50 unsigned HOST_WIDE_INT
,
51 unsigned HOST_WIDE_INT
,
53 static void store_fixed_bit_field_1 (rtx
, unsigned HOST_WIDE_INT
,
54 unsigned HOST_WIDE_INT
,
56 static void store_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
57 unsigned HOST_WIDE_INT
,
58 unsigned HOST_WIDE_INT
,
59 unsigned HOST_WIDE_INT
,
61 static rtx
extract_fixed_bit_field (machine_mode
, rtx
,
62 unsigned HOST_WIDE_INT
,
63 unsigned HOST_WIDE_INT
, rtx
, int, bool);
64 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
,
65 unsigned HOST_WIDE_INT
,
66 unsigned HOST_WIDE_INT
, rtx
, int, bool);
67 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
68 static rtx
extract_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
69 unsigned HOST_WIDE_INT
, int, bool);
70 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
71 static rtx
expand_smod_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
72 static rtx
expand_sdiv_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
80 mask_rtx (machine_mode mode
, int bitpos
, int bitsize
, bool complement
)
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos
, bitsize
, complement
,
84 GET_MODE_PRECISION (mode
)), mode
);
87 /* Test whether a value is zero of a power of two. */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
91 struct init_expmed_rtl
112 rtx pow2
[MAX_BITS_PER_WORD
];
113 rtx cint
[MAX_BITS_PER_WORD
];
117 init_expmed_one_conv (struct init_expmed_rtl
*all
, machine_mode to_mode
,
118 machine_mode from_mode
, bool speed
)
120 int to_size
, from_size
;
123 to_size
= GET_MODE_PRECISION (to_mode
);
124 from_size
= GET_MODE_PRECISION (from_mode
);
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
130 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
131 && pow2p_hwi (to_size
))
133 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
134 && pow2p_hwi (from_size
))
137 /* Assume cost of zero-extend and sign-extend is the same. */
138 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
140 PUT_MODE (all
->reg
, from_mode
);
141 set_convert_cost (to_mode
, from_mode
, speed
,
142 set_src_cost (which
, to_mode
, speed
));
146 init_expmed_one_mode (struct init_expmed_rtl
*all
,
147 machine_mode mode
, int speed
)
149 int m
, n
, mode_bitsize
;
150 machine_mode mode_from
;
152 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
154 PUT_MODE (all
->reg
, mode
);
155 PUT_MODE (all
->plus
, mode
);
156 PUT_MODE (all
->neg
, mode
);
157 PUT_MODE (all
->mult
, mode
);
158 PUT_MODE (all
->sdiv
, mode
);
159 PUT_MODE (all
->udiv
, mode
);
160 PUT_MODE (all
->sdiv_32
, mode
);
161 PUT_MODE (all
->smod_32
, mode
);
162 PUT_MODE (all
->wide_trunc
, mode
);
163 PUT_MODE (all
->shift
, mode
);
164 PUT_MODE (all
->shift_mult
, mode
);
165 PUT_MODE (all
->shift_add
, mode
);
166 PUT_MODE (all
->shift_sub0
, mode
);
167 PUT_MODE (all
->shift_sub1
, mode
);
168 PUT_MODE (all
->zext
, mode
);
169 PUT_MODE (all
->trunc
, mode
);
171 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
172 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
173 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
174 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
175 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
177 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
178 <= 2 * add_cost (speed
, mode
)));
179 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
180 <= 4 * add_cost (speed
, mode
)));
182 set_shift_cost (speed
, mode
, 0, 0);
184 int cost
= add_cost (speed
, mode
);
185 set_shiftadd_cost (speed
, mode
, 0, cost
);
186 set_shiftsub0_cost (speed
, mode
, 0, cost
);
187 set_shiftsub1_cost (speed
, mode
, 0, cost
);
190 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
191 for (m
= 1; m
< n
; m
++)
193 XEXP (all
->shift
, 1) = all
->cint
[m
];
194 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
196 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
197 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
199 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
201 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
205 if (SCALAR_INT_MODE_P (mode
))
207 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
208 mode_from
= (machine_mode
)(mode_from
+ 1))
209 init_expmed_one_conv (all
, mode
, mode_from
, speed
);
211 if (GET_MODE_CLASS (mode
) == MODE_INT
)
213 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
214 if (wider_mode
!= VOIDmode
)
216 PUT_MODE (all
->zext
, wider_mode
);
217 PUT_MODE (all
->wide_mult
, wider_mode
);
218 PUT_MODE (all
->wide_lshr
, wider_mode
);
219 XEXP (all
->wide_lshr
, 1) = GEN_INT (mode_bitsize
);
221 set_mul_widen_cost (speed
, wider_mode
,
222 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
223 set_mul_highpart_cost (speed
, mode
,
224 set_src_cost (all
->wide_trunc
, mode
, speed
));
232 struct init_expmed_rtl all
;
233 machine_mode mode
= QImode
;
236 memset (&all
, 0, sizeof all
);
237 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
239 all
.pow2
[m
] = GEN_INT (HOST_WIDE_INT_1
<< m
);
240 all
.cint
[m
] = GEN_INT (m
);
243 /* Avoid using hard regs in ways which may be unsupported. */
244 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
245 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
246 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
247 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
248 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
249 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
250 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
251 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
252 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
253 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
254 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
255 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
256 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
257 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
258 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
259 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
260 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
261 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
263 for (speed
= 0; speed
< 2; speed
++)
265 crtl
->maybe_hot_insn_p
= speed
;
266 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
268 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
269 mode
= (machine_mode
)(mode
+ 1))
270 init_expmed_one_mode (&all
, mode
, speed
);
272 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
273 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
274 mode
= (machine_mode
)(mode
+ 1))
275 init_expmed_one_mode (&all
, mode
, speed
);
277 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
278 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
279 mode
= (machine_mode
)(mode
+ 1))
280 init_expmed_one_mode (&all
, mode
, speed
);
283 if (alg_hash_used_p ())
285 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
286 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
289 set_alg_hash_used_p (true);
290 default_rtl_profile ();
292 ggc_free (all
.trunc
);
293 ggc_free (all
.shift_sub1
);
294 ggc_free (all
.shift_sub0
);
295 ggc_free (all
.shift_add
);
296 ggc_free (all
.shift_mult
);
297 ggc_free (all
.shift
);
298 ggc_free (all
.wide_trunc
);
299 ggc_free (all
.wide_lshr
);
300 ggc_free (all
.wide_mult
);
302 ggc_free (all
.smod_32
);
303 ggc_free (all
.sdiv_32
);
312 /* Return an rtx representing minus the value of X.
313 MODE is the intended mode of the result,
314 useful if X is a CONST_INT. */
317 negate_rtx (machine_mode mode
, rtx x
)
319 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
322 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
327 /* Whether reverse storage order is supported on the target. */
328 static int reverse_storage_order_supported
= -1;
330 /* Check whether reverse storage order is supported on the target. */
333 check_reverse_storage_order_support (void)
335 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
337 reverse_storage_order_supported
= 0;
338 sorry ("reverse scalar storage order");
341 reverse_storage_order_supported
= 1;
344 /* Whether reverse FP storage order is supported on the target. */
345 static int reverse_float_storage_order_supported
= -1;
347 /* Check whether reverse FP storage order is supported on the target. */
350 check_reverse_float_storage_order_support (void)
352 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
354 reverse_float_storage_order_supported
= 0;
355 sorry ("reverse floating-point scalar storage order");
358 reverse_float_storage_order_supported
= 1;
361 /* Return an rtx representing value of X with reverse storage order.
362 MODE is the intended mode of the result,
363 useful if X is a CONST_INT. */
366 flip_storage_order (machine_mode mode
, rtx x
)
368 machine_mode int_mode
;
374 if (COMPLEX_MODE_P (mode
))
376 rtx real
= read_complex_part (x
, false);
377 rtx imag
= read_complex_part (x
, true);
379 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
380 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
382 return gen_rtx_CONCAT (mode
, real
, imag
);
385 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
386 check_reverse_storage_order_support ();
388 if (SCALAR_INT_MODE_P (mode
))
392 if (FLOAT_MODE_P (mode
)
393 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
394 check_reverse_float_storage_order_support ();
396 int_mode
= mode_for_size (GET_MODE_PRECISION (mode
), MODE_INT
, 0);
397 if (int_mode
== BLKmode
)
399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
402 x
= gen_lowpart (int_mode
, x
);
405 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
407 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
409 if (int_mode
!= mode
)
410 result
= gen_lowpart (mode
, result
);
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417 If MODE is BLKmode, return a reference to every byte in the bitfield.
418 Set *NEW_BITNUM to the bit position of the field within the new memory. */
421 narrow_bit_field_mem (rtx mem
, machine_mode mode
,
422 unsigned HOST_WIDE_INT bitsize
,
423 unsigned HOST_WIDE_INT bitnum
,
424 unsigned HOST_WIDE_INT
*new_bitnum
)
428 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
429 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
430 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
432 return adjust_bitfield_address_size (mem
, mode
, offset
, size
);
436 unsigned int unit
= GET_MODE_BITSIZE (mode
);
437 *new_bitnum
= bitnum
% unit
;
438 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
439 return adjust_bitfield_address (mem
, mode
, offset
);
443 /* The caller wants to perform insertion or extraction PATTERN on a
444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445 BITREGION_START and BITREGION_END are as for store_bit_field
446 and FIELDMODE is the natural mode of the field.
448 Search for a mode that is compatible with the memory access
449 restrictions and (where applicable) with a register insertion or
450 extraction. Return the new memory on success, storing the adjusted
451 bit position in *NEW_BITNUM. Return null otherwise. */
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
455 rtx op0
, HOST_WIDE_INT bitsize
,
456 HOST_WIDE_INT bitnum
,
457 unsigned HOST_WIDE_INT bitregion_start
,
458 unsigned HOST_WIDE_INT bitregion_end
,
459 machine_mode fieldmode
,
460 unsigned HOST_WIDE_INT
*new_bitnum
)
462 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
463 bitregion_end
, MEM_ALIGN (op0
),
464 MEM_VOLATILE_P (op0
));
465 machine_mode best_mode
;
466 if (iter
.next_mode (&best_mode
))
468 /* We can use a memory in BEST_MODE. See whether this is true for
469 any wider modes. All other things being equal, we prefer to
470 use the widest mode possible because it tends to expose more
471 CSE opportunities. */
472 if (!iter
.prefer_smaller_modes ())
474 /* Limit the search to the mode required by the corresponding
475 register insertion or extraction instruction, if any. */
476 machine_mode limit_mode
= word_mode
;
477 extraction_insn insn
;
478 if (get_best_reg_extraction_insn (&insn
, pattern
,
479 GET_MODE_BITSIZE (best_mode
),
481 limit_mode
= insn
.field_mode
;
483 machine_mode wider_mode
;
484 while (iter
.next_mode (&wider_mode
)
485 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
486 best_mode
= wider_mode
;
488 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
496 offset is then BITNUM / BITS_PER_UNIT. */
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum
,
500 unsigned HOST_WIDE_INT bitsize
,
501 machine_mode struct_mode
)
503 if (BYTES_BIG_ENDIAN
)
504 return (bitnum
% BITS_PER_UNIT
== 0
505 && (bitnum
+ bitsize
== GET_MODE_BITSIZE (struct_mode
)
506 || (bitnum
+ bitsize
) % BITS_PER_WORD
== 0));
508 return bitnum
% BITS_PER_WORD
== 0;
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513 Return false if the access would touch memory outside the range
514 BITREGION_START to BITREGION_END for conformance to the C++ memory
518 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
519 unsigned HOST_WIDE_INT bitnum
,
520 machine_mode fieldmode
,
521 unsigned HOST_WIDE_INT bitregion_start
,
522 unsigned HOST_WIDE_INT bitregion_end
)
524 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
526 /* -fstrict-volatile-bitfields must be enabled and we must have a
529 || !MEM_VOLATILE_P (op0
)
530 || flag_strict_volatile_bitfields
<= 0)
533 /* Non-integral modes likely only happen with packed structures.
535 if (!SCALAR_INT_MODE_P (fieldmode
))
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum
% modesize
+ bitsize
> modesize
)
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0
) < modesize
)
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end
!= 0
555 && (bitnum
- bitnum
% modesize
< bitregion_start
556 || bitnum
- bitnum
% modesize
+ modesize
- 1 > bitregion_end
))
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
566 simple_mem_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
567 unsigned HOST_WIDE_INT bitnum
, machine_mode mode
)
570 && bitnum
% BITS_PER_UNIT
== 0
571 && bitsize
== GET_MODE_BITSIZE (mode
)
572 && (!SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (op0
))
573 || (bitnum
% GET_MODE_ALIGNMENT (mode
) == 0
574 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 BITSIZE and BITNUM are as for store_bit_field. */
581 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
582 unsigned HOST_WIDE_INT bitsize
,
583 unsigned HOST_WIDE_INT bitnum
,
586 struct expand_operand ops
[4];
589 rtx_insn
*last
= get_last_insn ();
590 bool copy_back
= false;
592 machine_mode op_mode
= insv
->field_mode
;
593 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
594 if (bitsize
== 0 || bitsize
> unit
)
598 /* Get a reference to the first byte of the field. */
599 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
603 /* Convert from counting within OP0 to counting in OP_MODE. */
604 if (BYTES_BIG_ENDIAN
)
605 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
607 /* If xop0 is a register, we need it in OP_MODE
608 to make it acceptable to the format of insv. */
609 if (GET_CODE (xop0
) == SUBREG
)
610 /* We can't just change the mode, because this might clobber op0,
611 and we will need the original value of op0 if insv fails. */
612 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
613 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
614 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
617 /* If the destination is a paradoxical subreg such that we need a
618 truncate to the inner mode, perform the insertion on a temporary and
619 truncate the result to the original destination. Note that we can't
620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621 X) 0)) is (reg:N X). */
622 if (GET_CODE (xop0
) == SUBREG
623 && REG_P (SUBREG_REG (xop0
))
624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
627 rtx tem
= gen_reg_rtx (op_mode
);
628 emit_move_insn (tem
, xop0
);
633 /* There are similar overflow check at the start of store_bit_field_1,
634 but that only check the situation where the field lies completely
635 outside the register, while there do have situation where the field
636 lies partialy in the register, we need to adjust bitsize for this
637 partial overflow situation. Without this fix, pr48335-2.c on big-endian
638 will broken on those arch support bit insert instruction, like arm, aarch64
640 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
642 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
643 "destination object, data truncated into %wu-bit",
644 bitsize
, unit
- bitnum
);
645 bitsize
= unit
- bitnum
;
648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649 "backwards" from the size of the unit we are inserting into.
650 Otherwise, we count bits from the most significant on a
651 BYTES/BITS_BIG_ENDIAN machine. */
653 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
654 bitnum
= unit
- bitsize
- bitnum
;
656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
658 if (GET_MODE (value
) != op_mode
)
660 if (GET_MODE_BITSIZE (GET_MODE (value
)) >= bitsize
)
663 /* Optimization: Don't bother really extending VALUE
664 if it has all the bits we will actually use. However,
665 if we must narrow it, be sure we do it correctly. */
667 if (GET_MODE_SIZE (GET_MODE (value
)) < GET_MODE_SIZE (op_mode
))
669 tmp
= simplify_subreg (op_mode
, value1
, GET_MODE (value
), 0);
671 tmp
= simplify_gen_subreg (op_mode
,
672 force_reg (GET_MODE (value
),
674 GET_MODE (value
), 0);
678 tmp
= gen_lowpart_if_possible (op_mode
, value1
);
680 tmp
= gen_lowpart (op_mode
, force_reg (GET_MODE (value
),
685 else if (CONST_INT_P (value
))
686 value1
= gen_int_mode (INTVAL (value
), op_mode
);
688 /* Parse phase is supposed to make VALUE's data type
689 match that of the component reference, which is a type
690 at least as wide as the field; so VALUE should have
691 a mode that corresponds to that type. */
692 gcc_assert (CONSTANT_P (value
));
695 create_fixed_operand (&ops
[0], xop0
);
696 create_integer_operand (&ops
[1], bitsize
);
697 create_integer_operand (&ops
[2], bitnum
);
698 create_input_operand (&ops
[3], value1
, op_mode
);
699 if (maybe_expand_insn (insv
->icode
, 4, ops
))
702 convert_move (op0
, xop0
, true);
705 delete_insns_since (last
);
709 /* A subroutine of store_bit_field, with the same arguments. Return true
710 if the operation could be implemented.
712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713 no other way of implementing the operation. If FALLBACK_P is false,
714 return false instead. */
717 store_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
718 unsigned HOST_WIDE_INT bitnum
,
719 unsigned HOST_WIDE_INT bitregion_start
,
720 unsigned HOST_WIDE_INT bitregion_end
,
721 machine_mode fieldmode
,
722 rtx value
, bool reverse
, bool fallback_p
)
727 while (GET_CODE (op0
) == SUBREG
)
729 /* The following line once was done only if WORDS_BIG_ENDIAN,
730 but I think that is a mistake. WORDS_BIG_ENDIAN is
731 meaningful at a much higher level; when structures are copied
732 between memory and regs, the higher-numbered regs
733 always get higher addresses. */
734 int inner_mode_size
= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0
)));
735 int outer_mode_size
= GET_MODE_SIZE (GET_MODE (op0
));
738 /* Paradoxical subregs need special handling on big-endian machines. */
739 if (SUBREG_BYTE (op0
) == 0 && inner_mode_size
< outer_mode_size
)
741 int difference
= inner_mode_size
- outer_mode_size
;
743 if (WORDS_BIG_ENDIAN
)
744 byte_offset
+= (difference
/ UNITS_PER_WORD
) * UNITS_PER_WORD
;
745 if (BYTES_BIG_ENDIAN
)
746 byte_offset
+= difference
% UNITS_PER_WORD
;
749 byte_offset
= SUBREG_BYTE (op0
);
751 bitnum
+= byte_offset
* BITS_PER_UNIT
;
752 op0
= SUBREG_REG (op0
);
755 /* No action is needed if the target is a register and if the field
756 lies completely outside that register. This can occur if the source
757 code contains an out-of-bounds access to a small array. */
758 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
761 /* Use vec_set patterns for inserting parts of vectors whenever
763 if (VECTOR_MODE_P (GET_MODE (op0
))
765 && optab_handler (vec_set_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
766 && fieldmode
== GET_MODE_INNER (GET_MODE (op0
))
767 && bitsize
== GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
768 && !(bitnum
% GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
770 struct expand_operand ops
[3];
771 machine_mode outermode
= GET_MODE (op0
);
772 machine_mode innermode
= GET_MODE_INNER (outermode
);
773 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
774 int pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
776 create_fixed_operand (&ops
[0], op0
);
777 create_input_operand (&ops
[1], value
, innermode
);
778 create_integer_operand (&ops
[2], pos
);
779 if (maybe_expand_insn (icode
, 3, ops
))
783 /* If the target is a register, overwriting the entire object, or storing
784 a full-word or multi-word field can be done with just a SUBREG. */
786 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
787 && ((bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)) && bitnum
== 0)
788 || (bitsize
% BITS_PER_WORD
== 0 && bitnum
% BITS_PER_WORD
== 0)))
790 /* Use the subreg machinery either to narrow OP0 to the required
791 words or to cope with mode punning between equal-sized modes.
792 In the latter case, use subreg on the rhs side, not lhs. */
795 if (bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
797 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
801 sub
= flip_storage_order (GET_MODE (op0
), sub
);
802 emit_move_insn (op0
, sub
);
808 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
809 bitnum
/ BITS_PER_UNIT
);
813 value
= flip_storage_order (fieldmode
, value
);
814 emit_move_insn (sub
, value
);
820 /* If the target is memory, storing any naturally aligned field can be
821 done with a simple store. For targets that support fast unaligned
822 memory, any naturally sized, unit aligned field can be done directly. */
823 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
))
825 op0
= adjust_bitfield_address (op0
, fieldmode
, bitnum
/ BITS_PER_UNIT
);
827 value
= flip_storage_order (fieldmode
, value
);
828 emit_move_insn (op0
, value
);
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
837 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
838 if (imode
!= GET_MODE (op0
))
841 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
844 gcc_assert (imode
!= BLKmode
);
845 op0
= gen_lowpart (imode
, op0
);
850 /* Storing an lsb-aligned field in a register
851 can be done with a movstrict instruction. */
855 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
856 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
857 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
859 struct expand_operand ops
[2];
860 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
862 unsigned HOST_WIDE_INT subreg_off
;
864 if (GET_CODE (arg0
) == SUBREG
)
866 /* Else we've got some float mode source being extracted into
867 a different float mode destination -- this combination of
868 subregs results in Severe Tire Damage. */
869 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
870 || GET_MODE_CLASS (fieldmode
) == MODE_INT
871 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
872 arg0
= SUBREG_REG (arg0
);
875 subreg_off
= bitnum
/ BITS_PER_UNIT
;
876 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
878 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
880 create_fixed_operand (&ops
[0], arg0
);
881 /* Shrink the source operand to FIELDMODE. */
882 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
883 if (maybe_expand_insn (icode
, 2, ops
))
888 /* Handle fields bigger than a word. */
890 if (bitsize
> BITS_PER_WORD
)
892 /* Here we transfer the words of the field
893 in the order least significant first.
894 This is because the most significant word is the one which may
896 However, only do that if the value is not BLKmode. */
898 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
899 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
903 /* This is the mode we must force value to, so that there will be enough
904 subwords to extract. Note that fieldmode will often (always?) be
905 VOIDmode, because that is what store_field uses to indicate that this
906 is a bit field, but passing VOIDmode to operand_subword_force
908 fieldmode
= GET_MODE (value
);
909 if (fieldmode
== VOIDmode
)
910 fieldmode
= smallest_mode_for_size (nwords
* BITS_PER_WORD
, MODE_INT
);
912 last
= get_last_insn ();
913 for (i
= 0; i
< nwords
; i
++)
915 /* If I is 0, use the low-order word in both field and target;
916 if I is 1, use the next to lowest word; and so on. */
917 unsigned int wordnum
= (backwards
918 ? GET_MODE_SIZE (fieldmode
) / UNITS_PER_WORD
921 unsigned int bit_offset
= (backwards
^ reverse
922 ? MAX ((int) bitsize
- ((int) i
+ 1)
925 : (int) i
* BITS_PER_WORD
);
926 rtx value_word
= operand_subword_force (value
, wordnum
, fieldmode
);
927 unsigned HOST_WIDE_INT new_bitsize
=
928 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
930 /* If the remaining chunk doesn't have full wordsize we have
931 to make sure that for big-endian machines the higher order
933 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
934 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
936 GEN_INT (BITS_PER_WORD
941 if (!store_bit_field_1 (op0
, new_bitsize
,
943 bitregion_start
, bitregion_end
,
945 value_word
, reverse
, fallback_p
))
947 delete_insns_since (last
);
954 /* If VALUE has a floating-point or complex mode, access it as an
955 integer of the corresponding size. This can occur on a machine
956 with 64 bit registers that uses SFmode for float. It can also
957 occur for unaligned float or complex fields. */
959 if (GET_MODE (value
) != VOIDmode
960 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_INT
961 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_PARTIAL_INT
)
963 value
= gen_reg_rtx (int_mode_for_mode (GET_MODE (value
)));
964 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
967 /* If OP0 is a multi-word register, narrow it to the affected word.
968 If the region spans two words, defer to store_split_bit_field.
969 Don't do this if op0 is a single hard register wider than word
970 such as a float or vector register. */
972 && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
974 || !HARD_REGISTER_P (op0
)
975 || HARD_REGNO_NREGS (REGNO (op0
), GET_MODE (op0
)) != 1))
977 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
982 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
983 bitregion_end
, value
, reverse
);
986 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
987 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
989 bitnum
%= BITS_PER_WORD
;
992 /* From here on we can assume that the field to be stored in fits
993 within a word. If the destination is a register, it too fits
996 extraction_insn insv
;
999 && get_best_reg_extraction_insn (&insv
, EP_insv
,
1000 GET_MODE_BITSIZE (GET_MODE (op0
)),
1002 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
1005 /* If OP0 is a memory, try copying it to a register and seeing if a
1006 cheap register alternative is available. */
1007 if (MEM_P (op0
) && !reverse
)
1009 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
1011 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
1014 rtx_insn
*last
= get_last_insn ();
1016 /* Try loading part of OP0 into a register, inserting the bitfield
1017 into that, and then copying the result back to OP0. */
1018 unsigned HOST_WIDE_INT bitpos
;
1019 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1020 bitregion_start
, bitregion_end
,
1021 fieldmode
, &bitpos
);
1024 rtx tempreg
= copy_to_reg (xop0
);
1025 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1026 bitregion_start
, bitregion_end
,
1027 fieldmode
, orig_value
, reverse
, false))
1029 emit_move_insn (xop0
, tempreg
);
1032 delete_insns_since (last
);
1039 store_fixed_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1040 bitregion_end
, value
, reverse
);
1044 /* Generate code to store value from rtx VALUE
1045 into a bit-field within structure STR_RTX
1046 containing BITSIZE bits starting at bit BITNUM.
1048 BITREGION_START is bitpos of the first bitfield in this region.
1049 BITREGION_END is the bitpos of the ending bitfield in this region.
1050 These two fields are 0, if the C++ memory model does not apply,
1051 or we are not interested in keeping track of bitfield regions.
1053 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1055 If REVERSE is true, the store is to be done in reverse order. */
1058 store_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1059 unsigned HOST_WIDE_INT bitnum
,
1060 unsigned HOST_WIDE_INT bitregion_start
,
1061 unsigned HOST_WIDE_INT bitregion_end
,
1062 machine_mode fieldmode
,
1063 rtx value
, bool reverse
)
1065 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1066 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, fieldmode
,
1067 bitregion_start
, bitregion_end
))
1069 /* Storing of a full word can be done with a simple store.
1070 We know here that the field can be accessed with one single
1071 instruction. For targets that support unaligned memory,
1072 an unaligned access may be necessary. */
1073 if (bitsize
== GET_MODE_BITSIZE (fieldmode
))
1075 str_rtx
= adjust_bitfield_address (str_rtx
, fieldmode
,
1076 bitnum
/ BITS_PER_UNIT
);
1078 value
= flip_storage_order (fieldmode
, value
);
1079 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1080 emit_move_insn (str_rtx
, value
);
1086 str_rtx
= narrow_bit_field_mem (str_rtx
, fieldmode
, bitsize
, bitnum
,
1088 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (fieldmode
));
1089 temp
= copy_to_reg (str_rtx
);
1090 if (!store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0,
1091 fieldmode
, value
, reverse
, true))
1094 emit_move_insn (str_rtx
, temp
);
1100 /* Under the C++0x memory model, we must not touch bits outside the
1101 bit region. Adjust the address to start at the beginning of the
1103 if (MEM_P (str_rtx
) && bitregion_start
> 0)
1105 machine_mode bestmode
;
1106 HOST_WIDE_INT offset
, size
;
1108 gcc_assert ((bitregion_start
% BITS_PER_UNIT
) == 0);
1110 offset
= bitregion_start
/ BITS_PER_UNIT
;
1111 bitnum
-= bitregion_start
;
1112 size
= (bitnum
+ bitsize
+ BITS_PER_UNIT
- 1) / BITS_PER_UNIT
;
1113 bitregion_end
-= bitregion_start
;
1114 bitregion_start
= 0;
1115 bestmode
= get_best_mode (bitsize
, bitnum
,
1116 bitregion_start
, bitregion_end
,
1117 MEM_ALIGN (str_rtx
), VOIDmode
,
1118 MEM_VOLATILE_P (str_rtx
));
1119 str_rtx
= adjust_bitfield_address_size (str_rtx
, bestmode
, offset
, size
);
1122 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1123 bitregion_start
, bitregion_end
,
1124 fieldmode
, value
, reverse
, true))
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129 width BITSIZE in OP0, starting at bit BITNUM.
1131 If REVERSE is true, the store is to be done in reverse order. */
1134 store_fixed_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1135 unsigned HOST_WIDE_INT bitnum
,
1136 unsigned HOST_WIDE_INT bitregion_start
,
1137 unsigned HOST_WIDE_INT bitregion_end
,
1138 rtx value
, bool reverse
)
1140 /* There is a case not handled here:
1141 a structure with a known alignment of just a halfword
1142 and a field split across two aligned halfwords within the structure.
1143 Or likewise a structure with a known alignment of just a byte
1144 and a field split across two bytes.
1145 Such cases are not supposed to be able to occur. */
1149 machine_mode mode
= GET_MODE (op0
);
1150 if (GET_MODE_BITSIZE (mode
) == 0
1151 || GET_MODE_BITSIZE (mode
) > GET_MODE_BITSIZE (word_mode
))
1153 mode
= get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1154 MEM_ALIGN (op0
), mode
, MEM_VOLATILE_P (op0
));
1156 if (mode
== VOIDmode
)
1158 /* The only way this should occur is if the field spans word
1160 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1161 bitregion_end
, value
, reverse
);
1165 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1168 store_fixed_bit_field_1 (op0
, bitsize
, bitnum
, value
, reverse
);
1171 /* Helper function for store_fixed_bit_field, stores
1172 the bit field always using the MODE of OP0. */
1175 store_fixed_bit_field_1 (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1176 unsigned HOST_WIDE_INT bitnum
,
1177 rtx value
, bool reverse
)
1184 mode
= GET_MODE (op0
);
1185 gcc_assert (SCALAR_INT_MODE_P (mode
));
1187 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1190 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1191 /* BITNUM is the distance between our msb
1192 and that of the containing datum.
1193 Convert it to the distance from the lsb. */
1194 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1196 /* Now BITNUM is always the distance between our lsb
1199 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1200 we must first convert its mode to MODE. */
1202 if (CONST_INT_P (value
))
1204 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1206 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1207 v
&= (HOST_WIDE_INT_1U
<< bitsize
) - 1;
1211 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1212 && v
== (HOST_WIDE_INT_1U
<< bitsize
) - 1)
1213 || (bitsize
== HOST_BITS_PER_WIDE_INT
1214 && v
== HOST_WIDE_INT_M1U
))
1217 value
= lshift_value (mode
, v
, bitnum
);
1221 int must_and
= (GET_MODE_BITSIZE (GET_MODE (value
)) != bitsize
1222 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1224 if (GET_MODE (value
) != mode
)
1225 value
= convert_to_mode (mode
, value
, 1);
1228 value
= expand_binop (mode
, and_optab
, value
,
1229 mask_rtx (mode
, 0, bitsize
, 0),
1230 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1232 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1233 bitnum
, NULL_RTX
, 1);
1237 value
= flip_storage_order (mode
, value
);
1239 /* Now clear the chosen bits in OP0,
1240 except that if VALUE is -1 we need not bother. */
1241 /* We keep the intermediates in registers to allow CSE to combine
1242 consecutive bitfield assignments. */
1244 temp
= force_reg (mode
, op0
);
1248 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1250 mask
= flip_storage_order (mode
, mask
);
1251 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1252 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1253 temp
= force_reg (mode
, temp
);
1256 /* Now logical-or VALUE into OP0, unless it is zero. */
1260 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1261 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1262 temp
= force_reg (mode
, temp
);
1267 op0
= copy_rtx (op0
);
1268 emit_move_insn (op0
, temp
);
1272 /* Store a bit field that is split across multiple accessible memory objects.
1274 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275 BITSIZE is the field width; BITPOS the position of its first bit
1277 VALUE is the value to store.
1279 If REVERSE is true, the store is to be done in reverse order.
1281 This does not yet handle fields wider than BITS_PER_WORD. */
1284 store_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1285 unsigned HOST_WIDE_INT bitpos
,
1286 unsigned HOST_WIDE_INT bitregion_start
,
1287 unsigned HOST_WIDE_INT bitregion_end
,
1288 rtx value
, bool reverse
)
1290 unsigned int unit
, total_bits
, bitsdone
= 0;
1292 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1294 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1295 unit
= BITS_PER_WORD
;
1297 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1299 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1301 again, and we will mutually recurse forever. */
1302 if (MEM_P (op0
) && GET_MODE_BITSIZE (GET_MODE (op0
)) > 0)
1303 unit
= MIN (unit
, GET_MODE_BITSIZE (GET_MODE (op0
)));
1305 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1307 that VALUE might be a floating-point constant. */
1308 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1310 rtx word
= gen_lowpart_common (word_mode
, value
);
1312 if (word
&& (value
!= word
))
1315 value
= gen_lowpart_common (word_mode
,
1316 force_reg (GET_MODE (value
) != VOIDmode
1318 : word_mode
, value
));
1321 total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1323 while (bitsdone
< bitsize
)
1325 unsigned HOST_WIDE_INT thissize
;
1326 unsigned HOST_WIDE_INT thispos
;
1327 unsigned HOST_WIDE_INT offset
;
1330 offset
= (bitpos
+ bitsdone
) / unit
;
1331 thispos
= (bitpos
+ bitsdone
) % unit
;
1333 /* When region of bytes we can touch is restricted, decrease
1334 UNIT close to the end of the region as needed. If op0 is a REG
1335 or SUBREG of REG, don't do this, as there can't be data races
1336 on a register and we can expand shorter code in some cases. */
1338 && unit
> BITS_PER_UNIT
1339 && bitpos
+ bitsdone
- thispos
+ unit
> bitregion_end
+ 1
1341 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1347 /* THISSIZE must not overrun a word boundary. Otherwise,
1348 store_fixed_bit_field will call us again, and we will mutually
1350 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1351 thissize
= MIN (thissize
, unit
- thispos
);
1353 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1355 /* Fetch successively less significant portions. */
1356 if (CONST_INT_P (value
))
1357 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1358 >> (bitsize
- bitsdone
- thissize
))
1359 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1360 /* Likewise, but the source is little-endian. */
1362 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1363 bitsize
- bitsdone
- thissize
,
1364 NULL_RTX
, 1, false);
1367 int total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1368 /* The args are chosen so that the last part includes the
1369 lsb. Give extract_bit_field the value it needs (with
1370 endianness compensation) to fetch the piece we want. */
1371 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1372 total_bits
- bitsize
+ bitsdone
,
1373 NULL_RTX
, 1, false);
1378 /* Fetch successively more significant portions. */
1379 if (CONST_INT_P (value
))
1380 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1382 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1383 /* Likewise, but the source is big-endian. */
1385 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1386 total_bits
- bitsdone
- thissize
,
1387 NULL_RTX
, 1, false);
1389 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1390 bitsdone
, NULL_RTX
, 1, false);
1393 /* If OP0 is a register, then handle OFFSET here. */
1394 if (SUBREG_P (op0
) || REG_P (op0
))
1396 machine_mode op0_mode
= GET_MODE (op0
);
1397 if (op0_mode
!= BLKmode
&& GET_MODE_SIZE (op0_mode
) < UNITS_PER_WORD
)
1398 word
= offset
? const0_rtx
: op0
;
1400 word
= operand_subword_force (op0
, offset
* unit
/ BITS_PER_WORD
,
1402 offset
&= BITS_PER_WORD
/ unit
- 1;
1407 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1408 it is just an out-of-bounds access. Ignore it. */
1409 if (word
!= const0_rtx
)
1410 store_fixed_bit_field (word
, thissize
, offset
* unit
+ thispos
,
1411 bitregion_start
, bitregion_end
, part
,
1413 bitsdone
+= thissize
;
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1419 to extract_bit_field. */
1422 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1423 machine_mode tmode
, bool unsignedp
)
1425 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1428 /* If the x mode is not a scalar integral, first convert to the
1429 integer mode of that size and then access it as a floating-point
1430 value via a SUBREG. */
1431 if (!SCALAR_INT_MODE_P (tmode
))
1435 smode
= mode_for_size (GET_MODE_BITSIZE (tmode
), MODE_INT
, 0);
1436 x
= convert_to_mode (smode
, x
, unsignedp
);
1437 x
= force_reg (smode
, x
);
1438 return gen_lowpart (tmode
, x
);
1441 return convert_to_mode (tmode
, x
, unsignedp
);
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445 Return the extracted value on success, otherwise return null.
1446 EXT_MODE is the mode of the extraction and the other arguments
1447 are as for extract_bit_field. */
1450 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1451 unsigned HOST_WIDE_INT bitsize
,
1452 unsigned HOST_WIDE_INT bitnum
,
1453 int unsignedp
, rtx target
,
1454 machine_mode mode
, machine_mode tmode
)
1456 struct expand_operand ops
[4];
1457 rtx spec_target
= target
;
1458 rtx spec_target_subreg
= 0;
1459 machine_mode ext_mode
= extv
->field_mode
;
1460 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1462 if (bitsize
== 0 || unit
< bitsize
)
1466 /* Get a reference to the first byte of the field. */
1467 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1471 /* Convert from counting within OP0 to counting in EXT_MODE. */
1472 if (BYTES_BIG_ENDIAN
)
1473 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
1475 /* If op0 is a register, we need it in EXT_MODE to make it
1476 acceptable to the format of ext(z)v. */
1477 if (GET_CODE (op0
) == SUBREG
&& GET_MODE (op0
) != ext_mode
)
1479 if (REG_P (op0
) && GET_MODE (op0
) != ext_mode
)
1480 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1483 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484 "backwards" from the size of the unit we are extracting from.
1485 Otherwise, we count bits from the most significant on a
1486 BYTES/BITS_BIG_ENDIAN machine. */
1488 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1489 bitnum
= unit
- bitsize
- bitnum
;
1492 target
= spec_target
= gen_reg_rtx (tmode
);
1494 if (GET_MODE (target
) != ext_mode
)
1496 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497 between the mode of the extraction (word_mode) and the target
1498 mode. Instead, create a temporary and use convert_move to set
1501 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1503 target
= gen_lowpart (ext_mode
, target
);
1504 if (GET_MODE_PRECISION (ext_mode
)
1505 > GET_MODE_PRECISION (GET_MODE (spec_target
)))
1506 spec_target_subreg
= target
;
1509 target
= gen_reg_rtx (ext_mode
);
1512 create_output_operand (&ops
[0], target
, ext_mode
);
1513 create_fixed_operand (&ops
[1], op0
);
1514 create_integer_operand (&ops
[2], bitsize
);
1515 create_integer_operand (&ops
[3], bitnum
);
1516 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1518 target
= ops
[0].value
;
1519 if (target
== spec_target
)
1521 if (target
== spec_target_subreg
)
1523 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530 if we can find no other means of implementing the operation.
1531 if FALLBACK_P is false, return NULL instead. */
1534 extract_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1535 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1536 machine_mode mode
, machine_mode tmode
,
1537 bool reverse
, bool fallback_p
, rtx
*alt_rtl
)
1540 machine_mode int_mode
;
1543 if (tmode
== VOIDmode
)
1546 while (GET_CODE (op0
) == SUBREG
)
1548 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1549 op0
= SUBREG_REG (op0
);
1552 /* If we have an out-of-bounds access to a register, just return an
1553 uninitialized register of the required mode. This can occur if the
1554 source code contains an out-of-bounds access to a small array. */
1555 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
1556 return gen_reg_rtx (tmode
);
1559 && mode
== GET_MODE (op0
)
1561 && bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
1564 op0
= flip_storage_order (mode
, op0
);
1565 /* We're trying to extract a full register from itself. */
1569 /* See if we can get a better vector mode before extracting. */
1570 if (VECTOR_MODE_P (GET_MODE (op0
))
1572 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1574 machine_mode new_mode
;
1576 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1577 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1578 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1579 new_mode
= MIN_MODE_VECTOR_FRACT
;
1580 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1581 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1582 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1583 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1584 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1585 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1587 new_mode
= MIN_MODE_VECTOR_INT
;
1589 for (; new_mode
!= VOIDmode
; new_mode
= GET_MODE_WIDER_MODE (new_mode
))
1590 if (GET_MODE_SIZE (new_mode
) == GET_MODE_SIZE (GET_MODE (op0
))
1591 && GET_MODE_UNIT_SIZE (new_mode
) == GET_MODE_SIZE (tmode
)
1592 && targetm
.vector_mode_supported_p (new_mode
))
1594 if (new_mode
!= VOIDmode
)
1595 op0
= gen_lowpart (new_mode
, op0
);
1598 /* Use vec_extract patterns for extracting parts of vectors whenever
1600 if (VECTOR_MODE_P (GET_MODE (op0
))
1602 && optab_handler (vec_extract_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
1603 && ((bitnum
+ bitsize
- 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
1604 == bitnum
/ GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
1606 struct expand_operand ops
[3];
1607 machine_mode outermode
= GET_MODE (op0
);
1608 machine_mode innermode
= GET_MODE_INNER (outermode
);
1609 enum insn_code icode
= optab_handler (vec_extract_optab
, outermode
);
1610 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1612 create_output_operand (&ops
[0], target
, innermode
);
1614 create_input_operand (&ops
[1], op0
, outermode
);
1615 create_integer_operand (&ops
[2], pos
);
1616 if (maybe_expand_insn (icode
, 3, ops
))
1618 if (alt_rtl
&& ops
[0].target
)
1620 target
= ops
[0].value
;
1621 if (GET_MODE (target
) != mode
)
1622 return gen_lowpart (tmode
, target
);
1627 /* Make sure we are playing with integral modes. Pun with subregs
1630 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
1631 if (imode
!= GET_MODE (op0
))
1634 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
1635 else if (imode
!= BLKmode
)
1637 op0
= gen_lowpart (imode
, op0
);
1639 /* If we got a SUBREG, force it into a register since we
1640 aren't going to be able to do another SUBREG on it. */
1641 if (GET_CODE (op0
) == SUBREG
)
1642 op0
= force_reg (imode
, op0
);
1646 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (op0
));
1647 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1648 emit_move_insn (mem
, op0
);
1649 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1654 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1655 If that's wrong, the solution is to test for it and set TARGET to 0
1658 /* Get the mode of the field to use for atomic access or subreg
1661 if (SCALAR_INT_MODE_P (tmode
))
1663 machine_mode try_mode
= mode_for_size (bitsize
,
1664 GET_MODE_CLASS (tmode
), 0);
1665 if (try_mode
!= BLKmode
)
1668 gcc_assert (mode1
!= BLKmode
);
1670 /* Extraction of a full MODE1 value can be done with a subreg as long
1671 as the least significant bit of the value is the least significant
1672 bit of either OP0 or a word of OP0. */
1675 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1676 && bitsize
== GET_MODE_BITSIZE (mode1
)
1677 && TRULY_NOOP_TRUNCATION_MODES_P (mode1
, GET_MODE (op0
)))
1679 rtx sub
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
),
1680 bitnum
/ BITS_PER_UNIT
);
1682 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1685 /* Extraction of a full MODE1 value can be done with a load as long as
1686 the field is on a byte boundary and is sufficiently aligned. */
1687 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
))
1689 op0
= adjust_bitfield_address (op0
, mode1
, bitnum
/ BITS_PER_UNIT
);
1691 op0
= flip_storage_order (mode1
, op0
);
1692 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1695 /* Handle fields bigger than a word. */
1697 if (bitsize
> BITS_PER_WORD
)
1699 /* Here we transfer the words of the field
1700 in the order least significant first.
1701 This is because the most significant word is the one which may
1702 be less than full. */
1704 const bool backwards
= WORDS_BIG_ENDIAN
;
1705 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1709 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1710 target
= gen_reg_rtx (mode
);
1712 /* In case we're about to clobber a base register or something
1713 (see gcc.c-torture/execute/20040625-1.c). */
1714 if (reg_mentioned_p (target
, str_rtx
))
1715 target
= gen_reg_rtx (mode
);
1717 /* Indicate for flow that the entire target reg is being set. */
1718 emit_clobber (target
);
1720 last
= get_last_insn ();
1721 for (i
= 0; i
< nwords
; i
++)
1723 /* If I is 0, use the low-order word in both field and target;
1724 if I is 1, use the next to lowest word; and so on. */
1725 /* Word number in TARGET to use. */
1726 unsigned int wordnum
1728 ? GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
- i
- 1
1730 /* Offset from start of field in OP0. */
1731 unsigned int bit_offset
= (backwards
^ reverse
1732 ? MAX ((int) bitsize
- ((int) i
+ 1)
1735 : (int) i
* BITS_PER_WORD
);
1736 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1738 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1739 bitsize
- i
* BITS_PER_WORD
),
1740 bitnum
+ bit_offset
, 1, target_part
,
1741 mode
, word_mode
, reverse
, fallback_p
, NULL
);
1743 gcc_assert (target_part
);
1746 delete_insns_since (last
);
1750 if (result_part
!= target_part
)
1751 emit_move_insn (target_part
, result_part
);
1756 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1757 need to be zero'd out. */
1758 if (GET_MODE_SIZE (GET_MODE (target
)) > nwords
* UNITS_PER_WORD
)
1760 unsigned int i
, total_words
;
1762 total_words
= GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
;
1763 for (i
= nwords
; i
< total_words
; i
++)
1765 (operand_subword (target
,
1766 backwards
? total_words
- i
- 1 : i
,
1773 /* Signed bit field: sign-extend with two arithmetic shifts. */
1774 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1775 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1776 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1777 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1780 /* If OP0 is a multi-word register, narrow it to the affected word.
1781 If the region spans two words, defer to extract_split_bit_field. */
1782 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
1784 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1788 target
= extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1790 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1792 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
1793 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1794 bitnum
%= BITS_PER_WORD
;
1797 /* From here on we know the desired field is smaller than a word.
1798 If OP0 is a register, it too fits within a word. */
1799 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1800 extraction_insn extv
;
1803 /* ??? We could limit the structure size to the part of OP0 that
1804 contains the field, with appropriate checks for endianness
1805 and TRULY_NOOP_TRUNCATION. */
1806 && get_best_reg_extraction_insn (&extv
, pattern
,
1807 GET_MODE_BITSIZE (GET_MODE (op0
)),
1810 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
, bitnum
,
1811 unsignedp
, target
, mode
,
1817 /* If OP0 is a memory, try copying it to a register and seeing if a
1818 cheap register alternative is available. */
1819 if (MEM_P (op0
) & !reverse
)
1821 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1824 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
,
1832 rtx_insn
*last
= get_last_insn ();
1834 /* Try loading part of OP0 into a register and extracting the
1835 bitfield from that. */
1836 unsigned HOST_WIDE_INT bitpos
;
1837 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
1838 0, 0, tmode
, &bitpos
);
1841 xop0
= copy_to_reg (xop0
);
1842 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
1844 mode
, tmode
, reverse
, false, NULL
);
1847 delete_insns_since (last
);
1854 /* Find a correspondingly-sized integer field, so we can apply
1855 shifts and masks to it. */
1856 int_mode
= int_mode_for_mode (tmode
);
1857 if (int_mode
== BLKmode
)
1858 int_mode
= int_mode_for_mode (mode
);
1859 /* Should probably push op0 out to memory and then do a load. */
1860 gcc_assert (int_mode
!= BLKmode
);
1862 target
= extract_fixed_bit_field (int_mode
, op0
, bitsize
, bitnum
, target
,
1863 unsignedp
, reverse
);
1865 /* Complex values must be reversed piecewise, so we need to undo the global
1866 reversal, convert to the complex mode and reverse again. */
1867 if (reverse
&& COMPLEX_MODE_P (tmode
))
1869 target
= flip_storage_order (int_mode
, target
);
1870 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1871 target
= flip_storage_order (tmode
, target
);
1874 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1879 /* Generate code to extract a byte-field from STR_RTX
1880 containing BITSIZE bits, starting at BITNUM,
1881 and put it in TARGET if possible (if TARGET is nonzero).
1882 Regardless of TARGET, we return the rtx for where the value is placed.
1884 STR_RTX is the structure containing the byte (a REG or MEM).
1885 UNSIGNEDP is nonzero if this is an unsigned bit field.
1886 MODE is the natural mode of the field value once extracted.
1887 TMODE is the mode the caller would like the value to have;
1888 but the value may be returned with type MODE instead.
1890 If REVERSE is true, the extraction is to be done in reverse order.
1892 If a TARGET is specified and we can store in it at no extra cost,
1893 we do so, and return TARGET.
1894 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1895 if they are equally easy. */
1898 extract_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1899 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1900 machine_mode mode
, machine_mode tmode
, bool reverse
,
1905 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1906 if (GET_MODE_BITSIZE (GET_MODE (str_rtx
)) > 0)
1907 mode1
= GET_MODE (str_rtx
);
1908 else if (target
&& GET_MODE_BITSIZE (GET_MODE (target
)) > 0)
1909 mode1
= GET_MODE (target
);
1913 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, mode1
, 0, 0))
1915 /* Extraction of a full MODE1 value can be done with a simple load.
1916 We know here that the field can be accessed with one single
1917 instruction. For targets that support unaligned memory,
1918 an unaligned access may be necessary. */
1919 if (bitsize
== GET_MODE_BITSIZE (mode1
))
1921 rtx result
= adjust_bitfield_address (str_rtx
, mode1
,
1922 bitnum
/ BITS_PER_UNIT
);
1924 result
= flip_storage_order (mode1
, result
);
1925 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1926 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
1929 str_rtx
= narrow_bit_field_mem (str_rtx
, mode1
, bitsize
, bitnum
,
1931 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (mode1
));
1932 str_rtx
= copy_to_reg (str_rtx
);
1935 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
1936 target
, mode
, tmode
, reverse
, true, alt_rtl
);
1939 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1940 from bit BITNUM of OP0.
1942 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1943 If REVERSE is true, the extraction is to be done in reverse order.
1945 If TARGET is nonzero, attempts to store the value there
1946 and return TARGET, but this is not guaranteed.
1947 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1950 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
1951 unsigned HOST_WIDE_INT bitsize
,
1952 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1953 int unsignedp
, bool reverse
)
1958 = get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
), word_mode
,
1959 MEM_VOLATILE_P (op0
));
1961 if (mode
== VOIDmode
)
1962 /* The only way this should occur is if the field spans word
1964 return extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1967 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1970 return extract_fixed_bit_field_1 (tmode
, op0
, bitsize
, bitnum
,
1971 target
, unsignedp
, reverse
);
1974 /* Helper function for extract_fixed_bit_field, extracts
1975 the bit field always using the MODE of OP0. */
1978 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
,
1979 unsigned HOST_WIDE_INT bitsize
,
1980 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1981 int unsignedp
, bool reverse
)
1983 machine_mode mode
= GET_MODE (op0
);
1984 gcc_assert (SCALAR_INT_MODE_P (mode
));
1986 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1987 for invalid input, such as extract equivalent of f5 from
1988 gcc.dg/pr48335-2.c. */
1990 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1991 /* BITNUM is the distance between our msb and that of OP0.
1992 Convert it to the distance from the lsb. */
1993 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1995 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1996 We have reduced the big-endian case to the little-endian case. */
1998 op0
= flip_storage_order (mode
, op0
);
2004 /* If the field does not already start at the lsb,
2005 shift it so it does. */
2006 /* Maybe propagate the target for the shift. */
2007 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2010 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2012 /* Convert the value to the desired mode. */
2014 op0
= convert_to_mode (tmode
, op0
, 1);
2016 /* Unless the msb of the field used to be the msb when we shifted,
2017 mask out the upper bits. */
2019 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2020 return expand_binop (GET_MODE (op0
), and_optab
, op0
,
2021 mask_rtx (GET_MODE (op0
), 0, bitsize
, 0),
2022 target
, 1, OPTAB_LIB_WIDEN
);
2026 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2027 then arithmetic-shift its lsb to the lsb of the word. */
2028 op0
= force_reg (mode
, op0
);
2030 /* Find the narrowest integer mode that contains the field. */
2032 for (mode
= GET_CLASS_NARROWEST_MODE (MODE_INT
); mode
!= VOIDmode
;
2033 mode
= GET_MODE_WIDER_MODE (mode
))
2034 if (GET_MODE_BITSIZE (mode
) >= bitsize
+ bitnum
)
2036 op0
= convert_to_mode (mode
, op0
, 0);
2043 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2045 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2046 /* Maybe propagate the target for the shift. */
2047 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2048 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2051 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2052 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2055 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2059 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2062 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2065 /* Extract a bit field that is split across two words
2066 and return an RTX for the result.
2068 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2069 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2070 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2072 If REVERSE is true, the extraction is to be done in reverse order. */
2075 extract_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
2076 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2080 unsigned int bitsdone
= 0;
2081 rtx result
= NULL_RTX
;
2084 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2086 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2087 unit
= BITS_PER_WORD
;
2089 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2091 while (bitsdone
< bitsize
)
2093 unsigned HOST_WIDE_INT thissize
;
2095 unsigned HOST_WIDE_INT thispos
;
2096 unsigned HOST_WIDE_INT offset
;
2098 offset
= (bitpos
+ bitsdone
) / unit
;
2099 thispos
= (bitpos
+ bitsdone
) % unit
;
2101 /* THISSIZE must not overrun a word boundary. Otherwise,
2102 extract_fixed_bit_field will call us again, and we will mutually
2104 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2105 thissize
= MIN (thissize
, unit
- thispos
);
2107 /* If OP0 is a register, then handle OFFSET here. */
2108 if (SUBREG_P (op0
) || REG_P (op0
))
2110 word
= operand_subword_force (op0
, offset
, GET_MODE (op0
));
2116 /* Extract the parts in bit-counting order,
2117 whose meaning is determined by BYTES_PER_UNIT.
2118 OFFSET is in UNITs, and UNIT is in bits. */
2119 part
= extract_fixed_bit_field (word_mode
, word
, thissize
,
2120 offset
* unit
+ thispos
, 0, 1, reverse
);
2121 bitsdone
+= thissize
;
2123 /* Shift this part into place for the result. */
2124 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2126 if (bitsize
!= bitsdone
)
2127 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2128 bitsize
- bitsdone
, 0, 1);
2132 if (bitsdone
!= thissize
)
2133 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2134 bitsdone
- thissize
, 0, 1);
2140 /* Combine the parts with bitwise or. This works
2141 because we extracted each part as an unsigned bit field. */
2142 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2148 /* Unsigned bit field: we are done. */
2151 /* Signed bit field: sign-extend with two arithmetic shifts. */
2152 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2153 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2154 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2155 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2158 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2159 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2160 MODE, fill the upper bits with zeros. Fail if the layout of either
2161 mode is unknown (as for CC modes) or if the extraction would involve
2162 unprofitable mode punning. Return the value on success, otherwise
2165 This is different from gen_lowpart* in these respects:
2167 - the returned value must always be considered an rvalue
2169 - when MODE is wider than SRC_MODE, the extraction involves
2172 - when MODE is smaller than SRC_MODE, the extraction involves
2173 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2175 In other words, this routine performs a computation, whereas the
2176 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2180 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2182 machine_mode int_mode
, src_int_mode
;
2184 if (mode
== src_mode
)
2187 if (CONSTANT_P (src
))
2189 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2190 fails, it will happily create (subreg (symbol_ref)) or similar
2192 unsigned int byte
= subreg_lowpart_offset (mode
, src_mode
);
2193 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2197 if (GET_MODE (src
) == VOIDmode
2198 || !validate_subreg (mode
, src_mode
, src
, byte
))
2201 src
= force_reg (GET_MODE (src
), src
);
2202 return gen_rtx_SUBREG (mode
, src
, byte
);
2205 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2208 if (GET_MODE_BITSIZE (mode
) == GET_MODE_BITSIZE (src_mode
)
2209 && MODES_TIEABLE_P (mode
, src_mode
))
2211 rtx x
= gen_lowpart_common (mode
, src
);
2216 src_int_mode
= int_mode_for_mode (src_mode
);
2217 int_mode
= int_mode_for_mode (mode
);
2218 if (src_int_mode
== BLKmode
|| int_mode
== BLKmode
)
2221 if (!MODES_TIEABLE_P (src_int_mode
, src_mode
))
2223 if (!MODES_TIEABLE_P (int_mode
, mode
))
2226 src
= gen_lowpart (src_int_mode
, src
);
2227 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2228 src
= gen_lowpart (mode
, src
);
2232 /* Add INC into TARGET. */
2235 expand_inc (rtx target
, rtx inc
)
2237 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2239 target
, 0, OPTAB_LIB_WIDEN
);
2240 if (value
!= target
)
2241 emit_move_insn (target
, value
);
2244 /* Subtract DEC from TARGET. */
2247 expand_dec (rtx target
, rtx dec
)
2249 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2251 target
, 0, OPTAB_LIB_WIDEN
);
2252 if (value
!= target
)
2253 emit_move_insn (target
, value
);
2256 /* Output a shift instruction for expression code CODE,
2257 with SHIFTED being the rtx for the value to shift,
2258 and AMOUNT the rtx for the amount to shift by.
2259 Store the result in the rtx TARGET, if that is convenient.
2260 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2261 Return the rtx for where the value is.
2262 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2263 in which case 0 is returned. */
2266 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2267 rtx amount
, rtx target
, int unsignedp
, bool may_fail
= false)
2270 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2271 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2272 optab lshift_optab
= ashl_optab
;
2273 optab rshift_arith_optab
= ashr_optab
;
2274 optab rshift_uns_optab
= lshr_optab
;
2275 optab lrotate_optab
= rotl_optab
;
2276 optab rrotate_optab
= rotr_optab
;
2277 machine_mode op1_mode
;
2278 machine_mode scalar_mode
= mode
;
2280 bool speed
= optimize_insn_for_speed_p ();
2282 if (VECTOR_MODE_P (mode
))
2283 scalar_mode
= GET_MODE_INNER (mode
);
2285 op1_mode
= GET_MODE (op1
);
2287 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2288 shift amount is a vector, use the vector/vector shift patterns. */
2289 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2291 lshift_optab
= vashl_optab
;
2292 rshift_arith_optab
= vashr_optab
;
2293 rshift_uns_optab
= vlshr_optab
;
2294 lrotate_optab
= vrotl_optab
;
2295 rrotate_optab
= vrotr_optab
;
2298 /* Previously detected shift-counts computed by NEGATE_EXPR
2299 and shifted in the other direction; but that does not work
2302 if (SHIFT_COUNT_TRUNCATED
)
2304 if (CONST_INT_P (op1
)
2305 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2306 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2307 op1
= GEN_INT ((unsigned HOST_WIDE_INT
) INTVAL (op1
)
2308 % GET_MODE_BITSIZE (scalar_mode
));
2309 else if (GET_CODE (op1
) == SUBREG
2310 && subreg_lowpart_p (op1
)
2311 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2312 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2313 op1
= SUBREG_REG (op1
);
2316 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2317 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2318 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2321 && CONST_INT_P (op1
)
2322 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2323 GET_MODE_BITSIZE (scalar_mode
) - 1))
2325 op1
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2327 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2330 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2331 Note that this is not the case for bigger values. For instance a rotation
2332 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2333 0x04030201 (bswapsi). */
2335 && CONST_INT_P (op1
)
2336 && INTVAL (op1
) == BITS_PER_UNIT
2337 && GET_MODE_SIZE (scalar_mode
) == 2
2338 && optab_handler (bswap_optab
, HImode
) != CODE_FOR_nothing
)
2339 return expand_unop (HImode
, bswap_optab
, shifted
, NULL_RTX
,
2342 if (op1
== const0_rtx
)
2345 /* Check whether its cheaper to implement a left shift by a constant
2346 bit count by a sequence of additions. */
2347 if (code
== LSHIFT_EXPR
2348 && CONST_INT_P (op1
)
2350 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2351 && INTVAL (op1
) < MAX_BITS_PER_WORD
2352 && (shift_cost (speed
, mode
, INTVAL (op1
))
2353 > INTVAL (op1
) * add_cost (speed
, mode
))
2354 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2357 for (i
= 0; i
< INTVAL (op1
); i
++)
2359 temp
= force_reg (mode
, shifted
);
2360 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2361 unsignedp
, OPTAB_LIB_WIDEN
);
2366 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2368 enum optab_methods methods
;
2371 methods
= OPTAB_DIRECT
;
2372 else if (attempt
== 1)
2373 methods
= OPTAB_WIDEN
;
2375 methods
= OPTAB_LIB_WIDEN
;
2379 /* Widening does not work for rotation. */
2380 if (methods
== OPTAB_WIDEN
)
2382 else if (methods
== OPTAB_LIB_WIDEN
)
2384 /* If we have been unable to open-code this by a rotation,
2385 do it as the IOR of two shifts. I.e., to rotate A
2387 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2388 where C is the bitsize of A.
2390 It is theoretically possible that the target machine might
2391 not be able to perform either shift and hence we would
2392 be making two libcalls rather than just the one for the
2393 shift (similarly if IOR could not be done). We will allow
2394 this extremely unlikely lossage to avoid complicating the
2397 rtx subtarget
= target
== shifted
? 0 : target
;
2398 rtx new_amount
, other_amount
;
2402 if (op1
== const0_rtx
)
2404 else if (CONST_INT_P (op1
))
2405 other_amount
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
)
2410 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2411 op1
, GET_MODE (op1
));
2412 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2414 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2415 gen_int_mode (mask
, GET_MODE (op1
)));
2418 shifted
= force_reg (mode
, shifted
);
2420 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2421 mode
, shifted
, new_amount
, 0, 1);
2422 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2423 mode
, shifted
, other_amount
,
2425 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2426 unsignedp
, methods
);
2429 temp
= expand_binop (mode
,
2430 left
? lrotate_optab
: rrotate_optab
,
2431 shifted
, op1
, target
, unsignedp
, methods
);
2434 temp
= expand_binop (mode
,
2435 left
? lshift_optab
: rshift_uns_optab
,
2436 shifted
, op1
, target
, unsignedp
, methods
);
2438 /* Do arithmetic shifts.
2439 Also, if we are going to widen the operand, we can just as well
2440 use an arithmetic right-shift instead of a logical one. */
2441 if (temp
== 0 && ! rotate
2442 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2444 enum optab_methods methods1
= methods
;
2446 /* If trying to widen a log shift to an arithmetic shift,
2447 don't accept an arithmetic shift of the same size. */
2449 methods1
= OPTAB_MUST_WIDEN
;
2451 /* Arithmetic shift */
2453 temp
= expand_binop (mode
,
2454 left
? lshift_optab
: rshift_arith_optab
,
2455 shifted
, op1
, target
, unsignedp
, methods1
);
2458 /* We used to try extzv here for logical right shifts, but that was
2459 only useful for one machine, the VAX, and caused poor code
2460 generation there for lshrdi3, so the code was deleted and a
2461 define_expand for lshrsi3 was added to vax.md. */
2464 gcc_assert (temp
!= NULL_RTX
|| may_fail
);
2468 /* Output a shift instruction for expression code CODE,
2469 with SHIFTED being the rtx for the value to shift,
2470 and AMOUNT the amount to shift by.
2471 Store the result in the rtx TARGET, if that is convenient.
2472 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2473 Return the rtx for where the value is. */
2476 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2477 int amount
, rtx target
, int unsignedp
)
2479 return expand_shift_1 (code
, mode
,
2480 shifted
, GEN_INT (amount
), target
, unsignedp
);
2483 /* Likewise, but return 0 if that cannot be done. */
2486 maybe_expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2487 int amount
, rtx target
, int unsignedp
)
2489 return expand_shift_1 (code
, mode
,
2490 shifted
, GEN_INT (amount
), target
, unsignedp
, true);
2493 /* Output a shift instruction for expression code CODE,
2494 with SHIFTED being the rtx for the value to shift,
2495 and AMOUNT the tree for the amount to shift by.
2496 Store the result in the rtx TARGET, if that is convenient.
2497 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2498 Return the rtx for where the value is. */
2501 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2502 tree amount
, rtx target
, int unsignedp
)
2504 return expand_shift_1 (code
, mode
,
2505 shifted
, expand_normal (amount
), target
, unsignedp
);
2509 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2510 const struct mult_cost
*, machine_mode mode
);
2511 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2512 const struct algorithm
*, enum mult_variant
);
2513 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2514 static rtx
extract_high_half (machine_mode
, rtx
);
2515 static rtx
expmed_mult_highpart (machine_mode
, rtx
, rtx
, rtx
, int, int);
2516 static rtx
expmed_mult_highpart_optab (machine_mode
, rtx
, rtx
, rtx
,
2518 /* Compute and return the best algorithm for multiplying by T.
2519 The algorithm must cost less than cost_limit
2520 If retval.cost >= COST_LIMIT, no algorithm was found and all
2521 other field of the returned struct are undefined.
2522 MODE is the machine mode of the multiplication. */
2525 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2526 const struct mult_cost
*cost_limit
, machine_mode mode
)
2529 struct algorithm
*alg_in
, *best_alg
;
2530 struct mult_cost best_cost
;
2531 struct mult_cost new_limit
;
2532 int op_cost
, op_latency
;
2533 unsigned HOST_WIDE_INT orig_t
= t
;
2534 unsigned HOST_WIDE_INT q
;
2535 int maxm
, hash_index
;
2536 bool cache_hit
= false;
2537 enum alg_code cache_alg
= alg_zero
;
2538 bool speed
= optimize_insn_for_speed_p ();
2540 struct alg_hash_entry
*entry_ptr
;
2542 /* Indicate that no algorithm is yet found. If no algorithm
2543 is found, this value will be returned and indicate failure. */
2544 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2545 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2547 if (cost_limit
->cost
< 0
2548 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2551 /* Be prepared for vector modes. */
2552 imode
= GET_MODE_INNER (mode
);
2554 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2556 /* Restrict the bits of "t" to the multiplication's mode. */
2557 t
&= GET_MODE_MASK (imode
);
2559 /* t == 1 can be done in zero cost. */
2563 alg_out
->cost
.cost
= 0;
2564 alg_out
->cost
.latency
= 0;
2565 alg_out
->op
[0] = alg_m
;
2569 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2573 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2578 alg_out
->cost
.cost
= zero_cost (speed
);
2579 alg_out
->cost
.latency
= zero_cost (speed
);
2580 alg_out
->op
[0] = alg_zero
;
2585 /* We'll be needing a couple extra algorithm structures now. */
2587 alg_in
= XALLOCA (struct algorithm
);
2588 best_alg
= XALLOCA (struct algorithm
);
2589 best_cost
= *cost_limit
;
2591 /* Compute the hash index. */
2592 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2594 /* See if we already know what to do for T. */
2595 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2596 if (entry_ptr
->t
== t
2597 && entry_ptr
->mode
== mode
2598 && entry_ptr
->speed
== speed
2599 && entry_ptr
->alg
!= alg_unknown
)
2601 cache_alg
= entry_ptr
->alg
;
2603 if (cache_alg
== alg_impossible
)
2605 /* The cache tells us that it's impossible to synthesize
2606 multiplication by T within entry_ptr->cost. */
2607 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2608 /* COST_LIMIT is at least as restrictive as the one
2609 recorded in the hash table, in which case we have no
2610 hope of synthesizing a multiplication. Just
2614 /* If we get here, COST_LIMIT is less restrictive than the
2615 one recorded in the hash table, so we may be able to
2616 synthesize a multiplication. Proceed as if we didn't
2617 have the cache entry. */
2621 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2622 /* The cached algorithm shows that this multiplication
2623 requires more cost than COST_LIMIT. Just return. This
2624 way, we don't clobber this cache entry with
2625 alg_impossible but retain useful information. */
2637 goto do_alg_addsub_t_m2
;
2639 case alg_add_factor
:
2640 case alg_sub_factor
:
2641 goto do_alg_addsub_factor
;
2644 goto do_alg_add_t2_m
;
2647 goto do_alg_sub_t2_m
;
2655 /* If we have a group of zero bits at the low-order part of T, try
2656 multiplying by the remaining bits and then doing a shift. */
2661 m
= ctz_or_zero (t
); /* m = number of low zero bits */
2665 /* The function expand_shift will choose between a shift and
2666 a sequence of additions, so the observed cost is given as
2667 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2668 op_cost
= m
* add_cost (speed
, mode
);
2669 if (shift_cost (speed
, mode
, m
) < op_cost
)
2670 op_cost
= shift_cost (speed
, mode
, m
);
2671 new_limit
.cost
= best_cost
.cost
- op_cost
;
2672 new_limit
.latency
= best_cost
.latency
- op_cost
;
2673 synth_mult (alg_in
, q
, &new_limit
, mode
);
2675 alg_in
->cost
.cost
+= op_cost
;
2676 alg_in
->cost
.latency
+= op_cost
;
2677 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2679 best_cost
= alg_in
->cost
;
2680 std::swap (alg_in
, best_alg
);
2681 best_alg
->log
[best_alg
->ops
] = m
;
2682 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2685 /* See if treating ORIG_T as a signed number yields a better
2686 sequence. Try this sequence only for a negative ORIG_T
2687 as it would be useless for a non-negative ORIG_T. */
2688 if ((HOST_WIDE_INT
) orig_t
< 0)
2690 /* Shift ORIG_T as follows because a right shift of a
2691 negative-valued signed type is implementation
2693 q
= ~(~orig_t
>> m
);
2694 /* The function expand_shift will choose between a shift
2695 and a sequence of additions, so the observed cost is
2696 given as MIN (m * add_cost(speed, mode),
2697 shift_cost(speed, mode, m)). */
2698 op_cost
= m
* add_cost (speed
, mode
);
2699 if (shift_cost (speed
, mode
, m
) < op_cost
)
2700 op_cost
= shift_cost (speed
, mode
, m
);
2701 new_limit
.cost
= best_cost
.cost
- op_cost
;
2702 new_limit
.latency
= best_cost
.latency
- op_cost
;
2703 synth_mult (alg_in
, q
, &new_limit
, mode
);
2705 alg_in
->cost
.cost
+= op_cost
;
2706 alg_in
->cost
.latency
+= op_cost
;
2707 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2709 best_cost
= alg_in
->cost
;
2710 std::swap (alg_in
, best_alg
);
2711 best_alg
->log
[best_alg
->ops
] = m
;
2712 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2720 /* If we have an odd number, add or subtract one. */
2723 unsigned HOST_WIDE_INT w
;
2726 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2728 /* If T was -1, then W will be zero after the loop. This is another
2729 case where T ends with ...111. Handling this with (T + 1) and
2730 subtract 1 produces slightly better code and results in algorithm
2731 selection much faster than treating it like the ...0111 case
2735 /* Reject the case where t is 3.
2736 Thus we prefer addition in that case. */
2739 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2741 op_cost
= add_cost (speed
, mode
);
2742 new_limit
.cost
= best_cost
.cost
- op_cost
;
2743 new_limit
.latency
= best_cost
.latency
- op_cost
;
2744 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2746 alg_in
->cost
.cost
+= op_cost
;
2747 alg_in
->cost
.latency
+= op_cost
;
2748 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2750 best_cost
= alg_in
->cost
;
2751 std::swap (alg_in
, best_alg
);
2752 best_alg
->log
[best_alg
->ops
] = 0;
2753 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2758 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2760 op_cost
= add_cost (speed
, mode
);
2761 new_limit
.cost
= best_cost
.cost
- op_cost
;
2762 new_limit
.latency
= best_cost
.latency
- op_cost
;
2763 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2765 alg_in
->cost
.cost
+= op_cost
;
2766 alg_in
->cost
.latency
+= op_cost
;
2767 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2769 best_cost
= alg_in
->cost
;
2770 std::swap (alg_in
, best_alg
);
2771 best_alg
->log
[best_alg
->ops
] = 0;
2772 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2776 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2777 quickly with a - a * n for some appropriate constant n. */
2778 m
= exact_log2 (-orig_t
+ 1);
2779 if (m
>= 0 && m
< maxm
)
2781 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2782 /* If the target has a cheap shift-and-subtract insn use
2783 that in preference to a shift insn followed by a sub insn.
2784 Assume that the shift-and-sub is "atomic" with a latency
2785 equal to it's cost, otherwise assume that on superscalar
2786 hardware the shift may be executed concurrently with the
2787 earlier steps in the algorithm. */
2788 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2790 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2791 op_latency
= op_cost
;
2794 op_latency
= add_cost (speed
, mode
);
2796 new_limit
.cost
= best_cost
.cost
- op_cost
;
2797 new_limit
.latency
= best_cost
.latency
- op_latency
;
2798 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2801 alg_in
->cost
.cost
+= op_cost
;
2802 alg_in
->cost
.latency
+= op_latency
;
2803 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2805 best_cost
= alg_in
->cost
;
2806 std::swap (alg_in
, best_alg
);
2807 best_alg
->log
[best_alg
->ops
] = m
;
2808 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2816 /* Look for factors of t of the form
2817 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2818 If we find such a factor, we can multiply by t using an algorithm that
2819 multiplies by q, shift the result by m and add/subtract it to itself.
2821 We search for large factors first and loop down, even if large factors
2822 are less probable than small; if we find a large factor we will find a
2823 good sequence quickly, and therefore be able to prune (by decreasing
2824 COST_LIMIT) the search. */
2826 do_alg_addsub_factor
:
2827 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
2829 unsigned HOST_WIDE_INT d
;
2831 d
= (HOST_WIDE_INT_1U
<< m
) + 1;
2832 if (t
% d
== 0 && t
> d
&& m
< maxm
2833 && (!cache_hit
|| cache_alg
== alg_add_factor
))
2835 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2836 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
2837 op_cost
= shiftadd_cost (speed
, mode
, m
);
2839 op_latency
= op_cost
;
2842 new_limit
.cost
= best_cost
.cost
- op_cost
;
2843 new_limit
.latency
= best_cost
.latency
- op_latency
;
2844 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2846 alg_in
->cost
.cost
+= op_cost
;
2847 alg_in
->cost
.latency
+= op_latency
;
2848 if (alg_in
->cost
.latency
< op_cost
)
2849 alg_in
->cost
.latency
= op_cost
;
2850 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2852 best_cost
= alg_in
->cost
;
2853 std::swap (alg_in
, best_alg
);
2854 best_alg
->log
[best_alg
->ops
] = m
;
2855 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
2857 /* Other factors will have been taken care of in the recursion. */
2861 d
= (HOST_WIDE_INT_1U
<< m
) - 1;
2862 if (t
% d
== 0 && t
> d
&& m
< maxm
2863 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
2865 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2866 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
2867 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2869 op_latency
= op_cost
;
2871 new_limit
.cost
= best_cost
.cost
- op_cost
;
2872 new_limit
.latency
= best_cost
.latency
- op_latency
;
2873 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2875 alg_in
->cost
.cost
+= op_cost
;
2876 alg_in
->cost
.latency
+= op_latency
;
2877 if (alg_in
->cost
.latency
< op_cost
)
2878 alg_in
->cost
.latency
= op_cost
;
2879 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2881 best_cost
= alg_in
->cost
;
2882 std::swap (alg_in
, best_alg
);
2883 best_alg
->log
[best_alg
->ops
] = m
;
2884 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
2892 /* Try shift-and-add (load effective address) instructions,
2893 i.e. do a*3, a*5, a*9. */
2901 op_cost
= shiftadd_cost (speed
, mode
, m
);
2902 new_limit
.cost
= best_cost
.cost
- op_cost
;
2903 new_limit
.latency
= best_cost
.latency
- op_cost
;
2904 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
2906 alg_in
->cost
.cost
+= op_cost
;
2907 alg_in
->cost
.latency
+= op_cost
;
2908 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2910 best_cost
= alg_in
->cost
;
2911 std::swap (alg_in
, best_alg
);
2912 best_alg
->log
[best_alg
->ops
] = m
;
2913 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
2924 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2925 new_limit
.cost
= best_cost
.cost
- op_cost
;
2926 new_limit
.latency
= best_cost
.latency
- op_cost
;
2927 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
2929 alg_in
->cost
.cost
+= op_cost
;
2930 alg_in
->cost
.latency
+= op_cost
;
2931 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2933 best_cost
= alg_in
->cost
;
2934 std::swap (alg_in
, best_alg
);
2935 best_alg
->log
[best_alg
->ops
] = m
;
2936 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
2944 /* If best_cost has not decreased, we have not found any algorithm. */
2945 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
2947 /* We failed to find an algorithm. Record alg_impossible for
2948 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2949 we are asked to find an algorithm for T within the same or
2950 lower COST_LIMIT, we can immediately return to the
2953 entry_ptr
->mode
= mode
;
2954 entry_ptr
->speed
= speed
;
2955 entry_ptr
->alg
= alg_impossible
;
2956 entry_ptr
->cost
= *cost_limit
;
2960 /* Cache the result. */
2964 entry_ptr
->mode
= mode
;
2965 entry_ptr
->speed
= speed
;
2966 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
2967 entry_ptr
->cost
.cost
= best_cost
.cost
;
2968 entry_ptr
->cost
.latency
= best_cost
.latency
;
2971 /* If we are getting a too long sequence for `struct algorithm'
2972 to record, make this search fail. */
2973 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
2976 /* Copy the algorithm from temporary space to the space at alg_out.
2977 We avoid using structure assignment because the majority of
2978 best_alg is normally undefined, and this is a critical function. */
2979 alg_out
->ops
= best_alg
->ops
+ 1;
2980 alg_out
->cost
= best_cost
;
2981 memcpy (alg_out
->op
, best_alg
->op
,
2982 alg_out
->ops
* sizeof *alg_out
->op
);
2983 memcpy (alg_out
->log
, best_alg
->log
,
2984 alg_out
->ops
* sizeof *alg_out
->log
);
2987 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2988 Try three variations:
2990 - a shift/add sequence based on VAL itself
2991 - a shift/add sequence based on -VAL, followed by a negation
2992 - a shift/add sequence based on VAL - 1, followed by an addition.
2994 Return true if the cheapest of these cost less than MULT_COST,
2995 describing the algorithm in *ALG and final fixup in *VARIANT. */
2998 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
2999 struct algorithm
*alg
, enum mult_variant
*variant
,
3002 struct algorithm alg2
;
3003 struct mult_cost limit
;
3005 bool speed
= optimize_insn_for_speed_p ();
3007 /* Fail quickly for impossible bounds. */
3011 /* Ensure that mult_cost provides a reasonable upper bound.
3012 Any constant multiplication can be performed with less
3013 than 2 * bits additions. */
3014 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3015 if (mult_cost
> op_cost
)
3016 mult_cost
= op_cost
;
3018 *variant
= basic_variant
;
3019 limit
.cost
= mult_cost
;
3020 limit
.latency
= mult_cost
;
3021 synth_mult (alg
, val
, &limit
, mode
);
3023 /* This works only if the inverted value actually fits in an
3025 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3027 op_cost
= neg_cost (speed
, mode
);
3028 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3030 limit
.cost
= alg
->cost
.cost
- op_cost
;
3031 limit
.latency
= alg
->cost
.latency
- op_cost
;
3035 limit
.cost
= mult_cost
- op_cost
;
3036 limit
.latency
= mult_cost
- op_cost
;
3039 synth_mult (&alg2
, -val
, &limit
, mode
);
3040 alg2
.cost
.cost
+= op_cost
;
3041 alg2
.cost
.latency
+= op_cost
;
3042 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3043 *alg
= alg2
, *variant
= negate_variant
;
3046 /* This proves very useful for division-by-constant. */
3047 op_cost
= add_cost (speed
, mode
);
3048 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3050 limit
.cost
= alg
->cost
.cost
- op_cost
;
3051 limit
.latency
= alg
->cost
.latency
- op_cost
;
3055 limit
.cost
= mult_cost
- op_cost
;
3056 limit
.latency
= mult_cost
- op_cost
;
3059 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3060 alg2
.cost
.cost
+= op_cost
;
3061 alg2
.cost
.latency
+= op_cost
;
3062 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3063 *alg
= alg2
, *variant
= add_variant
;
3065 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3068 /* A subroutine of expand_mult, used for constant multiplications.
3069 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3070 convenient. Use the shift/add sequence described by ALG and apply
3071 the final fixup specified by VARIANT. */
3074 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3075 rtx target
, const struct algorithm
*alg
,
3076 enum mult_variant variant
)
3078 unsigned HOST_WIDE_INT val_so_far
;
3084 /* Avoid referencing memory over and over and invalid sharing
3086 op0
= force_reg (mode
, op0
);
3088 /* ACCUM starts out either as OP0 or as a zero, depending on
3089 the first operation. */
3091 if (alg
->op
[0] == alg_zero
)
3093 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3096 else if (alg
->op
[0] == alg_m
)
3098 accum
= copy_to_mode_reg (mode
, op0
);
3104 for (opno
= 1; opno
< alg
->ops
; opno
++)
3106 int log
= alg
->log
[opno
];
3107 rtx shift_subtarget
= optimize
? 0 : accum
;
3109 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3112 rtx accum_target
= optimize
? 0 : accum
;
3115 switch (alg
->op
[opno
])
3118 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3119 /* REG_EQUAL note will be attached to the following insn. */
3120 emit_move_insn (accum
, tem
);
3125 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3126 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3127 add_target
? add_target
: accum_target
);
3128 val_so_far
+= HOST_WIDE_INT_1U
<< log
;
3132 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3133 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3134 add_target
? add_target
: accum_target
);
3135 val_so_far
-= HOST_WIDE_INT_1U
<< log
;
3139 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3140 log
, shift_subtarget
, 0);
3141 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3142 add_target
? add_target
: accum_target
);
3143 val_so_far
= (val_so_far
<< log
) + 1;
3147 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3148 log
, shift_subtarget
, 0);
3149 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3150 add_target
? add_target
: accum_target
);
3151 val_so_far
= (val_so_far
<< log
) - 1;
3154 case alg_add_factor
:
3155 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3156 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3157 add_target
? add_target
: accum_target
);
3158 val_so_far
+= val_so_far
<< log
;
3161 case alg_sub_factor
:
3162 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3163 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3165 ? add_target
: (optimize
? 0 : tem
)));
3166 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3173 if (SCALAR_INT_MODE_P (mode
))
3175 /* Write a REG_EQUAL note on the last insn so that we can cse
3176 multiplication sequences. Note that if ACCUM is a SUBREG,
3177 we've set the inner register and must properly indicate that. */
3178 tem
= op0
, nmode
= mode
;
3179 accum_inner
= accum
;
3180 if (GET_CODE (accum
) == SUBREG
)
3182 accum_inner
= SUBREG_REG (accum
);
3183 nmode
= GET_MODE (accum_inner
);
3184 tem
= gen_lowpart (nmode
, op0
);
3187 insn
= get_last_insn ();
3188 set_dst_reg_note (insn
, REG_EQUAL
,
3189 gen_rtx_MULT (nmode
, tem
,
3190 gen_int_mode (val_so_far
, nmode
)),
3195 if (variant
== negate_variant
)
3197 val_so_far
= -val_so_far
;
3198 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3200 else if (variant
== add_variant
)
3202 val_so_far
= val_so_far
+ 1;
3203 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3206 /* Compare only the bits of val and val_so_far that are significant
3207 in the result mode, to avoid sign-/zero-extension confusion. */
3208 nmode
= GET_MODE_INNER (mode
);
3209 val
&= GET_MODE_MASK (nmode
);
3210 val_so_far
&= GET_MODE_MASK (nmode
);
3211 gcc_assert (val
== (HOST_WIDE_INT
) val_so_far
);
3216 /* Perform a multiplication and return an rtx for the result.
3217 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3218 TARGET is a suggestion for where to store the result (an rtx).
3220 We check specially for a constant integer as OP1.
3221 If you want this check for OP0 as well, then before calling
3222 you should swap the two operands if OP0 would be constant. */
3225 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3228 enum mult_variant variant
;
3229 struct algorithm algorithm
;
3232 bool speed
= optimize_insn_for_speed_p ();
3233 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3235 if (CONSTANT_P (op0
))
3236 std::swap (op0
, op1
);
3238 /* For vectors, there are several simplifications that can be made if
3239 all elements of the vector constant are identical. */
3240 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3242 if (INTEGRAL_MODE_P (mode
))
3245 HOST_WIDE_INT coeff
;
3249 if (op1
== CONST0_RTX (mode
))
3251 if (op1
== CONST1_RTX (mode
))
3253 if (op1
== CONSTM1_RTX (mode
))
3254 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3260 /* If mode is integer vector mode, check if the backend supports
3261 vector lshift (by scalar or vector) at all. If not, we can't use
3262 synthetized multiply. */
3263 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3264 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3265 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3268 /* These are the operations that are potentially turned into
3269 a sequence of shifts and additions. */
3270 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3272 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3273 less than or equal in size to `unsigned int' this doesn't matter.
3274 If the mode is larger than `unsigned int', then synth_mult works
3275 only if the constant value exactly fits in an `unsigned int' without
3276 any truncation. This means that multiplying by negative values does
3277 not work; results are off by 2^32 on a 32 bit machine. */
3278 if (CONST_INT_P (scalar_op1
))
3280 coeff
= INTVAL (scalar_op1
);
3283 #if TARGET_SUPPORTS_WIDE_INT
3284 else if (CONST_WIDE_INT_P (scalar_op1
))
3286 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3289 int shift
= wi::exact_log2 (rtx_mode_t (scalar_op1
, mode
));
3290 /* Perfect power of 2 (other than 1, which is handled above). */
3292 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3293 shift
, target
, unsignedp
);
3300 /* We used to test optimize here, on the grounds that it's better to
3301 produce a smaller program when -O is not used. But this causes
3302 such a terrible slowdown sometimes that it seems better to always
3305 /* Special case powers of two. */
3306 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3307 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3308 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3309 floor_log2 (coeff
), target
, unsignedp
);
3311 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3313 /* Attempt to handle multiplication of DImode values by negative
3314 coefficients, by performing the multiplication by a positive
3315 multiplier and then inverting the result. */
3316 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3318 /* Its safe to use -coeff even for INT_MIN, as the
3319 result is interpreted as an unsigned coefficient.
3320 Exclude cost of op0 from max_cost to match the cost
3321 calculation of the synth_mult. */
3322 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3323 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3325 - neg_cost (speed
, mode
));
3329 /* Special case powers of two. */
3330 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3332 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3333 floor_log2 (coeff
), target
, unsignedp
);
3334 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3337 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3340 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3341 &algorithm
, variant
);
3342 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3347 /* Exclude cost of op0 from max_cost to match the cost
3348 calculation of the synth_mult. */
3349 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3350 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3351 return expand_mult_const (mode
, op0
, coeff
, target
,
3352 &algorithm
, variant
);
3356 /* Expand x*2.0 as x+x. */
3357 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3358 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3360 op0
= force_reg (GET_MODE (op0
), op0
);
3361 return expand_binop (mode
, add_optab
, op0
, op0
,
3362 target
, unsignedp
, OPTAB_LIB_WIDEN
);
3365 /* This used to use umul_optab if unsigned, but for non-widening multiply
3366 there is no difference between signed and unsigned. */
3367 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3368 op0
, op1
, target
, unsignedp
, OPTAB_LIB_WIDEN
);
3373 /* Return a cost estimate for multiplying a register by the given
3374 COEFFicient in the given MODE and SPEED. */
3377 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3380 struct algorithm algorithm
;
3381 enum mult_variant variant
;
3383 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3384 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3386 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3387 return algorithm
.cost
.cost
;
3392 /* Perform a widening multiplication and return an rtx for the result.
3393 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3394 TARGET is a suggestion for where to store the result (an rtx).
3395 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3396 or smul_widen_optab.
3398 We check specially for a constant integer as OP1, comparing the
3399 cost of a widening multiply against the cost of a sequence of shifts
3403 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3404 int unsignedp
, optab this_optab
)
3406 bool speed
= optimize_insn_for_speed_p ();
3409 if (CONST_INT_P (op1
)
3410 && GET_MODE (op0
) != VOIDmode
3411 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3412 this_optab
== umul_widen_optab
))
3413 && CONST_INT_P (cop1
)
3414 && (INTVAL (cop1
) >= 0
3415 || HWI_COMPUTABLE_MODE_P (mode
)))
3417 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3419 enum mult_variant variant
;
3420 struct algorithm algorithm
;
3423 return CONST0_RTX (mode
);
3425 /* Special case powers of two. */
3426 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3428 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3429 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3430 floor_log2 (coeff
), target
, unsignedp
);
3433 /* Exclude cost of op0 from max_cost to match the cost
3434 calculation of the synth_mult. */
3435 max_cost
= mul_widen_cost (speed
, mode
);
3436 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3439 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3440 return expand_mult_const (mode
, op0
, coeff
, target
,
3441 &algorithm
, variant
);
3444 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3445 unsignedp
, OPTAB_LIB_WIDEN
);
3448 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3449 replace division by D, and put the least significant N bits of the result
3450 in *MULTIPLIER_PTR and return the most significant bit.
3452 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3453 needed precision is in PRECISION (should be <= N).
3455 PRECISION should be as small as possible so this function can choose
3456 multiplier more freely.
3458 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3459 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3461 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3462 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3464 unsigned HOST_WIDE_INT
3465 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3466 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3467 int *post_shift_ptr
, int *lgup_ptr
)
3469 int lgup
, post_shift
;
3472 /* lgup = ceil(log2(divisor)); */
3473 lgup
= ceil_log2 (d
);
3475 gcc_assert (lgup
<= n
);
3478 pow2
= n
+ lgup
- precision
;
3480 /* mlow = 2^(N + lgup)/d */
3481 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3482 wide_int mlow
= wi::udiv_trunc (val
, d
);
3484 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3485 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3486 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3488 /* If precision == N, then mlow, mhigh exceed 2^N
3489 (but they do not exceed 2^(N+1)). */
3491 /* Reduce to lowest terms. */
3492 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3494 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3495 HOST_BITS_PER_WIDE_INT
);
3496 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3497 HOST_BITS_PER_WIDE_INT
);
3501 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3502 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3505 *post_shift_ptr
= post_shift
;
3507 if (n
< HOST_BITS_PER_WIDE_INT
)
3509 unsigned HOST_WIDE_INT mask
= (HOST_WIDE_INT_1U
<< n
) - 1;
3510 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3511 return mhigh
.to_uhwi () >= mask
;
3515 *multiplier_ptr
= mhigh
.to_uhwi ();
3516 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3520 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3521 congruent to 1 (mod 2**N). */
3523 static unsigned HOST_WIDE_INT
3524 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3526 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3528 /* The algorithm notes that the choice y = x satisfies
3529 x*y == 1 mod 2^3, since x is assumed odd.
3530 Each iteration doubles the number of bits of significance in y. */
3532 unsigned HOST_WIDE_INT mask
;
3533 unsigned HOST_WIDE_INT y
= x
;
3536 mask
= (n
== HOST_BITS_PER_WIDE_INT
3538 : (HOST_WIDE_INT_1U
<< n
) - 1);
3542 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3548 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3549 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3550 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3551 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3554 The result is put in TARGET if that is convenient.
3556 MODE is the mode of operation. */
3559 expand_mult_highpart_adjust (machine_mode mode
, rtx adj_operand
, rtx op0
,
3560 rtx op1
, rtx target
, int unsignedp
)
3563 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3565 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3566 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3567 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3569 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3572 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3573 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3574 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3575 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3581 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3584 extract_high_half (machine_mode mode
, rtx op
)
3586 machine_mode wider_mode
;
3588 if (mode
== word_mode
)
3589 return gen_highpart (mode
, op
);
3591 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3593 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3594 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3595 GET_MODE_BITSIZE (mode
), 0, 1);
3596 return convert_modes (mode
, wider_mode
, op
, 0);
3599 /* Like expmed_mult_highpart, but only consider using a multiplication
3600 optab. OP1 is an rtx for the constant operand. */
3603 expmed_mult_highpart_optab (machine_mode mode
, rtx op0
, rtx op1
,
3604 rtx target
, int unsignedp
, int max_cost
)
3606 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3607 machine_mode wider_mode
;
3611 bool speed
= optimize_insn_for_speed_p ();
3613 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3615 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3616 size
= GET_MODE_BITSIZE (mode
);
3618 /* Firstly, try using a multiplication insn that only generates the needed
3619 high part of the product, and in the sign flavor of unsignedp. */
3620 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3622 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3623 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3624 unsignedp
, OPTAB_DIRECT
);
3629 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3630 Need to adjust the result after the multiplication. */
3631 if (size
- 1 < BITS_PER_WORD
3632 && (mul_highpart_cost (speed
, mode
)
3633 + 2 * shift_cost (speed
, mode
, size
-1)
3634 + 4 * add_cost (speed
, mode
) < max_cost
))
3636 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3637 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3638 unsignedp
, OPTAB_DIRECT
);
3640 /* We used the wrong signedness. Adjust the result. */
3641 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3645 /* Try widening multiplication. */
3646 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3647 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3648 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3650 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3651 unsignedp
, OPTAB_WIDEN
);
3653 return extract_high_half (mode
, tem
);
3656 /* Try widening the mode and perform a non-widening multiplication. */
3657 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3658 && size
- 1 < BITS_PER_WORD
3659 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3665 /* We need to widen the operands, for example to ensure the
3666 constant multiplier is correctly sign or zero extended.
3667 Use a sequence to clean-up any instructions emitted by
3668 the conversions if things don't work out. */
3670 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3671 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3672 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3673 unsignedp
, OPTAB_WIDEN
);
3674 insns
= get_insns ();
3680 return extract_high_half (mode
, tem
);
3684 /* Try widening multiplication of opposite signedness, and adjust. */
3685 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3686 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3687 && size
- 1 < BITS_PER_WORD
3688 && (mul_widen_cost (speed
, wider_mode
)
3689 + 2 * shift_cost (speed
, mode
, size
-1)
3690 + 4 * add_cost (speed
, mode
) < max_cost
))
3692 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3693 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3696 tem
= extract_high_half (mode
, tem
);
3697 /* We used the wrong signedness. Adjust the result. */
3698 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3706 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3707 putting the high half of the result in TARGET if that is convenient,
3708 and return where the result is. If the operation can not be performed,
3711 MODE is the mode of operation and result.
3713 UNSIGNEDP nonzero means unsigned multiply.
3715 MAX_COST is the total allowed cost for the expanded RTL. */
3718 expmed_mult_highpart (machine_mode mode
, rtx op0
, rtx op1
,
3719 rtx target
, int unsignedp
, int max_cost
)
3721 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
3722 unsigned HOST_WIDE_INT cnst1
;
3724 bool sign_adjust
= false;
3725 enum mult_variant variant
;
3726 struct algorithm alg
;
3728 bool speed
= optimize_insn_for_speed_p ();
3730 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3731 /* We can't support modes wider than HOST_BITS_PER_INT. */
3732 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3734 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3736 /* We can't optimize modes wider than BITS_PER_WORD.
3737 ??? We might be able to perform double-word arithmetic if
3738 mode == word_mode, however all the cost calculations in
3739 synth_mult etc. assume single-word operations. */
3740 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3741 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3742 unsignedp
, max_cost
);
3744 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3746 /* Check whether we try to multiply by a negative constant. */
3747 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3750 extra_cost
+= add_cost (speed
, mode
);
3753 /* See whether shift/add multiplication is cheap enough. */
3754 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3755 max_cost
- extra_cost
))
3757 /* See whether the specialized multiplication optabs are
3758 cheaper than the shift/add version. */
3759 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3760 alg
.cost
.cost
+ extra_cost
);
3764 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3765 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3766 tem
= extract_high_half (mode
, tem
);
3768 /* Adjust result for signedness. */
3770 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3774 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3775 unsignedp
, max_cost
);
3779 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3782 expand_smod_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3784 rtx result
, temp
, shift
;
3785 rtx_code_label
*label
;
3787 int prec
= GET_MODE_PRECISION (mode
);
3789 logd
= floor_log2 (d
);
3790 result
= gen_reg_rtx (mode
);
3792 /* Avoid conditional branches when they're expensive. */
3793 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3794 && optimize_insn_for_speed_p ())
3796 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3800 HOST_WIDE_INT masklow
= (HOST_WIDE_INT_1
<< logd
) - 1;
3801 signmask
= force_reg (mode
, signmask
);
3802 shift
= GEN_INT (GET_MODE_BITSIZE (mode
) - logd
);
3804 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3805 which instruction sequence to use. If logical right shifts
3806 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3807 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3809 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3810 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3811 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
3812 > COSTS_N_INSNS (2)))
3814 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3815 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3816 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3817 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3818 temp
= expand_binop (mode
, and_optab
, temp
,
3819 gen_int_mode (masklow
, mode
),
3820 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3821 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
3822 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3823 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3824 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3828 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
3829 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3830 signmask
= force_reg (mode
, signmask
);
3832 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
3833 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3834 temp
= expand_binop (mode
, and_optab
, temp
,
3835 gen_int_mode (masklow
, mode
),
3836 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3837 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3838 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3844 /* Mask contains the mode's signbit and the significant bits of the
3845 modulus. By including the signbit in the operation, many targets
3846 can avoid an explicit compare operation in the following comparison
3848 wide_int mask
= wi::mask (logd
, false, prec
);
3849 mask
= wi::set_bit (mask
, prec
- 1);
3851 temp
= expand_binop (mode
, and_optab
, op0
,
3852 immed_wide_int_const (mask
, mode
),
3853 result
, 1, OPTAB_LIB_WIDEN
);
3855 emit_move_insn (result
, temp
);
3857 label
= gen_label_rtx ();
3858 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
3860 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
3861 0, OPTAB_LIB_WIDEN
);
3863 mask
= wi::mask (logd
, true, prec
);
3864 temp
= expand_binop (mode
, ior_optab
, temp
,
3865 immed_wide_int_const (mask
, mode
),
3866 result
, 1, OPTAB_LIB_WIDEN
);
3867 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
3868 0, OPTAB_LIB_WIDEN
);
3870 emit_move_insn (result
, temp
);
3875 /* Expand signed division of OP0 by a power of two D in mode MODE.
3876 This routine is only called for positive values of D. */
3879 expand_sdiv_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3882 rtx_code_label
*label
;
3885 logd
= floor_log2 (d
);
3888 && BRANCH_COST (optimize_insn_for_speed_p (),
3891 temp
= gen_reg_rtx (mode
);
3892 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
3893 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3894 0, OPTAB_LIB_WIDEN
);
3895 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3898 if (HAVE_conditional_move
3899 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3904 temp2
= copy_to_mode_reg (mode
, op0
);
3905 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
3906 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3907 temp
= force_reg (mode
, temp
);
3909 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3910 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
3911 mode
, temp
, temp2
, mode
, 0);
3914 rtx_insn
*seq
= get_insns ();
3917 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
3922 if (BRANCH_COST (optimize_insn_for_speed_p (),
3925 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
3927 temp
= gen_reg_rtx (mode
);
3928 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
3929 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
3930 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
3931 > COSTS_N_INSNS (1))
3932 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
3933 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3935 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
3936 ushift
, NULL_RTX
, 1);
3937 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3938 0, OPTAB_LIB_WIDEN
);
3939 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3942 label
= gen_label_rtx ();
3943 temp
= copy_to_mode_reg (mode
, op0
);
3944 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
3945 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
3947 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3950 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3951 if that is convenient, and returning where the result is.
3952 You may request either the quotient or the remainder as the result;
3953 specify REM_FLAG nonzero to get the remainder.
3955 CODE is the expression code for which kind of division this is;
3956 it controls how rounding is done. MODE is the machine mode to use.
3957 UNSIGNEDP nonzero means do unsigned division. */
3959 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3960 and then correct it by or'ing in missing high bits
3961 if result of ANDI is nonzero.
3962 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3963 This could optimize to a bfexts instruction.
3964 But C doesn't use these operations, so their optimizations are
3966 /* ??? For modulo, we don't actually need the highpart of the first product,
3967 the low part will do nicely. And for small divisors, the second multiply
3968 can also be a low-part only multiply or even be completely left out.
3969 E.g. to calculate the remainder of a division by 3 with a 32 bit
3970 multiply, multiply with 0x55555556 and extract the upper two bits;
3971 the result is exact for inputs up to 0x1fffffff.
3972 The input range can be reduced by using cross-sum rules.
3973 For odd divisors >= 3, the following table gives right shift counts
3974 so that if a number is shifted by an integer multiple of the given
3975 amount, the remainder stays the same:
3976 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3977 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3978 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3979 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3980 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3982 Cross-sum rules for even numbers can be derived by leaving as many bits
3983 to the right alone as the divisor has zeros to the right.
3984 E.g. if x is an unsigned 32 bit number:
3985 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3989 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
3990 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
3992 machine_mode compute_mode
;
3994 rtx quotient
= 0, remainder
= 0;
3998 optab optab1
, optab2
;
3999 int op1_is_constant
, op1_is_pow2
= 0;
4000 int max_cost
, extra_cost
;
4001 static HOST_WIDE_INT last_div_const
= 0;
4002 bool speed
= optimize_insn_for_speed_p ();
4004 op1_is_constant
= CONST_INT_P (op1
);
4005 if (op1_is_constant
)
4007 wide_int ext_op1
= rtx_mode_t (op1
, mode
);
4008 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4010 && wi::popcount (wi::neg (ext_op1
)) == 1));
4014 This is the structure of expand_divmod:
4016 First comes code to fix up the operands so we can perform the operations
4017 correctly and efficiently.
4019 Second comes a switch statement with code specific for each rounding mode.
4020 For some special operands this code emits all RTL for the desired
4021 operation, for other cases, it generates only a quotient and stores it in
4022 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4023 to indicate that it has not done anything.
4025 Last comes code that finishes the operation. If QUOTIENT is set and
4026 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4027 QUOTIENT is not set, it is computed using trunc rounding.
4029 We try to generate special code for division and remainder when OP1 is a
4030 constant. If |OP1| = 2**n we can use shifts and some other fast
4031 operations. For other values of OP1, we compute a carefully selected
4032 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4035 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4036 half of the product. Different strategies for generating the product are
4037 implemented in expmed_mult_highpart.
4039 If what we actually want is the remainder, we generate that by another
4040 by-constant multiplication and a subtraction. */
4042 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4043 code below will malfunction if we are, so check here and handle
4044 the special case if so. */
4045 if (op1
== const1_rtx
)
4046 return rem_flag
? const0_rtx
: op0
;
4048 /* When dividing by -1, we could get an overflow.
4049 negv_optab can handle overflows. */
4050 if (! unsignedp
&& op1
== constm1_rtx
)
4054 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4055 ? negv_optab
: neg_optab
, op0
, target
, 0);
4059 /* Don't use the function value register as a target
4060 since we have to read it as well as write it,
4061 and function-inlining gets confused by this. */
4062 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4063 /* Don't clobber an operand while doing a multi-step calculation. */
4064 || ((rem_flag
|| op1_is_constant
)
4065 && (reg_mentioned_p (target
, op0
)
4066 || (MEM_P (op0
) && MEM_P (target
))))
4067 || reg_mentioned_p (target
, op1
)
4068 || (MEM_P (op1
) && MEM_P (target
))))
4071 /* Get the mode in which to perform this computation. Normally it will
4072 be MODE, but sometimes we can't do the desired operation in MODE.
4073 If so, pick a wider mode in which we can do the operation. Convert
4074 to that mode at the start to avoid repeated conversions.
4076 First see what operations we need. These depend on the expression
4077 we are evaluating. (We assume that divxx3 insns exist under the
4078 same conditions that modxx3 insns and that these insns don't normally
4079 fail. If these assumptions are not correct, we may generate less
4080 efficient code in some cases.)
4082 Then see if we find a mode in which we can open-code that operation
4083 (either a division, modulus, or shift). Finally, check for the smallest
4084 mode for which we can do the operation with a library call. */
4086 /* We might want to refine this now that we have division-by-constant
4087 optimization. Since expmed_mult_highpart tries so many variants, it is
4088 not straightforward to generalize this. Maybe we should make an array
4089 of possible modes in init_expmed? Save this for GCC 2.7. */
4091 optab1
= (op1_is_pow2
4092 ? (unsignedp
? lshr_optab
: ashr_optab
)
4093 : (unsignedp
? udiv_optab
: sdiv_optab
));
4094 optab2
= (op1_is_pow2
? optab1
4095 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4097 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4098 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4099 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4100 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4103 if (compute_mode
== VOIDmode
)
4104 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4105 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4106 if (optab_libfunc (optab1
, compute_mode
)
4107 || optab_libfunc (optab2
, compute_mode
))
4110 /* If we still couldn't find a mode, use MODE, but expand_binop will
4112 if (compute_mode
== VOIDmode
)
4113 compute_mode
= mode
;
4115 if (target
&& GET_MODE (target
) == compute_mode
)
4118 tquotient
= gen_reg_rtx (compute_mode
);
4120 size
= GET_MODE_BITSIZE (compute_mode
);
4122 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4123 (mode), and thereby get better code when OP1 is a constant. Do that
4124 later. It will require going over all usages of SIZE below. */
4125 size
= GET_MODE_BITSIZE (mode
);
4128 /* Only deduct something for a REM if the last divide done was
4129 for a different constant. Then set the constant of the last
4131 max_cost
= (unsignedp
4132 ? udiv_cost (speed
, compute_mode
)
4133 : sdiv_cost (speed
, compute_mode
));
4134 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4135 && INTVAL (op1
) == last_div_const
))
4136 max_cost
-= (mul_cost (speed
, compute_mode
)
4137 + add_cost (speed
, compute_mode
));
4139 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4141 /* Now convert to the best mode to use. */
4142 if (compute_mode
!= mode
)
4144 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4145 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4147 /* convert_modes may have placed op1 into a register, so we
4148 must recompute the following. */
4149 op1_is_constant
= CONST_INT_P (op1
);
4150 if (op1_is_constant
)
4152 wide_int ext_op1
= rtx_mode_t (op1
, compute_mode
);
4153 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4155 && wi::popcount (wi::neg (ext_op1
)) == 1));
4161 /* If one of the operands is a volatile MEM, copy it into a register. */
4163 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4164 op0
= force_reg (compute_mode
, op0
);
4165 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4166 op1
= force_reg (compute_mode
, op1
);
4168 /* If we need the remainder or if OP1 is constant, we need to
4169 put OP0 in a register in case it has any queued subexpressions. */
4170 if (rem_flag
|| op1_is_constant
)
4171 op0
= force_reg (compute_mode
, op0
);
4173 last
= get_last_insn ();
4175 /* Promote floor rounding to trunc rounding for unsigned operations. */
4178 if (code
== FLOOR_DIV_EXPR
)
4179 code
= TRUNC_DIV_EXPR
;
4180 if (code
== FLOOR_MOD_EXPR
)
4181 code
= TRUNC_MOD_EXPR
;
4182 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4183 code
= TRUNC_DIV_EXPR
;
4186 if (op1
!= const0_rtx
)
4189 case TRUNC_MOD_EXPR
:
4190 case TRUNC_DIV_EXPR
:
4191 if (op1_is_constant
)
4195 unsigned HOST_WIDE_INT mh
, ml
;
4196 int pre_shift
, post_shift
;
4198 wide_int wd
= rtx_mode_t (op1
, compute_mode
);
4199 unsigned HOST_WIDE_INT d
= wd
.to_uhwi ();
4201 if (wi::popcount (wd
) == 1)
4203 pre_shift
= floor_log2 (d
);
4206 unsigned HOST_WIDE_INT mask
4207 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4209 = expand_binop (compute_mode
, and_optab
, op0
,
4210 gen_int_mode (mask
, compute_mode
),
4214 return gen_lowpart (mode
, remainder
);
4216 quotient
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4217 pre_shift
, tquotient
, 1);
4219 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4221 if (d
>= (HOST_WIDE_INT_1U
<< (size
- 1)))
4223 /* Most significant bit of divisor is set; emit an scc
4225 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4226 compute_mode
, 1, 1);
4230 /* Find a suitable multiplier and right shift count
4231 instead of multiplying with D. */
4233 mh
= choose_multiplier (d
, size
, size
,
4234 &ml
, &post_shift
, &dummy
);
4236 /* If the suggested multiplier is more than SIZE bits,
4237 we can do better for even divisors, using an
4238 initial right shift. */
4239 if (mh
!= 0 && (d
& 1) == 0)
4241 pre_shift
= ctz_or_zero (d
);
4242 mh
= choose_multiplier (d
>> pre_shift
, size
,
4244 &ml
, &post_shift
, &dummy
);
4254 if (post_shift
- 1 >= BITS_PER_WORD
)
4258 = (shift_cost (speed
, compute_mode
, post_shift
- 1)
4259 + shift_cost (speed
, compute_mode
, 1)
4260 + 2 * add_cost (speed
, compute_mode
));
4261 t1
= expmed_mult_highpart
4263 gen_int_mode (ml
, compute_mode
),
4264 NULL_RTX
, 1, max_cost
- extra_cost
);
4267 t2
= force_operand (gen_rtx_MINUS (compute_mode
,
4270 t3
= expand_shift (RSHIFT_EXPR
, compute_mode
,
4271 t2
, 1, NULL_RTX
, 1);
4272 t4
= force_operand (gen_rtx_PLUS (compute_mode
,
4275 quotient
= expand_shift
4276 (RSHIFT_EXPR
, compute_mode
, t4
,
4277 post_shift
- 1, tquotient
, 1);
4283 if (pre_shift
>= BITS_PER_WORD
4284 || post_shift
>= BITS_PER_WORD
)
4288 (RSHIFT_EXPR
, compute_mode
, op0
,
4289 pre_shift
, NULL_RTX
, 1);
4291 = (shift_cost (speed
, compute_mode
, pre_shift
)
4292 + shift_cost (speed
, compute_mode
, post_shift
));
4293 t2
= expmed_mult_highpart
4295 gen_int_mode (ml
, compute_mode
),
4296 NULL_RTX
, 1, max_cost
- extra_cost
);
4299 quotient
= expand_shift
4300 (RSHIFT_EXPR
, compute_mode
, t2
,
4301 post_shift
, tquotient
, 1);
4305 else /* Too wide mode to use tricky code */
4308 insn
= get_last_insn ();
4310 set_dst_reg_note (insn
, REG_EQUAL
,
4311 gen_rtx_UDIV (compute_mode
, op0
, op1
),
4314 else /* TRUNC_DIV, signed */
4316 unsigned HOST_WIDE_INT ml
;
4317 int lgup
, post_shift
;
4319 HOST_WIDE_INT d
= INTVAL (op1
);
4320 unsigned HOST_WIDE_INT abs_d
;
4322 /* Since d might be INT_MIN, we have to cast to
4323 unsigned HOST_WIDE_INT before negating to avoid
4324 undefined signed overflow. */
4326 ? (unsigned HOST_WIDE_INT
) d
4327 : - (unsigned HOST_WIDE_INT
) d
);
4329 /* n rem d = n rem -d */
4330 if (rem_flag
&& d
< 0)
4333 op1
= gen_int_mode (abs_d
, compute_mode
);
4339 quotient
= expand_unop (compute_mode
, neg_optab
, op0
,
4341 else if (size
<= HOST_BITS_PER_WIDE_INT
4342 && abs_d
== HOST_WIDE_INT_1U
<< (size
- 1))
4344 /* This case is not handled correctly below. */
4345 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4346 compute_mode
, 1, 1);
4350 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4351 && (size
<= HOST_BITS_PER_WIDE_INT
|| d
>= 0)
4353 ? smod_pow2_cheap (speed
, compute_mode
)
4354 : sdiv_pow2_cheap (speed
, compute_mode
))
4355 /* We assume that cheap metric is true if the
4356 optab has an expander for this mode. */
4357 && ((optab_handler ((rem_flag
? smod_optab
4360 != CODE_FOR_nothing
)
4361 || (optab_handler (sdivmod_optab
,
4363 != CODE_FOR_nothing
)))
4365 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
)
4366 && (size
<= HOST_BITS_PER_WIDE_INT
4367 || abs_d
!= (unsigned HOST_WIDE_INT
) d
))
4371 remainder
= expand_smod_pow2 (compute_mode
, op0
, d
);
4373 return gen_lowpart (mode
, remainder
);
4376 if (sdiv_pow2_cheap (speed
, compute_mode
)
4377 && ((optab_handler (sdiv_optab
, compute_mode
)
4378 != CODE_FOR_nothing
)
4379 || (optab_handler (sdivmod_optab
, compute_mode
)
4380 != CODE_FOR_nothing
)))
4381 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4383 gen_int_mode (abs_d
,
4387 quotient
= expand_sdiv_pow2 (compute_mode
, op0
, abs_d
);
4389 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4390 negate the quotient. */
4393 insn
= get_last_insn ();
4395 && abs_d
< (HOST_WIDE_INT_1U
4396 << (HOST_BITS_PER_WIDE_INT
- 1)))
4397 set_dst_reg_note (insn
, REG_EQUAL
,
4398 gen_rtx_DIV (compute_mode
, op0
,
4404 quotient
= expand_unop (compute_mode
, neg_optab
,
4405 quotient
, quotient
, 0);
4408 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4410 choose_multiplier (abs_d
, size
, size
- 1,
4411 &ml
, &post_shift
, &lgup
);
4412 if (ml
< HOST_WIDE_INT_1U
<< (size
- 1))
4416 if (post_shift
>= BITS_PER_WORD
4417 || size
- 1 >= BITS_PER_WORD
)
4420 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4421 + shift_cost (speed
, compute_mode
, size
- 1)
4422 + add_cost (speed
, compute_mode
));
4423 t1
= expmed_mult_highpart
4424 (compute_mode
, op0
, gen_int_mode (ml
, compute_mode
),
4425 NULL_RTX
, 0, max_cost
- extra_cost
);
4429 (RSHIFT_EXPR
, compute_mode
, t1
,
4430 post_shift
, NULL_RTX
, 0);
4432 (RSHIFT_EXPR
, compute_mode
, op0
,
4433 size
- 1, NULL_RTX
, 0);
4436 = force_operand (gen_rtx_MINUS (compute_mode
,
4441 = force_operand (gen_rtx_MINUS (compute_mode
,
4449 if (post_shift
>= BITS_PER_WORD
4450 || size
- 1 >= BITS_PER_WORD
)
4453 ml
|= HOST_WIDE_INT_M1U
<< (size
- 1);
4454 mlr
= gen_int_mode (ml
, compute_mode
);
4455 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4456 + shift_cost (speed
, compute_mode
, size
- 1)
4457 + 2 * add_cost (speed
, compute_mode
));
4458 t1
= expmed_mult_highpart (compute_mode
, op0
, mlr
,
4460 max_cost
- extra_cost
);
4463 t2
= force_operand (gen_rtx_PLUS (compute_mode
,
4467 (RSHIFT_EXPR
, compute_mode
, t2
,
4468 post_shift
, NULL_RTX
, 0);
4470 (RSHIFT_EXPR
, compute_mode
, op0
,
4471 size
- 1, NULL_RTX
, 0);
4474 = force_operand (gen_rtx_MINUS (compute_mode
,
4479 = force_operand (gen_rtx_MINUS (compute_mode
,
4484 else /* Too wide mode to use tricky code */
4487 insn
= get_last_insn ();
4489 set_dst_reg_note (insn
, REG_EQUAL
,
4490 gen_rtx_DIV (compute_mode
, op0
, op1
),
4496 delete_insns_since (last
);
4499 case FLOOR_DIV_EXPR
:
4500 case FLOOR_MOD_EXPR
:
4501 /* We will come here only for signed operations. */
4502 if (op1_is_constant
&& size
<= HOST_BITS_PER_WIDE_INT
)
4504 unsigned HOST_WIDE_INT mh
, ml
;
4505 int pre_shift
, lgup
, post_shift
;
4506 HOST_WIDE_INT d
= INTVAL (op1
);
4510 /* We could just as easily deal with negative constants here,
4511 but it does not seem worth the trouble for GCC 2.6. */
4512 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4514 pre_shift
= floor_log2 (d
);
4517 unsigned HOST_WIDE_INT mask
4518 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4519 remainder
= expand_binop
4520 (compute_mode
, and_optab
, op0
,
4521 gen_int_mode (mask
, compute_mode
),
4522 remainder
, 0, OPTAB_LIB_WIDEN
);
4524 return gen_lowpart (mode
, remainder
);
4526 quotient
= expand_shift
4527 (RSHIFT_EXPR
, compute_mode
, op0
,
4528 pre_shift
, tquotient
, 0);
4534 mh
= choose_multiplier (d
, size
, size
- 1,
4535 &ml
, &post_shift
, &lgup
);
4538 if (post_shift
< BITS_PER_WORD
4539 && size
- 1 < BITS_PER_WORD
)
4542 (RSHIFT_EXPR
, compute_mode
, op0
,
4543 size
- 1, NULL_RTX
, 0);
4544 t2
= expand_binop (compute_mode
, xor_optab
, op0
, t1
,
4545 NULL_RTX
, 0, OPTAB_WIDEN
);
4546 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4547 + shift_cost (speed
, compute_mode
, size
- 1)
4548 + 2 * add_cost (speed
, compute_mode
));
4549 t3
= expmed_mult_highpart
4550 (compute_mode
, t2
, gen_int_mode (ml
, compute_mode
),
4551 NULL_RTX
, 1, max_cost
- extra_cost
);
4555 (RSHIFT_EXPR
, compute_mode
, t3
,
4556 post_shift
, NULL_RTX
, 1);
4557 quotient
= expand_binop (compute_mode
, xor_optab
,
4558 t4
, t1
, tquotient
, 0,
4566 rtx nsign
, t1
, t2
, t3
, t4
;
4567 t1
= force_operand (gen_rtx_PLUS (compute_mode
,
4568 op0
, constm1_rtx
), NULL_RTX
);
4569 t2
= expand_binop (compute_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4571 nsign
= expand_shift (RSHIFT_EXPR
, compute_mode
, t2
,
4572 size
- 1, NULL_RTX
, 0);
4573 t3
= force_operand (gen_rtx_MINUS (compute_mode
, t1
, nsign
),
4575 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, compute_mode
, t3
, op1
,
4580 t5
= expand_unop (compute_mode
, one_cmpl_optab
, nsign
,
4582 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4591 delete_insns_since (last
);
4593 /* Try using an instruction that produces both the quotient and
4594 remainder, using truncation. We can easily compensate the quotient
4595 or remainder to get floor rounding, once we have the remainder.
4596 Notice that we compute also the final remainder value here,
4597 and return the result right away. */
4598 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4599 target
= gen_reg_rtx (compute_mode
);
4604 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4605 quotient
= gen_reg_rtx (compute_mode
);
4610 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4611 remainder
= gen_reg_rtx (compute_mode
);
4614 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4615 quotient
, remainder
, 0))
4617 /* This could be computed with a branch-less sequence.
4618 Save that for later. */
4620 rtx_code_label
*label
= gen_label_rtx ();
4621 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4622 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4623 NULL_RTX
, 0, OPTAB_WIDEN
);
4624 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4625 expand_dec (quotient
, const1_rtx
);
4626 expand_inc (remainder
, op1
);
4628 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4631 /* No luck with division elimination or divmod. Have to do it
4632 by conditionally adjusting op0 *and* the result. */
4634 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4638 quotient
= gen_reg_rtx (compute_mode
);
4639 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4640 label1
= gen_label_rtx ();
4641 label2
= gen_label_rtx ();
4642 label3
= gen_label_rtx ();
4643 label4
= gen_label_rtx ();
4644 label5
= gen_label_rtx ();
4645 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4646 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4647 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4648 quotient
, 0, OPTAB_LIB_WIDEN
);
4649 if (tem
!= quotient
)
4650 emit_move_insn (quotient
, tem
);
4651 emit_jump_insn (targetm
.gen_jump (label5
));
4653 emit_label (label1
);
4654 expand_inc (adjusted_op0
, const1_rtx
);
4655 emit_jump_insn (targetm
.gen_jump (label4
));
4657 emit_label (label2
);
4658 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4659 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4660 quotient
, 0, OPTAB_LIB_WIDEN
);
4661 if (tem
!= quotient
)
4662 emit_move_insn (quotient
, tem
);
4663 emit_jump_insn (targetm
.gen_jump (label5
));
4665 emit_label (label3
);
4666 expand_dec (adjusted_op0
, const1_rtx
);
4667 emit_label (label4
);
4668 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4669 quotient
, 0, OPTAB_LIB_WIDEN
);
4670 if (tem
!= quotient
)
4671 emit_move_insn (quotient
, tem
);
4672 expand_dec (quotient
, const1_rtx
);
4673 emit_label (label5
);
4682 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4683 && (size
<= HOST_BITS_PER_WIDE_INT
4684 || INTVAL (op1
) >= 0))
4687 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4688 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4689 floor_log2 (d
), tquotient
, 1);
4690 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4691 gen_int_mode (d
- 1, compute_mode
),
4692 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4693 t3
= gen_reg_rtx (compute_mode
);
4694 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4695 compute_mode
, 1, 1);
4698 rtx_code_label
*lab
;
4699 lab
= gen_label_rtx ();
4700 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4701 expand_inc (t1
, const1_rtx
);
4706 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4712 /* Try using an instruction that produces both the quotient and
4713 remainder, using truncation. We can easily compensate the
4714 quotient or remainder to get ceiling rounding, once we have the
4715 remainder. Notice that we compute also the final remainder
4716 value here, and return the result right away. */
4717 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4718 target
= gen_reg_rtx (compute_mode
);
4722 remainder
= (REG_P (target
)
4723 ? target
: gen_reg_rtx (compute_mode
));
4724 quotient
= gen_reg_rtx (compute_mode
);
4728 quotient
= (REG_P (target
)
4729 ? target
: gen_reg_rtx (compute_mode
));
4730 remainder
= gen_reg_rtx (compute_mode
);
4733 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4736 /* This could be computed with a branch-less sequence.
4737 Save that for later. */
4738 rtx_code_label
*label
= gen_label_rtx ();
4739 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4740 compute_mode
, label
);
4741 expand_inc (quotient
, const1_rtx
);
4742 expand_dec (remainder
, op1
);
4744 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4747 /* No luck with division elimination or divmod. Have to do it
4748 by conditionally adjusting op0 *and* the result. */
4750 rtx_code_label
*label1
, *label2
;
4751 rtx adjusted_op0
, tem
;
4753 quotient
= gen_reg_rtx (compute_mode
);
4754 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4755 label1
= gen_label_rtx ();
4756 label2
= gen_label_rtx ();
4757 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4758 compute_mode
, label1
);
4759 emit_move_insn (quotient
, const0_rtx
);
4760 emit_jump_insn (targetm
.gen_jump (label2
));
4762 emit_label (label1
);
4763 expand_dec (adjusted_op0
, const1_rtx
);
4764 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4765 quotient
, 1, OPTAB_LIB_WIDEN
);
4766 if (tem
!= quotient
)
4767 emit_move_insn (quotient
, tem
);
4768 expand_inc (quotient
, const1_rtx
);
4769 emit_label (label2
);
4774 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4775 && INTVAL (op1
) >= 0)
4777 /* This is extremely similar to the code for the unsigned case
4778 above. For 2.7 we should merge these variants, but for
4779 2.6.1 I don't want to touch the code for unsigned since that
4780 get used in C. The signed case will only be used by other
4784 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4785 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4786 floor_log2 (d
), tquotient
, 0);
4787 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4788 gen_int_mode (d
- 1, compute_mode
),
4789 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4790 t3
= gen_reg_rtx (compute_mode
);
4791 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4792 compute_mode
, 1, 1);
4795 rtx_code_label
*lab
;
4796 lab
= gen_label_rtx ();
4797 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4798 expand_inc (t1
, const1_rtx
);
4803 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4809 /* Try using an instruction that produces both the quotient and
4810 remainder, using truncation. We can easily compensate the
4811 quotient or remainder to get ceiling rounding, once we have the
4812 remainder. Notice that we compute also the final remainder
4813 value here, and return the result right away. */
4814 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4815 target
= gen_reg_rtx (compute_mode
);
4818 remainder
= (REG_P (target
)
4819 ? target
: gen_reg_rtx (compute_mode
));
4820 quotient
= gen_reg_rtx (compute_mode
);
4824 quotient
= (REG_P (target
)
4825 ? target
: gen_reg_rtx (compute_mode
));
4826 remainder
= gen_reg_rtx (compute_mode
);
4829 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
4832 /* This could be computed with a branch-less sequence.
4833 Save that for later. */
4835 rtx_code_label
*label
= gen_label_rtx ();
4836 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4837 compute_mode
, label
);
4838 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4839 NULL_RTX
, 0, OPTAB_WIDEN
);
4840 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
4841 expand_inc (quotient
, const1_rtx
);
4842 expand_dec (remainder
, op1
);
4844 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4847 /* No luck with division elimination or divmod. Have to do it
4848 by conditionally adjusting op0 *and* the result. */
4850 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4854 quotient
= gen_reg_rtx (compute_mode
);
4855 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4856 label1
= gen_label_rtx ();
4857 label2
= gen_label_rtx ();
4858 label3
= gen_label_rtx ();
4859 label4
= gen_label_rtx ();
4860 label5
= gen_label_rtx ();
4861 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4862 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
4863 compute_mode
, label1
);
4864 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4865 quotient
, 0, OPTAB_LIB_WIDEN
);
4866 if (tem
!= quotient
)
4867 emit_move_insn (quotient
, tem
);
4868 emit_jump_insn (targetm
.gen_jump (label5
));
4870 emit_label (label1
);
4871 expand_dec (adjusted_op0
, const1_rtx
);
4872 emit_jump_insn (targetm
.gen_jump (label4
));
4874 emit_label (label2
);
4875 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
4876 compute_mode
, label3
);
4877 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4878 quotient
, 0, OPTAB_LIB_WIDEN
);
4879 if (tem
!= quotient
)
4880 emit_move_insn (quotient
, tem
);
4881 emit_jump_insn (targetm
.gen_jump (label5
));
4883 emit_label (label3
);
4884 expand_inc (adjusted_op0
, const1_rtx
);
4885 emit_label (label4
);
4886 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4887 quotient
, 0, OPTAB_LIB_WIDEN
);
4888 if (tem
!= quotient
)
4889 emit_move_insn (quotient
, tem
);
4890 expand_inc (quotient
, const1_rtx
);
4891 emit_label (label5
);
4896 case EXACT_DIV_EXPR
:
4897 if (op1_is_constant
&& size
<= HOST_BITS_PER_WIDE_INT
)
4899 HOST_WIDE_INT d
= INTVAL (op1
);
4900 unsigned HOST_WIDE_INT ml
;
4904 pre_shift
= ctz_or_zero (d
);
4905 ml
= invert_mod2n (d
>> pre_shift
, size
);
4906 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4907 pre_shift
, NULL_RTX
, unsignedp
);
4908 quotient
= expand_mult (compute_mode
, t1
,
4909 gen_int_mode (ml
, compute_mode
),
4912 insn
= get_last_insn ();
4913 set_dst_reg_note (insn
, REG_EQUAL
,
4914 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
4915 compute_mode
, op0
, op1
),
4920 case ROUND_DIV_EXPR
:
4921 case ROUND_MOD_EXPR
:
4925 rtx_code_label
*label
;
4926 label
= gen_label_rtx ();
4927 quotient
= gen_reg_rtx (compute_mode
);
4928 remainder
= gen_reg_rtx (compute_mode
);
4929 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
4932 quotient
= expand_binop (compute_mode
, udiv_optab
, op0
, op1
,
4933 quotient
, 1, OPTAB_LIB_WIDEN
);
4934 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 1);
4935 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4936 remainder
, 1, OPTAB_LIB_WIDEN
);
4938 tem
= plus_constant (compute_mode
, op1
, -1);
4939 tem
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
, 1, NULL_RTX
, 1);
4940 do_cmp_and_jump (remainder
, tem
, LEU
, compute_mode
, label
);
4941 expand_inc (quotient
, const1_rtx
);
4942 expand_dec (remainder
, op1
);
4947 rtx abs_rem
, abs_op1
, tem
, mask
;
4948 rtx_code_label
*label
;
4949 label
= gen_label_rtx ();
4950 quotient
= gen_reg_rtx (compute_mode
);
4951 remainder
= gen_reg_rtx (compute_mode
);
4952 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
4955 quotient
= expand_binop (compute_mode
, sdiv_optab
, op0
, op1
,
4956 quotient
, 0, OPTAB_LIB_WIDEN
);
4957 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 0);
4958 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4959 remainder
, 0, OPTAB_LIB_WIDEN
);
4961 abs_rem
= expand_abs (compute_mode
, remainder
, NULL_RTX
, 1, 0);
4962 abs_op1
= expand_abs (compute_mode
, op1
, NULL_RTX
, 1, 0);
4963 tem
= expand_shift (LSHIFT_EXPR
, compute_mode
, abs_rem
,
4965 do_cmp_and_jump (tem
, abs_op1
, LTU
, compute_mode
, label
);
4966 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4967 NULL_RTX
, 0, OPTAB_WIDEN
);
4968 mask
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
,
4969 size
- 1, NULL_RTX
, 0);
4970 tem
= expand_binop (compute_mode
, xor_optab
, mask
, const1_rtx
,
4971 NULL_RTX
, 0, OPTAB_WIDEN
);
4972 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4973 NULL_RTX
, 0, OPTAB_WIDEN
);
4974 expand_inc (quotient
, tem
);
4975 tem
= expand_binop (compute_mode
, xor_optab
, mask
, op1
,
4976 NULL_RTX
, 0, OPTAB_WIDEN
);
4977 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4978 NULL_RTX
, 0, OPTAB_WIDEN
);
4979 expand_dec (remainder
, tem
);
4982 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4990 if (target
&& GET_MODE (target
) != compute_mode
)
4995 /* Try to produce the remainder without producing the quotient.
4996 If we seem to have a divmod pattern that does not require widening,
4997 don't try widening here. We should really have a WIDEN argument
4998 to expand_twoval_binop, since what we'd really like to do here is
4999 1) try a mod insn in compute_mode
5000 2) try a divmod insn in compute_mode
5001 3) try a div insn in compute_mode and multiply-subtract to get
5003 4) try the same things with widening allowed. */
5005 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5008 ((optab_handler (optab2
, compute_mode
)
5009 != CODE_FOR_nothing
)
5010 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5013 /* No luck there. Can we do remainder and divide at once
5014 without a library call? */
5015 remainder
= gen_reg_rtx (compute_mode
);
5016 if (! expand_twoval_binop ((unsignedp
5020 NULL_RTX
, remainder
, unsignedp
))
5025 return gen_lowpart (mode
, remainder
);
5028 /* Produce the quotient. Try a quotient insn, but not a library call.
5029 If we have a divmod in this mode, use it in preference to widening
5030 the div (for this test we assume it will not fail). Note that optab2
5031 is set to the one of the two optabs that the call below will use. */
5033 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5034 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5036 ((optab_handler (optab2
, compute_mode
)
5037 != CODE_FOR_nothing
)
5038 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5042 /* No luck there. Try a quotient-and-remainder insn,
5043 keeping the quotient alone. */
5044 quotient
= gen_reg_rtx (compute_mode
);
5045 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5047 quotient
, NULL_RTX
, unsignedp
))
5051 /* Still no luck. If we are not computing the remainder,
5052 use a library call for the quotient. */
5053 quotient
= sign_expand_binop (compute_mode
,
5054 udiv_optab
, sdiv_optab
,
5056 unsignedp
, OPTAB_LIB_WIDEN
);
5063 if (target
&& GET_MODE (target
) != compute_mode
)
5068 /* No divide instruction either. Use library for remainder. */
5069 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5071 unsignedp
, OPTAB_LIB_WIDEN
);
5072 /* No remainder function. Try a quotient-and-remainder
5073 function, keeping the remainder. */
5076 remainder
= gen_reg_rtx (compute_mode
);
5077 if (!expand_twoval_binop_libfunc
5078 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5080 NULL_RTX
, remainder
,
5081 unsignedp
? UMOD
: MOD
))
5082 remainder
= NULL_RTX
;
5087 /* We divided. Now finish doing X - Y * (X / Y). */
5088 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5089 NULL_RTX
, unsignedp
);
5090 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5091 remainder
, target
, unsignedp
,
5096 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5099 /* Return a tree node with data type TYPE, describing the value of X.
5100 Usually this is an VAR_DECL, if there is no obvious better choice.
5101 X may be an expression, however we only support those expressions
5102 generated by loop.c. */
5105 make_tree (tree type
, rtx x
)
5109 switch (GET_CODE (x
))
5112 case CONST_WIDE_INT
:
5113 t
= wide_int_to_tree (type
, rtx_mode_t (x
, TYPE_MODE (type
)));
5117 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5118 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5119 t
= wide_int_to_tree (type
,
5120 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5121 HOST_BITS_PER_WIDE_INT
* 2));
5123 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5129 int units
= CONST_VECTOR_NUNITS (x
);
5130 tree itype
= TREE_TYPE (type
);
5134 /* Build a tree with vector elements. */
5135 elts
= XALLOCAVEC (tree
, units
);
5136 for (i
= units
- 1; i
>= 0; --i
)
5138 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5139 elts
[i
] = make_tree (itype
, elt
);
5142 return build_vector (type
, elts
);
5146 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5147 make_tree (type
, XEXP (x
, 1)));
5150 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5151 make_tree (type
, XEXP (x
, 1)));
5154 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5157 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5158 make_tree (type
, XEXP (x
, 1)));
5161 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5162 make_tree (type
, XEXP (x
, 1)));
5165 t
= unsigned_type_for (type
);
5166 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5167 make_tree (t
, XEXP (x
, 0)),
5168 make_tree (type
, XEXP (x
, 1))));
5171 t
= signed_type_for (type
);
5172 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5173 make_tree (t
, XEXP (x
, 0)),
5174 make_tree (type
, XEXP (x
, 1))));
5177 if (TREE_CODE (type
) != REAL_TYPE
)
5178 t
= signed_type_for (type
);
5182 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5183 make_tree (t
, XEXP (x
, 0)),
5184 make_tree (t
, XEXP (x
, 1))));
5186 t
= unsigned_type_for (type
);
5187 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5188 make_tree (t
, XEXP (x
, 0)),
5189 make_tree (t
, XEXP (x
, 1))));
5193 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5194 GET_CODE (x
) == ZERO_EXTEND
);
5195 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5198 return make_tree (type
, XEXP (x
, 0));
5201 t
= SYMBOL_REF_DECL (x
);
5203 return fold_convert (type
, build_fold_addr_expr (t
));
5207 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5209 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5210 address mode to pointer mode. */
5211 if (POINTER_TYPE_P (type
))
5212 x
= convert_memory_address_addr_space
5213 (TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5215 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5216 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5217 t
->decl_with_rtl
.rtl
= x
;
5223 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5224 and returning TARGET.
5226 If TARGET is 0, a pseudo-register or constant is returned. */
5229 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5233 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5234 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5236 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5240 else if (tem
!= target
)
5241 emit_move_insn (target
, tem
);
5245 /* Helper function for emit_store_flag. */
5247 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5248 machine_mode mode
, machine_mode compare_mode
,
5249 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5250 machine_mode target_mode
)
5252 struct expand_operand ops
[4];
5253 rtx op0
, comparison
, subtarget
;
5255 machine_mode result_mode
= targetm
.cstore_mode (icode
);
5257 last
= get_last_insn ();
5258 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5259 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5262 delete_insns_since (last
);
5266 if (target_mode
== VOIDmode
)
5267 target_mode
= result_mode
;
5269 target
= gen_reg_rtx (target_mode
);
5271 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5273 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5274 create_fixed_operand (&ops
[1], comparison
);
5275 create_fixed_operand (&ops
[2], x
);
5276 create_fixed_operand (&ops
[3], y
);
5277 if (!maybe_expand_insn (icode
, 4, ops
))
5279 delete_insns_since (last
);
5282 subtarget
= ops
[0].value
;
5284 /* If we are converting to a wider mode, first convert to
5285 TARGET_MODE, then normalize. This produces better combining
5286 opportunities on machines that have a SIGN_EXTRACT when we are
5287 testing a single bit. This mostly benefits the 68k.
5289 If STORE_FLAG_VALUE does not have the sign bit set when
5290 interpreted in MODE, we can do this conversion as unsigned, which
5291 is usually more efficient. */
5292 if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (result_mode
))
5294 convert_move (target
, subtarget
,
5295 val_signbit_known_clear_p (result_mode
,
5298 result_mode
= target_mode
;
5303 /* If we want to keep subexpressions around, don't reuse our last
5308 /* Now normalize to the proper value in MODE. Sometimes we don't
5309 have to do anything. */
5310 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5312 /* STORE_FLAG_VALUE might be the most negative number, so write
5313 the comparison this way to avoid a compiler-time warning. */
5314 else if (- normalizep
== STORE_FLAG_VALUE
)
5315 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5317 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5318 it hard to use a value of just the sign bit due to ANSI integer
5319 constant typing rules. */
5320 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5321 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5322 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5326 gcc_assert (STORE_FLAG_VALUE
& 1);
5328 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5329 if (normalizep
== -1)
5330 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5333 /* If we were converting to a smaller mode, do the conversion now. */
5334 if (target_mode
!= result_mode
)
5336 convert_move (target
, op0
, 0);
5344 /* A subroutine of emit_store_flag only including "tricks" that do not
5345 need a recursive call. These are kept separate to avoid infinite
5349 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5350 machine_mode mode
, int unsignedp
, int normalizep
,
5351 machine_mode target_mode
)
5354 enum insn_code icode
;
5355 machine_mode compare_mode
;
5356 enum mode_class mclass
;
5357 enum rtx_code scode
;
5360 code
= unsigned_condition (code
);
5361 scode
= swap_condition (code
);
5363 /* If one operand is constant, make it the second one. Only do this
5364 if the other operand is not constant as well. */
5366 if (swap_commutative_operands_p (op0
, op1
))
5368 std::swap (op0
, op1
);
5369 code
= swap_condition (code
);
5372 if (mode
== VOIDmode
)
5373 mode
= GET_MODE (op0
);
5375 /* For some comparisons with 1 and -1, we can convert this to
5376 comparisons with zero. This will often produce more opportunities for
5377 store-flag insns. */
5382 if (op1
== const1_rtx
)
5383 op1
= const0_rtx
, code
= LE
;
5386 if (op1
== constm1_rtx
)
5387 op1
= const0_rtx
, code
= LT
;
5390 if (op1
== const1_rtx
)
5391 op1
= const0_rtx
, code
= GT
;
5394 if (op1
== constm1_rtx
)
5395 op1
= const0_rtx
, code
= GE
;
5398 if (op1
== const1_rtx
)
5399 op1
= const0_rtx
, code
= NE
;
5402 if (op1
== const1_rtx
)
5403 op1
= const0_rtx
, code
= EQ
;
5409 /* If we are comparing a double-word integer with zero or -1, we can
5410 convert the comparison into one involving a single word. */
5411 if (GET_MODE_BITSIZE (mode
) == BITS_PER_WORD
* 2
5412 && GET_MODE_CLASS (mode
) == MODE_INT
5413 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5416 if ((code
== EQ
|| code
== NE
)
5417 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5421 /* Do a logical OR or AND of the two words and compare the
5423 op00
= simplify_gen_subreg (word_mode
, op0
, mode
, 0);
5424 op01
= simplify_gen_subreg (word_mode
, op0
, mode
, UNITS_PER_WORD
);
5425 tem
= expand_binop (word_mode
,
5426 op1
== const0_rtx
? ior_optab
: and_optab
,
5427 op00
, op01
, NULL_RTX
, unsignedp
,
5431 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5432 unsignedp
, normalizep
);
5434 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5438 /* If testing the sign bit, can just test on high word. */
5439 op0h
= simplify_gen_subreg (word_mode
, op0
, mode
,
5440 subreg_highpart_offset (word_mode
,
5442 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5443 unsignedp
, normalizep
);
5450 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5453 target
= gen_reg_rtx (target_mode
);
5455 convert_move (target
, tem
,
5456 !val_signbit_known_set_p (word_mode
,
5457 (normalizep
? normalizep
5458 : STORE_FLAG_VALUE
)));
5463 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5464 complement of A (for GE) and shifting the sign bit to the low bit. */
5465 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5466 && GET_MODE_CLASS (mode
) == MODE_INT
5467 && (normalizep
|| STORE_FLAG_VALUE
== 1
5468 || val_signbit_p (mode
, STORE_FLAG_VALUE
)))
5475 /* If the result is to be wider than OP0, it is best to convert it
5476 first. If it is to be narrower, it is *incorrect* to convert it
5478 else if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (mode
))
5480 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5484 if (target_mode
!= mode
)
5488 op0
= expand_unop (mode
, one_cmpl_optab
, op0
,
5489 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5490 ? 0 : subtarget
), 0);
5492 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5493 /* If we are supposed to produce a 0/1 value, we want to do
5494 a logical shift from the sign bit to the low-order bit; for
5495 a -1/0 value, we do an arithmetic shift. */
5496 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5497 GET_MODE_BITSIZE (mode
) - 1,
5498 subtarget
, normalizep
!= -1);
5500 if (mode
!= target_mode
)
5501 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5506 mclass
= GET_MODE_CLASS (mode
);
5507 for (compare_mode
= mode
; compare_mode
!= VOIDmode
;
5508 compare_mode
= GET_MODE_WIDER_MODE (compare_mode
))
5510 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5511 icode
= optab_handler (cstore_optab
, optab_mode
);
5512 if (icode
!= CODE_FOR_nothing
)
5514 do_pending_stack_adjust ();
5515 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5516 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5520 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5522 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5523 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5534 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5535 and storing in TARGET. Normally return TARGET.
5536 Return 0 if that cannot be done.
5538 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5539 it is VOIDmode, they cannot both be CONST_INT.
5541 UNSIGNEDP is for the case where we have to widen the operands
5542 to perform the operation. It says to use zero-extension.
5544 NORMALIZEP is 1 if we should convert the result to be either zero
5545 or one. Normalize is -1 if we should convert the result to be
5546 either zero or -1. If NORMALIZEP is zero, the result will be left
5547 "raw" out of the scc insn. */
5550 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5551 machine_mode mode
, int unsignedp
, int normalizep
)
5553 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5554 enum rtx_code rcode
;
5559 /* If we compare constants, we shouldn't use a store-flag operation,
5560 but a constant load. We can get there via the vanilla route that
5561 usually generates a compare-branch sequence, but will in this case
5562 fold the comparison to a constant, and thus elide the branch. */
5563 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5566 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5571 /* If we reached here, we can't do this with a scc insn, however there
5572 are some comparisons that can be done in other ways. Don't do any
5573 of these cases if branches are very cheap. */
5574 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5577 /* See what we need to return. We can only return a 1, -1, or the
5580 if (normalizep
== 0)
5582 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5583 normalizep
= STORE_FLAG_VALUE
;
5585 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5591 last
= get_last_insn ();
5593 /* If optimizing, use different pseudo registers for each insn, instead
5594 of reusing the same pseudo. This leads to better CSE, but slows
5595 down the compiler, since there are more pseudos */
5596 subtarget
= (!optimize
5597 && (target_mode
== mode
)) ? target
: NULL_RTX
;
5598 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
5600 /* For floating-point comparisons, try the reverse comparison or try
5601 changing the "orderedness" of the comparison. */
5602 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5604 enum rtx_code first_code
;
5607 rcode
= reverse_condition_maybe_unordered (code
);
5608 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5609 && (code
== ORDERED
|| code
== UNORDERED
5610 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5611 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5613 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5614 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5616 /* For the reverse comparison, use either an addition or a XOR. */
5618 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5619 optimize_insn_for_speed_p ()) == 0)
5621 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5622 STORE_FLAG_VALUE
, target_mode
);
5624 return expand_binop (target_mode
, add_optab
, tem
,
5625 gen_int_mode (normalizep
, target_mode
),
5626 target
, 0, OPTAB_WIDEN
);
5629 && rtx_cost (trueval
, mode
, XOR
, 1,
5630 optimize_insn_for_speed_p ()) == 0)
5632 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5633 normalizep
, target_mode
);
5635 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
5636 target
, INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5640 delete_insns_since (last
);
5642 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5643 if (code
== ORDERED
|| code
== UNORDERED
)
5646 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
5648 /* If there are no NaNs, the first comparison should always fall through.
5649 Effectively change the comparison to the other one. */
5650 if (!HONOR_NANS (mode
))
5652 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
5653 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
5657 if (!HAVE_conditional_move
)
5660 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5661 conditional move. */
5662 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
5663 normalizep
, target_mode
);
5668 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5669 tem
, const0_rtx
, GET_MODE (tem
), 0);
5671 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5672 trueval
, tem
, GET_MODE (tem
), 0);
5675 delete_insns_since (last
);
5679 /* The remaining tricks only apply to integer comparisons. */
5681 if (GET_MODE_CLASS (mode
) != MODE_INT
)
5684 /* If this is an equality comparison of integers, we can try to exclusive-or
5685 (or subtract) the two operands and use a recursive call to try the
5686 comparison with zero. Don't do any of these cases if branches are
5689 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5691 tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5695 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5698 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5699 mode
, unsignedp
, normalizep
);
5703 delete_insns_since (last
);
5706 /* For integer comparisons, try the reverse comparison. However, for
5707 small X and if we'd have anyway to extend, implementing "X != 0"
5708 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5709 rcode
= reverse_condition (code
);
5710 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5711 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5713 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5714 && op1
== const0_rtx
))
5716 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5717 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5719 /* Again, for the reverse comparison, use either an addition or a XOR. */
5721 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5722 optimize_insn_for_speed_p ()) == 0)
5724 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5725 STORE_FLAG_VALUE
, target_mode
);
5727 tem
= expand_binop (target_mode
, add_optab
, tem
,
5728 gen_int_mode (normalizep
, target_mode
),
5729 target
, 0, OPTAB_WIDEN
);
5732 && rtx_cost (trueval
, mode
, XOR
, 1,
5733 optimize_insn_for_speed_p ()) == 0)
5735 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5736 normalizep
, target_mode
);
5738 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5739 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5744 delete_insns_since (last
);
5747 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5748 the constant zero. Reject all other comparisons at this point. Only
5749 do LE and GT if branches are expensive since they are expensive on
5750 2-operand machines. */
5752 if (op1
!= const0_rtx
5753 || (code
!= EQ
&& code
!= NE
5754 && (BRANCH_COST (optimize_insn_for_speed_p (),
5755 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5758 /* Try to put the result of the comparison in the sign bit. Assume we can't
5759 do the necessary operation below. */
5763 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5764 the sign bit set. */
5768 /* This is destructive, so SUBTARGET can't be OP0. */
5769 if (rtx_equal_p (subtarget
, op0
))
5772 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5775 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5779 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5780 number of bits in the mode of OP0, minus one. */
5784 if (rtx_equal_p (subtarget
, op0
))
5787 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, op0
,
5788 GET_MODE_BITSIZE (mode
) - 1,
5791 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5795 if (code
== EQ
|| code
== NE
)
5797 /* For EQ or NE, one way to do the comparison is to apply an operation
5798 that converts the operand into a positive number if it is nonzero
5799 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5800 for NE we negate. This puts the result in the sign bit. Then we
5801 normalize with a shift, if needed.
5803 Two operations that can do the above actions are ABS and FFS, so try
5804 them. If that doesn't work, and MODE is smaller than a full word,
5805 we can use zero-extension to the wider mode (an unsigned conversion)
5806 as the operation. */
5808 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5809 that is compensated by the subsequent overflow when subtracting
5812 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5813 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5814 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5815 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5816 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5818 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5825 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5828 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5831 /* If we couldn't do it that way, for NE we can "or" the two's complement
5832 of the value with itself. For EQ, we take the one's complement of
5833 that "or", which is an extra insn, so we only handle EQ if branches
5838 || BRANCH_COST (optimize_insn_for_speed_p (),
5841 if (rtx_equal_p (subtarget
, op0
))
5844 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5845 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5848 if (tem
&& code
== EQ
)
5849 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5853 if (tem
&& normalizep
)
5854 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, tem
,
5855 GET_MODE_BITSIZE (mode
) - 1,
5856 subtarget
, normalizep
== 1);
5862 else if (GET_MODE (tem
) != target_mode
)
5864 convert_move (target
, tem
, 0);
5867 else if (!subtarget
)
5869 emit_move_insn (target
, tem
);
5874 delete_insns_since (last
);
5879 /* Like emit_store_flag, but always succeeds. */
5882 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5883 machine_mode mode
, int unsignedp
, int normalizep
)
5886 rtx_code_label
*label
;
5887 rtx trueval
, falseval
;
5889 /* First see if emit_store_flag can do the job. */
5890 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
5895 target
= gen_reg_rtx (word_mode
);
5897 /* If this failed, we have to do this with set/compare/jump/set code.
5898 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5899 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
5901 && GET_MODE_CLASS (mode
) == MODE_INT
5904 && op1
== const0_rtx
)
5906 label
= gen_label_rtx ();
5907 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
5908 NULL_RTX
, NULL
, label
,
5909 profile_probability::uninitialized ());
5910 emit_move_insn (target
, trueval
);
5916 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
5917 target
= gen_reg_rtx (GET_MODE (target
));
5919 /* Jump in the right direction if the target cannot implement CODE
5920 but can jump on its reverse condition. */
5921 falseval
= const0_rtx
;
5922 if (! can_compare_p (code
, mode
, ccp_jump
)
5923 && (! FLOAT_MODE_P (mode
)
5924 || code
== ORDERED
|| code
== UNORDERED
5925 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5926 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5928 enum rtx_code rcode
;
5929 if (FLOAT_MODE_P (mode
))
5930 rcode
= reverse_condition_maybe_unordered (code
);
5932 rcode
= reverse_condition (code
);
5934 /* Canonicalize to UNORDERED for the libcall. */
5935 if (can_compare_p (rcode
, mode
, ccp_jump
)
5936 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
5939 trueval
= const0_rtx
;
5944 emit_move_insn (target
, trueval
);
5945 label
= gen_label_rtx ();
5946 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
5947 label
, profile_probability::uninitialized ());
5949 emit_move_insn (target
, falseval
);
5955 /* Perform possibly multi-word comparison and conditional jump to LABEL
5956 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5957 now a thin wrapper around do_compare_rtx_and_jump. */
5960 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
5961 rtx_code_label
*label
)
5963 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
5964 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,
5965 NULL
, label
, profile_probability::uninitialized ());