1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed
;
45 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
48 static void store_fixed_bit_field (rtx
, unsigned HOST_WIDE_INT
,
49 unsigned HOST_WIDE_INT
,
50 unsigned HOST_WIDE_INT
,
51 unsigned HOST_WIDE_INT
,
53 static void store_fixed_bit_field_1 (rtx
, unsigned HOST_WIDE_INT
,
54 unsigned HOST_WIDE_INT
,
56 static void store_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
57 unsigned HOST_WIDE_INT
,
58 unsigned HOST_WIDE_INT
,
59 unsigned HOST_WIDE_INT
,
61 static rtx
extract_fixed_bit_field (machine_mode
, rtx
,
62 unsigned HOST_WIDE_INT
,
63 unsigned HOST_WIDE_INT
, rtx
, int, bool);
64 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
,
65 unsigned HOST_WIDE_INT
,
66 unsigned HOST_WIDE_INT
, rtx
, int, bool);
67 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
68 static rtx
extract_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
69 unsigned HOST_WIDE_INT
, int, bool);
70 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
71 static rtx
expand_smod_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
72 static rtx
expand_sdiv_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
80 mask_rtx (machine_mode mode
, int bitpos
, int bitsize
, bool complement
)
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos
, bitsize
, complement
,
84 GET_MODE_PRECISION (mode
)), mode
);
87 /* Test whether a value is zero of a power of two. */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
91 struct init_expmed_rtl
112 rtx pow2
[MAX_BITS_PER_WORD
];
113 rtx cint
[MAX_BITS_PER_WORD
];
117 init_expmed_one_conv (struct init_expmed_rtl
*all
, machine_mode to_mode
,
118 machine_mode from_mode
, bool speed
)
120 int to_size
, from_size
;
123 to_size
= GET_MODE_PRECISION (to_mode
);
124 from_size
= GET_MODE_PRECISION (from_mode
);
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
130 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
131 && pow2p_hwi (to_size
))
133 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
134 && pow2p_hwi (from_size
))
137 /* Assume cost of zero-extend and sign-extend is the same. */
138 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
140 PUT_MODE (all
->reg
, from_mode
);
141 set_convert_cost (to_mode
, from_mode
, speed
,
142 set_src_cost (which
, to_mode
, speed
));
146 init_expmed_one_mode (struct init_expmed_rtl
*all
,
147 machine_mode mode
, int speed
)
149 int m
, n
, mode_bitsize
;
150 machine_mode mode_from
;
152 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
154 PUT_MODE (all
->reg
, mode
);
155 PUT_MODE (all
->plus
, mode
);
156 PUT_MODE (all
->neg
, mode
);
157 PUT_MODE (all
->mult
, mode
);
158 PUT_MODE (all
->sdiv
, mode
);
159 PUT_MODE (all
->udiv
, mode
);
160 PUT_MODE (all
->sdiv_32
, mode
);
161 PUT_MODE (all
->smod_32
, mode
);
162 PUT_MODE (all
->wide_trunc
, mode
);
163 PUT_MODE (all
->shift
, mode
);
164 PUT_MODE (all
->shift_mult
, mode
);
165 PUT_MODE (all
->shift_add
, mode
);
166 PUT_MODE (all
->shift_sub0
, mode
);
167 PUT_MODE (all
->shift_sub1
, mode
);
168 PUT_MODE (all
->zext
, mode
);
169 PUT_MODE (all
->trunc
, mode
);
171 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
172 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
173 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
174 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
175 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
177 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
178 <= 2 * add_cost (speed
, mode
)));
179 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
180 <= 4 * add_cost (speed
, mode
)));
182 set_shift_cost (speed
, mode
, 0, 0);
184 int cost
= add_cost (speed
, mode
);
185 set_shiftadd_cost (speed
, mode
, 0, cost
);
186 set_shiftsub0_cost (speed
, mode
, 0, cost
);
187 set_shiftsub1_cost (speed
, mode
, 0, cost
);
190 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
191 for (m
= 1; m
< n
; m
++)
193 XEXP (all
->shift
, 1) = all
->cint
[m
];
194 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
196 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
197 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
199 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
201 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
205 if (SCALAR_INT_MODE_P (mode
))
207 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
208 mode_from
= (machine_mode
)(mode_from
+ 1))
209 init_expmed_one_conv (all
, mode
, mode_from
, speed
);
211 if (GET_MODE_CLASS (mode
) == MODE_INT
)
213 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
214 if (wider_mode
!= VOIDmode
)
216 PUT_MODE (all
->zext
, wider_mode
);
217 PUT_MODE (all
->wide_mult
, wider_mode
);
218 PUT_MODE (all
->wide_lshr
, wider_mode
);
219 XEXP (all
->wide_lshr
, 1) = GEN_INT (mode_bitsize
);
221 set_mul_widen_cost (speed
, wider_mode
,
222 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
223 set_mul_highpart_cost (speed
, mode
,
224 set_src_cost (all
->wide_trunc
, mode
, speed
));
232 struct init_expmed_rtl all
;
233 machine_mode mode
= QImode
;
236 memset (&all
, 0, sizeof all
);
237 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
239 all
.pow2
[m
] = GEN_INT (HOST_WIDE_INT_1
<< m
);
240 all
.cint
[m
] = GEN_INT (m
);
243 /* Avoid using hard regs in ways which may be unsupported. */
244 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
245 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
246 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
247 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
248 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
249 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
250 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
251 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
252 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
253 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
254 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
255 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
256 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
257 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
258 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
259 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
260 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
261 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
263 for (speed
= 0; speed
< 2; speed
++)
265 crtl
->maybe_hot_insn_p
= speed
;
266 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
268 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
269 mode
= (machine_mode
)(mode
+ 1))
270 init_expmed_one_mode (&all
, mode
, speed
);
272 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
273 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
274 mode
= (machine_mode
)(mode
+ 1))
275 init_expmed_one_mode (&all
, mode
, speed
);
277 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
278 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
279 mode
= (machine_mode
)(mode
+ 1))
280 init_expmed_one_mode (&all
, mode
, speed
);
283 if (alg_hash_used_p ())
285 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
286 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
289 set_alg_hash_used_p (true);
290 default_rtl_profile ();
292 ggc_free (all
.trunc
);
293 ggc_free (all
.shift_sub1
);
294 ggc_free (all
.shift_sub0
);
295 ggc_free (all
.shift_add
);
296 ggc_free (all
.shift_mult
);
297 ggc_free (all
.shift
);
298 ggc_free (all
.wide_trunc
);
299 ggc_free (all
.wide_lshr
);
300 ggc_free (all
.wide_mult
);
302 ggc_free (all
.smod_32
);
303 ggc_free (all
.sdiv_32
);
312 /* Return an rtx representing minus the value of X.
313 MODE is the intended mode of the result,
314 useful if X is a CONST_INT. */
317 negate_rtx (machine_mode mode
, rtx x
)
319 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
322 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
327 /* Whether reverse storage order is supported on the target. */
328 static int reverse_storage_order_supported
= -1;
330 /* Check whether reverse storage order is supported on the target. */
333 check_reverse_storage_order_support (void)
335 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
337 reverse_storage_order_supported
= 0;
338 sorry ("reverse scalar storage order");
341 reverse_storage_order_supported
= 1;
344 /* Whether reverse FP storage order is supported on the target. */
345 static int reverse_float_storage_order_supported
= -1;
347 /* Check whether reverse FP storage order is supported on the target. */
350 check_reverse_float_storage_order_support (void)
352 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
354 reverse_float_storage_order_supported
= 0;
355 sorry ("reverse floating-point scalar storage order");
358 reverse_float_storage_order_supported
= 1;
361 /* Return an rtx representing value of X with reverse storage order.
362 MODE is the intended mode of the result,
363 useful if X is a CONST_INT. */
366 flip_storage_order (enum machine_mode mode
, rtx x
)
368 enum machine_mode int_mode
;
374 if (COMPLEX_MODE_P (mode
))
376 rtx real
= read_complex_part (x
, false);
377 rtx imag
= read_complex_part (x
, true);
379 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
380 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
382 return gen_rtx_CONCAT (mode
, real
, imag
);
385 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
386 check_reverse_storage_order_support ();
388 if (SCALAR_INT_MODE_P (mode
))
392 if (FLOAT_MODE_P (mode
)
393 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
394 check_reverse_float_storage_order_support ();
396 int_mode
= mode_for_size (GET_MODE_PRECISION (mode
), MODE_INT
, 0);
397 if (int_mode
== BLKmode
)
399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
402 x
= gen_lowpart (int_mode
, x
);
405 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
407 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
409 if (int_mode
!= mode
)
410 result
= gen_lowpart (mode
, result
);
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417 If MODE is BLKmode, return a reference to every byte in the bitfield.
418 Set *NEW_BITNUM to the bit position of the field within the new memory. */
421 narrow_bit_field_mem (rtx mem
, machine_mode mode
,
422 unsigned HOST_WIDE_INT bitsize
,
423 unsigned HOST_WIDE_INT bitnum
,
424 unsigned HOST_WIDE_INT
*new_bitnum
)
428 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
429 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
430 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
432 return adjust_bitfield_address_size (mem
, mode
, offset
, size
);
436 unsigned int unit
= GET_MODE_BITSIZE (mode
);
437 *new_bitnum
= bitnum
% unit
;
438 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
439 return adjust_bitfield_address (mem
, mode
, offset
);
443 /* The caller wants to perform insertion or extraction PATTERN on a
444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445 BITREGION_START and BITREGION_END are as for store_bit_field
446 and FIELDMODE is the natural mode of the field.
448 Search for a mode that is compatible with the memory access
449 restrictions and (where applicable) with a register insertion or
450 extraction. Return the new memory on success, storing the adjusted
451 bit position in *NEW_BITNUM. Return null otherwise. */
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
455 rtx op0
, HOST_WIDE_INT bitsize
,
456 HOST_WIDE_INT bitnum
,
457 unsigned HOST_WIDE_INT bitregion_start
,
458 unsigned HOST_WIDE_INT bitregion_end
,
459 machine_mode fieldmode
,
460 unsigned HOST_WIDE_INT
*new_bitnum
)
462 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
463 bitregion_end
, MEM_ALIGN (op0
),
464 MEM_VOLATILE_P (op0
));
465 machine_mode best_mode
;
466 if (iter
.next_mode (&best_mode
))
468 /* We can use a memory in BEST_MODE. See whether this is true for
469 any wider modes. All other things being equal, we prefer to
470 use the widest mode possible because it tends to expose more
471 CSE opportunities. */
472 if (!iter
.prefer_smaller_modes ())
474 /* Limit the search to the mode required by the corresponding
475 register insertion or extraction instruction, if any. */
476 machine_mode limit_mode
= word_mode
;
477 extraction_insn insn
;
478 if (get_best_reg_extraction_insn (&insn
, pattern
,
479 GET_MODE_BITSIZE (best_mode
),
481 limit_mode
= insn
.field_mode
;
483 machine_mode wider_mode
;
484 while (iter
.next_mode (&wider_mode
)
485 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
486 best_mode
= wider_mode
;
488 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
496 offset is then BITNUM / BITS_PER_UNIT. */
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum
,
500 unsigned HOST_WIDE_INT bitsize
,
501 machine_mode struct_mode
)
503 if (BYTES_BIG_ENDIAN
)
504 return (bitnum
% BITS_PER_UNIT
== 0
505 && (bitnum
+ bitsize
== GET_MODE_BITSIZE (struct_mode
)
506 || (bitnum
+ bitsize
) % BITS_PER_WORD
== 0));
508 return bitnum
% BITS_PER_WORD
== 0;
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513 Return false if the access would touch memory outside the range
514 BITREGION_START to BITREGION_END for conformance to the C++ memory
518 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
519 unsigned HOST_WIDE_INT bitnum
,
520 machine_mode fieldmode
,
521 unsigned HOST_WIDE_INT bitregion_start
,
522 unsigned HOST_WIDE_INT bitregion_end
)
524 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
526 /* -fstrict-volatile-bitfields must be enabled and we must have a
529 || !MEM_VOLATILE_P (op0
)
530 || flag_strict_volatile_bitfields
<= 0)
533 /* Non-integral modes likely only happen with packed structures.
535 if (!SCALAR_INT_MODE_P (fieldmode
))
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum
% modesize
+ bitsize
> modesize
)
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0
) < modesize
)
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end
!= 0
555 && (bitnum
- bitnum
% modesize
< bitregion_start
556 || bitnum
- bitnum
% modesize
+ modesize
- 1 > bitregion_end
))
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
566 simple_mem_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
567 unsigned HOST_WIDE_INT bitnum
, machine_mode mode
)
570 && bitnum
% BITS_PER_UNIT
== 0
571 && bitsize
== GET_MODE_BITSIZE (mode
)
572 && (!SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (op0
))
573 || (bitnum
% GET_MODE_ALIGNMENT (mode
) == 0
574 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 BITSIZE and BITNUM are as for store_bit_field. */
581 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
582 unsigned HOST_WIDE_INT bitsize
,
583 unsigned HOST_WIDE_INT bitnum
,
586 struct expand_operand ops
[4];
589 rtx_insn
*last
= get_last_insn ();
590 bool copy_back
= false;
592 machine_mode op_mode
= insv
->field_mode
;
593 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
594 if (bitsize
== 0 || bitsize
> unit
)
598 /* Get a reference to the first byte of the field. */
599 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
603 /* Convert from counting within OP0 to counting in OP_MODE. */
604 if (BYTES_BIG_ENDIAN
)
605 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
607 /* If xop0 is a register, we need it in OP_MODE
608 to make it acceptable to the format of insv. */
609 if (GET_CODE (xop0
) == SUBREG
)
610 /* We can't just change the mode, because this might clobber op0,
611 and we will need the original value of op0 if insv fails. */
612 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
613 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
614 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
617 /* If the destination is a paradoxical subreg such that we need a
618 truncate to the inner mode, perform the insertion on a temporary and
619 truncate the result to the original destination. Note that we can't
620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621 X) 0)) is (reg:N X). */
622 if (GET_CODE (xop0
) == SUBREG
623 && REG_P (SUBREG_REG (xop0
))
624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
627 rtx tem
= gen_reg_rtx (op_mode
);
628 emit_move_insn (tem
, xop0
);
633 /* There are similar overflow check at the start of store_bit_field_1,
634 but that only check the situation where the field lies completely
635 outside the register, while there do have situation where the field
636 lies partialy in the register, we need to adjust bitsize for this
637 partial overflow situation. Without this fix, pr48335-2.c on big-endian
638 will broken on those arch support bit insert instruction, like arm, aarch64
640 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
642 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
643 "destination object, data truncated into %wu-bit",
644 bitsize
, unit
- bitnum
);
645 bitsize
= unit
- bitnum
;
648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649 "backwards" from the size of the unit we are inserting into.
650 Otherwise, we count bits from the most significant on a
651 BYTES/BITS_BIG_ENDIAN machine. */
653 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
654 bitnum
= unit
- bitsize
- bitnum
;
656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
658 if (GET_MODE (value
) != op_mode
)
660 if (GET_MODE_BITSIZE (GET_MODE (value
)) >= bitsize
)
663 /* Optimization: Don't bother really extending VALUE
664 if it has all the bits we will actually use. However,
665 if we must narrow it, be sure we do it correctly. */
667 if (GET_MODE_SIZE (GET_MODE (value
)) < GET_MODE_SIZE (op_mode
))
669 tmp
= simplify_subreg (op_mode
, value1
, GET_MODE (value
), 0);
671 tmp
= simplify_gen_subreg (op_mode
,
672 force_reg (GET_MODE (value
),
674 GET_MODE (value
), 0);
678 tmp
= gen_lowpart_if_possible (op_mode
, value1
);
680 tmp
= gen_lowpart (op_mode
, force_reg (GET_MODE (value
),
685 else if (CONST_INT_P (value
))
686 value1
= gen_int_mode (INTVAL (value
), op_mode
);
688 /* Parse phase is supposed to make VALUE's data type
689 match that of the component reference, which is a type
690 at least as wide as the field; so VALUE should have
691 a mode that corresponds to that type. */
692 gcc_assert (CONSTANT_P (value
));
695 create_fixed_operand (&ops
[0], xop0
);
696 create_integer_operand (&ops
[1], bitsize
);
697 create_integer_operand (&ops
[2], bitnum
);
698 create_input_operand (&ops
[3], value1
, op_mode
);
699 if (maybe_expand_insn (insv
->icode
, 4, ops
))
702 convert_move (op0
, xop0
, true);
705 delete_insns_since (last
);
709 /* A subroutine of store_bit_field, with the same arguments. Return true
710 if the operation could be implemented.
712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713 no other way of implementing the operation. If FALLBACK_P is false,
714 return false instead. */
717 store_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
718 unsigned HOST_WIDE_INT bitnum
,
719 unsigned HOST_WIDE_INT bitregion_start
,
720 unsigned HOST_WIDE_INT bitregion_end
,
721 machine_mode fieldmode
,
722 rtx value
, bool reverse
, bool fallback_p
)
727 while (GET_CODE (op0
) == SUBREG
)
729 /* The following line once was done only if WORDS_BIG_ENDIAN,
730 but I think that is a mistake. WORDS_BIG_ENDIAN is
731 meaningful at a much higher level; when structures are copied
732 between memory and regs, the higher-numbered regs
733 always get higher addresses. */
734 int inner_mode_size
= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0
)));
735 int outer_mode_size
= GET_MODE_SIZE (GET_MODE (op0
));
738 /* Paradoxical subregs need special handling on big-endian machines. */
739 if (SUBREG_BYTE (op0
) == 0 && inner_mode_size
< outer_mode_size
)
741 int difference
= inner_mode_size
- outer_mode_size
;
743 if (WORDS_BIG_ENDIAN
)
744 byte_offset
+= (difference
/ UNITS_PER_WORD
) * UNITS_PER_WORD
;
745 if (BYTES_BIG_ENDIAN
)
746 byte_offset
+= difference
% UNITS_PER_WORD
;
749 byte_offset
= SUBREG_BYTE (op0
);
751 bitnum
+= byte_offset
* BITS_PER_UNIT
;
752 op0
= SUBREG_REG (op0
);
755 /* No action is needed if the target is a register and if the field
756 lies completely outside that register. This can occur if the source
757 code contains an out-of-bounds access to a small array. */
758 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
761 /* Use vec_set patterns for inserting parts of vectors whenever
763 if (VECTOR_MODE_P (GET_MODE (op0
))
765 && optab_handler (vec_set_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
766 && fieldmode
== GET_MODE_INNER (GET_MODE (op0
))
767 && bitsize
== GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
768 && !(bitnum
% GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
770 struct expand_operand ops
[3];
771 machine_mode outermode
= GET_MODE (op0
);
772 machine_mode innermode
= GET_MODE_INNER (outermode
);
773 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
774 int pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
776 create_fixed_operand (&ops
[0], op0
);
777 create_input_operand (&ops
[1], value
, innermode
);
778 create_integer_operand (&ops
[2], pos
);
779 if (maybe_expand_insn (icode
, 3, ops
))
783 /* If the target is a register, overwriting the entire object, or storing
784 a full-word or multi-word field can be done with just a SUBREG. */
786 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
787 && ((bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)) && bitnum
== 0)
788 || (bitsize
% BITS_PER_WORD
== 0 && bitnum
% BITS_PER_WORD
== 0)))
790 /* Use the subreg machinery either to narrow OP0 to the required
791 words or to cope with mode punning between equal-sized modes.
792 In the latter case, use subreg on the rhs side, not lhs. */
795 if (bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
797 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
801 sub
= flip_storage_order (GET_MODE (op0
), sub
);
802 emit_move_insn (op0
, sub
);
808 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
809 bitnum
/ BITS_PER_UNIT
);
813 value
= flip_storage_order (fieldmode
, value
);
814 emit_move_insn (sub
, value
);
820 /* If the target is memory, storing any naturally aligned field can be
821 done with a simple store. For targets that support fast unaligned
822 memory, any naturally sized, unit aligned field can be done directly. */
823 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
))
825 op0
= adjust_bitfield_address (op0
, fieldmode
, bitnum
/ BITS_PER_UNIT
);
827 value
= flip_storage_order (fieldmode
, value
);
828 emit_move_insn (op0
, value
);
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
837 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
838 if (imode
!= GET_MODE (op0
))
841 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
844 gcc_assert (imode
!= BLKmode
);
845 op0
= gen_lowpart (imode
, op0
);
850 /* Storing an lsb-aligned field in a register
851 can be done with a movstrict instruction. */
855 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
856 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
857 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
859 struct expand_operand ops
[2];
860 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
862 unsigned HOST_WIDE_INT subreg_off
;
864 if (GET_CODE (arg0
) == SUBREG
)
866 /* Else we've got some float mode source being extracted into
867 a different float mode destination -- this combination of
868 subregs results in Severe Tire Damage. */
869 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
870 || GET_MODE_CLASS (fieldmode
) == MODE_INT
871 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
872 arg0
= SUBREG_REG (arg0
);
875 subreg_off
= bitnum
/ BITS_PER_UNIT
;
876 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
878 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
880 create_fixed_operand (&ops
[0], arg0
);
881 /* Shrink the source operand to FIELDMODE. */
882 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
883 if (maybe_expand_insn (icode
, 2, ops
))
888 /* Handle fields bigger than a word. */
890 if (bitsize
> BITS_PER_WORD
)
892 /* Here we transfer the words of the field
893 in the order least significant first.
894 This is because the most significant word is the one which may
896 However, only do that if the value is not BLKmode. */
898 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
899 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
903 /* This is the mode we must force value to, so that there will be enough
904 subwords to extract. Note that fieldmode will often (always?) be
905 VOIDmode, because that is what store_field uses to indicate that this
906 is a bit field, but passing VOIDmode to operand_subword_force
908 fieldmode
= GET_MODE (value
);
909 if (fieldmode
== VOIDmode
)
910 fieldmode
= smallest_mode_for_size (nwords
* BITS_PER_WORD
, MODE_INT
);
912 last
= get_last_insn ();
913 for (i
= 0; i
< nwords
; i
++)
915 /* If I is 0, use the low-order word in both field and target;
916 if I is 1, use the next to lowest word; and so on. */
917 unsigned int wordnum
= (backwards
918 ? GET_MODE_SIZE (fieldmode
) / UNITS_PER_WORD
921 unsigned int bit_offset
= (backwards
^ reverse
922 ? MAX ((int) bitsize
- ((int) i
+ 1)
925 : (int) i
* BITS_PER_WORD
);
926 rtx value_word
= operand_subword_force (value
, wordnum
, fieldmode
);
927 unsigned HOST_WIDE_INT new_bitsize
=
928 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
930 /* If the remaining chunk doesn't have full wordsize we have
931 to make sure that for big-endian machines the higher order
933 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
934 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
936 GEN_INT (BITS_PER_WORD
941 if (!store_bit_field_1 (op0
, new_bitsize
,
943 bitregion_start
, bitregion_end
,
945 value_word
, reverse
, fallback_p
))
947 delete_insns_since (last
);
954 /* If VALUE has a floating-point or complex mode, access it as an
955 integer of the corresponding size. This can occur on a machine
956 with 64 bit registers that uses SFmode for float. It can also
957 occur for unaligned float or complex fields. */
959 if (GET_MODE (value
) != VOIDmode
960 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_INT
961 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_PARTIAL_INT
)
963 value
= gen_reg_rtx (int_mode_for_mode (GET_MODE (value
)));
964 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
967 /* If OP0 is a multi-word register, narrow it to the affected word.
968 If the region spans two words, defer to store_split_bit_field. */
969 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
971 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
976 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
977 bitregion_end
, value
, reverse
);
980 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
981 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
983 bitnum
%= BITS_PER_WORD
;
986 /* From here on we can assume that the field to be stored in fits
987 within a word. If the destination is a register, it too fits
990 extraction_insn insv
;
993 && get_best_reg_extraction_insn (&insv
, EP_insv
,
994 GET_MODE_BITSIZE (GET_MODE (op0
)),
996 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
999 /* If OP0 is a memory, try copying it to a register and seeing if a
1000 cheap register alternative is available. */
1001 if (MEM_P (op0
) && !reverse
)
1003 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
1005 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
1008 rtx_insn
*last
= get_last_insn ();
1010 /* Try loading part of OP0 into a register, inserting the bitfield
1011 into that, and then copying the result back to OP0. */
1012 unsigned HOST_WIDE_INT bitpos
;
1013 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1014 bitregion_start
, bitregion_end
,
1015 fieldmode
, &bitpos
);
1018 rtx tempreg
= copy_to_reg (xop0
);
1019 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1020 bitregion_start
, bitregion_end
,
1021 fieldmode
, orig_value
, reverse
, false))
1023 emit_move_insn (xop0
, tempreg
);
1026 delete_insns_since (last
);
1033 store_fixed_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1034 bitregion_end
, value
, reverse
);
1038 /* Generate code to store value from rtx VALUE
1039 into a bit-field within structure STR_RTX
1040 containing BITSIZE bits starting at bit BITNUM.
1042 BITREGION_START is bitpos of the first bitfield in this region.
1043 BITREGION_END is the bitpos of the ending bitfield in this region.
1044 These two fields are 0, if the C++ memory model does not apply,
1045 or we are not interested in keeping track of bitfield regions.
1047 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1049 If REVERSE is true, the store is to be done in reverse order. */
1052 store_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1053 unsigned HOST_WIDE_INT bitnum
,
1054 unsigned HOST_WIDE_INT bitregion_start
,
1055 unsigned HOST_WIDE_INT bitregion_end
,
1056 machine_mode fieldmode
,
1057 rtx value
, bool reverse
)
1059 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1060 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, fieldmode
,
1061 bitregion_start
, bitregion_end
))
1063 /* Storing of a full word can be done with a simple store.
1064 We know here that the field can be accessed with one single
1065 instruction. For targets that support unaligned memory,
1066 an unaligned access may be necessary. */
1067 if (bitsize
== GET_MODE_BITSIZE (fieldmode
))
1069 str_rtx
= adjust_bitfield_address (str_rtx
, fieldmode
,
1070 bitnum
/ BITS_PER_UNIT
);
1072 value
= flip_storage_order (fieldmode
, value
);
1073 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1074 emit_move_insn (str_rtx
, value
);
1080 str_rtx
= narrow_bit_field_mem (str_rtx
, fieldmode
, bitsize
, bitnum
,
1082 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (fieldmode
));
1083 temp
= copy_to_reg (str_rtx
);
1084 if (!store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0,
1085 fieldmode
, value
, reverse
, true))
1088 emit_move_insn (str_rtx
, temp
);
1094 /* Under the C++0x memory model, we must not touch bits outside the
1095 bit region. Adjust the address to start at the beginning of the
1097 if (MEM_P (str_rtx
) && bitregion_start
> 0)
1099 machine_mode bestmode
;
1100 HOST_WIDE_INT offset
, size
;
1102 gcc_assert ((bitregion_start
% BITS_PER_UNIT
) == 0);
1104 offset
= bitregion_start
/ BITS_PER_UNIT
;
1105 bitnum
-= bitregion_start
;
1106 size
= (bitnum
+ bitsize
+ BITS_PER_UNIT
- 1) / BITS_PER_UNIT
;
1107 bitregion_end
-= bitregion_start
;
1108 bitregion_start
= 0;
1109 bestmode
= get_best_mode (bitsize
, bitnum
,
1110 bitregion_start
, bitregion_end
,
1111 MEM_ALIGN (str_rtx
), VOIDmode
,
1112 MEM_VOLATILE_P (str_rtx
));
1113 str_rtx
= adjust_bitfield_address_size (str_rtx
, bestmode
, offset
, size
);
1116 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1117 bitregion_start
, bitregion_end
,
1118 fieldmode
, value
, reverse
, true))
1122 /* Use shifts and boolean operations to store VALUE into a bit field of
1123 width BITSIZE in OP0, starting at bit BITNUM.
1125 If REVERSE is true, the store is to be done in reverse order. */
1128 store_fixed_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1129 unsigned HOST_WIDE_INT bitnum
,
1130 unsigned HOST_WIDE_INT bitregion_start
,
1131 unsigned HOST_WIDE_INT bitregion_end
,
1132 rtx value
, bool reverse
)
1134 /* There is a case not handled here:
1135 a structure with a known alignment of just a halfword
1136 and a field split across two aligned halfwords within the structure.
1137 Or likewise a structure with a known alignment of just a byte
1138 and a field split across two bytes.
1139 Such cases are not supposed to be able to occur. */
1143 machine_mode mode
= GET_MODE (op0
);
1144 if (GET_MODE_BITSIZE (mode
) == 0
1145 || GET_MODE_BITSIZE (mode
) > GET_MODE_BITSIZE (word_mode
))
1147 mode
= get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1148 MEM_ALIGN (op0
), mode
, MEM_VOLATILE_P (op0
));
1150 if (mode
== VOIDmode
)
1152 /* The only way this should occur is if the field spans word
1154 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1155 bitregion_end
, value
, reverse
);
1159 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1162 store_fixed_bit_field_1 (op0
, bitsize
, bitnum
, value
, reverse
);
1165 /* Helper function for store_fixed_bit_field, stores
1166 the bit field always using the MODE of OP0. */
1169 store_fixed_bit_field_1 (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1170 unsigned HOST_WIDE_INT bitnum
,
1171 rtx value
, bool reverse
)
1178 mode
= GET_MODE (op0
);
1179 gcc_assert (SCALAR_INT_MODE_P (mode
));
1181 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1182 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1184 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1185 /* BITNUM is the distance between our msb
1186 and that of the containing datum.
1187 Convert it to the distance from the lsb. */
1188 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1190 /* Now BITNUM is always the distance between our lsb
1193 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1194 we must first convert its mode to MODE. */
1196 if (CONST_INT_P (value
))
1198 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1200 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1201 v
&= (HOST_WIDE_INT_1U
<< bitsize
) - 1;
1205 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1206 && v
== (HOST_WIDE_INT_1U
<< bitsize
) - 1)
1207 || (bitsize
== HOST_BITS_PER_WIDE_INT
1208 && v
== HOST_WIDE_INT_M1U
))
1211 value
= lshift_value (mode
, v
, bitnum
);
1215 int must_and
= (GET_MODE_BITSIZE (GET_MODE (value
)) != bitsize
1216 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1218 if (GET_MODE (value
) != mode
)
1219 value
= convert_to_mode (mode
, value
, 1);
1222 value
= expand_binop (mode
, and_optab
, value
,
1223 mask_rtx (mode
, 0, bitsize
, 0),
1224 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1226 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1227 bitnum
, NULL_RTX
, 1);
1231 value
= flip_storage_order (mode
, value
);
1233 /* Now clear the chosen bits in OP0,
1234 except that if VALUE is -1 we need not bother. */
1235 /* We keep the intermediates in registers to allow CSE to combine
1236 consecutive bitfield assignments. */
1238 temp
= force_reg (mode
, op0
);
1242 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1244 mask
= flip_storage_order (mode
, mask
);
1245 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1246 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1247 temp
= force_reg (mode
, temp
);
1250 /* Now logical-or VALUE into OP0, unless it is zero. */
1254 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1255 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1256 temp
= force_reg (mode
, temp
);
1261 op0
= copy_rtx (op0
);
1262 emit_move_insn (op0
, temp
);
1266 /* Store a bit field that is split across multiple accessible memory objects.
1268 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1269 BITSIZE is the field width; BITPOS the position of its first bit
1271 VALUE is the value to store.
1273 If REVERSE is true, the store is to be done in reverse order.
1275 This does not yet handle fields wider than BITS_PER_WORD. */
1278 store_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1279 unsigned HOST_WIDE_INT bitpos
,
1280 unsigned HOST_WIDE_INT bitregion_start
,
1281 unsigned HOST_WIDE_INT bitregion_end
,
1282 rtx value
, bool reverse
)
1284 unsigned int unit
, total_bits
, bitsdone
= 0;
1286 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1288 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1289 unit
= BITS_PER_WORD
;
1291 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1293 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1294 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1295 again, and we will mutually recurse forever. */
1296 if (MEM_P (op0
) && GET_MODE_BITSIZE (GET_MODE (op0
)) > 0)
1297 unit
= MIN (unit
, GET_MODE_BITSIZE (GET_MODE (op0
)));
1299 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1300 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1301 that VALUE might be a floating-point constant. */
1302 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1304 rtx word
= gen_lowpart_common (word_mode
, value
);
1306 if (word
&& (value
!= word
))
1309 value
= gen_lowpart_common (word_mode
,
1310 force_reg (GET_MODE (value
) != VOIDmode
1312 : word_mode
, value
));
1315 total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1317 while (bitsdone
< bitsize
)
1319 unsigned HOST_WIDE_INT thissize
;
1320 unsigned HOST_WIDE_INT thispos
;
1321 unsigned HOST_WIDE_INT offset
;
1324 offset
= (bitpos
+ bitsdone
) / unit
;
1325 thispos
= (bitpos
+ bitsdone
) % unit
;
1327 /* When region of bytes we can touch is restricted, decrease
1328 UNIT close to the end of the region as needed. If op0 is a REG
1329 or SUBREG of REG, don't do this, as there can't be data races
1330 on a register and we can expand shorter code in some cases. */
1332 && unit
> BITS_PER_UNIT
1333 && bitpos
+ bitsdone
- thispos
+ unit
> bitregion_end
+ 1
1335 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1341 /* THISSIZE must not overrun a word boundary. Otherwise,
1342 store_fixed_bit_field will call us again, and we will mutually
1344 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1345 thissize
= MIN (thissize
, unit
- thispos
);
1347 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1349 /* Fetch successively less significant portions. */
1350 if (CONST_INT_P (value
))
1351 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1352 >> (bitsize
- bitsdone
- thissize
))
1353 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1354 /* Likewise, but the source is little-endian. */
1356 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1357 bitsize
- bitsdone
- thissize
,
1358 NULL_RTX
, 1, false);
1361 int total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1362 /* The args are chosen so that the last part includes the
1363 lsb. Give extract_bit_field the value it needs (with
1364 endianness compensation) to fetch the piece we want. */
1365 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1366 total_bits
- bitsize
+ bitsdone
,
1367 NULL_RTX
, 1, false);
1372 /* Fetch successively more significant portions. */
1373 if (CONST_INT_P (value
))
1374 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1376 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1377 /* Likewise, but the source is big-endian. */
1379 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1380 total_bits
- bitsdone
- thissize
,
1381 NULL_RTX
, 1, false);
1383 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1384 bitsdone
, NULL_RTX
, 1, false);
1387 /* If OP0 is a register, then handle OFFSET here. */
1388 if (SUBREG_P (op0
) || REG_P (op0
))
1390 machine_mode op0_mode
= GET_MODE (op0
);
1391 if (op0_mode
!= BLKmode
&& GET_MODE_SIZE (op0_mode
) < UNITS_PER_WORD
)
1392 word
= offset
? const0_rtx
: op0
;
1394 word
= operand_subword_force (op0
, offset
* unit
/ BITS_PER_WORD
,
1396 offset
&= BITS_PER_WORD
/ unit
- 1;
1401 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1402 it is just an out-of-bounds access. Ignore it. */
1403 if (word
!= const0_rtx
)
1404 store_fixed_bit_field (word
, thissize
, offset
* unit
+ thispos
,
1405 bitregion_start
, bitregion_end
, part
,
1407 bitsdone
+= thissize
;
1411 /* A subroutine of extract_bit_field_1 that converts return value X
1412 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1413 to extract_bit_field. */
1416 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1417 machine_mode tmode
, bool unsignedp
)
1419 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1422 /* If the x mode is not a scalar integral, first convert to the
1423 integer mode of that size and then access it as a floating-point
1424 value via a SUBREG. */
1425 if (!SCALAR_INT_MODE_P (tmode
))
1429 smode
= mode_for_size (GET_MODE_BITSIZE (tmode
), MODE_INT
, 0);
1430 x
= convert_to_mode (smode
, x
, unsignedp
);
1431 x
= force_reg (smode
, x
);
1432 return gen_lowpart (tmode
, x
);
1435 return convert_to_mode (tmode
, x
, unsignedp
);
1438 /* Try to use an ext(z)v pattern to extract a field from OP0.
1439 Return the extracted value on success, otherwise return null.
1440 EXT_MODE is the mode of the extraction and the other arguments
1441 are as for extract_bit_field. */
1444 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1445 unsigned HOST_WIDE_INT bitsize
,
1446 unsigned HOST_WIDE_INT bitnum
,
1447 int unsignedp
, rtx target
,
1448 machine_mode mode
, machine_mode tmode
)
1450 struct expand_operand ops
[4];
1451 rtx spec_target
= target
;
1452 rtx spec_target_subreg
= 0;
1453 machine_mode ext_mode
= extv
->field_mode
;
1454 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1456 if (bitsize
== 0 || unit
< bitsize
)
1460 /* Get a reference to the first byte of the field. */
1461 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1465 /* Convert from counting within OP0 to counting in EXT_MODE. */
1466 if (BYTES_BIG_ENDIAN
)
1467 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
1469 /* If op0 is a register, we need it in EXT_MODE to make it
1470 acceptable to the format of ext(z)v. */
1471 if (GET_CODE (op0
) == SUBREG
&& GET_MODE (op0
) != ext_mode
)
1473 if (REG_P (op0
) && GET_MODE (op0
) != ext_mode
)
1474 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1477 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1478 "backwards" from the size of the unit we are extracting from.
1479 Otherwise, we count bits from the most significant on a
1480 BYTES/BITS_BIG_ENDIAN machine. */
1482 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1483 bitnum
= unit
- bitsize
- bitnum
;
1486 target
= spec_target
= gen_reg_rtx (tmode
);
1488 if (GET_MODE (target
) != ext_mode
)
1490 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1491 between the mode of the extraction (word_mode) and the target
1492 mode. Instead, create a temporary and use convert_move to set
1495 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1497 target
= gen_lowpart (ext_mode
, target
);
1498 if (GET_MODE_PRECISION (ext_mode
)
1499 > GET_MODE_PRECISION (GET_MODE (spec_target
)))
1500 spec_target_subreg
= target
;
1503 target
= gen_reg_rtx (ext_mode
);
1506 create_output_operand (&ops
[0], target
, ext_mode
);
1507 create_fixed_operand (&ops
[1], op0
);
1508 create_integer_operand (&ops
[2], bitsize
);
1509 create_integer_operand (&ops
[3], bitnum
);
1510 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1512 target
= ops
[0].value
;
1513 if (target
== spec_target
)
1515 if (target
== spec_target_subreg
)
1517 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1522 /* A subroutine of extract_bit_field, with the same arguments.
1523 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1524 if we can find no other means of implementing the operation.
1525 if FALLBACK_P is false, return NULL instead. */
1528 extract_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1529 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1530 machine_mode mode
, machine_mode tmode
,
1531 bool reverse
, bool fallback_p
)
1534 machine_mode int_mode
;
1537 if (tmode
== VOIDmode
)
1540 while (GET_CODE (op0
) == SUBREG
)
1542 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1543 op0
= SUBREG_REG (op0
);
1546 /* If we have an out-of-bounds access to a register, just return an
1547 uninitialized register of the required mode. This can occur if the
1548 source code contains an out-of-bounds access to a small array. */
1549 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
1550 return gen_reg_rtx (tmode
);
1553 && mode
== GET_MODE (op0
)
1555 && bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
1558 op0
= flip_storage_order (mode
, op0
);
1559 /* We're trying to extract a full register from itself. */
1563 /* See if we can get a better vector mode before extracting. */
1564 if (VECTOR_MODE_P (GET_MODE (op0
))
1566 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1568 machine_mode new_mode
;
1570 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1571 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1572 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1573 new_mode
= MIN_MODE_VECTOR_FRACT
;
1574 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1575 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1576 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1577 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1578 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1579 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1581 new_mode
= MIN_MODE_VECTOR_INT
;
1583 for (; new_mode
!= VOIDmode
; new_mode
= GET_MODE_WIDER_MODE (new_mode
))
1584 if (GET_MODE_SIZE (new_mode
) == GET_MODE_SIZE (GET_MODE (op0
))
1585 && GET_MODE_UNIT_SIZE (new_mode
) == GET_MODE_SIZE (tmode
)
1586 && targetm
.vector_mode_supported_p (new_mode
))
1588 if (new_mode
!= VOIDmode
)
1589 op0
= gen_lowpart (new_mode
, op0
);
1592 /* Use vec_extract patterns for extracting parts of vectors whenever
1594 if (VECTOR_MODE_P (GET_MODE (op0
))
1596 && optab_handler (vec_extract_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
1597 && ((bitnum
+ bitsize
- 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
1598 == bitnum
/ GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
1600 struct expand_operand ops
[3];
1601 machine_mode outermode
= GET_MODE (op0
);
1602 machine_mode innermode
= GET_MODE_INNER (outermode
);
1603 enum insn_code icode
= optab_handler (vec_extract_optab
, outermode
);
1604 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1606 create_output_operand (&ops
[0], target
, innermode
);
1607 create_input_operand (&ops
[1], op0
, outermode
);
1608 create_integer_operand (&ops
[2], pos
);
1609 if (maybe_expand_insn (icode
, 3, ops
))
1611 target
= ops
[0].value
;
1612 if (GET_MODE (target
) != mode
)
1613 return gen_lowpart (tmode
, target
);
1618 /* Make sure we are playing with integral modes. Pun with subregs
1621 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
1622 if (imode
!= GET_MODE (op0
))
1625 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
1626 else if (imode
!= BLKmode
)
1628 op0
= gen_lowpart (imode
, op0
);
1630 /* If we got a SUBREG, force it into a register since we
1631 aren't going to be able to do another SUBREG on it. */
1632 if (GET_CODE (op0
) == SUBREG
)
1633 op0
= force_reg (imode
, op0
);
1637 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (op0
));
1638 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1639 emit_move_insn (mem
, op0
);
1640 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1645 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1646 If that's wrong, the solution is to test for it and set TARGET to 0
1649 /* Get the mode of the field to use for atomic access or subreg
1652 if (SCALAR_INT_MODE_P (tmode
))
1654 machine_mode try_mode
= mode_for_size (bitsize
,
1655 GET_MODE_CLASS (tmode
), 0);
1656 if (try_mode
!= BLKmode
)
1659 gcc_assert (mode1
!= BLKmode
);
1661 /* Extraction of a full MODE1 value can be done with a subreg as long
1662 as the least significant bit of the value is the least significant
1663 bit of either OP0 or a word of OP0. */
1666 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1667 && bitsize
== GET_MODE_BITSIZE (mode1
)
1668 && TRULY_NOOP_TRUNCATION_MODES_P (mode1
, GET_MODE (op0
)))
1670 rtx sub
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
),
1671 bitnum
/ BITS_PER_UNIT
);
1673 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1676 /* Extraction of a full MODE1 value can be done with a load as long as
1677 the field is on a byte boundary and is sufficiently aligned. */
1678 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
))
1680 op0
= adjust_bitfield_address (op0
, mode1
, bitnum
/ BITS_PER_UNIT
);
1682 op0
= flip_storage_order (mode1
, op0
);
1683 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1686 /* Handle fields bigger than a word. */
1688 if (bitsize
> BITS_PER_WORD
)
1690 /* Here we transfer the words of the field
1691 in the order least significant first.
1692 This is because the most significant word is the one which may
1693 be less than full. */
1695 const bool backwards
= WORDS_BIG_ENDIAN
;
1696 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1700 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1701 target
= gen_reg_rtx (mode
);
1703 /* In case we're about to clobber a base register or something
1704 (see gcc.c-torture/execute/20040625-1.c). */
1705 if (reg_mentioned_p (target
, str_rtx
))
1706 target
= gen_reg_rtx (mode
);
1708 /* Indicate for flow that the entire target reg is being set. */
1709 emit_clobber (target
);
1711 last
= get_last_insn ();
1712 for (i
= 0; i
< nwords
; i
++)
1714 /* If I is 0, use the low-order word in both field and target;
1715 if I is 1, use the next to lowest word; and so on. */
1716 /* Word number in TARGET to use. */
1717 unsigned int wordnum
1719 ? GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
- i
- 1
1721 /* Offset from start of field in OP0. */
1722 unsigned int bit_offset
= (backwards
^ reverse
1723 ? MAX ((int) bitsize
- ((int) i
+ 1)
1726 : (int) i
* BITS_PER_WORD
);
1727 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1729 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1730 bitsize
- i
* BITS_PER_WORD
),
1731 bitnum
+ bit_offset
, 1, target_part
,
1732 mode
, word_mode
, reverse
, fallback_p
);
1734 gcc_assert (target_part
);
1737 delete_insns_since (last
);
1741 if (result_part
!= target_part
)
1742 emit_move_insn (target_part
, result_part
);
1747 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1748 need to be zero'd out. */
1749 if (GET_MODE_SIZE (GET_MODE (target
)) > nwords
* UNITS_PER_WORD
)
1751 unsigned int i
, total_words
;
1753 total_words
= GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
;
1754 for (i
= nwords
; i
< total_words
; i
++)
1756 (operand_subword (target
,
1757 backwards
? total_words
- i
- 1 : i
,
1764 /* Signed bit field: sign-extend with two arithmetic shifts. */
1765 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1766 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1767 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1768 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1771 /* If OP0 is a multi-word register, narrow it to the affected word.
1772 If the region spans two words, defer to extract_split_bit_field. */
1773 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
1775 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1779 target
= extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1781 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1783 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
1784 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1785 bitnum
%= BITS_PER_WORD
;
1788 /* From here on we know the desired field is smaller than a word.
1789 If OP0 is a register, it too fits within a word. */
1790 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1791 extraction_insn extv
;
1794 /* ??? We could limit the structure size to the part of OP0 that
1795 contains the field, with appropriate checks for endianness
1796 and TRULY_NOOP_TRUNCATION. */
1797 && get_best_reg_extraction_insn (&extv
, pattern
,
1798 GET_MODE_BITSIZE (GET_MODE (op0
)),
1801 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
, bitnum
,
1802 unsignedp
, target
, mode
,
1808 /* If OP0 is a memory, try copying it to a register and seeing if a
1809 cheap register alternative is available. */
1810 if (MEM_P (op0
) & !reverse
)
1812 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1815 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
,
1823 rtx_insn
*last
= get_last_insn ();
1825 /* Try loading part of OP0 into a register and extracting the
1826 bitfield from that. */
1827 unsigned HOST_WIDE_INT bitpos
;
1828 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
1829 0, 0, tmode
, &bitpos
);
1832 xop0
= copy_to_reg (xop0
);
1833 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
1835 mode
, tmode
, reverse
, false);
1838 delete_insns_since (last
);
1845 /* Find a correspondingly-sized integer field, so we can apply
1846 shifts and masks to it. */
1847 int_mode
= int_mode_for_mode (tmode
);
1848 if (int_mode
== BLKmode
)
1849 int_mode
= int_mode_for_mode (mode
);
1850 /* Should probably push op0 out to memory and then do a load. */
1851 gcc_assert (int_mode
!= BLKmode
);
1853 target
= extract_fixed_bit_field (int_mode
, op0
, bitsize
, bitnum
, target
,
1854 unsignedp
, reverse
);
1856 /* Complex values must be reversed piecewise, so we need to undo the global
1857 reversal, convert to the complex mode and reverse again. */
1858 if (reverse
&& COMPLEX_MODE_P (tmode
))
1860 target
= flip_storage_order (int_mode
, target
);
1861 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1862 target
= flip_storage_order (tmode
, target
);
1865 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1870 /* Generate code to extract a byte-field from STR_RTX
1871 containing BITSIZE bits, starting at BITNUM,
1872 and put it in TARGET if possible (if TARGET is nonzero).
1873 Regardless of TARGET, we return the rtx for where the value is placed.
1875 STR_RTX is the structure containing the byte (a REG or MEM).
1876 UNSIGNEDP is nonzero if this is an unsigned bit field.
1877 MODE is the natural mode of the field value once extracted.
1878 TMODE is the mode the caller would like the value to have;
1879 but the value may be returned with type MODE instead.
1881 If REVERSE is true, the extraction is to be done in reverse order.
1883 If a TARGET is specified and we can store in it at no extra cost,
1884 we do so, and return TARGET.
1885 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1886 if they are equally easy. */
1889 extract_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1890 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1891 machine_mode mode
, machine_mode tmode
, bool reverse
)
1895 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1896 if (GET_MODE_BITSIZE (GET_MODE (str_rtx
)) > 0)
1897 mode1
= GET_MODE (str_rtx
);
1898 else if (target
&& GET_MODE_BITSIZE (GET_MODE (target
)) > 0)
1899 mode1
= GET_MODE (target
);
1903 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, mode1
, 0, 0))
1905 /* Extraction of a full MODE1 value can be done with a simple load.
1906 We know here that the field can be accessed with one single
1907 instruction. For targets that support unaligned memory,
1908 an unaligned access may be necessary. */
1909 if (bitsize
== GET_MODE_BITSIZE (mode1
))
1911 rtx result
= adjust_bitfield_address (str_rtx
, mode1
,
1912 bitnum
/ BITS_PER_UNIT
);
1914 result
= flip_storage_order (mode1
, result
);
1915 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1916 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
1919 str_rtx
= narrow_bit_field_mem (str_rtx
, mode1
, bitsize
, bitnum
,
1921 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (mode1
));
1922 str_rtx
= copy_to_reg (str_rtx
);
1925 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
1926 target
, mode
, tmode
, reverse
, true);
1929 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1930 from bit BITNUM of OP0.
1932 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1933 If REVERSE is true, the extraction is to be done in reverse order.
1935 If TARGET is nonzero, attempts to store the value there
1936 and return TARGET, but this is not guaranteed.
1937 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1940 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
1941 unsigned HOST_WIDE_INT bitsize
,
1942 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1943 int unsignedp
, bool reverse
)
1948 = get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
), word_mode
,
1949 MEM_VOLATILE_P (op0
));
1951 if (mode
== VOIDmode
)
1952 /* The only way this should occur is if the field spans word
1954 return extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1957 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1960 return extract_fixed_bit_field_1 (tmode
, op0
, bitsize
, bitnum
,
1961 target
, unsignedp
, reverse
);
1964 /* Helper function for extract_fixed_bit_field, extracts
1965 the bit field always using the MODE of OP0. */
1968 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
,
1969 unsigned HOST_WIDE_INT bitsize
,
1970 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1971 int unsignedp
, bool reverse
)
1973 machine_mode mode
= GET_MODE (op0
);
1974 gcc_assert (SCALAR_INT_MODE_P (mode
));
1976 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1977 for invalid input, such as extract equivalent of f5 from
1978 gcc.dg/pr48335-2.c. */
1980 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1981 /* BITNUM is the distance between our msb and that of OP0.
1982 Convert it to the distance from the lsb. */
1983 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1985 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1986 We have reduced the big-endian case to the little-endian case. */
1988 op0
= flip_storage_order (mode
, op0
);
1994 /* If the field does not already start at the lsb,
1995 shift it so it does. */
1996 /* Maybe propagate the target for the shift. */
1997 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2000 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2002 /* Convert the value to the desired mode. */
2004 op0
= convert_to_mode (tmode
, op0
, 1);
2006 /* Unless the msb of the field used to be the msb when we shifted,
2007 mask out the upper bits. */
2009 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2010 return expand_binop (GET_MODE (op0
), and_optab
, op0
,
2011 mask_rtx (GET_MODE (op0
), 0, bitsize
, 0),
2012 target
, 1, OPTAB_LIB_WIDEN
);
2016 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2017 then arithmetic-shift its lsb to the lsb of the word. */
2018 op0
= force_reg (mode
, op0
);
2020 /* Find the narrowest integer mode that contains the field. */
2022 for (mode
= GET_CLASS_NARROWEST_MODE (MODE_INT
); mode
!= VOIDmode
;
2023 mode
= GET_MODE_WIDER_MODE (mode
))
2024 if (GET_MODE_BITSIZE (mode
) >= bitsize
+ bitnum
)
2026 op0
= convert_to_mode (mode
, op0
, 0);
2033 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2035 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2036 /* Maybe propagate the target for the shift. */
2037 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2038 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2041 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2042 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2045 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2049 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2052 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2055 /* Extract a bit field that is split across two words
2056 and return an RTX for the result.
2058 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2059 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2060 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2062 If REVERSE is true, the extraction is to be done in reverse order. */
2065 extract_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
2066 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2070 unsigned int bitsdone
= 0;
2071 rtx result
= NULL_RTX
;
2074 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2076 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2077 unit
= BITS_PER_WORD
;
2079 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2081 while (bitsdone
< bitsize
)
2083 unsigned HOST_WIDE_INT thissize
;
2085 unsigned HOST_WIDE_INT thispos
;
2086 unsigned HOST_WIDE_INT offset
;
2088 offset
= (bitpos
+ bitsdone
) / unit
;
2089 thispos
= (bitpos
+ bitsdone
) % unit
;
2091 /* THISSIZE must not overrun a word boundary. Otherwise,
2092 extract_fixed_bit_field will call us again, and we will mutually
2094 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2095 thissize
= MIN (thissize
, unit
- thispos
);
2097 /* If OP0 is a register, then handle OFFSET here. */
2098 if (SUBREG_P (op0
) || REG_P (op0
))
2100 word
= operand_subword_force (op0
, offset
, GET_MODE (op0
));
2106 /* Extract the parts in bit-counting order,
2107 whose meaning is determined by BYTES_PER_UNIT.
2108 OFFSET is in UNITs, and UNIT is in bits. */
2109 part
= extract_fixed_bit_field (word_mode
, word
, thissize
,
2110 offset
* unit
+ thispos
, 0, 1, reverse
);
2111 bitsdone
+= thissize
;
2113 /* Shift this part into place for the result. */
2114 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2116 if (bitsize
!= bitsdone
)
2117 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2118 bitsize
- bitsdone
, 0, 1);
2122 if (bitsdone
!= thissize
)
2123 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2124 bitsdone
- thissize
, 0, 1);
2130 /* Combine the parts with bitwise or. This works
2131 because we extracted each part as an unsigned bit field. */
2132 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2138 /* Unsigned bit field: we are done. */
2141 /* Signed bit field: sign-extend with two arithmetic shifts. */
2142 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2143 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2144 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2145 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2148 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2149 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2150 MODE, fill the upper bits with zeros. Fail if the layout of either
2151 mode is unknown (as for CC modes) or if the extraction would involve
2152 unprofitable mode punning. Return the value on success, otherwise
2155 This is different from gen_lowpart* in these respects:
2157 - the returned value must always be considered an rvalue
2159 - when MODE is wider than SRC_MODE, the extraction involves
2162 - when MODE is smaller than SRC_MODE, the extraction involves
2163 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2165 In other words, this routine performs a computation, whereas the
2166 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2170 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2172 machine_mode int_mode
, src_int_mode
;
2174 if (mode
== src_mode
)
2177 if (CONSTANT_P (src
))
2179 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2180 fails, it will happily create (subreg (symbol_ref)) or similar
2182 unsigned int byte
= subreg_lowpart_offset (mode
, src_mode
);
2183 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2187 if (GET_MODE (src
) == VOIDmode
2188 || !validate_subreg (mode
, src_mode
, src
, byte
))
2191 src
= force_reg (GET_MODE (src
), src
);
2192 return gen_rtx_SUBREG (mode
, src
, byte
);
2195 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2198 if (GET_MODE_BITSIZE (mode
) == GET_MODE_BITSIZE (src_mode
)
2199 && MODES_TIEABLE_P (mode
, src_mode
))
2201 rtx x
= gen_lowpart_common (mode
, src
);
2206 src_int_mode
= int_mode_for_mode (src_mode
);
2207 int_mode
= int_mode_for_mode (mode
);
2208 if (src_int_mode
== BLKmode
|| int_mode
== BLKmode
)
2211 if (!MODES_TIEABLE_P (src_int_mode
, src_mode
))
2213 if (!MODES_TIEABLE_P (int_mode
, mode
))
2216 src
= gen_lowpart (src_int_mode
, src
);
2217 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2218 src
= gen_lowpart (mode
, src
);
2222 /* Add INC into TARGET. */
2225 expand_inc (rtx target
, rtx inc
)
2227 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2229 target
, 0, OPTAB_LIB_WIDEN
);
2230 if (value
!= target
)
2231 emit_move_insn (target
, value
);
2234 /* Subtract DEC from TARGET. */
2237 expand_dec (rtx target
, rtx dec
)
2239 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2241 target
, 0, OPTAB_LIB_WIDEN
);
2242 if (value
!= target
)
2243 emit_move_insn (target
, value
);
2246 /* Output a shift instruction for expression code CODE,
2247 with SHIFTED being the rtx for the value to shift,
2248 and AMOUNT the rtx for the amount to shift by.
2249 Store the result in the rtx TARGET, if that is convenient.
2250 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2251 Return the rtx for where the value is.
2252 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2253 in which case 0 is returned. */
2256 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2257 rtx amount
, rtx target
, int unsignedp
, bool may_fail
= false)
2260 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2261 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2262 optab lshift_optab
= ashl_optab
;
2263 optab rshift_arith_optab
= ashr_optab
;
2264 optab rshift_uns_optab
= lshr_optab
;
2265 optab lrotate_optab
= rotl_optab
;
2266 optab rrotate_optab
= rotr_optab
;
2267 machine_mode op1_mode
;
2268 machine_mode scalar_mode
= mode
;
2270 bool speed
= optimize_insn_for_speed_p ();
2272 if (VECTOR_MODE_P (mode
))
2273 scalar_mode
= GET_MODE_INNER (mode
);
2275 op1_mode
= GET_MODE (op1
);
2277 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2278 shift amount is a vector, use the vector/vector shift patterns. */
2279 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2281 lshift_optab
= vashl_optab
;
2282 rshift_arith_optab
= vashr_optab
;
2283 rshift_uns_optab
= vlshr_optab
;
2284 lrotate_optab
= vrotl_optab
;
2285 rrotate_optab
= vrotr_optab
;
2288 /* Previously detected shift-counts computed by NEGATE_EXPR
2289 and shifted in the other direction; but that does not work
2292 if (SHIFT_COUNT_TRUNCATED
)
2294 if (CONST_INT_P (op1
)
2295 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2296 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2297 op1
= GEN_INT ((unsigned HOST_WIDE_INT
) INTVAL (op1
)
2298 % GET_MODE_BITSIZE (scalar_mode
));
2299 else if (GET_CODE (op1
) == SUBREG
2300 && subreg_lowpart_p (op1
)
2301 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2302 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2303 op1
= SUBREG_REG (op1
);
2306 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2307 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2308 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2311 && CONST_INT_P (op1
)
2312 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2313 GET_MODE_BITSIZE (scalar_mode
) - 1))
2315 op1
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2317 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2320 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2321 Note that this is not the case for bigger values. For instance a rotation
2322 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2323 0x04030201 (bswapsi). */
2325 && CONST_INT_P (op1
)
2326 && INTVAL (op1
) == BITS_PER_UNIT
2327 && GET_MODE_SIZE (scalar_mode
) == 2
2328 && optab_handler (bswap_optab
, HImode
) != CODE_FOR_nothing
)
2329 return expand_unop (HImode
, bswap_optab
, shifted
, NULL_RTX
,
2332 if (op1
== const0_rtx
)
2335 /* Check whether its cheaper to implement a left shift by a constant
2336 bit count by a sequence of additions. */
2337 if (code
== LSHIFT_EXPR
2338 && CONST_INT_P (op1
)
2340 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2341 && INTVAL (op1
) < MAX_BITS_PER_WORD
2342 && (shift_cost (speed
, mode
, INTVAL (op1
))
2343 > INTVAL (op1
) * add_cost (speed
, mode
))
2344 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2347 for (i
= 0; i
< INTVAL (op1
); i
++)
2349 temp
= force_reg (mode
, shifted
);
2350 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2351 unsignedp
, OPTAB_LIB_WIDEN
);
2356 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2358 enum optab_methods methods
;
2361 methods
= OPTAB_DIRECT
;
2362 else if (attempt
== 1)
2363 methods
= OPTAB_WIDEN
;
2365 methods
= OPTAB_LIB_WIDEN
;
2369 /* Widening does not work for rotation. */
2370 if (methods
== OPTAB_WIDEN
)
2372 else if (methods
== OPTAB_LIB_WIDEN
)
2374 /* If we have been unable to open-code this by a rotation,
2375 do it as the IOR of two shifts. I.e., to rotate A
2377 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2378 where C is the bitsize of A.
2380 It is theoretically possible that the target machine might
2381 not be able to perform either shift and hence we would
2382 be making two libcalls rather than just the one for the
2383 shift (similarly if IOR could not be done). We will allow
2384 this extremely unlikely lossage to avoid complicating the
2387 rtx subtarget
= target
== shifted
? 0 : target
;
2388 rtx new_amount
, other_amount
;
2392 if (op1
== const0_rtx
)
2394 else if (CONST_INT_P (op1
))
2395 other_amount
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
)
2400 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2401 op1
, GET_MODE (op1
));
2402 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2404 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2405 gen_int_mode (mask
, GET_MODE (op1
)));
2408 shifted
= force_reg (mode
, shifted
);
2410 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2411 mode
, shifted
, new_amount
, 0, 1);
2412 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2413 mode
, shifted
, other_amount
,
2415 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2416 unsignedp
, methods
);
2419 temp
= expand_binop (mode
,
2420 left
? lrotate_optab
: rrotate_optab
,
2421 shifted
, op1
, target
, unsignedp
, methods
);
2424 temp
= expand_binop (mode
,
2425 left
? lshift_optab
: rshift_uns_optab
,
2426 shifted
, op1
, target
, unsignedp
, methods
);
2428 /* Do arithmetic shifts.
2429 Also, if we are going to widen the operand, we can just as well
2430 use an arithmetic right-shift instead of a logical one. */
2431 if (temp
== 0 && ! rotate
2432 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2434 enum optab_methods methods1
= methods
;
2436 /* If trying to widen a log shift to an arithmetic shift,
2437 don't accept an arithmetic shift of the same size. */
2439 methods1
= OPTAB_MUST_WIDEN
;
2441 /* Arithmetic shift */
2443 temp
= expand_binop (mode
,
2444 left
? lshift_optab
: rshift_arith_optab
,
2445 shifted
, op1
, target
, unsignedp
, methods1
);
2448 /* We used to try extzv here for logical right shifts, but that was
2449 only useful for one machine, the VAX, and caused poor code
2450 generation there for lshrdi3, so the code was deleted and a
2451 define_expand for lshrsi3 was added to vax.md. */
2454 gcc_assert (temp
!= NULL_RTX
|| may_fail
);
2458 /* Output a shift instruction for expression code CODE,
2459 with SHIFTED being the rtx for the value to shift,
2460 and AMOUNT the amount to shift by.
2461 Store the result in the rtx TARGET, if that is convenient.
2462 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2463 Return the rtx for where the value is. */
2466 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2467 int amount
, rtx target
, int unsignedp
)
2469 return expand_shift_1 (code
, mode
,
2470 shifted
, GEN_INT (amount
), target
, unsignedp
);
2473 /* Likewise, but return 0 if that cannot be done. */
2476 maybe_expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2477 int amount
, rtx target
, int unsignedp
)
2479 return expand_shift_1 (code
, mode
,
2480 shifted
, GEN_INT (amount
), target
, unsignedp
, true);
2483 /* Output a shift instruction for expression code CODE,
2484 with SHIFTED being the rtx for the value to shift,
2485 and AMOUNT the tree for the amount to shift by.
2486 Store the result in the rtx TARGET, if that is convenient.
2487 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2488 Return the rtx for where the value is. */
2491 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2492 tree amount
, rtx target
, int unsignedp
)
2494 return expand_shift_1 (code
, mode
,
2495 shifted
, expand_normal (amount
), target
, unsignedp
);
2499 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2500 const struct mult_cost
*, machine_mode mode
);
2501 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2502 const struct algorithm
*, enum mult_variant
);
2503 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2504 static rtx
extract_high_half (machine_mode
, rtx
);
2505 static rtx
expmed_mult_highpart (machine_mode
, rtx
, rtx
, rtx
, int, int);
2506 static rtx
expmed_mult_highpart_optab (machine_mode
, rtx
, rtx
, rtx
,
2508 /* Compute and return the best algorithm for multiplying by T.
2509 The algorithm must cost less than cost_limit
2510 If retval.cost >= COST_LIMIT, no algorithm was found and all
2511 other field of the returned struct are undefined.
2512 MODE is the machine mode of the multiplication. */
2515 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2516 const struct mult_cost
*cost_limit
, machine_mode mode
)
2519 struct algorithm
*alg_in
, *best_alg
;
2520 struct mult_cost best_cost
;
2521 struct mult_cost new_limit
;
2522 int op_cost
, op_latency
;
2523 unsigned HOST_WIDE_INT orig_t
= t
;
2524 unsigned HOST_WIDE_INT q
;
2525 int maxm
, hash_index
;
2526 bool cache_hit
= false;
2527 enum alg_code cache_alg
= alg_zero
;
2528 bool speed
= optimize_insn_for_speed_p ();
2530 struct alg_hash_entry
*entry_ptr
;
2532 /* Indicate that no algorithm is yet found. If no algorithm
2533 is found, this value will be returned and indicate failure. */
2534 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2535 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2537 if (cost_limit
->cost
< 0
2538 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2541 /* Be prepared for vector modes. */
2542 imode
= GET_MODE_INNER (mode
);
2544 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2546 /* Restrict the bits of "t" to the multiplication's mode. */
2547 t
&= GET_MODE_MASK (imode
);
2549 /* t == 1 can be done in zero cost. */
2553 alg_out
->cost
.cost
= 0;
2554 alg_out
->cost
.latency
= 0;
2555 alg_out
->op
[0] = alg_m
;
2559 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2563 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2568 alg_out
->cost
.cost
= zero_cost (speed
);
2569 alg_out
->cost
.latency
= zero_cost (speed
);
2570 alg_out
->op
[0] = alg_zero
;
2575 /* We'll be needing a couple extra algorithm structures now. */
2577 alg_in
= XALLOCA (struct algorithm
);
2578 best_alg
= XALLOCA (struct algorithm
);
2579 best_cost
= *cost_limit
;
2581 /* Compute the hash index. */
2582 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2584 /* See if we already know what to do for T. */
2585 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2586 if (entry_ptr
->t
== t
2587 && entry_ptr
->mode
== mode
2588 && entry_ptr
->speed
== speed
2589 && entry_ptr
->alg
!= alg_unknown
)
2591 cache_alg
= entry_ptr
->alg
;
2593 if (cache_alg
== alg_impossible
)
2595 /* The cache tells us that it's impossible to synthesize
2596 multiplication by T within entry_ptr->cost. */
2597 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2598 /* COST_LIMIT is at least as restrictive as the one
2599 recorded in the hash table, in which case we have no
2600 hope of synthesizing a multiplication. Just
2604 /* If we get here, COST_LIMIT is less restrictive than the
2605 one recorded in the hash table, so we may be able to
2606 synthesize a multiplication. Proceed as if we didn't
2607 have the cache entry. */
2611 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2612 /* The cached algorithm shows that this multiplication
2613 requires more cost than COST_LIMIT. Just return. This
2614 way, we don't clobber this cache entry with
2615 alg_impossible but retain useful information. */
2627 goto do_alg_addsub_t_m2
;
2629 case alg_add_factor
:
2630 case alg_sub_factor
:
2631 goto do_alg_addsub_factor
;
2634 goto do_alg_add_t2_m
;
2637 goto do_alg_sub_t2_m
;
2645 /* If we have a group of zero bits at the low-order part of T, try
2646 multiplying by the remaining bits and then doing a shift. */
2651 m
= ctz_or_zero (t
); /* m = number of low zero bits */
2655 /* The function expand_shift will choose between a shift and
2656 a sequence of additions, so the observed cost is given as
2657 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2658 op_cost
= m
* add_cost (speed
, mode
);
2659 if (shift_cost (speed
, mode
, m
) < op_cost
)
2660 op_cost
= shift_cost (speed
, mode
, m
);
2661 new_limit
.cost
= best_cost
.cost
- op_cost
;
2662 new_limit
.latency
= best_cost
.latency
- op_cost
;
2663 synth_mult (alg_in
, q
, &new_limit
, mode
);
2665 alg_in
->cost
.cost
+= op_cost
;
2666 alg_in
->cost
.latency
+= op_cost
;
2667 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2669 best_cost
= alg_in
->cost
;
2670 std::swap (alg_in
, best_alg
);
2671 best_alg
->log
[best_alg
->ops
] = m
;
2672 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2675 /* See if treating ORIG_T as a signed number yields a better
2676 sequence. Try this sequence only for a negative ORIG_T
2677 as it would be useless for a non-negative ORIG_T. */
2678 if ((HOST_WIDE_INT
) orig_t
< 0)
2680 /* Shift ORIG_T as follows because a right shift of a
2681 negative-valued signed type is implementation
2683 q
= ~(~orig_t
>> m
);
2684 /* The function expand_shift will choose between a shift
2685 and a sequence of additions, so the observed cost is
2686 given as MIN (m * add_cost(speed, mode),
2687 shift_cost(speed, mode, m)). */
2688 op_cost
= m
* add_cost (speed
, mode
);
2689 if (shift_cost (speed
, mode
, m
) < op_cost
)
2690 op_cost
= shift_cost (speed
, mode
, m
);
2691 new_limit
.cost
= best_cost
.cost
- op_cost
;
2692 new_limit
.latency
= best_cost
.latency
- op_cost
;
2693 synth_mult (alg_in
, q
, &new_limit
, mode
);
2695 alg_in
->cost
.cost
+= op_cost
;
2696 alg_in
->cost
.latency
+= op_cost
;
2697 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2699 best_cost
= alg_in
->cost
;
2700 std::swap (alg_in
, best_alg
);
2701 best_alg
->log
[best_alg
->ops
] = m
;
2702 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2710 /* If we have an odd number, add or subtract one. */
2713 unsigned HOST_WIDE_INT w
;
2716 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2718 /* If T was -1, then W will be zero after the loop. This is another
2719 case where T ends with ...111. Handling this with (T + 1) and
2720 subtract 1 produces slightly better code and results in algorithm
2721 selection much faster than treating it like the ...0111 case
2725 /* Reject the case where t is 3.
2726 Thus we prefer addition in that case. */
2729 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2731 op_cost
= add_cost (speed
, mode
);
2732 new_limit
.cost
= best_cost
.cost
- op_cost
;
2733 new_limit
.latency
= best_cost
.latency
- op_cost
;
2734 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2736 alg_in
->cost
.cost
+= op_cost
;
2737 alg_in
->cost
.latency
+= op_cost
;
2738 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2740 best_cost
= alg_in
->cost
;
2741 std::swap (alg_in
, best_alg
);
2742 best_alg
->log
[best_alg
->ops
] = 0;
2743 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2748 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2750 op_cost
= add_cost (speed
, mode
);
2751 new_limit
.cost
= best_cost
.cost
- op_cost
;
2752 new_limit
.latency
= best_cost
.latency
- op_cost
;
2753 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2755 alg_in
->cost
.cost
+= op_cost
;
2756 alg_in
->cost
.latency
+= op_cost
;
2757 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2759 best_cost
= alg_in
->cost
;
2760 std::swap (alg_in
, best_alg
);
2761 best_alg
->log
[best_alg
->ops
] = 0;
2762 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2766 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2767 quickly with a - a * n for some appropriate constant n. */
2768 m
= exact_log2 (-orig_t
+ 1);
2769 if (m
>= 0 && m
< maxm
)
2771 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2772 /* If the target has a cheap shift-and-subtract insn use
2773 that in preference to a shift insn followed by a sub insn.
2774 Assume that the shift-and-sub is "atomic" with a latency
2775 equal to it's cost, otherwise assume that on superscalar
2776 hardware the shift may be executed concurrently with the
2777 earlier steps in the algorithm. */
2778 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2780 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2781 op_latency
= op_cost
;
2784 op_latency
= add_cost (speed
, mode
);
2786 new_limit
.cost
= best_cost
.cost
- op_cost
;
2787 new_limit
.latency
= best_cost
.latency
- op_latency
;
2788 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2791 alg_in
->cost
.cost
+= op_cost
;
2792 alg_in
->cost
.latency
+= op_latency
;
2793 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2795 best_cost
= alg_in
->cost
;
2796 std::swap (alg_in
, best_alg
);
2797 best_alg
->log
[best_alg
->ops
] = m
;
2798 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2806 /* Look for factors of t of the form
2807 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2808 If we find such a factor, we can multiply by t using an algorithm that
2809 multiplies by q, shift the result by m and add/subtract it to itself.
2811 We search for large factors first and loop down, even if large factors
2812 are less probable than small; if we find a large factor we will find a
2813 good sequence quickly, and therefore be able to prune (by decreasing
2814 COST_LIMIT) the search. */
2816 do_alg_addsub_factor
:
2817 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
2819 unsigned HOST_WIDE_INT d
;
2821 d
= (HOST_WIDE_INT_1U
<< m
) + 1;
2822 if (t
% d
== 0 && t
> d
&& m
< maxm
2823 && (!cache_hit
|| cache_alg
== alg_add_factor
))
2825 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2826 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
2827 op_cost
= shiftadd_cost (speed
, mode
, m
);
2829 op_latency
= op_cost
;
2832 new_limit
.cost
= best_cost
.cost
- op_cost
;
2833 new_limit
.latency
= best_cost
.latency
- op_latency
;
2834 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2836 alg_in
->cost
.cost
+= op_cost
;
2837 alg_in
->cost
.latency
+= op_latency
;
2838 if (alg_in
->cost
.latency
< op_cost
)
2839 alg_in
->cost
.latency
= op_cost
;
2840 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2842 best_cost
= alg_in
->cost
;
2843 std::swap (alg_in
, best_alg
);
2844 best_alg
->log
[best_alg
->ops
] = m
;
2845 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
2847 /* Other factors will have been taken care of in the recursion. */
2851 d
= (HOST_WIDE_INT_1U
<< m
) - 1;
2852 if (t
% d
== 0 && t
> d
&& m
< maxm
2853 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
2855 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2856 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
2857 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2859 op_latency
= op_cost
;
2861 new_limit
.cost
= best_cost
.cost
- op_cost
;
2862 new_limit
.latency
= best_cost
.latency
- op_latency
;
2863 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2865 alg_in
->cost
.cost
+= op_cost
;
2866 alg_in
->cost
.latency
+= op_latency
;
2867 if (alg_in
->cost
.latency
< op_cost
)
2868 alg_in
->cost
.latency
= op_cost
;
2869 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2871 best_cost
= alg_in
->cost
;
2872 std::swap (alg_in
, best_alg
);
2873 best_alg
->log
[best_alg
->ops
] = m
;
2874 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
2882 /* Try shift-and-add (load effective address) instructions,
2883 i.e. do a*3, a*5, a*9. */
2891 op_cost
= shiftadd_cost (speed
, mode
, m
);
2892 new_limit
.cost
= best_cost
.cost
- op_cost
;
2893 new_limit
.latency
= best_cost
.latency
- op_cost
;
2894 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
2896 alg_in
->cost
.cost
+= op_cost
;
2897 alg_in
->cost
.latency
+= op_cost
;
2898 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2900 best_cost
= alg_in
->cost
;
2901 std::swap (alg_in
, best_alg
);
2902 best_alg
->log
[best_alg
->ops
] = m
;
2903 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
2914 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2915 new_limit
.cost
= best_cost
.cost
- op_cost
;
2916 new_limit
.latency
= best_cost
.latency
- op_cost
;
2917 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
2919 alg_in
->cost
.cost
+= op_cost
;
2920 alg_in
->cost
.latency
+= op_cost
;
2921 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2923 best_cost
= alg_in
->cost
;
2924 std::swap (alg_in
, best_alg
);
2925 best_alg
->log
[best_alg
->ops
] = m
;
2926 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
2934 /* If best_cost has not decreased, we have not found any algorithm. */
2935 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
2937 /* We failed to find an algorithm. Record alg_impossible for
2938 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2939 we are asked to find an algorithm for T within the same or
2940 lower COST_LIMIT, we can immediately return to the
2943 entry_ptr
->mode
= mode
;
2944 entry_ptr
->speed
= speed
;
2945 entry_ptr
->alg
= alg_impossible
;
2946 entry_ptr
->cost
= *cost_limit
;
2950 /* Cache the result. */
2954 entry_ptr
->mode
= mode
;
2955 entry_ptr
->speed
= speed
;
2956 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
2957 entry_ptr
->cost
.cost
= best_cost
.cost
;
2958 entry_ptr
->cost
.latency
= best_cost
.latency
;
2961 /* If we are getting a too long sequence for `struct algorithm'
2962 to record, make this search fail. */
2963 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
2966 /* Copy the algorithm from temporary space to the space at alg_out.
2967 We avoid using structure assignment because the majority of
2968 best_alg is normally undefined, and this is a critical function. */
2969 alg_out
->ops
= best_alg
->ops
+ 1;
2970 alg_out
->cost
= best_cost
;
2971 memcpy (alg_out
->op
, best_alg
->op
,
2972 alg_out
->ops
* sizeof *alg_out
->op
);
2973 memcpy (alg_out
->log
, best_alg
->log
,
2974 alg_out
->ops
* sizeof *alg_out
->log
);
2977 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2978 Try three variations:
2980 - a shift/add sequence based on VAL itself
2981 - a shift/add sequence based on -VAL, followed by a negation
2982 - a shift/add sequence based on VAL - 1, followed by an addition.
2984 Return true if the cheapest of these cost less than MULT_COST,
2985 describing the algorithm in *ALG and final fixup in *VARIANT. */
2988 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
2989 struct algorithm
*alg
, enum mult_variant
*variant
,
2992 struct algorithm alg2
;
2993 struct mult_cost limit
;
2995 bool speed
= optimize_insn_for_speed_p ();
2997 /* Fail quickly for impossible bounds. */
3001 /* Ensure that mult_cost provides a reasonable upper bound.
3002 Any constant multiplication can be performed with less
3003 than 2 * bits additions. */
3004 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3005 if (mult_cost
> op_cost
)
3006 mult_cost
= op_cost
;
3008 *variant
= basic_variant
;
3009 limit
.cost
= mult_cost
;
3010 limit
.latency
= mult_cost
;
3011 synth_mult (alg
, val
, &limit
, mode
);
3013 /* This works only if the inverted value actually fits in an
3015 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3017 op_cost
= neg_cost (speed
, mode
);
3018 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3020 limit
.cost
= alg
->cost
.cost
- op_cost
;
3021 limit
.latency
= alg
->cost
.latency
- op_cost
;
3025 limit
.cost
= mult_cost
- op_cost
;
3026 limit
.latency
= mult_cost
- op_cost
;
3029 synth_mult (&alg2
, -val
, &limit
, mode
);
3030 alg2
.cost
.cost
+= op_cost
;
3031 alg2
.cost
.latency
+= op_cost
;
3032 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3033 *alg
= alg2
, *variant
= negate_variant
;
3036 /* This proves very useful for division-by-constant. */
3037 op_cost
= add_cost (speed
, mode
);
3038 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3040 limit
.cost
= alg
->cost
.cost
- op_cost
;
3041 limit
.latency
= alg
->cost
.latency
- op_cost
;
3045 limit
.cost
= mult_cost
- op_cost
;
3046 limit
.latency
= mult_cost
- op_cost
;
3049 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3050 alg2
.cost
.cost
+= op_cost
;
3051 alg2
.cost
.latency
+= op_cost
;
3052 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3053 *alg
= alg2
, *variant
= add_variant
;
3055 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3058 /* A subroutine of expand_mult, used for constant multiplications.
3059 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3060 convenient. Use the shift/add sequence described by ALG and apply
3061 the final fixup specified by VARIANT. */
3064 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3065 rtx target
, const struct algorithm
*alg
,
3066 enum mult_variant variant
)
3068 unsigned HOST_WIDE_INT val_so_far
;
3074 /* Avoid referencing memory over and over and invalid sharing
3076 op0
= force_reg (mode
, op0
);
3078 /* ACCUM starts out either as OP0 or as a zero, depending on
3079 the first operation. */
3081 if (alg
->op
[0] == alg_zero
)
3083 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3086 else if (alg
->op
[0] == alg_m
)
3088 accum
= copy_to_mode_reg (mode
, op0
);
3094 for (opno
= 1; opno
< alg
->ops
; opno
++)
3096 int log
= alg
->log
[opno
];
3097 rtx shift_subtarget
= optimize
? 0 : accum
;
3099 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3102 rtx accum_target
= optimize
? 0 : accum
;
3105 switch (alg
->op
[opno
])
3108 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3109 /* REG_EQUAL note will be attached to the following insn. */
3110 emit_move_insn (accum
, tem
);
3115 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3116 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3117 add_target
? add_target
: accum_target
);
3118 val_so_far
+= HOST_WIDE_INT_1U
<< log
;
3122 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3123 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3124 add_target
? add_target
: accum_target
);
3125 val_so_far
-= HOST_WIDE_INT_1U
<< log
;
3129 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3130 log
, shift_subtarget
, 0);
3131 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3132 add_target
? add_target
: accum_target
);
3133 val_so_far
= (val_so_far
<< log
) + 1;
3137 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3138 log
, shift_subtarget
, 0);
3139 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3140 add_target
? add_target
: accum_target
);
3141 val_so_far
= (val_so_far
<< log
) - 1;
3144 case alg_add_factor
:
3145 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3146 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3147 add_target
? add_target
: accum_target
);
3148 val_so_far
+= val_so_far
<< log
;
3151 case alg_sub_factor
:
3152 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3153 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3155 ? add_target
: (optimize
? 0 : tem
)));
3156 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3163 if (SCALAR_INT_MODE_P (mode
))
3165 /* Write a REG_EQUAL note on the last insn so that we can cse
3166 multiplication sequences. Note that if ACCUM is a SUBREG,
3167 we've set the inner register and must properly indicate that. */
3168 tem
= op0
, nmode
= mode
;
3169 accum_inner
= accum
;
3170 if (GET_CODE (accum
) == SUBREG
)
3172 accum_inner
= SUBREG_REG (accum
);
3173 nmode
= GET_MODE (accum_inner
);
3174 tem
= gen_lowpart (nmode
, op0
);
3177 insn
= get_last_insn ();
3178 set_dst_reg_note (insn
, REG_EQUAL
,
3179 gen_rtx_MULT (nmode
, tem
,
3180 gen_int_mode (val_so_far
, nmode
)),
3185 if (variant
== negate_variant
)
3187 val_so_far
= -val_so_far
;
3188 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3190 else if (variant
== add_variant
)
3192 val_so_far
= val_so_far
+ 1;
3193 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3196 /* Compare only the bits of val and val_so_far that are significant
3197 in the result mode, to avoid sign-/zero-extension confusion. */
3198 nmode
= GET_MODE_INNER (mode
);
3199 val
&= GET_MODE_MASK (nmode
);
3200 val_so_far
&= GET_MODE_MASK (nmode
);
3201 gcc_assert (val
== (HOST_WIDE_INT
) val_so_far
);
3206 /* Perform a multiplication and return an rtx for the result.
3207 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3208 TARGET is a suggestion for where to store the result (an rtx).
3210 We check specially for a constant integer as OP1.
3211 If you want this check for OP0 as well, then before calling
3212 you should swap the two operands if OP0 would be constant. */
3215 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3218 enum mult_variant variant
;
3219 struct algorithm algorithm
;
3222 bool speed
= optimize_insn_for_speed_p ();
3223 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3225 if (CONSTANT_P (op0
))
3226 std::swap (op0
, op1
);
3228 /* For vectors, there are several simplifications that can be made if
3229 all elements of the vector constant are identical. */
3230 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3232 if (INTEGRAL_MODE_P (mode
))
3235 HOST_WIDE_INT coeff
;
3239 if (op1
== CONST0_RTX (mode
))
3241 if (op1
== CONST1_RTX (mode
))
3243 if (op1
== CONSTM1_RTX (mode
))
3244 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3250 /* If mode is integer vector mode, check if the backend supports
3251 vector lshift (by scalar or vector) at all. If not, we can't use
3252 synthetized multiply. */
3253 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3254 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3255 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3258 /* These are the operations that are potentially turned into
3259 a sequence of shifts and additions. */
3260 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3262 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3263 less than or equal in size to `unsigned int' this doesn't matter.
3264 If the mode is larger than `unsigned int', then synth_mult works
3265 only if the constant value exactly fits in an `unsigned int' without
3266 any truncation. This means that multiplying by negative values does
3267 not work; results are off by 2^32 on a 32 bit machine. */
3268 if (CONST_INT_P (scalar_op1
))
3270 coeff
= INTVAL (scalar_op1
);
3273 #if TARGET_SUPPORTS_WIDE_INT
3274 else if (CONST_WIDE_INT_P (scalar_op1
))
3276 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3279 int shift
= wi::exact_log2 (rtx_mode_t (scalar_op1
, mode
));
3280 /* Perfect power of 2 (other than 1, which is handled above). */
3282 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3283 shift
, target
, unsignedp
);
3290 /* We used to test optimize here, on the grounds that it's better to
3291 produce a smaller program when -O is not used. But this causes
3292 such a terrible slowdown sometimes that it seems better to always
3295 /* Special case powers of two. */
3296 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3297 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3298 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3299 floor_log2 (coeff
), target
, unsignedp
);
3301 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3303 /* Attempt to handle multiplication of DImode values by negative
3304 coefficients, by performing the multiplication by a positive
3305 multiplier and then inverting the result. */
3306 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3308 /* Its safe to use -coeff even for INT_MIN, as the
3309 result is interpreted as an unsigned coefficient.
3310 Exclude cost of op0 from max_cost to match the cost
3311 calculation of the synth_mult. */
3312 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3313 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3315 - neg_cost (speed
, mode
));
3319 /* Special case powers of two. */
3320 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3322 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3323 floor_log2 (coeff
), target
, unsignedp
);
3324 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3327 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3330 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3331 &algorithm
, variant
);
3332 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3337 /* Exclude cost of op0 from max_cost to match the cost
3338 calculation of the synth_mult. */
3339 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3340 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3341 return expand_mult_const (mode
, op0
, coeff
, target
,
3342 &algorithm
, variant
);
3346 /* Expand x*2.0 as x+x. */
3347 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3348 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3350 op0
= force_reg (GET_MODE (op0
), op0
);
3351 return expand_binop (mode
, add_optab
, op0
, op0
,
3352 target
, unsignedp
, OPTAB_LIB_WIDEN
);
3355 /* This used to use umul_optab if unsigned, but for non-widening multiply
3356 there is no difference between signed and unsigned. */
3357 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3358 op0
, op1
, target
, unsignedp
, OPTAB_LIB_WIDEN
);
3363 /* Return a cost estimate for multiplying a register by the given
3364 COEFFicient in the given MODE and SPEED. */
3367 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3370 struct algorithm algorithm
;
3371 enum mult_variant variant
;
3373 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3374 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3376 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3377 return algorithm
.cost
.cost
;
3382 /* Perform a widening multiplication and return an rtx for the result.
3383 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3384 TARGET is a suggestion for where to store the result (an rtx).
3385 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3386 or smul_widen_optab.
3388 We check specially for a constant integer as OP1, comparing the
3389 cost of a widening multiply against the cost of a sequence of shifts
3393 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3394 int unsignedp
, optab this_optab
)
3396 bool speed
= optimize_insn_for_speed_p ();
3399 if (CONST_INT_P (op1
)
3400 && GET_MODE (op0
) != VOIDmode
3401 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3402 this_optab
== umul_widen_optab
))
3403 && CONST_INT_P (cop1
)
3404 && (INTVAL (cop1
) >= 0
3405 || HWI_COMPUTABLE_MODE_P (mode
)))
3407 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3409 enum mult_variant variant
;
3410 struct algorithm algorithm
;
3413 return CONST0_RTX (mode
);
3415 /* Special case powers of two. */
3416 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3418 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3419 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3420 floor_log2 (coeff
), target
, unsignedp
);
3423 /* Exclude cost of op0 from max_cost to match the cost
3424 calculation of the synth_mult. */
3425 max_cost
= mul_widen_cost (speed
, mode
);
3426 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3429 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3430 return expand_mult_const (mode
, op0
, coeff
, target
,
3431 &algorithm
, variant
);
3434 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3435 unsignedp
, OPTAB_LIB_WIDEN
);
3438 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3439 replace division by D, and put the least significant N bits of the result
3440 in *MULTIPLIER_PTR and return the most significant bit.
3442 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3443 needed precision is in PRECISION (should be <= N).
3445 PRECISION should be as small as possible so this function can choose
3446 multiplier more freely.
3448 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3449 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3451 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3452 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3454 unsigned HOST_WIDE_INT
3455 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3456 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3457 int *post_shift_ptr
, int *lgup_ptr
)
3459 int lgup
, post_shift
;
3462 /* lgup = ceil(log2(divisor)); */
3463 lgup
= ceil_log2 (d
);
3465 gcc_assert (lgup
<= n
);
3468 pow2
= n
+ lgup
- precision
;
3470 /* mlow = 2^(N + lgup)/d */
3471 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3472 wide_int mlow
= wi::udiv_trunc (val
, d
);
3474 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3475 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3476 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3478 /* If precision == N, then mlow, mhigh exceed 2^N
3479 (but they do not exceed 2^(N+1)). */
3481 /* Reduce to lowest terms. */
3482 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3484 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3485 HOST_BITS_PER_WIDE_INT
);
3486 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3487 HOST_BITS_PER_WIDE_INT
);
3491 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3492 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3495 *post_shift_ptr
= post_shift
;
3497 if (n
< HOST_BITS_PER_WIDE_INT
)
3499 unsigned HOST_WIDE_INT mask
= (HOST_WIDE_INT_1U
<< n
) - 1;
3500 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3501 return mhigh
.to_uhwi () >= mask
;
3505 *multiplier_ptr
= mhigh
.to_uhwi ();
3506 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3510 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3511 congruent to 1 (mod 2**N). */
3513 static unsigned HOST_WIDE_INT
3514 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3516 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3518 /* The algorithm notes that the choice y = x satisfies
3519 x*y == 1 mod 2^3, since x is assumed odd.
3520 Each iteration doubles the number of bits of significance in y. */
3522 unsigned HOST_WIDE_INT mask
;
3523 unsigned HOST_WIDE_INT y
= x
;
3526 mask
= (n
== HOST_BITS_PER_WIDE_INT
3528 : (HOST_WIDE_INT_1U
<< n
) - 1);
3532 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3538 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3539 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3540 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3541 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3544 The result is put in TARGET if that is convenient.
3546 MODE is the mode of operation. */
3549 expand_mult_highpart_adjust (machine_mode mode
, rtx adj_operand
, rtx op0
,
3550 rtx op1
, rtx target
, int unsignedp
)
3553 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3555 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3556 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3557 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3559 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3562 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3563 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3564 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3565 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3571 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3574 extract_high_half (machine_mode mode
, rtx op
)
3576 machine_mode wider_mode
;
3578 if (mode
== word_mode
)
3579 return gen_highpart (mode
, op
);
3581 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3583 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3584 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3585 GET_MODE_BITSIZE (mode
), 0, 1);
3586 return convert_modes (mode
, wider_mode
, op
, 0);
3589 /* Like expmed_mult_highpart, but only consider using a multiplication
3590 optab. OP1 is an rtx for the constant operand. */
3593 expmed_mult_highpart_optab (machine_mode mode
, rtx op0
, rtx op1
,
3594 rtx target
, int unsignedp
, int max_cost
)
3596 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3597 machine_mode wider_mode
;
3601 bool speed
= optimize_insn_for_speed_p ();
3603 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3605 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3606 size
= GET_MODE_BITSIZE (mode
);
3608 /* Firstly, try using a multiplication insn that only generates the needed
3609 high part of the product, and in the sign flavor of unsignedp. */
3610 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3612 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3613 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3614 unsignedp
, OPTAB_DIRECT
);
3619 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3620 Need to adjust the result after the multiplication. */
3621 if (size
- 1 < BITS_PER_WORD
3622 && (mul_highpart_cost (speed
, mode
)
3623 + 2 * shift_cost (speed
, mode
, size
-1)
3624 + 4 * add_cost (speed
, mode
) < max_cost
))
3626 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3627 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3628 unsignedp
, OPTAB_DIRECT
);
3630 /* We used the wrong signedness. Adjust the result. */
3631 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3635 /* Try widening multiplication. */
3636 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3637 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3638 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3640 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3641 unsignedp
, OPTAB_WIDEN
);
3643 return extract_high_half (mode
, tem
);
3646 /* Try widening the mode and perform a non-widening multiplication. */
3647 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3648 && size
- 1 < BITS_PER_WORD
3649 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3655 /* We need to widen the operands, for example to ensure the
3656 constant multiplier is correctly sign or zero extended.
3657 Use a sequence to clean-up any instructions emitted by
3658 the conversions if things don't work out. */
3660 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3661 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3662 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3663 unsignedp
, OPTAB_WIDEN
);
3664 insns
= get_insns ();
3670 return extract_high_half (mode
, tem
);
3674 /* Try widening multiplication of opposite signedness, and adjust. */
3675 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3676 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3677 && size
- 1 < BITS_PER_WORD
3678 && (mul_widen_cost (speed
, wider_mode
)
3679 + 2 * shift_cost (speed
, mode
, size
-1)
3680 + 4 * add_cost (speed
, mode
) < max_cost
))
3682 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3683 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3686 tem
= extract_high_half (mode
, tem
);
3687 /* We used the wrong signedness. Adjust the result. */
3688 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3696 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3697 putting the high half of the result in TARGET if that is convenient,
3698 and return where the result is. If the operation can not be performed,
3701 MODE is the mode of operation and result.
3703 UNSIGNEDP nonzero means unsigned multiply.
3705 MAX_COST is the total allowed cost for the expanded RTL. */
3708 expmed_mult_highpart (machine_mode mode
, rtx op0
, rtx op1
,
3709 rtx target
, int unsignedp
, int max_cost
)
3711 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
3712 unsigned HOST_WIDE_INT cnst1
;
3714 bool sign_adjust
= false;
3715 enum mult_variant variant
;
3716 struct algorithm alg
;
3718 bool speed
= optimize_insn_for_speed_p ();
3720 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3721 /* We can't support modes wider than HOST_BITS_PER_INT. */
3722 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3724 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3726 /* We can't optimize modes wider than BITS_PER_WORD.
3727 ??? We might be able to perform double-word arithmetic if
3728 mode == word_mode, however all the cost calculations in
3729 synth_mult etc. assume single-word operations. */
3730 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3731 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3732 unsignedp
, max_cost
);
3734 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3736 /* Check whether we try to multiply by a negative constant. */
3737 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3740 extra_cost
+= add_cost (speed
, mode
);
3743 /* See whether shift/add multiplication is cheap enough. */
3744 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3745 max_cost
- extra_cost
))
3747 /* See whether the specialized multiplication optabs are
3748 cheaper than the shift/add version. */
3749 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3750 alg
.cost
.cost
+ extra_cost
);
3754 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3755 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3756 tem
= extract_high_half (mode
, tem
);
3758 /* Adjust result for signedness. */
3760 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3764 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3765 unsignedp
, max_cost
);
3769 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3772 expand_smod_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3774 rtx result
, temp
, shift
;
3775 rtx_code_label
*label
;
3777 int prec
= GET_MODE_PRECISION (mode
);
3779 logd
= floor_log2 (d
);
3780 result
= gen_reg_rtx (mode
);
3782 /* Avoid conditional branches when they're expensive. */
3783 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3784 && optimize_insn_for_speed_p ())
3786 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3790 HOST_WIDE_INT masklow
= (HOST_WIDE_INT_1
<< logd
) - 1;
3791 signmask
= force_reg (mode
, signmask
);
3792 shift
= GEN_INT (GET_MODE_BITSIZE (mode
) - logd
);
3794 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3795 which instruction sequence to use. If logical right shifts
3796 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3797 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3799 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3800 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3801 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
3802 > COSTS_N_INSNS (2)))
3804 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3805 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3806 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3807 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3808 temp
= expand_binop (mode
, and_optab
, temp
,
3809 gen_int_mode (masklow
, mode
),
3810 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3811 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
3812 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3813 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3814 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3818 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
3819 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3820 signmask
= force_reg (mode
, signmask
);
3822 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
3823 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3824 temp
= expand_binop (mode
, and_optab
, temp
,
3825 gen_int_mode (masklow
, mode
),
3826 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3827 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3828 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3834 /* Mask contains the mode's signbit and the significant bits of the
3835 modulus. By including the signbit in the operation, many targets
3836 can avoid an explicit compare operation in the following comparison
3838 wide_int mask
= wi::mask (logd
, false, prec
);
3839 mask
= wi::set_bit (mask
, prec
- 1);
3841 temp
= expand_binop (mode
, and_optab
, op0
,
3842 immed_wide_int_const (mask
, mode
),
3843 result
, 1, OPTAB_LIB_WIDEN
);
3845 emit_move_insn (result
, temp
);
3847 label
= gen_label_rtx ();
3848 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
3850 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
3851 0, OPTAB_LIB_WIDEN
);
3853 mask
= wi::mask (logd
, true, prec
);
3854 temp
= expand_binop (mode
, ior_optab
, temp
,
3855 immed_wide_int_const (mask
, mode
),
3856 result
, 1, OPTAB_LIB_WIDEN
);
3857 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
3858 0, OPTAB_LIB_WIDEN
);
3860 emit_move_insn (result
, temp
);
3865 /* Expand signed division of OP0 by a power of two D in mode MODE.
3866 This routine is only called for positive values of D. */
3869 expand_sdiv_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3872 rtx_code_label
*label
;
3875 logd
= floor_log2 (d
);
3878 && BRANCH_COST (optimize_insn_for_speed_p (),
3881 temp
= gen_reg_rtx (mode
);
3882 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
3883 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3884 0, OPTAB_LIB_WIDEN
);
3885 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3888 if (HAVE_conditional_move
3889 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3894 temp2
= copy_to_mode_reg (mode
, op0
);
3895 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
3896 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3897 temp
= force_reg (mode
, temp
);
3899 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3900 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
3901 mode
, temp
, temp2
, mode
, 0);
3904 rtx_insn
*seq
= get_insns ();
3907 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
3912 if (BRANCH_COST (optimize_insn_for_speed_p (),
3915 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
3917 temp
= gen_reg_rtx (mode
);
3918 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
3919 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
3920 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
3921 > COSTS_N_INSNS (1))
3922 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
3923 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3925 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
3926 ushift
, NULL_RTX
, 1);
3927 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3928 0, OPTAB_LIB_WIDEN
);
3929 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3932 label
= gen_label_rtx ();
3933 temp
= copy_to_mode_reg (mode
, op0
);
3934 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
3935 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
3937 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3940 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3941 if that is convenient, and returning where the result is.
3942 You may request either the quotient or the remainder as the result;
3943 specify REM_FLAG nonzero to get the remainder.
3945 CODE is the expression code for which kind of division this is;
3946 it controls how rounding is done. MODE is the machine mode to use.
3947 UNSIGNEDP nonzero means do unsigned division. */
3949 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3950 and then correct it by or'ing in missing high bits
3951 if result of ANDI is nonzero.
3952 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3953 This could optimize to a bfexts instruction.
3954 But C doesn't use these operations, so their optimizations are
3956 /* ??? For modulo, we don't actually need the highpart of the first product,
3957 the low part will do nicely. And for small divisors, the second multiply
3958 can also be a low-part only multiply or even be completely left out.
3959 E.g. to calculate the remainder of a division by 3 with a 32 bit
3960 multiply, multiply with 0x55555556 and extract the upper two bits;
3961 the result is exact for inputs up to 0x1fffffff.
3962 The input range can be reduced by using cross-sum rules.
3963 For odd divisors >= 3, the following table gives right shift counts
3964 so that if a number is shifted by an integer multiple of the given
3965 amount, the remainder stays the same:
3966 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3967 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3968 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3969 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3970 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3972 Cross-sum rules for even numbers can be derived by leaving as many bits
3973 to the right alone as the divisor has zeros to the right.
3974 E.g. if x is an unsigned 32 bit number:
3975 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3979 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
3980 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
3982 machine_mode compute_mode
;
3984 rtx quotient
= 0, remainder
= 0;
3988 optab optab1
, optab2
;
3989 int op1_is_constant
, op1_is_pow2
= 0;
3990 int max_cost
, extra_cost
;
3991 static HOST_WIDE_INT last_div_const
= 0;
3992 bool speed
= optimize_insn_for_speed_p ();
3994 op1_is_constant
= CONST_INT_P (op1
);
3995 if (op1_is_constant
)
3997 wide_int ext_op1
= rtx_mode_t (op1
, mode
);
3998 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4000 && wi::popcount (wi::neg (ext_op1
)) == 1));
4004 This is the structure of expand_divmod:
4006 First comes code to fix up the operands so we can perform the operations
4007 correctly and efficiently.
4009 Second comes a switch statement with code specific for each rounding mode.
4010 For some special operands this code emits all RTL for the desired
4011 operation, for other cases, it generates only a quotient and stores it in
4012 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4013 to indicate that it has not done anything.
4015 Last comes code that finishes the operation. If QUOTIENT is set and
4016 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4017 QUOTIENT is not set, it is computed using trunc rounding.
4019 We try to generate special code for division and remainder when OP1 is a
4020 constant. If |OP1| = 2**n we can use shifts and some other fast
4021 operations. For other values of OP1, we compute a carefully selected
4022 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4025 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4026 half of the product. Different strategies for generating the product are
4027 implemented in expmed_mult_highpart.
4029 If what we actually want is the remainder, we generate that by another
4030 by-constant multiplication and a subtraction. */
4032 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4033 code below will malfunction if we are, so check here and handle
4034 the special case if so. */
4035 if (op1
== const1_rtx
)
4036 return rem_flag
? const0_rtx
: op0
;
4038 /* When dividing by -1, we could get an overflow.
4039 negv_optab can handle overflows. */
4040 if (! unsignedp
&& op1
== constm1_rtx
)
4044 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4045 ? negv_optab
: neg_optab
, op0
, target
, 0);
4049 /* Don't use the function value register as a target
4050 since we have to read it as well as write it,
4051 and function-inlining gets confused by this. */
4052 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4053 /* Don't clobber an operand while doing a multi-step calculation. */
4054 || ((rem_flag
|| op1_is_constant
)
4055 && (reg_mentioned_p (target
, op0
)
4056 || (MEM_P (op0
) && MEM_P (target
))))
4057 || reg_mentioned_p (target
, op1
)
4058 || (MEM_P (op1
) && MEM_P (target
))))
4061 /* Get the mode in which to perform this computation. Normally it will
4062 be MODE, but sometimes we can't do the desired operation in MODE.
4063 If so, pick a wider mode in which we can do the operation. Convert
4064 to that mode at the start to avoid repeated conversions.
4066 First see what operations we need. These depend on the expression
4067 we are evaluating. (We assume that divxx3 insns exist under the
4068 same conditions that modxx3 insns and that these insns don't normally
4069 fail. If these assumptions are not correct, we may generate less
4070 efficient code in some cases.)
4072 Then see if we find a mode in which we can open-code that operation
4073 (either a division, modulus, or shift). Finally, check for the smallest
4074 mode for which we can do the operation with a library call. */
4076 /* We might want to refine this now that we have division-by-constant
4077 optimization. Since expmed_mult_highpart tries so many variants, it is
4078 not straightforward to generalize this. Maybe we should make an array
4079 of possible modes in init_expmed? Save this for GCC 2.7. */
4081 optab1
= (op1_is_pow2
4082 ? (unsignedp
? lshr_optab
: ashr_optab
)
4083 : (unsignedp
? udiv_optab
: sdiv_optab
));
4084 optab2
= (op1_is_pow2
? optab1
4085 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4087 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4088 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4089 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4090 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4093 if (compute_mode
== VOIDmode
)
4094 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4095 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4096 if (optab_libfunc (optab1
, compute_mode
)
4097 || optab_libfunc (optab2
, compute_mode
))
4100 /* If we still couldn't find a mode, use MODE, but expand_binop will
4102 if (compute_mode
== VOIDmode
)
4103 compute_mode
= mode
;
4105 if (target
&& GET_MODE (target
) == compute_mode
)
4108 tquotient
= gen_reg_rtx (compute_mode
);
4110 size
= GET_MODE_BITSIZE (compute_mode
);
4112 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4113 (mode), and thereby get better code when OP1 is a constant. Do that
4114 later. It will require going over all usages of SIZE below. */
4115 size
= GET_MODE_BITSIZE (mode
);
4118 /* Only deduct something for a REM if the last divide done was
4119 for a different constant. Then set the constant of the last
4121 max_cost
= (unsignedp
4122 ? udiv_cost (speed
, compute_mode
)
4123 : sdiv_cost (speed
, compute_mode
));
4124 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4125 && INTVAL (op1
) == last_div_const
))
4126 max_cost
-= (mul_cost (speed
, compute_mode
)
4127 + add_cost (speed
, compute_mode
));
4129 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4131 /* Now convert to the best mode to use. */
4132 if (compute_mode
!= mode
)
4134 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4135 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4137 /* convert_modes may have placed op1 into a register, so we
4138 must recompute the following. */
4139 op1_is_constant
= CONST_INT_P (op1
);
4140 if (op1_is_constant
)
4142 wide_int ext_op1
= rtx_mode_t (op1
, compute_mode
);
4143 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4145 && wi::popcount (wi::neg (ext_op1
)) == 1));
4151 /* If one of the operands is a volatile MEM, copy it into a register. */
4153 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4154 op0
= force_reg (compute_mode
, op0
);
4155 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4156 op1
= force_reg (compute_mode
, op1
);
4158 /* If we need the remainder or if OP1 is constant, we need to
4159 put OP0 in a register in case it has any queued subexpressions. */
4160 if (rem_flag
|| op1_is_constant
)
4161 op0
= force_reg (compute_mode
, op0
);
4163 last
= get_last_insn ();
4165 /* Promote floor rounding to trunc rounding for unsigned operations. */
4168 if (code
== FLOOR_DIV_EXPR
)
4169 code
= TRUNC_DIV_EXPR
;
4170 if (code
== FLOOR_MOD_EXPR
)
4171 code
= TRUNC_MOD_EXPR
;
4172 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4173 code
= TRUNC_DIV_EXPR
;
4176 if (op1
!= const0_rtx
)
4179 case TRUNC_MOD_EXPR
:
4180 case TRUNC_DIV_EXPR
:
4181 if (op1_is_constant
)
4185 unsigned HOST_WIDE_INT mh
, ml
;
4186 int pre_shift
, post_shift
;
4188 wide_int wd
= rtx_mode_t (op1
, compute_mode
);
4189 unsigned HOST_WIDE_INT d
= wd
.to_uhwi ();
4191 if (wi::popcount (wd
) == 1)
4193 pre_shift
= floor_log2 (d
);
4196 unsigned HOST_WIDE_INT mask
4197 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4199 = expand_binop (compute_mode
, and_optab
, op0
,
4200 gen_int_mode (mask
, compute_mode
),
4204 return gen_lowpart (mode
, remainder
);
4206 quotient
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4207 pre_shift
, tquotient
, 1);
4209 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4211 if (d
>= (HOST_WIDE_INT_1U
<< (size
- 1)))
4213 /* Most significant bit of divisor is set; emit an scc
4215 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4216 compute_mode
, 1, 1);
4220 /* Find a suitable multiplier and right shift count
4221 instead of multiplying with D. */
4223 mh
= choose_multiplier (d
, size
, size
,
4224 &ml
, &post_shift
, &dummy
);
4226 /* If the suggested multiplier is more than SIZE bits,
4227 we can do better for even divisors, using an
4228 initial right shift. */
4229 if (mh
!= 0 && (d
& 1) == 0)
4231 pre_shift
= ctz_or_zero (d
);
4232 mh
= choose_multiplier (d
>> pre_shift
, size
,
4234 &ml
, &post_shift
, &dummy
);
4244 if (post_shift
- 1 >= BITS_PER_WORD
)
4248 = (shift_cost (speed
, compute_mode
, post_shift
- 1)
4249 + shift_cost (speed
, compute_mode
, 1)
4250 + 2 * add_cost (speed
, compute_mode
));
4251 t1
= expmed_mult_highpart
4253 gen_int_mode (ml
, compute_mode
),
4254 NULL_RTX
, 1, max_cost
- extra_cost
);
4257 t2
= force_operand (gen_rtx_MINUS (compute_mode
,
4260 t3
= expand_shift (RSHIFT_EXPR
, compute_mode
,
4261 t2
, 1, NULL_RTX
, 1);
4262 t4
= force_operand (gen_rtx_PLUS (compute_mode
,
4265 quotient
= expand_shift
4266 (RSHIFT_EXPR
, compute_mode
, t4
,
4267 post_shift
- 1, tquotient
, 1);
4273 if (pre_shift
>= BITS_PER_WORD
4274 || post_shift
>= BITS_PER_WORD
)
4278 (RSHIFT_EXPR
, compute_mode
, op0
,
4279 pre_shift
, NULL_RTX
, 1);
4281 = (shift_cost (speed
, compute_mode
, pre_shift
)
4282 + shift_cost (speed
, compute_mode
, post_shift
));
4283 t2
= expmed_mult_highpart
4285 gen_int_mode (ml
, compute_mode
),
4286 NULL_RTX
, 1, max_cost
- extra_cost
);
4289 quotient
= expand_shift
4290 (RSHIFT_EXPR
, compute_mode
, t2
,
4291 post_shift
, tquotient
, 1);
4295 else /* Too wide mode to use tricky code */
4298 insn
= get_last_insn ();
4300 set_dst_reg_note (insn
, REG_EQUAL
,
4301 gen_rtx_UDIV (compute_mode
, op0
, op1
),
4304 else /* TRUNC_DIV, signed */
4306 unsigned HOST_WIDE_INT ml
;
4307 int lgup
, post_shift
;
4309 HOST_WIDE_INT d
= INTVAL (op1
);
4310 unsigned HOST_WIDE_INT abs_d
;
4312 /* Since d might be INT_MIN, we have to cast to
4313 unsigned HOST_WIDE_INT before negating to avoid
4314 undefined signed overflow. */
4316 ? (unsigned HOST_WIDE_INT
) d
4317 : - (unsigned HOST_WIDE_INT
) d
);
4319 /* n rem d = n rem -d */
4320 if (rem_flag
&& d
< 0)
4323 op1
= gen_int_mode (abs_d
, compute_mode
);
4329 quotient
= expand_unop (compute_mode
, neg_optab
, op0
,
4331 else if (size
<= HOST_BITS_PER_WIDE_INT
4332 && abs_d
== HOST_WIDE_INT_1U
<< (size
- 1))
4334 /* This case is not handled correctly below. */
4335 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4336 compute_mode
, 1, 1);
4340 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4341 && (size
<= HOST_BITS_PER_WIDE_INT
|| d
>= 0)
4343 ? smod_pow2_cheap (speed
, compute_mode
)
4344 : sdiv_pow2_cheap (speed
, compute_mode
))
4345 /* We assume that cheap metric is true if the
4346 optab has an expander for this mode. */
4347 && ((optab_handler ((rem_flag
? smod_optab
4350 != CODE_FOR_nothing
)
4351 || (optab_handler (sdivmod_optab
,
4353 != CODE_FOR_nothing
)))
4355 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
)
4356 && (size
<= HOST_BITS_PER_WIDE_INT
4357 || abs_d
!= (unsigned HOST_WIDE_INT
) d
))
4361 remainder
= expand_smod_pow2 (compute_mode
, op0
, d
);
4363 return gen_lowpart (mode
, remainder
);
4366 if (sdiv_pow2_cheap (speed
, compute_mode
)
4367 && ((optab_handler (sdiv_optab
, compute_mode
)
4368 != CODE_FOR_nothing
)
4369 || (optab_handler (sdivmod_optab
, compute_mode
)
4370 != CODE_FOR_nothing
)))
4371 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4373 gen_int_mode (abs_d
,
4377 quotient
= expand_sdiv_pow2 (compute_mode
, op0
, abs_d
);
4379 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4380 negate the quotient. */
4383 insn
= get_last_insn ();
4385 && abs_d
< (HOST_WIDE_INT_1U
4386 << (HOST_BITS_PER_WIDE_INT
- 1)))
4387 set_dst_reg_note (insn
, REG_EQUAL
,
4388 gen_rtx_DIV (compute_mode
, op0
,
4394 quotient
= expand_unop (compute_mode
, neg_optab
,
4395 quotient
, quotient
, 0);
4398 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4400 choose_multiplier (abs_d
, size
, size
- 1,
4401 &ml
, &post_shift
, &lgup
);
4402 if (ml
< HOST_WIDE_INT_1U
<< (size
- 1))
4406 if (post_shift
>= BITS_PER_WORD
4407 || size
- 1 >= BITS_PER_WORD
)
4410 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4411 + shift_cost (speed
, compute_mode
, size
- 1)
4412 + add_cost (speed
, compute_mode
));
4413 t1
= expmed_mult_highpart
4414 (compute_mode
, op0
, gen_int_mode (ml
, compute_mode
),
4415 NULL_RTX
, 0, max_cost
- extra_cost
);
4419 (RSHIFT_EXPR
, compute_mode
, t1
,
4420 post_shift
, NULL_RTX
, 0);
4422 (RSHIFT_EXPR
, compute_mode
, op0
,
4423 size
- 1, NULL_RTX
, 0);
4426 = force_operand (gen_rtx_MINUS (compute_mode
,
4431 = force_operand (gen_rtx_MINUS (compute_mode
,
4439 if (post_shift
>= BITS_PER_WORD
4440 || size
- 1 >= BITS_PER_WORD
)
4443 ml
|= HOST_WIDE_INT_M1U
<< (size
- 1);
4444 mlr
= gen_int_mode (ml
, compute_mode
);
4445 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4446 + shift_cost (speed
, compute_mode
, size
- 1)
4447 + 2 * add_cost (speed
, compute_mode
));
4448 t1
= expmed_mult_highpart (compute_mode
, op0
, mlr
,
4450 max_cost
- extra_cost
);
4453 t2
= force_operand (gen_rtx_PLUS (compute_mode
,
4457 (RSHIFT_EXPR
, compute_mode
, t2
,
4458 post_shift
, NULL_RTX
, 0);
4460 (RSHIFT_EXPR
, compute_mode
, op0
,
4461 size
- 1, NULL_RTX
, 0);
4464 = force_operand (gen_rtx_MINUS (compute_mode
,
4469 = force_operand (gen_rtx_MINUS (compute_mode
,
4474 else /* Too wide mode to use tricky code */
4477 insn
= get_last_insn ();
4479 set_dst_reg_note (insn
, REG_EQUAL
,
4480 gen_rtx_DIV (compute_mode
, op0
, op1
),
4486 delete_insns_since (last
);
4489 case FLOOR_DIV_EXPR
:
4490 case FLOOR_MOD_EXPR
:
4491 /* We will come here only for signed operations. */
4492 if (op1_is_constant
&& size
<= HOST_BITS_PER_WIDE_INT
)
4494 unsigned HOST_WIDE_INT mh
, ml
;
4495 int pre_shift
, lgup
, post_shift
;
4496 HOST_WIDE_INT d
= INTVAL (op1
);
4500 /* We could just as easily deal with negative constants here,
4501 but it does not seem worth the trouble for GCC 2.6. */
4502 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4504 pre_shift
= floor_log2 (d
);
4507 unsigned HOST_WIDE_INT mask
4508 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4509 remainder
= expand_binop
4510 (compute_mode
, and_optab
, op0
,
4511 gen_int_mode (mask
, compute_mode
),
4512 remainder
, 0, OPTAB_LIB_WIDEN
);
4514 return gen_lowpart (mode
, remainder
);
4516 quotient
= expand_shift
4517 (RSHIFT_EXPR
, compute_mode
, op0
,
4518 pre_shift
, tquotient
, 0);
4524 mh
= choose_multiplier (d
, size
, size
- 1,
4525 &ml
, &post_shift
, &lgup
);
4528 if (post_shift
< BITS_PER_WORD
4529 && size
- 1 < BITS_PER_WORD
)
4532 (RSHIFT_EXPR
, compute_mode
, op0
,
4533 size
- 1, NULL_RTX
, 0);
4534 t2
= expand_binop (compute_mode
, xor_optab
, op0
, t1
,
4535 NULL_RTX
, 0, OPTAB_WIDEN
);
4536 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4537 + shift_cost (speed
, compute_mode
, size
- 1)
4538 + 2 * add_cost (speed
, compute_mode
));
4539 t3
= expmed_mult_highpart
4540 (compute_mode
, t2
, gen_int_mode (ml
, compute_mode
),
4541 NULL_RTX
, 1, max_cost
- extra_cost
);
4545 (RSHIFT_EXPR
, compute_mode
, t3
,
4546 post_shift
, NULL_RTX
, 1);
4547 quotient
= expand_binop (compute_mode
, xor_optab
,
4548 t4
, t1
, tquotient
, 0,
4556 rtx nsign
, t1
, t2
, t3
, t4
;
4557 t1
= force_operand (gen_rtx_PLUS (compute_mode
,
4558 op0
, constm1_rtx
), NULL_RTX
);
4559 t2
= expand_binop (compute_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4561 nsign
= expand_shift (RSHIFT_EXPR
, compute_mode
, t2
,
4562 size
- 1, NULL_RTX
, 0);
4563 t3
= force_operand (gen_rtx_MINUS (compute_mode
, t1
, nsign
),
4565 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, compute_mode
, t3
, op1
,
4570 t5
= expand_unop (compute_mode
, one_cmpl_optab
, nsign
,
4572 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4581 delete_insns_since (last
);
4583 /* Try using an instruction that produces both the quotient and
4584 remainder, using truncation. We can easily compensate the quotient
4585 or remainder to get floor rounding, once we have the remainder.
4586 Notice that we compute also the final remainder value here,
4587 and return the result right away. */
4588 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4589 target
= gen_reg_rtx (compute_mode
);
4594 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4595 quotient
= gen_reg_rtx (compute_mode
);
4600 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4601 remainder
= gen_reg_rtx (compute_mode
);
4604 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4605 quotient
, remainder
, 0))
4607 /* This could be computed with a branch-less sequence.
4608 Save that for later. */
4610 rtx_code_label
*label
= gen_label_rtx ();
4611 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4612 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4613 NULL_RTX
, 0, OPTAB_WIDEN
);
4614 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4615 expand_dec (quotient
, const1_rtx
);
4616 expand_inc (remainder
, op1
);
4618 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4621 /* No luck with division elimination or divmod. Have to do it
4622 by conditionally adjusting op0 *and* the result. */
4624 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4628 quotient
= gen_reg_rtx (compute_mode
);
4629 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4630 label1
= gen_label_rtx ();
4631 label2
= gen_label_rtx ();
4632 label3
= gen_label_rtx ();
4633 label4
= gen_label_rtx ();
4634 label5
= gen_label_rtx ();
4635 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4636 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4637 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4638 quotient
, 0, OPTAB_LIB_WIDEN
);
4639 if (tem
!= quotient
)
4640 emit_move_insn (quotient
, tem
);
4641 emit_jump_insn (targetm
.gen_jump (label5
));
4643 emit_label (label1
);
4644 expand_inc (adjusted_op0
, const1_rtx
);
4645 emit_jump_insn (targetm
.gen_jump (label4
));
4647 emit_label (label2
);
4648 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4649 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4650 quotient
, 0, OPTAB_LIB_WIDEN
);
4651 if (tem
!= quotient
)
4652 emit_move_insn (quotient
, tem
);
4653 emit_jump_insn (targetm
.gen_jump (label5
));
4655 emit_label (label3
);
4656 expand_dec (adjusted_op0
, const1_rtx
);
4657 emit_label (label4
);
4658 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4659 quotient
, 0, OPTAB_LIB_WIDEN
);
4660 if (tem
!= quotient
)
4661 emit_move_insn (quotient
, tem
);
4662 expand_dec (quotient
, const1_rtx
);
4663 emit_label (label5
);
4672 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4673 && (size
<= HOST_BITS_PER_WIDE_INT
4674 || INTVAL (op1
) >= 0))
4677 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4678 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4679 floor_log2 (d
), tquotient
, 1);
4680 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4681 gen_int_mode (d
- 1, compute_mode
),
4682 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4683 t3
= gen_reg_rtx (compute_mode
);
4684 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4685 compute_mode
, 1, 1);
4688 rtx_code_label
*lab
;
4689 lab
= gen_label_rtx ();
4690 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4691 expand_inc (t1
, const1_rtx
);
4696 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4702 /* Try using an instruction that produces both the quotient and
4703 remainder, using truncation. We can easily compensate the
4704 quotient or remainder to get ceiling rounding, once we have the
4705 remainder. Notice that we compute also the final remainder
4706 value here, and return the result right away. */
4707 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4708 target
= gen_reg_rtx (compute_mode
);
4712 remainder
= (REG_P (target
)
4713 ? target
: gen_reg_rtx (compute_mode
));
4714 quotient
= gen_reg_rtx (compute_mode
);
4718 quotient
= (REG_P (target
)
4719 ? target
: gen_reg_rtx (compute_mode
));
4720 remainder
= gen_reg_rtx (compute_mode
);
4723 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4726 /* This could be computed with a branch-less sequence.
4727 Save that for later. */
4728 rtx_code_label
*label
= gen_label_rtx ();
4729 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4730 compute_mode
, label
);
4731 expand_inc (quotient
, const1_rtx
);
4732 expand_dec (remainder
, op1
);
4734 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4737 /* No luck with division elimination or divmod. Have to do it
4738 by conditionally adjusting op0 *and* the result. */
4740 rtx_code_label
*label1
, *label2
;
4741 rtx adjusted_op0
, tem
;
4743 quotient
= gen_reg_rtx (compute_mode
);
4744 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4745 label1
= gen_label_rtx ();
4746 label2
= gen_label_rtx ();
4747 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4748 compute_mode
, label1
);
4749 emit_move_insn (quotient
, const0_rtx
);
4750 emit_jump_insn (targetm
.gen_jump (label2
));
4752 emit_label (label1
);
4753 expand_dec (adjusted_op0
, const1_rtx
);
4754 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4755 quotient
, 1, OPTAB_LIB_WIDEN
);
4756 if (tem
!= quotient
)
4757 emit_move_insn (quotient
, tem
);
4758 expand_inc (quotient
, const1_rtx
);
4759 emit_label (label2
);
4764 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4765 && INTVAL (op1
) >= 0)
4767 /* This is extremely similar to the code for the unsigned case
4768 above. For 2.7 we should merge these variants, but for
4769 2.6.1 I don't want to touch the code for unsigned since that
4770 get used in C. The signed case will only be used by other
4774 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4775 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4776 floor_log2 (d
), tquotient
, 0);
4777 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4778 gen_int_mode (d
- 1, compute_mode
),
4779 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4780 t3
= gen_reg_rtx (compute_mode
);
4781 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4782 compute_mode
, 1, 1);
4785 rtx_code_label
*lab
;
4786 lab
= gen_label_rtx ();
4787 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4788 expand_inc (t1
, const1_rtx
);
4793 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4799 /* Try using an instruction that produces both the quotient and
4800 remainder, using truncation. We can easily compensate the
4801 quotient or remainder to get ceiling rounding, once we have the
4802 remainder. Notice that we compute also the final remainder
4803 value here, and return the result right away. */
4804 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4805 target
= gen_reg_rtx (compute_mode
);
4808 remainder
= (REG_P (target
)
4809 ? target
: gen_reg_rtx (compute_mode
));
4810 quotient
= gen_reg_rtx (compute_mode
);
4814 quotient
= (REG_P (target
)
4815 ? target
: gen_reg_rtx (compute_mode
));
4816 remainder
= gen_reg_rtx (compute_mode
);
4819 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
4822 /* This could be computed with a branch-less sequence.
4823 Save that for later. */
4825 rtx_code_label
*label
= gen_label_rtx ();
4826 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4827 compute_mode
, label
);
4828 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4829 NULL_RTX
, 0, OPTAB_WIDEN
);
4830 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
4831 expand_inc (quotient
, const1_rtx
);
4832 expand_dec (remainder
, op1
);
4834 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4837 /* No luck with division elimination or divmod. Have to do it
4838 by conditionally adjusting op0 *and* the result. */
4840 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4844 quotient
= gen_reg_rtx (compute_mode
);
4845 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4846 label1
= gen_label_rtx ();
4847 label2
= gen_label_rtx ();
4848 label3
= gen_label_rtx ();
4849 label4
= gen_label_rtx ();
4850 label5
= gen_label_rtx ();
4851 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4852 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
4853 compute_mode
, label1
);
4854 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4855 quotient
, 0, OPTAB_LIB_WIDEN
);
4856 if (tem
!= quotient
)
4857 emit_move_insn (quotient
, tem
);
4858 emit_jump_insn (targetm
.gen_jump (label5
));
4860 emit_label (label1
);
4861 expand_dec (adjusted_op0
, const1_rtx
);
4862 emit_jump_insn (targetm
.gen_jump (label4
));
4864 emit_label (label2
);
4865 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
4866 compute_mode
, label3
);
4867 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4868 quotient
, 0, OPTAB_LIB_WIDEN
);
4869 if (tem
!= quotient
)
4870 emit_move_insn (quotient
, tem
);
4871 emit_jump_insn (targetm
.gen_jump (label5
));
4873 emit_label (label3
);
4874 expand_inc (adjusted_op0
, const1_rtx
);
4875 emit_label (label4
);
4876 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4877 quotient
, 0, OPTAB_LIB_WIDEN
);
4878 if (tem
!= quotient
)
4879 emit_move_insn (quotient
, tem
);
4880 expand_inc (quotient
, const1_rtx
);
4881 emit_label (label5
);
4886 case EXACT_DIV_EXPR
:
4887 if (op1_is_constant
&& size
<= HOST_BITS_PER_WIDE_INT
)
4889 HOST_WIDE_INT d
= INTVAL (op1
);
4890 unsigned HOST_WIDE_INT ml
;
4894 pre_shift
= ctz_or_zero (d
);
4895 ml
= invert_mod2n (d
>> pre_shift
, size
);
4896 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4897 pre_shift
, NULL_RTX
, unsignedp
);
4898 quotient
= expand_mult (compute_mode
, t1
,
4899 gen_int_mode (ml
, compute_mode
),
4902 insn
= get_last_insn ();
4903 set_dst_reg_note (insn
, REG_EQUAL
,
4904 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
4905 compute_mode
, op0
, op1
),
4910 case ROUND_DIV_EXPR
:
4911 case ROUND_MOD_EXPR
:
4915 rtx_code_label
*label
;
4916 label
= gen_label_rtx ();
4917 quotient
= gen_reg_rtx (compute_mode
);
4918 remainder
= gen_reg_rtx (compute_mode
);
4919 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
4922 quotient
= expand_binop (compute_mode
, udiv_optab
, op0
, op1
,
4923 quotient
, 1, OPTAB_LIB_WIDEN
);
4924 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 1);
4925 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4926 remainder
, 1, OPTAB_LIB_WIDEN
);
4928 tem
= plus_constant (compute_mode
, op1
, -1);
4929 tem
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
, 1, NULL_RTX
, 1);
4930 do_cmp_and_jump (remainder
, tem
, LEU
, compute_mode
, label
);
4931 expand_inc (quotient
, const1_rtx
);
4932 expand_dec (remainder
, op1
);
4937 rtx abs_rem
, abs_op1
, tem
, mask
;
4938 rtx_code_label
*label
;
4939 label
= gen_label_rtx ();
4940 quotient
= gen_reg_rtx (compute_mode
);
4941 remainder
= gen_reg_rtx (compute_mode
);
4942 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
4945 quotient
= expand_binop (compute_mode
, sdiv_optab
, op0
, op1
,
4946 quotient
, 0, OPTAB_LIB_WIDEN
);
4947 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 0);
4948 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4949 remainder
, 0, OPTAB_LIB_WIDEN
);
4951 abs_rem
= expand_abs (compute_mode
, remainder
, NULL_RTX
, 1, 0);
4952 abs_op1
= expand_abs (compute_mode
, op1
, NULL_RTX
, 1, 0);
4953 tem
= expand_shift (LSHIFT_EXPR
, compute_mode
, abs_rem
,
4955 do_cmp_and_jump (tem
, abs_op1
, LTU
, compute_mode
, label
);
4956 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4957 NULL_RTX
, 0, OPTAB_WIDEN
);
4958 mask
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
,
4959 size
- 1, NULL_RTX
, 0);
4960 tem
= expand_binop (compute_mode
, xor_optab
, mask
, const1_rtx
,
4961 NULL_RTX
, 0, OPTAB_WIDEN
);
4962 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4963 NULL_RTX
, 0, OPTAB_WIDEN
);
4964 expand_inc (quotient
, tem
);
4965 tem
= expand_binop (compute_mode
, xor_optab
, mask
, op1
,
4966 NULL_RTX
, 0, OPTAB_WIDEN
);
4967 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4968 NULL_RTX
, 0, OPTAB_WIDEN
);
4969 expand_dec (remainder
, tem
);
4972 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4980 if (target
&& GET_MODE (target
) != compute_mode
)
4985 /* Try to produce the remainder without producing the quotient.
4986 If we seem to have a divmod pattern that does not require widening,
4987 don't try widening here. We should really have a WIDEN argument
4988 to expand_twoval_binop, since what we'd really like to do here is
4989 1) try a mod insn in compute_mode
4990 2) try a divmod insn in compute_mode
4991 3) try a div insn in compute_mode and multiply-subtract to get
4993 4) try the same things with widening allowed. */
4995 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
4998 ((optab_handler (optab2
, compute_mode
)
4999 != CODE_FOR_nothing
)
5000 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5003 /* No luck there. Can we do remainder and divide at once
5004 without a library call? */
5005 remainder
= gen_reg_rtx (compute_mode
);
5006 if (! expand_twoval_binop ((unsignedp
5010 NULL_RTX
, remainder
, unsignedp
))
5015 return gen_lowpart (mode
, remainder
);
5018 /* Produce the quotient. Try a quotient insn, but not a library call.
5019 If we have a divmod in this mode, use it in preference to widening
5020 the div (for this test we assume it will not fail). Note that optab2
5021 is set to the one of the two optabs that the call below will use. */
5023 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5024 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5026 ((optab_handler (optab2
, compute_mode
)
5027 != CODE_FOR_nothing
)
5028 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5032 /* No luck there. Try a quotient-and-remainder insn,
5033 keeping the quotient alone. */
5034 quotient
= gen_reg_rtx (compute_mode
);
5035 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5037 quotient
, NULL_RTX
, unsignedp
))
5041 /* Still no luck. If we are not computing the remainder,
5042 use a library call for the quotient. */
5043 quotient
= sign_expand_binop (compute_mode
,
5044 udiv_optab
, sdiv_optab
,
5046 unsignedp
, OPTAB_LIB_WIDEN
);
5053 if (target
&& GET_MODE (target
) != compute_mode
)
5058 /* No divide instruction either. Use library for remainder. */
5059 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5061 unsignedp
, OPTAB_LIB_WIDEN
);
5062 /* No remainder function. Try a quotient-and-remainder
5063 function, keeping the remainder. */
5066 remainder
= gen_reg_rtx (compute_mode
);
5067 if (!expand_twoval_binop_libfunc
5068 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5070 NULL_RTX
, remainder
,
5071 unsignedp
? UMOD
: MOD
))
5072 remainder
= NULL_RTX
;
5077 /* We divided. Now finish doing X - Y * (X / Y). */
5078 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5079 NULL_RTX
, unsignedp
);
5080 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5081 remainder
, target
, unsignedp
,
5086 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5089 /* Return a tree node with data type TYPE, describing the value of X.
5090 Usually this is an VAR_DECL, if there is no obvious better choice.
5091 X may be an expression, however we only support those expressions
5092 generated by loop.c. */
5095 make_tree (tree type
, rtx x
)
5099 switch (GET_CODE (x
))
5102 case CONST_WIDE_INT
:
5103 t
= wide_int_to_tree (type
, rtx_mode_t (x
, TYPE_MODE (type
)));
5107 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5108 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5109 t
= wide_int_to_tree (type
,
5110 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5111 HOST_BITS_PER_WIDE_INT
* 2));
5113 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5119 int units
= CONST_VECTOR_NUNITS (x
);
5120 tree itype
= TREE_TYPE (type
);
5124 /* Build a tree with vector elements. */
5125 elts
= XALLOCAVEC (tree
, units
);
5126 for (i
= units
- 1; i
>= 0; --i
)
5128 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5129 elts
[i
] = make_tree (itype
, elt
);
5132 return build_vector (type
, elts
);
5136 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5137 make_tree (type
, XEXP (x
, 1)));
5140 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5141 make_tree (type
, XEXP (x
, 1)));
5144 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5147 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5148 make_tree (type
, XEXP (x
, 1)));
5151 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5152 make_tree (type
, XEXP (x
, 1)));
5155 t
= unsigned_type_for (type
);
5156 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5157 make_tree (t
, XEXP (x
, 0)),
5158 make_tree (type
, XEXP (x
, 1))));
5161 t
= signed_type_for (type
);
5162 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5163 make_tree (t
, XEXP (x
, 0)),
5164 make_tree (type
, XEXP (x
, 1))));
5167 if (TREE_CODE (type
) != REAL_TYPE
)
5168 t
= signed_type_for (type
);
5172 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5173 make_tree (t
, XEXP (x
, 0)),
5174 make_tree (t
, XEXP (x
, 1))));
5176 t
= unsigned_type_for (type
);
5177 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5178 make_tree (t
, XEXP (x
, 0)),
5179 make_tree (t
, XEXP (x
, 1))));
5183 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5184 GET_CODE (x
) == ZERO_EXTEND
);
5185 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5188 return make_tree (type
, XEXP (x
, 0));
5191 t
= SYMBOL_REF_DECL (x
);
5193 return fold_convert (type
, build_fold_addr_expr (t
));
5197 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5199 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5200 address mode to pointer mode. */
5201 if (POINTER_TYPE_P (type
))
5202 x
= convert_memory_address_addr_space
5203 (TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5205 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5206 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5207 t
->decl_with_rtl
.rtl
= x
;
5213 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5214 and returning TARGET.
5216 If TARGET is 0, a pseudo-register or constant is returned. */
5219 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5223 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5224 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5226 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5230 else if (tem
!= target
)
5231 emit_move_insn (target
, tem
);
5235 /* Helper function for emit_store_flag. */
5237 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5238 machine_mode mode
, machine_mode compare_mode
,
5239 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5240 machine_mode target_mode
)
5242 struct expand_operand ops
[4];
5243 rtx op0
, comparison
, subtarget
;
5245 machine_mode result_mode
= targetm
.cstore_mode (icode
);
5247 last
= get_last_insn ();
5248 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5249 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5252 delete_insns_since (last
);
5256 if (target_mode
== VOIDmode
)
5257 target_mode
= result_mode
;
5259 target
= gen_reg_rtx (target_mode
);
5261 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5263 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5264 create_fixed_operand (&ops
[1], comparison
);
5265 create_fixed_operand (&ops
[2], x
);
5266 create_fixed_operand (&ops
[3], y
);
5267 if (!maybe_expand_insn (icode
, 4, ops
))
5269 delete_insns_since (last
);
5272 subtarget
= ops
[0].value
;
5274 /* If we are converting to a wider mode, first convert to
5275 TARGET_MODE, then normalize. This produces better combining
5276 opportunities on machines that have a SIGN_EXTRACT when we are
5277 testing a single bit. This mostly benefits the 68k.
5279 If STORE_FLAG_VALUE does not have the sign bit set when
5280 interpreted in MODE, we can do this conversion as unsigned, which
5281 is usually more efficient. */
5282 if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (result_mode
))
5284 convert_move (target
, subtarget
,
5285 val_signbit_known_clear_p (result_mode
,
5288 result_mode
= target_mode
;
5293 /* If we want to keep subexpressions around, don't reuse our last
5298 /* Now normalize to the proper value in MODE. Sometimes we don't
5299 have to do anything. */
5300 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5302 /* STORE_FLAG_VALUE might be the most negative number, so write
5303 the comparison this way to avoid a compiler-time warning. */
5304 else if (- normalizep
== STORE_FLAG_VALUE
)
5305 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5307 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5308 it hard to use a value of just the sign bit due to ANSI integer
5309 constant typing rules. */
5310 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5311 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5312 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5316 gcc_assert (STORE_FLAG_VALUE
& 1);
5318 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5319 if (normalizep
== -1)
5320 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5323 /* If we were converting to a smaller mode, do the conversion now. */
5324 if (target_mode
!= result_mode
)
5326 convert_move (target
, op0
, 0);
5334 /* A subroutine of emit_store_flag only including "tricks" that do not
5335 need a recursive call. These are kept separate to avoid infinite
5339 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5340 machine_mode mode
, int unsignedp
, int normalizep
,
5341 machine_mode target_mode
)
5344 enum insn_code icode
;
5345 machine_mode compare_mode
;
5346 enum mode_class mclass
;
5347 enum rtx_code scode
;
5350 code
= unsigned_condition (code
);
5351 scode
= swap_condition (code
);
5353 /* If one operand is constant, make it the second one. Only do this
5354 if the other operand is not constant as well. */
5356 if (swap_commutative_operands_p (op0
, op1
))
5358 std::swap (op0
, op1
);
5359 code
= swap_condition (code
);
5362 if (mode
== VOIDmode
)
5363 mode
= GET_MODE (op0
);
5365 /* For some comparisons with 1 and -1, we can convert this to
5366 comparisons with zero. This will often produce more opportunities for
5367 store-flag insns. */
5372 if (op1
== const1_rtx
)
5373 op1
= const0_rtx
, code
= LE
;
5376 if (op1
== constm1_rtx
)
5377 op1
= const0_rtx
, code
= LT
;
5380 if (op1
== const1_rtx
)
5381 op1
= const0_rtx
, code
= GT
;
5384 if (op1
== constm1_rtx
)
5385 op1
= const0_rtx
, code
= GE
;
5388 if (op1
== const1_rtx
)
5389 op1
= const0_rtx
, code
= NE
;
5392 if (op1
== const1_rtx
)
5393 op1
= const0_rtx
, code
= EQ
;
5399 /* If we are comparing a double-word integer with zero or -1, we can
5400 convert the comparison into one involving a single word. */
5401 if (GET_MODE_BITSIZE (mode
) == BITS_PER_WORD
* 2
5402 && GET_MODE_CLASS (mode
) == MODE_INT
5403 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5406 if ((code
== EQ
|| code
== NE
)
5407 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5411 /* Do a logical OR or AND of the two words and compare the
5413 op00
= simplify_gen_subreg (word_mode
, op0
, mode
, 0);
5414 op01
= simplify_gen_subreg (word_mode
, op0
, mode
, UNITS_PER_WORD
);
5415 tem
= expand_binop (word_mode
,
5416 op1
== const0_rtx
? ior_optab
: and_optab
,
5417 op00
, op01
, NULL_RTX
, unsignedp
,
5421 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5422 unsignedp
, normalizep
);
5424 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5428 /* If testing the sign bit, can just test on high word. */
5429 op0h
= simplify_gen_subreg (word_mode
, op0
, mode
,
5430 subreg_highpart_offset (word_mode
,
5432 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5433 unsignedp
, normalizep
);
5440 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5443 target
= gen_reg_rtx (target_mode
);
5445 convert_move (target
, tem
,
5446 !val_signbit_known_set_p (word_mode
,
5447 (normalizep
? normalizep
5448 : STORE_FLAG_VALUE
)));
5453 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5454 complement of A (for GE) and shifting the sign bit to the low bit. */
5455 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5456 && GET_MODE_CLASS (mode
) == MODE_INT
5457 && (normalizep
|| STORE_FLAG_VALUE
== 1
5458 || val_signbit_p (mode
, STORE_FLAG_VALUE
)))
5465 /* If the result is to be wider than OP0, it is best to convert it
5466 first. If it is to be narrower, it is *incorrect* to convert it
5468 else if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (mode
))
5470 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5474 if (target_mode
!= mode
)
5478 op0
= expand_unop (mode
, one_cmpl_optab
, op0
,
5479 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5480 ? 0 : subtarget
), 0);
5482 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5483 /* If we are supposed to produce a 0/1 value, we want to do
5484 a logical shift from the sign bit to the low-order bit; for
5485 a -1/0 value, we do an arithmetic shift. */
5486 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5487 GET_MODE_BITSIZE (mode
) - 1,
5488 subtarget
, normalizep
!= -1);
5490 if (mode
!= target_mode
)
5491 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5496 mclass
= GET_MODE_CLASS (mode
);
5497 for (compare_mode
= mode
; compare_mode
!= VOIDmode
;
5498 compare_mode
= GET_MODE_WIDER_MODE (compare_mode
))
5500 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5501 icode
= optab_handler (cstore_optab
, optab_mode
);
5502 if (icode
!= CODE_FOR_nothing
)
5504 do_pending_stack_adjust ();
5505 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5506 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5510 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5512 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5513 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5524 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5525 and storing in TARGET. Normally return TARGET.
5526 Return 0 if that cannot be done.
5528 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5529 it is VOIDmode, they cannot both be CONST_INT.
5531 UNSIGNEDP is for the case where we have to widen the operands
5532 to perform the operation. It says to use zero-extension.
5534 NORMALIZEP is 1 if we should convert the result to be either zero
5535 or one. Normalize is -1 if we should convert the result to be
5536 either zero or -1. If NORMALIZEP is zero, the result will be left
5537 "raw" out of the scc insn. */
5540 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5541 machine_mode mode
, int unsignedp
, int normalizep
)
5543 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5544 enum rtx_code rcode
;
5549 /* If we compare constants, we shouldn't use a store-flag operation,
5550 but a constant load. We can get there via the vanilla route that
5551 usually generates a compare-branch sequence, but will in this case
5552 fold the comparison to a constant, and thus elide the branch. */
5553 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5556 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5561 /* If we reached here, we can't do this with a scc insn, however there
5562 are some comparisons that can be done in other ways. Don't do any
5563 of these cases if branches are very cheap. */
5564 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5567 /* See what we need to return. We can only return a 1, -1, or the
5570 if (normalizep
== 0)
5572 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5573 normalizep
= STORE_FLAG_VALUE
;
5575 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5581 last
= get_last_insn ();
5583 /* If optimizing, use different pseudo registers for each insn, instead
5584 of reusing the same pseudo. This leads to better CSE, but slows
5585 down the compiler, since there are more pseudos */
5586 subtarget
= (!optimize
5587 && (target_mode
== mode
)) ? target
: NULL_RTX
;
5588 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
5590 /* For floating-point comparisons, try the reverse comparison or try
5591 changing the "orderedness" of the comparison. */
5592 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5594 enum rtx_code first_code
;
5597 rcode
= reverse_condition_maybe_unordered (code
);
5598 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5599 && (code
== ORDERED
|| code
== UNORDERED
5600 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5601 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5603 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5604 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5606 /* For the reverse comparison, use either an addition or a XOR. */
5608 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5609 optimize_insn_for_speed_p ()) == 0)
5611 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5612 STORE_FLAG_VALUE
, target_mode
);
5614 return expand_binop (target_mode
, add_optab
, tem
,
5615 gen_int_mode (normalizep
, target_mode
),
5616 target
, 0, OPTAB_WIDEN
);
5619 && rtx_cost (trueval
, mode
, XOR
, 1,
5620 optimize_insn_for_speed_p ()) == 0)
5622 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5623 normalizep
, target_mode
);
5625 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
5626 target
, INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5630 delete_insns_since (last
);
5632 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5633 if (code
== ORDERED
|| code
== UNORDERED
)
5636 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
5638 /* If there are no NaNs, the first comparison should always fall through.
5639 Effectively change the comparison to the other one. */
5640 if (!HONOR_NANS (mode
))
5642 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
5643 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
5647 if (!HAVE_conditional_move
)
5650 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5651 conditional move. */
5652 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
5653 normalizep
, target_mode
);
5658 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5659 tem
, const0_rtx
, GET_MODE (tem
), 0);
5661 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5662 trueval
, tem
, GET_MODE (tem
), 0);
5665 delete_insns_since (last
);
5669 /* The remaining tricks only apply to integer comparisons. */
5671 if (GET_MODE_CLASS (mode
) != MODE_INT
)
5674 /* If this is an equality comparison of integers, we can try to exclusive-or
5675 (or subtract) the two operands and use a recursive call to try the
5676 comparison with zero. Don't do any of these cases if branches are
5679 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5681 tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5685 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5688 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5689 mode
, unsignedp
, normalizep
);
5693 delete_insns_since (last
);
5696 /* For integer comparisons, try the reverse comparison. However, for
5697 small X and if we'd have anyway to extend, implementing "X != 0"
5698 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5699 rcode
= reverse_condition (code
);
5700 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5701 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5703 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5704 && op1
== const0_rtx
))
5706 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5707 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5709 /* Again, for the reverse comparison, use either an addition or a XOR. */
5711 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5712 optimize_insn_for_speed_p ()) == 0)
5714 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5715 STORE_FLAG_VALUE
, target_mode
);
5717 tem
= expand_binop (target_mode
, add_optab
, tem
,
5718 gen_int_mode (normalizep
, target_mode
),
5719 target
, 0, OPTAB_WIDEN
);
5722 && rtx_cost (trueval
, mode
, XOR
, 1,
5723 optimize_insn_for_speed_p ()) == 0)
5725 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5726 normalizep
, target_mode
);
5728 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5729 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5734 delete_insns_since (last
);
5737 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5738 the constant zero. Reject all other comparisons at this point. Only
5739 do LE and GT if branches are expensive since they are expensive on
5740 2-operand machines. */
5742 if (op1
!= const0_rtx
5743 || (code
!= EQ
&& code
!= NE
5744 && (BRANCH_COST (optimize_insn_for_speed_p (),
5745 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5748 /* Try to put the result of the comparison in the sign bit. Assume we can't
5749 do the necessary operation below. */
5753 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5754 the sign bit set. */
5758 /* This is destructive, so SUBTARGET can't be OP0. */
5759 if (rtx_equal_p (subtarget
, op0
))
5762 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5765 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5769 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5770 number of bits in the mode of OP0, minus one. */
5774 if (rtx_equal_p (subtarget
, op0
))
5777 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, op0
,
5778 GET_MODE_BITSIZE (mode
) - 1,
5781 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5785 if (code
== EQ
|| code
== NE
)
5787 /* For EQ or NE, one way to do the comparison is to apply an operation
5788 that converts the operand into a positive number if it is nonzero
5789 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5790 for NE we negate. This puts the result in the sign bit. Then we
5791 normalize with a shift, if needed.
5793 Two operations that can do the above actions are ABS and FFS, so try
5794 them. If that doesn't work, and MODE is smaller than a full word,
5795 we can use zero-extension to the wider mode (an unsigned conversion)
5796 as the operation. */
5798 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5799 that is compensated by the subsequent overflow when subtracting
5802 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5803 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5804 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5805 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5806 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5808 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5815 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5818 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5821 /* If we couldn't do it that way, for NE we can "or" the two's complement
5822 of the value with itself. For EQ, we take the one's complement of
5823 that "or", which is an extra insn, so we only handle EQ if branches
5828 || BRANCH_COST (optimize_insn_for_speed_p (),
5831 if (rtx_equal_p (subtarget
, op0
))
5834 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5835 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5838 if (tem
&& code
== EQ
)
5839 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5843 if (tem
&& normalizep
)
5844 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, tem
,
5845 GET_MODE_BITSIZE (mode
) - 1,
5846 subtarget
, normalizep
== 1);
5852 else if (GET_MODE (tem
) != target_mode
)
5854 convert_move (target
, tem
, 0);
5857 else if (!subtarget
)
5859 emit_move_insn (target
, tem
);
5864 delete_insns_since (last
);
5869 /* Like emit_store_flag, but always succeeds. */
5872 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5873 machine_mode mode
, int unsignedp
, int normalizep
)
5876 rtx_code_label
*label
;
5877 rtx trueval
, falseval
;
5879 /* First see if emit_store_flag can do the job. */
5880 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
5885 target
= gen_reg_rtx (word_mode
);
5887 /* If this failed, we have to do this with set/compare/jump/set code.
5888 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5889 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
5891 && GET_MODE_CLASS (mode
) == MODE_INT
5894 && op1
== const0_rtx
)
5896 label
= gen_label_rtx ();
5897 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
5898 NULL_RTX
, NULL
, label
, -1);
5899 emit_move_insn (target
, trueval
);
5905 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
5906 target
= gen_reg_rtx (GET_MODE (target
));
5908 /* Jump in the right direction if the target cannot implement CODE
5909 but can jump on its reverse condition. */
5910 falseval
= const0_rtx
;
5911 if (! can_compare_p (code
, mode
, ccp_jump
)
5912 && (! FLOAT_MODE_P (mode
)
5913 || code
== ORDERED
|| code
== UNORDERED
5914 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5915 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5917 enum rtx_code rcode
;
5918 if (FLOAT_MODE_P (mode
))
5919 rcode
= reverse_condition_maybe_unordered (code
);
5921 rcode
= reverse_condition (code
);
5923 /* Canonicalize to UNORDERED for the libcall. */
5924 if (can_compare_p (rcode
, mode
, ccp_jump
)
5925 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
5928 trueval
= const0_rtx
;
5933 emit_move_insn (target
, trueval
);
5934 label
= gen_label_rtx ();
5935 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
5938 emit_move_insn (target
, falseval
);
5944 /* Perform possibly multi-word comparison and conditional jump to LABEL
5945 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5946 now a thin wrapper around do_compare_rtx_and_jump. */
5949 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
5950 rtx_code_label
*label
)
5952 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
5953 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,