1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2019 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
42 #include "langhooks.h"
43 #include "tree-vector-builder.h"
45 struct target_expmed default_target_expmed
;
47 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
50 static bool store_integral_bit_field (rtx
, opt_scalar_int_mode
,
51 unsigned HOST_WIDE_INT
,
52 unsigned HOST_WIDE_INT
,
53 poly_uint64
, poly_uint64
,
54 machine_mode
, rtx
, bool, bool);
55 static void store_fixed_bit_field (rtx
, opt_scalar_int_mode
,
56 unsigned HOST_WIDE_INT
,
57 unsigned HOST_WIDE_INT
,
58 poly_uint64
, poly_uint64
,
59 rtx
, scalar_int_mode
, bool);
60 static void store_fixed_bit_field_1 (rtx
, scalar_int_mode
,
61 unsigned HOST_WIDE_INT
,
62 unsigned HOST_WIDE_INT
,
63 rtx
, scalar_int_mode
, bool);
64 static void store_split_bit_field (rtx
, opt_scalar_int_mode
,
65 unsigned HOST_WIDE_INT
,
66 unsigned HOST_WIDE_INT
,
67 poly_uint64
, poly_uint64
,
68 rtx
, scalar_int_mode
, bool);
69 static rtx
extract_integral_bit_field (rtx
, opt_scalar_int_mode
,
70 unsigned HOST_WIDE_INT
,
71 unsigned HOST_WIDE_INT
, int, rtx
,
72 machine_mode
, machine_mode
, bool, bool);
73 static rtx
extract_fixed_bit_field (machine_mode
, rtx
, opt_scalar_int_mode
,
74 unsigned HOST_WIDE_INT
,
75 unsigned HOST_WIDE_INT
, rtx
, int, bool);
76 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
, scalar_int_mode
,
77 unsigned HOST_WIDE_INT
,
78 unsigned HOST_WIDE_INT
, rtx
, int, bool);
79 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
80 static rtx
extract_split_bit_field (rtx
, opt_scalar_int_mode
,
81 unsigned HOST_WIDE_INT
,
82 unsigned HOST_WIDE_INT
, int, bool);
83 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
84 static rtx
expand_smod_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
85 static rtx
expand_sdiv_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
88 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
89 The mask is truncated if necessary to the width of mode MODE. The
90 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
93 mask_rtx (scalar_int_mode mode
, int bitpos
, int bitsize
, bool complement
)
95 return immed_wide_int_const
96 (wi::shifted_mask (bitpos
, bitsize
, complement
,
97 GET_MODE_PRECISION (mode
)), mode
);
100 /* Test whether a value is zero of a power of two. */
101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
102 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
104 struct init_expmed_rtl
125 rtx pow2
[MAX_BITS_PER_WORD
];
126 rtx cint
[MAX_BITS_PER_WORD
];
130 init_expmed_one_conv (struct init_expmed_rtl
*all
, scalar_int_mode to_mode
,
131 scalar_int_mode from_mode
, bool speed
)
133 int to_size
, from_size
;
136 to_size
= GET_MODE_PRECISION (to_mode
);
137 from_size
= GET_MODE_PRECISION (from_mode
);
139 /* Most partial integers have a precision less than the "full"
140 integer it requires for storage. In case one doesn't, for
141 comparison purposes here, reduce the bit size by one in that
143 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
144 && pow2p_hwi (to_size
))
146 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
147 && pow2p_hwi (from_size
))
150 /* Assume cost of zero-extend and sign-extend is the same. */
151 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
153 PUT_MODE (all
->reg
, from_mode
);
154 set_convert_cost (to_mode
, from_mode
, speed
,
155 set_src_cost (which
, to_mode
, speed
));
159 init_expmed_one_mode (struct init_expmed_rtl
*all
,
160 machine_mode mode
, int speed
)
162 int m
, n
, mode_bitsize
;
163 machine_mode mode_from
;
165 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
167 PUT_MODE (all
->reg
, mode
);
168 PUT_MODE (all
->plus
, mode
);
169 PUT_MODE (all
->neg
, mode
);
170 PUT_MODE (all
->mult
, mode
);
171 PUT_MODE (all
->sdiv
, mode
);
172 PUT_MODE (all
->udiv
, mode
);
173 PUT_MODE (all
->sdiv_32
, mode
);
174 PUT_MODE (all
->smod_32
, mode
);
175 PUT_MODE (all
->wide_trunc
, mode
);
176 PUT_MODE (all
->shift
, mode
);
177 PUT_MODE (all
->shift_mult
, mode
);
178 PUT_MODE (all
->shift_add
, mode
);
179 PUT_MODE (all
->shift_sub0
, mode
);
180 PUT_MODE (all
->shift_sub1
, mode
);
181 PUT_MODE (all
->zext
, mode
);
182 PUT_MODE (all
->trunc
, mode
);
184 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
185 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
186 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
187 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
188 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
190 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
191 <= 2 * add_cost (speed
, mode
)));
192 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
193 <= 4 * add_cost (speed
, mode
)));
195 set_shift_cost (speed
, mode
, 0, 0);
197 int cost
= add_cost (speed
, mode
);
198 set_shiftadd_cost (speed
, mode
, 0, cost
);
199 set_shiftsub0_cost (speed
, mode
, 0, cost
);
200 set_shiftsub1_cost (speed
, mode
, 0, cost
);
203 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
204 for (m
= 1; m
< n
; m
++)
206 XEXP (all
->shift
, 1) = all
->cint
[m
];
207 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
209 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
210 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
212 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
214 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
218 scalar_int_mode int_mode_to
;
219 if (is_a
<scalar_int_mode
> (mode
, &int_mode_to
))
221 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
222 mode_from
= (machine_mode
)(mode_from
+ 1))
223 init_expmed_one_conv (all
, int_mode_to
,
224 as_a
<scalar_int_mode
> (mode_from
), speed
);
226 scalar_int_mode wider_mode
;
227 if (GET_MODE_CLASS (int_mode_to
) == MODE_INT
228 && GET_MODE_WIDER_MODE (int_mode_to
).exists (&wider_mode
))
230 PUT_MODE (all
->zext
, wider_mode
);
231 PUT_MODE (all
->wide_mult
, wider_mode
);
232 PUT_MODE (all
->wide_lshr
, wider_mode
);
233 XEXP (all
->wide_lshr
, 1)
234 = gen_int_shift_amount (wider_mode
, mode_bitsize
);
236 set_mul_widen_cost (speed
, wider_mode
,
237 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
238 set_mul_highpart_cost (speed
, int_mode_to
,
239 set_src_cost (all
->wide_trunc
,
240 int_mode_to
, speed
));
248 struct init_expmed_rtl all
;
249 machine_mode mode
= QImode
;
252 memset (&all
, 0, sizeof all
);
253 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
255 all
.pow2
[m
] = GEN_INT (HOST_WIDE_INT_1
<< m
);
256 all
.cint
[m
] = GEN_INT (m
);
259 /* Avoid using hard regs in ways which may be unsupported. */
260 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
261 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
262 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
263 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
264 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
265 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
266 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
267 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
268 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
269 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
270 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
271 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
272 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
273 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
274 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
275 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
276 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
277 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
279 for (speed
= 0; speed
< 2; speed
++)
281 crtl
->maybe_hot_insn_p
= speed
;
282 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
284 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
285 mode
= (machine_mode
)(mode
+ 1))
286 init_expmed_one_mode (&all
, mode
, speed
);
288 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
289 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
290 mode
= (machine_mode
)(mode
+ 1))
291 init_expmed_one_mode (&all
, mode
, speed
);
293 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
294 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
295 mode
= (machine_mode
)(mode
+ 1))
296 init_expmed_one_mode (&all
, mode
, speed
);
299 if (alg_hash_used_p ())
301 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
302 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
305 set_alg_hash_used_p (true);
306 default_rtl_profile ();
308 ggc_free (all
.trunc
);
309 ggc_free (all
.shift_sub1
);
310 ggc_free (all
.shift_sub0
);
311 ggc_free (all
.shift_add
);
312 ggc_free (all
.shift_mult
);
313 ggc_free (all
.shift
);
314 ggc_free (all
.wide_trunc
);
315 ggc_free (all
.wide_lshr
);
316 ggc_free (all
.wide_mult
);
318 ggc_free (all
.smod_32
);
319 ggc_free (all
.sdiv_32
);
328 /* Return an rtx representing minus the value of X.
329 MODE is the intended mode of the result,
330 useful if X is a CONST_INT. */
333 negate_rtx (machine_mode mode
, rtx x
)
335 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
338 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
343 /* Whether reverse storage order is supported on the target. */
344 static int reverse_storage_order_supported
= -1;
346 /* Check whether reverse storage order is supported on the target. */
349 check_reverse_storage_order_support (void)
351 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
353 reverse_storage_order_supported
= 0;
354 sorry ("reverse scalar storage order");
357 reverse_storage_order_supported
= 1;
360 /* Whether reverse FP storage order is supported on the target. */
361 static int reverse_float_storage_order_supported
= -1;
363 /* Check whether reverse FP storage order is supported on the target. */
366 check_reverse_float_storage_order_support (void)
368 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
370 reverse_float_storage_order_supported
= 0;
371 sorry ("reverse floating-point scalar storage order");
374 reverse_float_storage_order_supported
= 1;
377 /* Return an rtx representing value of X with reverse storage order.
378 MODE is the intended mode of the result,
379 useful if X is a CONST_INT. */
382 flip_storage_order (machine_mode mode
, rtx x
)
384 scalar_int_mode int_mode
;
390 if (COMPLEX_MODE_P (mode
))
392 rtx real
= read_complex_part (x
, false);
393 rtx imag
= read_complex_part (x
, true);
395 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
396 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
398 return gen_rtx_CONCAT (mode
, real
, imag
);
401 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
402 check_reverse_storage_order_support ();
404 if (!is_a
<scalar_int_mode
> (mode
, &int_mode
))
406 if (FLOAT_MODE_P (mode
)
407 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
408 check_reverse_float_storage_order_support ();
410 if (!int_mode_for_size (GET_MODE_PRECISION (mode
), 0).exists (&int_mode
))
412 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
415 x
= gen_lowpart (int_mode
, x
);
418 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
420 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
422 if (int_mode
!= mode
)
423 result
= gen_lowpart (mode
, result
);
428 /* If MODE is set, adjust bitfield memory MEM so that it points to the
429 first unit of mode MODE that contains a bitfield of size BITSIZE at
430 bit position BITNUM. If MODE is not set, return a BLKmode reference
431 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
432 of the field within the new memory. */
435 narrow_bit_field_mem (rtx mem
, opt_scalar_int_mode mode
,
436 unsigned HOST_WIDE_INT bitsize
,
437 unsigned HOST_WIDE_INT bitnum
,
438 unsigned HOST_WIDE_INT
*new_bitnum
)
440 scalar_int_mode imode
;
441 if (mode
.exists (&imode
))
443 unsigned int unit
= GET_MODE_BITSIZE (imode
);
444 *new_bitnum
= bitnum
% unit
;
445 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
446 return adjust_bitfield_address (mem
, imode
, offset
);
450 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
451 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
452 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
454 return adjust_bitfield_address_size (mem
, BLKmode
, offset
, size
);
458 /* The caller wants to perform insertion or extraction PATTERN on a
459 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
460 BITREGION_START and BITREGION_END are as for store_bit_field
461 and FIELDMODE is the natural mode of the field.
463 Search for a mode that is compatible with the memory access
464 restrictions and (where applicable) with a register insertion or
465 extraction. Return the new memory on success, storing the adjusted
466 bit position in *NEW_BITNUM. Return null otherwise. */
469 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
470 rtx op0
, HOST_WIDE_INT bitsize
,
471 HOST_WIDE_INT bitnum
,
472 poly_uint64 bitregion_start
,
473 poly_uint64 bitregion_end
,
474 machine_mode fieldmode
,
475 unsigned HOST_WIDE_INT
*new_bitnum
)
477 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
478 bitregion_end
, MEM_ALIGN (op0
),
479 MEM_VOLATILE_P (op0
));
480 scalar_int_mode best_mode
;
481 if (iter
.next_mode (&best_mode
))
483 /* We can use a memory in BEST_MODE. See whether this is true for
484 any wider modes. All other things being equal, we prefer to
485 use the widest mode possible because it tends to expose more
486 CSE opportunities. */
487 if (!iter
.prefer_smaller_modes ())
489 /* Limit the search to the mode required by the corresponding
490 register insertion or extraction instruction, if any. */
491 scalar_int_mode limit_mode
= word_mode
;
492 extraction_insn insn
;
493 if (get_best_reg_extraction_insn (&insn
, pattern
,
494 GET_MODE_BITSIZE (best_mode
),
496 limit_mode
= insn
.field_mode
;
498 scalar_int_mode wider_mode
;
499 while (iter
.next_mode (&wider_mode
)
500 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
501 best_mode
= wider_mode
;
503 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
509 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
510 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
511 offset is then BITNUM / BITS_PER_UNIT. */
514 lowpart_bit_field_p (poly_uint64 bitnum
, poly_uint64 bitsize
,
515 machine_mode struct_mode
)
517 poly_uint64 regsize
= REGMODE_NATURAL_SIZE (struct_mode
);
518 if (BYTES_BIG_ENDIAN
)
519 return (multiple_p (bitnum
, BITS_PER_UNIT
)
520 && (known_eq (bitnum
+ bitsize
, GET_MODE_BITSIZE (struct_mode
))
521 || multiple_p (bitnum
+ bitsize
,
522 regsize
* BITS_PER_UNIT
)));
524 return multiple_p (bitnum
, regsize
* BITS_PER_UNIT
);
527 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
528 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
529 Return false if the access would touch memory outside the range
530 BITREGION_START to BITREGION_END for conformance to the C++ memory
534 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
535 unsigned HOST_WIDE_INT bitnum
,
536 scalar_int_mode fieldmode
,
537 poly_uint64 bitregion_start
,
538 poly_uint64 bitregion_end
)
540 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
542 /* -fstrict-volatile-bitfields must be enabled and we must have a
545 || !MEM_VOLATILE_P (op0
)
546 || flag_strict_volatile_bitfields
<= 0)
549 /* The bit size must not be larger than the field mode, and
550 the field mode must not be larger than a word. */
551 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
554 /* Check for cases of unaligned fields that must be split. */
555 if (bitnum
% modesize
+ bitsize
> modesize
)
558 /* The memory must be sufficiently aligned for a MODESIZE access.
559 This condition guarantees, that the memory access will not
560 touch anything after the end of the structure. */
561 if (MEM_ALIGN (op0
) < modesize
)
564 /* Check for cases where the C++ memory model applies. */
565 if (maybe_ne (bitregion_end
, 0U)
566 && (maybe_lt (bitnum
- bitnum
% modesize
, bitregion_start
)
567 || maybe_gt (bitnum
- bitnum
% modesize
+ modesize
- 1,
574 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
575 bit number BITNUM can be treated as a simple value of mode MODE.
576 Store the byte offset in *BYTENUM if so. */
579 simple_mem_bitfield_p (rtx op0
, poly_uint64 bitsize
, poly_uint64 bitnum
,
580 machine_mode mode
, poly_uint64
*bytenum
)
583 && multiple_p (bitnum
, BITS_PER_UNIT
, bytenum
)
584 && known_eq (bitsize
, GET_MODE_BITSIZE (mode
))
585 && (!targetm
.slow_unaligned_access (mode
, MEM_ALIGN (op0
))
586 || (multiple_p (bitnum
, GET_MODE_ALIGNMENT (mode
))
587 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
590 /* Try to use instruction INSV to store VALUE into a field of OP0.
591 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
592 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
593 are as for store_bit_field. */
596 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
597 opt_scalar_int_mode op0_mode
,
598 unsigned HOST_WIDE_INT bitsize
,
599 unsigned HOST_WIDE_INT bitnum
,
600 rtx value
, scalar_int_mode value_mode
)
602 class expand_operand ops
[4];
605 rtx_insn
*last
= get_last_insn ();
606 bool copy_back
= false;
608 scalar_int_mode op_mode
= insv
->field_mode
;
609 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
610 if (bitsize
== 0 || bitsize
> unit
)
614 /* Get a reference to the first byte of the field. */
615 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
619 /* Convert from counting within OP0 to counting in OP_MODE. */
620 if (BYTES_BIG_ENDIAN
)
621 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
623 /* If xop0 is a register, we need it in OP_MODE
624 to make it acceptable to the format of insv. */
625 if (GET_CODE (xop0
) == SUBREG
)
626 /* We can't just change the mode, because this might clobber op0,
627 and we will need the original value of op0 if insv fails. */
628 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
629 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
630 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
633 /* If the destination is a paradoxical subreg such that we need a
634 truncate to the inner mode, perform the insertion on a temporary and
635 truncate the result to the original destination. Note that we can't
636 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
637 X) 0)) is (reg:N X). */
638 if (GET_CODE (xop0
) == SUBREG
639 && REG_P (SUBREG_REG (xop0
))
640 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
643 rtx tem
= gen_reg_rtx (op_mode
);
644 emit_move_insn (tem
, xop0
);
649 /* There are similar overflow check at the start of store_bit_field_1,
650 but that only check the situation where the field lies completely
651 outside the register, while there do have situation where the field
652 lies partialy in the register, we need to adjust bitsize for this
653 partial overflow situation. Without this fix, pr48335-2.c on big-endian
654 will broken on those arch support bit insert instruction, like arm, aarch64
656 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
658 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
659 "destination object, data truncated into %wu-bit",
660 bitsize
, unit
- bitnum
);
661 bitsize
= unit
- bitnum
;
664 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
665 "backwards" from the size of the unit we are inserting into.
666 Otherwise, we count bits from the most significant on a
667 BYTES/BITS_BIG_ENDIAN machine. */
669 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
670 bitnum
= unit
- bitsize
- bitnum
;
672 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
674 if (value_mode
!= op_mode
)
676 if (GET_MODE_BITSIZE (value_mode
) >= bitsize
)
679 /* Optimization: Don't bother really extending VALUE
680 if it has all the bits we will actually use. However,
681 if we must narrow it, be sure we do it correctly. */
683 if (GET_MODE_SIZE (value_mode
) < GET_MODE_SIZE (op_mode
))
685 tmp
= simplify_subreg (op_mode
, value1
, value_mode
, 0);
687 tmp
= simplify_gen_subreg (op_mode
,
688 force_reg (value_mode
, value1
),
693 tmp
= gen_lowpart_if_possible (op_mode
, value1
);
695 tmp
= gen_lowpart (op_mode
, force_reg (value_mode
, value1
));
699 else if (CONST_INT_P (value
))
700 value1
= gen_int_mode (INTVAL (value
), op_mode
);
702 /* Parse phase is supposed to make VALUE's data type
703 match that of the component reference, which is a type
704 at least as wide as the field; so VALUE should have
705 a mode that corresponds to that type. */
706 gcc_assert (CONSTANT_P (value
));
709 create_fixed_operand (&ops
[0], xop0
);
710 create_integer_operand (&ops
[1], bitsize
);
711 create_integer_operand (&ops
[2], bitnum
);
712 create_input_operand (&ops
[3], value1
, op_mode
);
713 if (maybe_expand_insn (insv
->icode
, 4, ops
))
716 convert_move (op0
, xop0
, true);
719 delete_insns_since (last
);
723 /* A subroutine of store_bit_field, with the same arguments. Return true
724 if the operation could be implemented.
726 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
727 no other way of implementing the operation. If FALLBACK_P is false,
728 return false instead. */
731 store_bit_field_1 (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
732 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
733 machine_mode fieldmode
,
734 rtx value
, bool reverse
, bool fallback_p
)
738 while (GET_CODE (op0
) == SUBREG
)
740 bitnum
+= subreg_memory_offset (op0
) * BITS_PER_UNIT
;
741 op0
= SUBREG_REG (op0
);
744 /* No action is needed if the target is a register and if the field
745 lies completely outside that register. This can occur if the source
746 code contains an out-of-bounds access to a small array. */
747 if (REG_P (op0
) && known_ge (bitnum
, GET_MODE_BITSIZE (GET_MODE (op0
))))
750 /* Use vec_set patterns for inserting parts of vectors whenever
752 machine_mode outermode
= GET_MODE (op0
);
753 scalar_mode innermode
= GET_MODE_INNER (outermode
);
755 if (VECTOR_MODE_P (outermode
)
757 && optab_handler (vec_set_optab
, outermode
) != CODE_FOR_nothing
758 && fieldmode
== innermode
759 && known_eq (bitsize
, GET_MODE_BITSIZE (innermode
))
760 && multiple_p (bitnum
, GET_MODE_BITSIZE (innermode
), &pos
))
762 class expand_operand ops
[3];
763 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
765 create_fixed_operand (&ops
[0], op0
);
766 create_input_operand (&ops
[1], value
, innermode
);
767 create_integer_operand (&ops
[2], pos
);
768 if (maybe_expand_insn (icode
, 3, ops
))
772 /* If the target is a register, overwriting the entire object, or storing
773 a full-word or multi-word field can be done with just a SUBREG. */
775 && known_eq (bitsize
, GET_MODE_BITSIZE (fieldmode
)))
777 /* Use the subreg machinery either to narrow OP0 to the required
778 words or to cope with mode punning between equal-sized modes.
779 In the latter case, use subreg on the rhs side, not lhs. */
781 HOST_WIDE_INT regnum
;
782 poly_uint64 regsize
= REGMODE_NATURAL_SIZE (GET_MODE (op0
));
783 if (known_eq (bitnum
, 0U)
784 && known_eq (bitsize
, GET_MODE_BITSIZE (GET_MODE (op0
))))
786 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
790 sub
= flip_storage_order (GET_MODE (op0
), sub
);
791 emit_move_insn (op0
, sub
);
795 else if (constant_multiple_p (bitnum
, regsize
* BITS_PER_UNIT
, ®num
)
796 && multiple_p (bitsize
, regsize
* BITS_PER_UNIT
))
798 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
803 value
= flip_storage_order (fieldmode
, value
);
804 emit_move_insn (sub
, value
);
810 /* If the target is memory, storing any naturally aligned field can be
811 done with a simple store. For targets that support fast unaligned
812 memory, any naturally sized, unit aligned field can be done directly. */
814 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
, &bytenum
))
816 op0
= adjust_bitfield_address (op0
, fieldmode
, bytenum
);
818 value
= flip_storage_order (fieldmode
, value
);
819 emit_move_insn (op0
, value
);
823 /* It's possible we'll need to handle other cases here for
824 polynomial bitnum and bitsize. */
826 /* From here on we need to be looking at a fixed-size insertion. */
827 unsigned HOST_WIDE_INT ibitsize
= bitsize
.to_constant ();
828 unsigned HOST_WIDE_INT ibitnum
= bitnum
.to_constant ();
830 /* Make sure we are playing with integral modes. Pun with subregs
831 if we aren't. This must come after the entire register case above,
832 since that case is valid for any mode. The following cases are only
833 valid for integral modes. */
834 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
835 scalar_int_mode imode
;
836 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
839 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
842 op0
= gen_lowpart (op0_mode
.require (), op0
);
845 return store_integral_bit_field (op0
, op0_mode
, ibitsize
, ibitnum
,
846 bitregion_start
, bitregion_end
,
847 fieldmode
, value
, reverse
, fallback_p
);
850 /* Subroutine of store_bit_field_1, with the same arguments, except
851 that BITSIZE and BITNUM are constant. Handle cases specific to
852 integral modes. If OP0_MODE is defined, it is the mode of OP0,
853 otherwise OP0 is a BLKmode MEM. */
856 store_integral_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
857 unsigned HOST_WIDE_INT bitsize
,
858 unsigned HOST_WIDE_INT bitnum
,
859 poly_uint64 bitregion_start
,
860 poly_uint64 bitregion_end
,
861 machine_mode fieldmode
,
862 rtx value
, bool reverse
, bool fallback_p
)
864 /* Storing an lsb-aligned field in a register
865 can be done with a movstrict instruction. */
869 && lowpart_bit_field_p (bitnum
, bitsize
, op0_mode
.require ())
870 && known_eq (bitsize
, GET_MODE_BITSIZE (fieldmode
))
871 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
873 class expand_operand ops
[2];
874 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
876 unsigned HOST_WIDE_INT subreg_off
;
878 if (GET_CODE (arg0
) == SUBREG
)
880 /* Else we've got some float mode source being extracted into
881 a different float mode destination -- this combination of
882 subregs results in Severe Tire Damage. */
883 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
884 || GET_MODE_CLASS (fieldmode
) == MODE_INT
885 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
886 arg0
= SUBREG_REG (arg0
);
889 subreg_off
= bitnum
/ BITS_PER_UNIT
;
890 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
892 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
894 create_fixed_operand (&ops
[0], arg0
);
895 /* Shrink the source operand to FIELDMODE. */
896 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
897 if (maybe_expand_insn (icode
, 2, ops
))
902 /* Handle fields bigger than a word. */
904 if (bitsize
> BITS_PER_WORD
)
906 /* Here we transfer the words of the field
907 in the order least significant first.
908 This is because the most significant word is the one which may
910 However, only do that if the value is not BLKmode. */
912 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
913 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
917 /* This is the mode we must force value to, so that there will be enough
918 subwords to extract. Note that fieldmode will often (always?) be
919 VOIDmode, because that is what store_field uses to indicate that this
920 is a bit field, but passing VOIDmode to operand_subword_force
923 The mode must be fixed-size, since insertions into variable-sized
924 objects are meant to be handled before calling this function. */
925 fixed_size_mode value_mode
= as_a
<fixed_size_mode
> (GET_MODE (value
));
926 if (value_mode
== VOIDmode
)
927 value_mode
= smallest_int_mode_for_size (nwords
* BITS_PER_WORD
);
929 last
= get_last_insn ();
930 for (i
= 0; i
< nwords
; i
++)
932 /* If I is 0, use the low-order word in both field and target;
933 if I is 1, use the next to lowest word; and so on. */
934 unsigned int wordnum
= (backwards
935 ? GET_MODE_SIZE (value_mode
) / UNITS_PER_WORD
938 unsigned int bit_offset
= (backwards
^ reverse
939 ? MAX ((int) bitsize
- ((int) i
+ 1)
942 : (int) i
* BITS_PER_WORD
);
943 rtx value_word
= operand_subword_force (value
, wordnum
, value_mode
);
944 unsigned HOST_WIDE_INT new_bitsize
=
945 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
947 /* If the remaining chunk doesn't have full wordsize we have
948 to make sure that for big-endian machines the higher order
950 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
952 int shift
= BITS_PER_WORD
- new_bitsize
;
953 rtx shift_rtx
= gen_int_shift_amount (word_mode
, shift
);
954 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
955 value_word
, shift_rtx
,
960 if (!store_bit_field_1 (op0
, new_bitsize
,
962 bitregion_start
, bitregion_end
,
964 value_word
, reverse
, fallback_p
))
966 delete_insns_since (last
);
973 /* If VALUE has a floating-point or complex mode, access it as an
974 integer of the corresponding size. This can occur on a machine
975 with 64 bit registers that uses SFmode for float. It can also
976 occur for unaligned float or complex fields. */
977 rtx orig_value
= value
;
978 scalar_int_mode value_mode
;
979 if (GET_MODE (value
) == VOIDmode
)
980 /* By this point we've dealt with values that are bigger than a word,
981 so word_mode is a conservatively correct choice. */
982 value_mode
= word_mode
;
983 else if (!is_a
<scalar_int_mode
> (GET_MODE (value
), &value_mode
))
985 value_mode
= int_mode_for_mode (GET_MODE (value
)).require ();
986 value
= gen_reg_rtx (value_mode
);
987 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
990 /* If OP0 is a multi-word register, narrow it to the affected word.
991 If the region spans two words, defer to store_split_bit_field.
992 Don't do this if op0 is a single hard register wider than word
993 such as a float or vector register. */
995 && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
997 || !HARD_REGISTER_P (op0
)
998 || hard_regno_nregs (REGNO (op0
), op0_mode
.require ()) != 1))
1000 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1005 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1006 bitregion_start
, bitregion_end
,
1007 value
, value_mode
, reverse
);
1010 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
1011 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1013 op0_mode
= word_mode
;
1014 bitnum
%= BITS_PER_WORD
;
1017 /* From here on we can assume that the field to be stored in fits
1018 within a word. If the destination is a register, it too fits
1021 extraction_insn insv
;
1024 && get_best_reg_extraction_insn (&insv
, EP_insv
,
1025 GET_MODE_BITSIZE (op0_mode
.require ()),
1027 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
1028 bitsize
, bitnum
, value
, value_mode
))
1031 /* If OP0 is a memory, try copying it to a register and seeing if a
1032 cheap register alternative is available. */
1033 if (MEM_P (op0
) && !reverse
)
1035 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
1037 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
1038 bitsize
, bitnum
, value
, value_mode
))
1041 rtx_insn
*last
= get_last_insn ();
1043 /* Try loading part of OP0 into a register, inserting the bitfield
1044 into that, and then copying the result back to OP0. */
1045 unsigned HOST_WIDE_INT bitpos
;
1046 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1047 bitregion_start
, bitregion_end
,
1048 fieldmode
, &bitpos
);
1051 rtx tempreg
= copy_to_reg (xop0
);
1052 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1053 bitregion_start
, bitregion_end
,
1054 fieldmode
, orig_value
, reverse
, false))
1056 emit_move_insn (xop0
, tempreg
);
1059 delete_insns_since (last
);
1066 store_fixed_bit_field (op0
, op0_mode
, bitsize
, bitnum
, bitregion_start
,
1067 bitregion_end
, value
, value_mode
, reverse
);
1071 /* Generate code to store value from rtx VALUE
1072 into a bit-field within structure STR_RTX
1073 containing BITSIZE bits starting at bit BITNUM.
1075 BITREGION_START is bitpos of the first bitfield in this region.
1076 BITREGION_END is the bitpos of the ending bitfield in this region.
1077 These two fields are 0, if the C++ memory model does not apply,
1078 or we are not interested in keeping track of bitfield regions.
1080 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1082 If REVERSE is true, the store is to be done in reverse order. */
1085 store_bit_field (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
1086 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1087 machine_mode fieldmode
,
1088 rtx value
, bool reverse
)
1090 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1091 unsigned HOST_WIDE_INT ibitsize
= 0, ibitnum
= 0;
1092 scalar_int_mode int_mode
;
1093 if (bitsize
.is_constant (&ibitsize
)
1094 && bitnum
.is_constant (&ibitnum
)
1095 && is_a
<scalar_int_mode
> (fieldmode
, &int_mode
)
1096 && strict_volatile_bitfield_p (str_rtx
, ibitsize
, ibitnum
, int_mode
,
1097 bitregion_start
, bitregion_end
))
1099 /* Storing of a full word can be done with a simple store.
1100 We know here that the field can be accessed with one single
1101 instruction. For targets that support unaligned memory,
1102 an unaligned access may be necessary. */
1103 if (ibitsize
== GET_MODE_BITSIZE (int_mode
))
1105 str_rtx
= adjust_bitfield_address (str_rtx
, int_mode
,
1106 ibitnum
/ BITS_PER_UNIT
);
1108 value
= flip_storage_order (int_mode
, value
);
1109 gcc_assert (ibitnum
% BITS_PER_UNIT
== 0);
1110 emit_move_insn (str_rtx
, value
);
1116 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, ibitsize
,
1118 gcc_assert (ibitnum
+ ibitsize
<= GET_MODE_BITSIZE (int_mode
));
1119 temp
= copy_to_reg (str_rtx
);
1120 if (!store_bit_field_1 (temp
, ibitsize
, ibitnum
, 0, 0,
1121 int_mode
, value
, reverse
, true))
1124 emit_move_insn (str_rtx
, temp
);
1130 /* Under the C++0x memory model, we must not touch bits outside the
1131 bit region. Adjust the address to start at the beginning of the
1133 if (MEM_P (str_rtx
) && maybe_ne (bitregion_start
, 0U))
1135 scalar_int_mode best_mode
;
1136 machine_mode addr_mode
= VOIDmode
;
1138 poly_uint64 offset
= exact_div (bitregion_start
, BITS_PER_UNIT
);
1139 bitnum
-= bitregion_start
;
1140 poly_int64 size
= bits_to_bytes_round_up (bitnum
+ bitsize
);
1141 bitregion_end
-= bitregion_start
;
1142 bitregion_start
= 0;
1143 if (bitsize
.is_constant (&ibitsize
)
1144 && bitnum
.is_constant (&ibitnum
)
1145 && get_best_mode (ibitsize
, ibitnum
,
1146 bitregion_start
, bitregion_end
,
1147 MEM_ALIGN (str_rtx
), INT_MAX
,
1148 MEM_VOLATILE_P (str_rtx
), &best_mode
))
1149 addr_mode
= best_mode
;
1150 str_rtx
= adjust_bitfield_address_size (str_rtx
, addr_mode
,
1154 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1155 bitregion_start
, bitregion_end
,
1156 fieldmode
, value
, reverse
, true))
1160 /* Use shifts and boolean operations to store VALUE into a bit field of
1161 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1162 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1165 If REVERSE is true, the store is to be done in reverse order. */
1168 store_fixed_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1169 unsigned HOST_WIDE_INT bitsize
,
1170 unsigned HOST_WIDE_INT bitnum
,
1171 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1172 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1174 /* There is a case not handled here:
1175 a structure with a known alignment of just a halfword
1176 and a field split across two aligned halfwords within the structure.
1177 Or likewise a structure with a known alignment of just a byte
1178 and a field split across two bytes.
1179 Such cases are not supposed to be able to occur. */
1181 scalar_int_mode best_mode
;
1184 unsigned int max_bitsize
= BITS_PER_WORD
;
1185 scalar_int_mode imode
;
1186 if (op0_mode
.exists (&imode
) && GET_MODE_BITSIZE (imode
) < max_bitsize
)
1187 max_bitsize
= GET_MODE_BITSIZE (imode
);
1189 if (!get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1190 MEM_ALIGN (op0
), max_bitsize
, MEM_VOLATILE_P (op0
),
1193 /* The only way this should occur is if the field spans word
1195 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1196 bitregion_start
, bitregion_end
,
1197 value
, value_mode
, reverse
);
1201 op0
= narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
, &bitnum
);
1204 best_mode
= op0_mode
.require ();
1206 store_fixed_bit_field_1 (op0
, best_mode
, bitsize
, bitnum
,
1207 value
, value_mode
, reverse
);
1210 /* Helper function for store_fixed_bit_field, stores
1211 the bit field always using MODE, which is the mode of OP0. The other
1212 arguments are as for store_fixed_bit_field. */
1215 store_fixed_bit_field_1 (rtx op0
, scalar_int_mode mode
,
1216 unsigned HOST_WIDE_INT bitsize
,
1217 unsigned HOST_WIDE_INT bitnum
,
1218 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1224 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1225 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1227 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1228 /* BITNUM is the distance between our msb
1229 and that of the containing datum.
1230 Convert it to the distance from the lsb. */
1231 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1233 /* Now BITNUM is always the distance between our lsb
1236 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1237 we must first convert its mode to MODE. */
1239 if (CONST_INT_P (value
))
1241 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1243 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1244 v
&= (HOST_WIDE_INT_1U
<< bitsize
) - 1;
1248 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1249 && v
== (HOST_WIDE_INT_1U
<< bitsize
) - 1)
1250 || (bitsize
== HOST_BITS_PER_WIDE_INT
1251 && v
== HOST_WIDE_INT_M1U
))
1254 value
= lshift_value (mode
, v
, bitnum
);
1258 int must_and
= (GET_MODE_BITSIZE (value_mode
) != bitsize
1259 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1261 if (value_mode
!= mode
)
1262 value
= convert_to_mode (mode
, value
, 1);
1265 value
= expand_binop (mode
, and_optab
, value
,
1266 mask_rtx (mode
, 0, bitsize
, 0),
1267 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1269 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1270 bitnum
, NULL_RTX
, 1);
1274 value
= flip_storage_order (mode
, value
);
1276 /* Now clear the chosen bits in OP0,
1277 except that if VALUE is -1 we need not bother. */
1278 /* We keep the intermediates in registers to allow CSE to combine
1279 consecutive bitfield assignments. */
1281 temp
= force_reg (mode
, op0
);
1285 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1287 mask
= flip_storage_order (mode
, mask
);
1288 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1289 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1290 temp
= force_reg (mode
, temp
);
1293 /* Now logical-or VALUE into OP0, unless it is zero. */
1297 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1298 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1299 temp
= force_reg (mode
, temp
);
1304 op0
= copy_rtx (op0
);
1305 emit_move_insn (op0
, temp
);
1309 /* Store a bit field that is split across multiple accessible memory objects.
1311 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1312 BITSIZE is the field width; BITPOS the position of its first bit
1314 VALUE is the value to store, which has mode VALUE_MODE.
1315 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1318 If REVERSE is true, the store is to be done in reverse order.
1320 This does not yet handle fields wider than BITS_PER_WORD. */
1323 store_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1324 unsigned HOST_WIDE_INT bitsize
,
1325 unsigned HOST_WIDE_INT bitpos
,
1326 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1327 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1329 unsigned int unit
, total_bits
, bitsdone
= 0;
1331 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1333 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1334 unit
= BITS_PER_WORD
;
1336 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1338 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1339 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1340 again, and we will mutually recurse forever. */
1341 if (MEM_P (op0
) && op0_mode
.exists ())
1342 unit
= MIN (unit
, GET_MODE_BITSIZE (op0_mode
.require ()));
1344 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1345 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1346 that VALUE might be a floating-point constant. */
1347 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1349 rtx word
= gen_lowpart_common (word_mode
, value
);
1351 if (word
&& (value
!= word
))
1354 value
= gen_lowpart_common (word_mode
, force_reg (value_mode
, value
));
1355 value_mode
= word_mode
;
1358 total_bits
= GET_MODE_BITSIZE (value_mode
);
1360 while (bitsdone
< bitsize
)
1362 unsigned HOST_WIDE_INT thissize
;
1363 unsigned HOST_WIDE_INT thispos
;
1364 unsigned HOST_WIDE_INT offset
;
1367 offset
= (bitpos
+ bitsdone
) / unit
;
1368 thispos
= (bitpos
+ bitsdone
) % unit
;
1370 /* When region of bytes we can touch is restricted, decrease
1371 UNIT close to the end of the region as needed. If op0 is a REG
1372 or SUBREG of REG, don't do this, as there can't be data races
1373 on a register and we can expand shorter code in some cases. */
1374 if (maybe_ne (bitregion_end
, 0U)
1375 && unit
> BITS_PER_UNIT
1376 && maybe_gt (bitpos
+ bitsdone
- thispos
+ unit
, bitregion_end
+ 1)
1378 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1384 /* THISSIZE must not overrun a word boundary. Otherwise,
1385 store_fixed_bit_field will call us again, and we will mutually
1387 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1388 thissize
= MIN (thissize
, unit
- thispos
);
1390 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1392 /* Fetch successively less significant portions. */
1393 if (CONST_INT_P (value
))
1394 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1395 >> (bitsize
- bitsdone
- thissize
))
1396 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1397 /* Likewise, but the source is little-endian. */
1399 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1401 bitsize
- bitsdone
- thissize
,
1402 NULL_RTX
, 1, false);
1404 /* The args are chosen so that the last part includes the
1405 lsb. Give extract_bit_field the value it needs (with
1406 endianness compensation) to fetch the piece we want. */
1407 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1409 total_bits
- bitsize
+ bitsdone
,
1410 NULL_RTX
, 1, false);
1414 /* Fetch successively more significant portions. */
1415 if (CONST_INT_P (value
))
1416 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1418 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1419 /* Likewise, but the source is big-endian. */
1421 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1423 total_bits
- bitsdone
- thissize
,
1424 NULL_RTX
, 1, false);
1426 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1427 thissize
, bitsdone
, NULL_RTX
,
1431 /* If OP0 is a register, then handle OFFSET here. */
1432 rtx op0_piece
= op0
;
1433 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
1434 if (SUBREG_P (op0
) || REG_P (op0
))
1436 scalar_int_mode imode
;
1437 if (op0_mode
.exists (&imode
)
1438 && GET_MODE_SIZE (imode
) < UNITS_PER_WORD
)
1441 op0_piece
= const0_rtx
;
1445 op0_piece
= operand_subword_force (op0
,
1446 offset
* unit
/ BITS_PER_WORD
,
1448 op0_piece_mode
= word_mode
;
1450 offset
&= BITS_PER_WORD
/ unit
- 1;
1453 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1454 it is just an out-of-bounds access. Ignore it. */
1455 if (op0_piece
!= const0_rtx
)
1456 store_fixed_bit_field (op0_piece
, op0_piece_mode
, thissize
,
1457 offset
* unit
+ thispos
, bitregion_start
,
1458 bitregion_end
, part
, word_mode
, reverse
);
1459 bitsdone
+= thissize
;
1463 /* A subroutine of extract_bit_field_1 that converts return value X
1464 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1465 to extract_bit_field. */
1468 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1469 machine_mode tmode
, bool unsignedp
)
1471 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1474 /* If the x mode is not a scalar integral, first convert to the
1475 integer mode of that size and then access it as a floating-point
1476 value via a SUBREG. */
1477 if (!SCALAR_INT_MODE_P (tmode
))
1479 scalar_int_mode int_mode
= int_mode_for_mode (tmode
).require ();
1480 x
= convert_to_mode (int_mode
, x
, unsignedp
);
1481 x
= force_reg (int_mode
, x
);
1482 return gen_lowpart (tmode
, x
);
1485 return convert_to_mode (tmode
, x
, unsignedp
);
1488 /* Try to use an ext(z)v pattern to extract a field from OP0.
1489 Return the extracted value on success, otherwise return null.
1490 EXTV describes the extraction instruction to use. If OP0_MODE
1491 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1492 The other arguments are as for extract_bit_field. */
1495 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1496 opt_scalar_int_mode op0_mode
,
1497 unsigned HOST_WIDE_INT bitsize
,
1498 unsigned HOST_WIDE_INT bitnum
,
1499 int unsignedp
, rtx target
,
1500 machine_mode mode
, machine_mode tmode
)
1502 class expand_operand ops
[4];
1503 rtx spec_target
= target
;
1504 rtx spec_target_subreg
= 0;
1505 scalar_int_mode ext_mode
= extv
->field_mode
;
1506 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1508 if (bitsize
== 0 || unit
< bitsize
)
1512 /* Get a reference to the first byte of the field. */
1513 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1517 /* Convert from counting within OP0 to counting in EXT_MODE. */
1518 if (BYTES_BIG_ENDIAN
)
1519 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
1521 /* If op0 is a register, we need it in EXT_MODE to make it
1522 acceptable to the format of ext(z)v. */
1523 if (GET_CODE (op0
) == SUBREG
&& op0_mode
.require () != ext_mode
)
1525 if (REG_P (op0
) && op0_mode
.require () != ext_mode
)
1526 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1529 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1530 "backwards" from the size of the unit we are extracting from.
1531 Otherwise, we count bits from the most significant on a
1532 BYTES/BITS_BIG_ENDIAN machine. */
1534 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1535 bitnum
= unit
- bitsize
- bitnum
;
1538 target
= spec_target
= gen_reg_rtx (tmode
);
1540 if (GET_MODE (target
) != ext_mode
)
1542 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1543 between the mode of the extraction (word_mode) and the target
1544 mode. Instead, create a temporary and use convert_move to set
1547 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1549 target
= gen_lowpart (ext_mode
, target
);
1550 if (partial_subreg_p (GET_MODE (spec_target
), ext_mode
))
1551 spec_target_subreg
= target
;
1554 target
= gen_reg_rtx (ext_mode
);
1557 create_output_operand (&ops
[0], target
, ext_mode
);
1558 create_fixed_operand (&ops
[1], op0
);
1559 create_integer_operand (&ops
[2], bitsize
);
1560 create_integer_operand (&ops
[3], bitnum
);
1561 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1563 target
= ops
[0].value
;
1564 if (target
== spec_target
)
1566 if (target
== spec_target_subreg
)
1568 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1573 /* See whether it would be valid to extract the part of OP0 described
1574 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1575 operation. Return the subreg if so, otherwise return null. */
1578 extract_bit_field_as_subreg (machine_mode mode
, rtx op0
,
1579 poly_uint64 bitsize
, poly_uint64 bitnum
)
1581 poly_uint64 bytenum
;
1582 if (multiple_p (bitnum
, BITS_PER_UNIT
, &bytenum
)
1583 && known_eq (bitsize
, GET_MODE_BITSIZE (mode
))
1584 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1585 && TRULY_NOOP_TRUNCATION_MODES_P (mode
, GET_MODE (op0
)))
1586 return simplify_gen_subreg (mode
, op0
, GET_MODE (op0
), bytenum
);
1590 /* A subroutine of extract_bit_field, with the same arguments.
1591 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1592 if we can find no other means of implementing the operation.
1593 if FALLBACK_P is false, return NULL instead. */
1596 extract_bit_field_1 (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
1597 int unsignedp
, rtx target
, machine_mode mode
,
1598 machine_mode tmode
, bool reverse
, bool fallback_p
,
1604 if (tmode
== VOIDmode
)
1607 while (GET_CODE (op0
) == SUBREG
)
1609 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1610 op0
= SUBREG_REG (op0
);
1613 /* If we have an out-of-bounds access to a register, just return an
1614 uninitialized register of the required mode. This can occur if the
1615 source code contains an out-of-bounds access to a small array. */
1616 if (REG_P (op0
) && known_ge (bitnum
, GET_MODE_BITSIZE (GET_MODE (op0
))))
1617 return gen_reg_rtx (tmode
);
1620 && mode
== GET_MODE (op0
)
1621 && known_eq (bitnum
, 0U)
1622 && known_eq (bitsize
, GET_MODE_BITSIZE (GET_MODE (op0
))))
1625 op0
= flip_storage_order (mode
, op0
);
1626 /* We're trying to extract a full register from itself. */
1630 /* First try to check for vector from vector extractions. */
1631 if (VECTOR_MODE_P (GET_MODE (op0
))
1633 && VECTOR_MODE_P (tmode
)
1634 && known_eq (bitsize
, GET_MODE_BITSIZE (tmode
))
1635 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0
)), GET_MODE_SIZE (tmode
)))
1637 machine_mode new_mode
= GET_MODE (op0
);
1638 if (GET_MODE_INNER (new_mode
) != GET_MODE_INNER (tmode
))
1640 scalar_mode inner_mode
= GET_MODE_INNER (tmode
);
1642 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0
)),
1643 GET_MODE_UNIT_BITSIZE (tmode
), &nunits
)
1644 || !mode_for_vector (inner_mode
, nunits
).exists (&new_mode
)
1645 || !VECTOR_MODE_P (new_mode
)
1646 || maybe_ne (GET_MODE_SIZE (new_mode
),
1647 GET_MODE_SIZE (GET_MODE (op0
)))
1648 || GET_MODE_INNER (new_mode
) != GET_MODE_INNER (tmode
)
1649 || !targetm
.vector_mode_supported_p (new_mode
))
1650 new_mode
= VOIDmode
;
1653 if (new_mode
!= VOIDmode
1654 && (convert_optab_handler (vec_extract_optab
, new_mode
, tmode
)
1655 != CODE_FOR_nothing
)
1656 && multiple_p (bitnum
, GET_MODE_BITSIZE (tmode
), &pos
))
1658 class expand_operand ops
[3];
1659 machine_mode outermode
= new_mode
;
1660 machine_mode innermode
= tmode
;
1661 enum insn_code icode
1662 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1664 if (new_mode
!= GET_MODE (op0
))
1665 op0
= gen_lowpart (new_mode
, op0
);
1666 create_output_operand (&ops
[0], target
, innermode
);
1668 create_input_operand (&ops
[1], op0
, outermode
);
1669 create_integer_operand (&ops
[2], pos
);
1670 if (maybe_expand_insn (icode
, 3, ops
))
1672 if (alt_rtl
&& ops
[0].target
)
1674 target
= ops
[0].value
;
1675 if (GET_MODE (target
) != mode
)
1676 return gen_lowpart (tmode
, target
);
1682 /* See if we can get a better vector mode before extracting. */
1683 if (VECTOR_MODE_P (GET_MODE (op0
))
1685 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1687 machine_mode new_mode
;
1689 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1690 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1691 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1692 new_mode
= MIN_MODE_VECTOR_FRACT
;
1693 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1694 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1695 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1696 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1697 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1698 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1700 new_mode
= MIN_MODE_VECTOR_INT
;
1702 FOR_EACH_MODE_FROM (new_mode
, new_mode
)
1703 if (known_eq (GET_MODE_SIZE (new_mode
), GET_MODE_SIZE (GET_MODE (op0
)))
1704 && known_eq (GET_MODE_UNIT_SIZE (new_mode
), GET_MODE_SIZE (tmode
))
1705 && targetm
.vector_mode_supported_p (new_mode
))
1707 if (new_mode
!= VOIDmode
)
1708 op0
= gen_lowpart (new_mode
, op0
);
1711 /* Use vec_extract patterns for extracting parts of vectors whenever
1712 available. If that fails, see whether the current modes and bitregion
1713 give a natural subreg. */
1714 machine_mode outermode
= GET_MODE (op0
);
1715 if (VECTOR_MODE_P (outermode
) && !MEM_P (op0
))
1717 scalar_mode innermode
= GET_MODE_INNER (outermode
);
1718 enum insn_code icode
1719 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1721 if (icode
!= CODE_FOR_nothing
1722 && known_eq (bitsize
, GET_MODE_BITSIZE (innermode
))
1723 && multiple_p (bitnum
, GET_MODE_BITSIZE (innermode
), &pos
))
1725 class expand_operand ops
[3];
1727 create_output_operand (&ops
[0], target
, innermode
);
1729 create_input_operand (&ops
[1], op0
, outermode
);
1730 create_integer_operand (&ops
[2], pos
);
1731 if (maybe_expand_insn (icode
, 3, ops
))
1733 if (alt_rtl
&& ops
[0].target
)
1735 target
= ops
[0].value
;
1736 if (GET_MODE (target
) != mode
)
1737 return gen_lowpart (tmode
, target
);
1741 /* Using subregs is useful if we're extracting one register vector
1742 from a multi-register vector. extract_bit_field_as_subreg checks
1743 for valid bitsize and bitnum, so we don't need to do that here. */
1744 if (VECTOR_MODE_P (mode
))
1746 rtx sub
= extract_bit_field_as_subreg (mode
, op0
, bitsize
, bitnum
);
1752 /* Make sure we are playing with integral modes. Pun with subregs
1754 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
1755 scalar_int_mode imode
;
1756 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
1759 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
1761 else if (op0_mode
.exists (&imode
))
1763 op0
= gen_lowpart (imode
, op0
);
1765 /* If we got a SUBREG, force it into a register since we
1766 aren't going to be able to do another SUBREG on it. */
1767 if (GET_CODE (op0
) == SUBREG
)
1768 op0
= force_reg (imode
, op0
);
1772 poly_int64 size
= GET_MODE_SIZE (GET_MODE (op0
));
1773 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1774 emit_move_insn (mem
, op0
);
1775 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1779 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1780 If that's wrong, the solution is to test for it and set TARGET to 0
1783 /* Get the mode of the field to use for atomic access or subreg
1785 if (!SCALAR_INT_MODE_P (tmode
)
1786 || !mode_for_size (bitsize
, GET_MODE_CLASS (tmode
), 0).exists (&mode1
))
1788 gcc_assert (mode1
!= BLKmode
);
1790 /* Extraction of a full MODE1 value can be done with a subreg as long
1791 as the least significant bit of the value is the least significant
1792 bit of either OP0 or a word of OP0. */
1793 if (!MEM_P (op0
) && !reverse
)
1795 rtx sub
= extract_bit_field_as_subreg (mode1
, op0
, bitsize
, bitnum
);
1797 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1800 /* Extraction of a full MODE1 value can be done with a load as long as
1801 the field is on a byte boundary and is sufficiently aligned. */
1802 poly_uint64 bytenum
;
1803 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
, &bytenum
))
1805 op0
= adjust_bitfield_address (op0
, mode1
, bytenum
);
1807 op0
= flip_storage_order (mode1
, op0
);
1808 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1811 /* If we have a memory source and a non-constant bit offset, restrict
1812 the memory to the referenced bytes. This is a worst-case fallback
1813 but is useful for things like vector booleans. */
1814 if (MEM_P (op0
) && !bitnum
.is_constant ())
1816 bytenum
= bits_to_bytes_round_down (bitnum
);
1817 bitnum
= num_trailing_bits (bitnum
);
1818 poly_uint64 bytesize
= bits_to_bytes_round_up (bitnum
+ bitsize
);
1819 op0
= adjust_bitfield_address_size (op0
, BLKmode
, bytenum
, bytesize
);
1820 op0_mode
= opt_scalar_int_mode ();
1823 /* It's possible we'll need to handle other cases here for
1824 polynomial bitnum and bitsize. */
1826 /* From here on we need to be looking at a fixed-size insertion. */
1827 return extract_integral_bit_field (op0
, op0_mode
, bitsize
.to_constant (),
1828 bitnum
.to_constant (), unsignedp
,
1829 target
, mode
, tmode
, reverse
, fallback_p
);
1832 /* Subroutine of extract_bit_field_1, with the same arguments, except
1833 that BITSIZE and BITNUM are constant. Handle cases specific to
1834 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1835 otherwise OP0 is a BLKmode MEM. */
1838 extract_integral_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1839 unsigned HOST_WIDE_INT bitsize
,
1840 unsigned HOST_WIDE_INT bitnum
, int unsignedp
,
1841 rtx target
, machine_mode mode
, machine_mode tmode
,
1842 bool reverse
, bool fallback_p
)
1844 /* Handle fields bigger than a word. */
1846 if (bitsize
> BITS_PER_WORD
)
1848 /* Here we transfer the words of the field
1849 in the order least significant first.
1850 This is because the most significant word is the one which may
1851 be less than full. */
1853 const bool backwards
= WORDS_BIG_ENDIAN
;
1854 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1858 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1859 target
= gen_reg_rtx (mode
);
1861 /* In case we're about to clobber a base register or something
1862 (see gcc.c-torture/execute/20040625-1.c). */
1863 if (reg_mentioned_p (target
, op0
))
1864 target
= gen_reg_rtx (mode
);
1866 /* Indicate for flow that the entire target reg is being set. */
1867 emit_clobber (target
);
1869 /* The mode must be fixed-size, since extract_bit_field_1 handles
1870 extractions from variable-sized objects before calling this
1872 unsigned int target_size
1873 = GET_MODE_SIZE (GET_MODE (target
)).to_constant ();
1874 last
= get_last_insn ();
1875 for (i
= 0; i
< nwords
; i
++)
1877 /* If I is 0, use the low-order word in both field and target;
1878 if I is 1, use the next to lowest word; and so on. */
1879 /* Word number in TARGET to use. */
1880 unsigned int wordnum
1881 = (backwards
? target_size
/ UNITS_PER_WORD
- i
- 1 : i
);
1882 /* Offset from start of field in OP0. */
1883 unsigned int bit_offset
= (backwards
^ reverse
1884 ? MAX ((int) bitsize
- ((int) i
+ 1)
1887 : (int) i
* BITS_PER_WORD
);
1888 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1890 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1891 bitsize
- i
* BITS_PER_WORD
),
1892 bitnum
+ bit_offset
, 1, target_part
,
1893 mode
, word_mode
, reverse
, fallback_p
, NULL
);
1895 gcc_assert (target_part
);
1898 delete_insns_since (last
);
1902 if (result_part
!= target_part
)
1903 emit_move_insn (target_part
, result_part
);
1908 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1909 need to be zero'd out. */
1910 if (target_size
> nwords
* UNITS_PER_WORD
)
1912 unsigned int i
, total_words
;
1914 total_words
= target_size
/ UNITS_PER_WORD
;
1915 for (i
= nwords
; i
< total_words
; i
++)
1917 (operand_subword (target
,
1918 backwards
? total_words
- i
- 1 : i
,
1925 /* Signed bit field: sign-extend with two arithmetic shifts. */
1926 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1927 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1928 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1929 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1932 /* If OP0 is a multi-word register, narrow it to the affected word.
1933 If the region spans two words, defer to extract_split_bit_field. */
1934 if (!MEM_P (op0
) && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
)
1936 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1940 target
= extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1941 unsignedp
, reverse
);
1942 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1944 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
1945 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1946 op0_mode
= word_mode
;
1947 bitnum
%= BITS_PER_WORD
;
1950 /* From here on we know the desired field is smaller than a word.
1951 If OP0 is a register, it too fits within a word. */
1952 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1953 extraction_insn extv
;
1956 /* ??? We could limit the structure size to the part of OP0 that
1957 contains the field, with appropriate checks for endianness
1958 and TARGET_TRULY_NOOP_TRUNCATION. */
1959 && get_best_reg_extraction_insn (&extv
, pattern
,
1960 GET_MODE_BITSIZE (op0_mode
.require ()),
1963 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
1965 unsignedp
, target
, mode
,
1971 /* If OP0 is a memory, try copying it to a register and seeing if a
1972 cheap register alternative is available. */
1973 if (MEM_P (op0
) & !reverse
)
1975 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1978 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
1980 unsignedp
, target
, mode
,
1986 rtx_insn
*last
= get_last_insn ();
1988 /* Try loading part of OP0 into a register and extracting the
1989 bitfield from that. */
1990 unsigned HOST_WIDE_INT bitpos
;
1991 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
1992 0, 0, tmode
, &bitpos
);
1995 xop0
= copy_to_reg (xop0
);
1996 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
1998 mode
, tmode
, reverse
, false, NULL
);
2001 delete_insns_since (last
);
2008 /* Find a correspondingly-sized integer field, so we can apply
2009 shifts and masks to it. */
2010 scalar_int_mode int_mode
;
2011 if (!int_mode_for_mode (tmode
).exists (&int_mode
))
2012 /* If this fails, we should probably push op0 out to memory and then
2014 int_mode
= int_mode_for_mode (mode
).require ();
2016 target
= extract_fixed_bit_field (int_mode
, op0
, op0_mode
, bitsize
,
2017 bitnum
, target
, unsignedp
, reverse
);
2019 /* Complex values must be reversed piecewise, so we need to undo the global
2020 reversal, convert to the complex mode and reverse again. */
2021 if (reverse
&& COMPLEX_MODE_P (tmode
))
2023 target
= flip_storage_order (int_mode
, target
);
2024 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
2025 target
= flip_storage_order (tmode
, target
);
2028 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
2033 /* Generate code to extract a byte-field from STR_RTX
2034 containing BITSIZE bits, starting at BITNUM,
2035 and put it in TARGET if possible (if TARGET is nonzero).
2036 Regardless of TARGET, we return the rtx for where the value is placed.
2038 STR_RTX is the structure containing the byte (a REG or MEM).
2039 UNSIGNEDP is nonzero if this is an unsigned bit field.
2040 MODE is the natural mode of the field value once extracted.
2041 TMODE is the mode the caller would like the value to have;
2042 but the value may be returned with type MODE instead.
2044 If REVERSE is true, the extraction is to be done in reverse order.
2046 If a TARGET is specified and we can store in it at no extra cost,
2047 we do so, and return TARGET.
2048 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2049 if they are equally easy.
2051 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2052 then *ALT_RTL is set to TARGET (before legitimziation). */
2055 extract_bit_field (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
2056 int unsignedp
, rtx target
, machine_mode mode
,
2057 machine_mode tmode
, bool reverse
, rtx
*alt_rtl
)
2061 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2062 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx
)), 0))
2063 mode1
= GET_MODE (str_rtx
);
2064 else if (target
&& maybe_ne (GET_MODE_BITSIZE (GET_MODE (target
)), 0))
2065 mode1
= GET_MODE (target
);
2069 unsigned HOST_WIDE_INT ibitsize
, ibitnum
;
2070 scalar_int_mode int_mode
;
2071 if (bitsize
.is_constant (&ibitsize
)
2072 && bitnum
.is_constant (&ibitnum
)
2073 && is_a
<scalar_int_mode
> (mode1
, &int_mode
)
2074 && strict_volatile_bitfield_p (str_rtx
, ibitsize
, ibitnum
,
2077 /* Extraction of a full INT_MODE value can be done with a simple load.
2078 We know here that the field can be accessed with one single
2079 instruction. For targets that support unaligned memory,
2080 an unaligned access may be necessary. */
2081 if (ibitsize
== GET_MODE_BITSIZE (int_mode
))
2083 rtx result
= adjust_bitfield_address (str_rtx
, int_mode
,
2084 ibitnum
/ BITS_PER_UNIT
);
2086 result
= flip_storage_order (int_mode
, result
);
2087 gcc_assert (ibitnum
% BITS_PER_UNIT
== 0);
2088 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
2091 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, ibitsize
, ibitnum
,
2093 gcc_assert (ibitnum
+ ibitsize
<= GET_MODE_BITSIZE (int_mode
));
2094 str_rtx
= copy_to_reg (str_rtx
);
2095 return extract_bit_field_1 (str_rtx
, ibitsize
, ibitnum
, unsignedp
,
2096 target
, mode
, tmode
, reverse
, true, alt_rtl
);
2099 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
2100 target
, mode
, tmode
, reverse
, true, alt_rtl
);
2103 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2104 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2105 otherwise OP0 is a BLKmode MEM.
2107 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2108 If REVERSE is true, the extraction is to be done in reverse order.
2110 If TARGET is nonzero, attempts to store the value there
2111 and return TARGET, but this is not guaranteed.
2112 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2115 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
2116 opt_scalar_int_mode op0_mode
,
2117 unsigned HOST_WIDE_INT bitsize
,
2118 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2119 int unsignedp
, bool reverse
)
2121 scalar_int_mode mode
;
2124 if (!get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
),
2125 BITS_PER_WORD
, MEM_VOLATILE_P (op0
), &mode
))
2126 /* The only way this should occur is if the field spans word
2128 return extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
2129 unsignedp
, reverse
);
2131 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
2134 mode
= op0_mode
.require ();
2136 return extract_fixed_bit_field_1 (tmode
, op0
, mode
, bitsize
, bitnum
,
2137 target
, unsignedp
, reverse
);
2140 /* Helper function for extract_fixed_bit_field, extracts
2141 the bit field always using MODE, which is the mode of OP0.
2142 The other arguments are as for extract_fixed_bit_field. */
2145 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
, scalar_int_mode mode
,
2146 unsigned HOST_WIDE_INT bitsize
,
2147 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2148 int unsignedp
, bool reverse
)
2150 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2151 for invalid input, such as extract equivalent of f5 from
2152 gcc.dg/pr48335-2.c. */
2154 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2155 /* BITNUM is the distance between our msb and that of OP0.
2156 Convert it to the distance from the lsb. */
2157 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
2159 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2160 We have reduced the big-endian case to the little-endian case. */
2162 op0
= flip_storage_order (mode
, op0
);
2168 /* If the field does not already start at the lsb,
2169 shift it so it does. */
2170 /* Maybe propagate the target for the shift. */
2171 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2174 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2176 /* Convert the value to the desired mode. TMODE must also be a
2177 scalar integer for this conversion to make sense, since we
2178 shouldn't reinterpret the bits. */
2179 scalar_int_mode new_mode
= as_a
<scalar_int_mode
> (tmode
);
2180 if (mode
!= new_mode
)
2181 op0
= convert_to_mode (new_mode
, op0
, 1);
2183 /* Unless the msb of the field used to be the msb when we shifted,
2184 mask out the upper bits. */
2186 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2187 return expand_binop (new_mode
, and_optab
, op0
,
2188 mask_rtx (new_mode
, 0, bitsize
, 0),
2189 target
, 1, OPTAB_LIB_WIDEN
);
2193 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2194 then arithmetic-shift its lsb to the lsb of the word. */
2195 op0
= force_reg (mode
, op0
);
2197 /* Find the narrowest integer mode that contains the field. */
2199 opt_scalar_int_mode mode_iter
;
2200 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_INT
)
2201 if (GET_MODE_BITSIZE (mode_iter
.require ()) >= bitsize
+ bitnum
)
2204 mode
= mode_iter
.require ();
2205 op0
= convert_to_mode (mode
, op0
, 0);
2210 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2212 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2213 /* Maybe propagate the target for the shift. */
2214 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2215 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2218 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2219 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2222 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2226 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2229 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2232 /* Extract a bit field that is split across two words
2233 and return an RTX for the result.
2235 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2236 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2237 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2238 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2241 If REVERSE is true, the extraction is to be done in reverse order. */
2244 extract_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
2245 unsigned HOST_WIDE_INT bitsize
,
2246 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2250 unsigned int bitsdone
= 0;
2251 rtx result
= NULL_RTX
;
2254 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2256 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2257 unit
= BITS_PER_WORD
;
2259 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2261 while (bitsdone
< bitsize
)
2263 unsigned HOST_WIDE_INT thissize
;
2265 unsigned HOST_WIDE_INT thispos
;
2266 unsigned HOST_WIDE_INT offset
;
2268 offset
= (bitpos
+ bitsdone
) / unit
;
2269 thispos
= (bitpos
+ bitsdone
) % unit
;
2271 /* THISSIZE must not overrun a word boundary. Otherwise,
2272 extract_fixed_bit_field will call us again, and we will mutually
2274 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2275 thissize
= MIN (thissize
, unit
- thispos
);
2277 /* If OP0 is a register, then handle OFFSET here. */
2278 rtx op0_piece
= op0
;
2279 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
2280 if (SUBREG_P (op0
) || REG_P (op0
))
2282 op0_piece
= operand_subword_force (op0
, offset
, op0_mode
.require ());
2283 op0_piece_mode
= word_mode
;
2287 /* Extract the parts in bit-counting order,
2288 whose meaning is determined by BYTES_PER_UNIT.
2289 OFFSET is in UNITs, and UNIT is in bits. */
2290 part
= extract_fixed_bit_field (word_mode
, op0_piece
, op0_piece_mode
,
2291 thissize
, offset
* unit
+ thispos
,
2293 bitsdone
+= thissize
;
2295 /* Shift this part into place for the result. */
2296 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2298 if (bitsize
!= bitsdone
)
2299 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2300 bitsize
- bitsdone
, 0, 1);
2304 if (bitsdone
!= thissize
)
2305 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2306 bitsdone
- thissize
, 0, 1);
2312 /* Combine the parts with bitwise or. This works
2313 because we extracted each part as an unsigned bit field. */
2314 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2320 /* Unsigned bit field: we are done. */
2323 /* Signed bit field: sign-extend with two arithmetic shifts. */
2324 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2325 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2326 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2327 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2330 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2331 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2332 MODE, fill the upper bits with zeros. Fail if the layout of either
2333 mode is unknown (as for CC modes) or if the extraction would involve
2334 unprofitable mode punning. Return the value on success, otherwise
2337 This is different from gen_lowpart* in these respects:
2339 - the returned value must always be considered an rvalue
2341 - when MODE is wider than SRC_MODE, the extraction involves
2344 - when MODE is smaller than SRC_MODE, the extraction involves
2345 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2347 In other words, this routine performs a computation, whereas the
2348 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2352 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2354 scalar_int_mode int_mode
, src_int_mode
;
2356 if (mode
== src_mode
)
2359 if (CONSTANT_P (src
))
2361 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2362 fails, it will happily create (subreg (symbol_ref)) or similar
2364 poly_uint64 byte
= subreg_lowpart_offset (mode
, src_mode
);
2365 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2369 if (GET_MODE (src
) == VOIDmode
2370 || !validate_subreg (mode
, src_mode
, src
, byte
))
2373 src
= force_reg (GET_MODE (src
), src
);
2374 return gen_rtx_SUBREG (mode
, src
, byte
);
2377 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2380 if (known_eq (GET_MODE_BITSIZE (mode
), GET_MODE_BITSIZE (src_mode
))
2381 && targetm
.modes_tieable_p (mode
, src_mode
))
2383 rtx x
= gen_lowpart_common (mode
, src
);
2388 if (!int_mode_for_mode (src_mode
).exists (&src_int_mode
)
2389 || !int_mode_for_mode (mode
).exists (&int_mode
))
2392 if (!targetm
.modes_tieable_p (src_int_mode
, src_mode
))
2394 if (!targetm
.modes_tieable_p (int_mode
, mode
))
2397 src
= gen_lowpart (src_int_mode
, src
);
2398 if (!validate_subreg (int_mode
, src_int_mode
, src
,
2399 subreg_lowpart_offset (int_mode
, src_int_mode
)))
2402 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2403 src
= gen_lowpart (mode
, src
);
2407 /* Add INC into TARGET. */
2410 expand_inc (rtx target
, rtx inc
)
2412 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2414 target
, 0, OPTAB_LIB_WIDEN
);
2415 if (value
!= target
)
2416 emit_move_insn (target
, value
);
2419 /* Subtract DEC from TARGET. */
2422 expand_dec (rtx target
, rtx dec
)
2424 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2426 target
, 0, OPTAB_LIB_WIDEN
);
2427 if (value
!= target
)
2428 emit_move_insn (target
, value
);
2431 /* Output a shift instruction for expression code CODE,
2432 with SHIFTED being the rtx for the value to shift,
2433 and AMOUNT the rtx for the amount to shift by.
2434 Store the result in the rtx TARGET, if that is convenient.
2435 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2436 Return the rtx for where the value is.
2437 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2438 in which case 0 is returned. */
2441 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2442 rtx amount
, rtx target
, int unsignedp
, bool may_fail
= false)
2445 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2446 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2447 optab lshift_optab
= ashl_optab
;
2448 optab rshift_arith_optab
= ashr_optab
;
2449 optab rshift_uns_optab
= lshr_optab
;
2450 optab lrotate_optab
= rotl_optab
;
2451 optab rrotate_optab
= rotr_optab
;
2452 machine_mode op1_mode
;
2453 scalar_mode scalar_mode
= GET_MODE_INNER (mode
);
2455 bool speed
= optimize_insn_for_speed_p ();
2458 op1_mode
= GET_MODE (op1
);
2460 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2461 shift amount is a vector, use the vector/vector shift patterns. */
2462 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2464 lshift_optab
= vashl_optab
;
2465 rshift_arith_optab
= vashr_optab
;
2466 rshift_uns_optab
= vlshr_optab
;
2467 lrotate_optab
= vrotl_optab
;
2468 rrotate_optab
= vrotr_optab
;
2471 /* Previously detected shift-counts computed by NEGATE_EXPR
2472 and shifted in the other direction; but that does not work
2475 if (SHIFT_COUNT_TRUNCATED
)
2477 if (CONST_INT_P (op1
)
2478 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2479 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2480 op1
= gen_int_shift_amount (mode
,
2481 (unsigned HOST_WIDE_INT
) INTVAL (op1
)
2482 % GET_MODE_BITSIZE (scalar_mode
));
2483 else if (GET_CODE (op1
) == SUBREG
2484 && subreg_lowpart_p (op1
)
2485 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2486 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2487 op1
= SUBREG_REG (op1
);
2490 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2491 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2492 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2495 && CONST_INT_P (op1
)
2496 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2497 GET_MODE_BITSIZE (scalar_mode
) - 1))
2499 op1
= gen_int_shift_amount (mode
, (GET_MODE_BITSIZE (scalar_mode
)
2502 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2505 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2506 Note that this is not the case for bigger values. For instance a rotation
2507 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2508 0x04030201 (bswapsi). */
2510 && CONST_INT_P (op1
)
2511 && INTVAL (op1
) == BITS_PER_UNIT
2512 && GET_MODE_SIZE (scalar_mode
) == 2
2513 && optab_handler (bswap_optab
, mode
) != CODE_FOR_nothing
)
2514 return expand_unop (mode
, bswap_optab
, shifted
, NULL_RTX
, unsignedp
);
2516 if (op1
== const0_rtx
)
2519 /* Check whether its cheaper to implement a left shift by a constant
2520 bit count by a sequence of additions. */
2521 if (code
== LSHIFT_EXPR
2522 && CONST_INT_P (op1
)
2524 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2525 && INTVAL (op1
) < MAX_BITS_PER_WORD
2526 && (shift_cost (speed
, mode
, INTVAL (op1
))
2527 > INTVAL (op1
) * add_cost (speed
, mode
))
2528 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2531 for (i
= 0; i
< INTVAL (op1
); i
++)
2533 temp
= force_reg (mode
, shifted
);
2534 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2535 unsignedp
, OPTAB_LIB_WIDEN
);
2540 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2542 enum optab_methods methods
;
2545 methods
= OPTAB_DIRECT
;
2546 else if (attempt
== 1)
2547 methods
= OPTAB_WIDEN
;
2549 methods
= OPTAB_LIB_WIDEN
;
2553 /* Widening does not work for rotation. */
2554 if (methods
== OPTAB_WIDEN
)
2556 else if (methods
== OPTAB_LIB_WIDEN
)
2558 /* If we have been unable to open-code this by a rotation,
2559 do it as the IOR of two shifts. I.e., to rotate A
2561 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2562 where C is the bitsize of A.
2564 It is theoretically possible that the target machine might
2565 not be able to perform either shift and hence we would
2566 be making two libcalls rather than just the one for the
2567 shift (similarly if IOR could not be done). We will allow
2568 this extremely unlikely lossage to avoid complicating the
2571 rtx subtarget
= target
== shifted
? 0 : target
;
2572 rtx new_amount
, other_amount
;
2576 if (op1
== const0_rtx
)
2578 else if (CONST_INT_P (op1
))
2579 other_amount
= gen_int_shift_amount
2580 (mode
, GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2584 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2585 op1
, GET_MODE (op1
));
2586 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2588 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2589 gen_int_mode (mask
, GET_MODE (op1
)));
2592 shifted
= force_reg (mode
, shifted
);
2594 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2595 mode
, shifted
, new_amount
, 0, 1);
2596 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2597 mode
, shifted
, other_amount
,
2599 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2600 unsignedp
, methods
);
2603 temp
= expand_binop (mode
,
2604 left
? lrotate_optab
: rrotate_optab
,
2605 shifted
, op1
, target
, unsignedp
, methods
);
2608 temp
= expand_binop (mode
,
2609 left
? lshift_optab
: rshift_uns_optab
,
2610 shifted
, op1
, target
, unsignedp
, methods
);
2612 /* Do arithmetic shifts.
2613 Also, if we are going to widen the operand, we can just as well
2614 use an arithmetic right-shift instead of a logical one. */
2615 if (temp
== 0 && ! rotate
2616 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2618 enum optab_methods methods1
= methods
;
2620 /* If trying to widen a log shift to an arithmetic shift,
2621 don't accept an arithmetic shift of the same size. */
2623 methods1
= OPTAB_MUST_WIDEN
;
2625 /* Arithmetic shift */
2627 temp
= expand_binop (mode
,
2628 left
? lshift_optab
: rshift_arith_optab
,
2629 shifted
, op1
, target
, unsignedp
, methods1
);
2632 /* We used to try extzv here for logical right shifts, but that was
2633 only useful for one machine, the VAX, and caused poor code
2634 generation there for lshrdi3, so the code was deleted and a
2635 define_expand for lshrsi3 was added to vax.md. */
2638 gcc_assert (temp
!= NULL_RTX
|| may_fail
);
2642 /* Output a shift instruction for expression code CODE,
2643 with SHIFTED being the rtx for the value to shift,
2644 and AMOUNT the amount to shift by.
2645 Store the result in the rtx TARGET, if that is convenient.
2646 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2647 Return the rtx for where the value is. */
2650 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2651 poly_int64 amount
, rtx target
, int unsignedp
)
2653 return expand_shift_1 (code
, mode
, shifted
,
2654 gen_int_shift_amount (mode
, amount
),
2658 /* Likewise, but return 0 if that cannot be done. */
2661 maybe_expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2662 int amount
, rtx target
, int unsignedp
)
2664 return expand_shift_1 (code
, mode
,
2665 shifted
, GEN_INT (amount
), target
, unsignedp
, true);
2668 /* Output a shift instruction for expression code CODE,
2669 with SHIFTED being the rtx for the value to shift,
2670 and AMOUNT the tree for the amount to shift by.
2671 Store the result in the rtx TARGET, if that is convenient.
2672 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2673 Return the rtx for where the value is. */
2676 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2677 tree amount
, rtx target
, int unsignedp
)
2679 return expand_shift_1 (code
, mode
,
2680 shifted
, expand_normal (amount
), target
, unsignedp
);
2684 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2685 const struct mult_cost
*, machine_mode mode
);
2686 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2687 const struct algorithm
*, enum mult_variant
);
2688 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2689 static rtx
extract_high_half (scalar_int_mode
, rtx
);
2690 static rtx
expmed_mult_highpart (scalar_int_mode
, rtx
, rtx
, rtx
, int, int);
2691 static rtx
expmed_mult_highpart_optab (scalar_int_mode
, rtx
, rtx
, rtx
,
2693 /* Compute and return the best algorithm for multiplying by T.
2694 The algorithm must cost less than cost_limit
2695 If retval.cost >= COST_LIMIT, no algorithm was found and all
2696 other field of the returned struct are undefined.
2697 MODE is the machine mode of the multiplication. */
2700 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2701 const struct mult_cost
*cost_limit
, machine_mode mode
)
2704 struct algorithm
*alg_in
, *best_alg
;
2705 struct mult_cost best_cost
;
2706 struct mult_cost new_limit
;
2707 int op_cost
, op_latency
;
2708 unsigned HOST_WIDE_INT orig_t
= t
;
2709 unsigned HOST_WIDE_INT q
;
2710 int maxm
, hash_index
;
2711 bool cache_hit
= false;
2712 enum alg_code cache_alg
= alg_zero
;
2713 bool speed
= optimize_insn_for_speed_p ();
2714 scalar_int_mode imode
;
2715 struct alg_hash_entry
*entry_ptr
;
2717 /* Indicate that no algorithm is yet found. If no algorithm
2718 is found, this value will be returned and indicate failure. */
2719 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2720 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2722 if (cost_limit
->cost
< 0
2723 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2726 /* Be prepared for vector modes. */
2727 imode
= as_a
<scalar_int_mode
> (GET_MODE_INNER (mode
));
2729 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2731 /* Restrict the bits of "t" to the multiplication's mode. */
2732 t
&= GET_MODE_MASK (imode
);
2734 /* t == 1 can be done in zero cost. */
2738 alg_out
->cost
.cost
= 0;
2739 alg_out
->cost
.latency
= 0;
2740 alg_out
->op
[0] = alg_m
;
2744 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2748 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2753 alg_out
->cost
.cost
= zero_cost (speed
);
2754 alg_out
->cost
.latency
= zero_cost (speed
);
2755 alg_out
->op
[0] = alg_zero
;
2760 /* We'll be needing a couple extra algorithm structures now. */
2762 alg_in
= XALLOCA (struct algorithm
);
2763 best_alg
= XALLOCA (struct algorithm
);
2764 best_cost
= *cost_limit
;
2766 /* Compute the hash index. */
2767 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2769 /* See if we already know what to do for T. */
2770 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2771 if (entry_ptr
->t
== t
2772 && entry_ptr
->mode
== mode
2773 && entry_ptr
->speed
== speed
2774 && entry_ptr
->alg
!= alg_unknown
)
2776 cache_alg
= entry_ptr
->alg
;
2778 if (cache_alg
== alg_impossible
)
2780 /* The cache tells us that it's impossible to synthesize
2781 multiplication by T within entry_ptr->cost. */
2782 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2783 /* COST_LIMIT is at least as restrictive as the one
2784 recorded in the hash table, in which case we have no
2785 hope of synthesizing a multiplication. Just
2789 /* If we get here, COST_LIMIT is less restrictive than the
2790 one recorded in the hash table, so we may be able to
2791 synthesize a multiplication. Proceed as if we didn't
2792 have the cache entry. */
2796 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2797 /* The cached algorithm shows that this multiplication
2798 requires more cost than COST_LIMIT. Just return. This
2799 way, we don't clobber this cache entry with
2800 alg_impossible but retain useful information. */
2812 goto do_alg_addsub_t_m2
;
2814 case alg_add_factor
:
2815 case alg_sub_factor
:
2816 goto do_alg_addsub_factor
;
2819 goto do_alg_add_t2_m
;
2822 goto do_alg_sub_t2_m
;
2830 /* If we have a group of zero bits at the low-order part of T, try
2831 multiplying by the remaining bits and then doing a shift. */
2836 m
= ctz_or_zero (t
); /* m = number of low zero bits */
2840 /* The function expand_shift will choose between a shift and
2841 a sequence of additions, so the observed cost is given as
2842 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2843 op_cost
= m
* add_cost (speed
, mode
);
2844 if (shift_cost (speed
, mode
, m
) < op_cost
)
2845 op_cost
= shift_cost (speed
, mode
, m
);
2846 new_limit
.cost
= best_cost
.cost
- op_cost
;
2847 new_limit
.latency
= best_cost
.latency
- op_cost
;
2848 synth_mult (alg_in
, q
, &new_limit
, mode
);
2850 alg_in
->cost
.cost
+= op_cost
;
2851 alg_in
->cost
.latency
+= op_cost
;
2852 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2854 best_cost
= alg_in
->cost
;
2855 std::swap (alg_in
, best_alg
);
2856 best_alg
->log
[best_alg
->ops
] = m
;
2857 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2860 /* See if treating ORIG_T as a signed number yields a better
2861 sequence. Try this sequence only for a negative ORIG_T
2862 as it would be useless for a non-negative ORIG_T. */
2863 if ((HOST_WIDE_INT
) orig_t
< 0)
2865 /* Shift ORIG_T as follows because a right shift of a
2866 negative-valued signed type is implementation
2868 q
= ~(~orig_t
>> m
);
2869 /* The function expand_shift will choose between a shift
2870 and a sequence of additions, so the observed cost is
2871 given as MIN (m * add_cost(speed, mode),
2872 shift_cost(speed, mode, m)). */
2873 op_cost
= m
* add_cost (speed
, mode
);
2874 if (shift_cost (speed
, mode
, m
) < op_cost
)
2875 op_cost
= shift_cost (speed
, mode
, m
);
2876 new_limit
.cost
= best_cost
.cost
- op_cost
;
2877 new_limit
.latency
= best_cost
.latency
- op_cost
;
2878 synth_mult (alg_in
, q
, &new_limit
, mode
);
2880 alg_in
->cost
.cost
+= op_cost
;
2881 alg_in
->cost
.latency
+= op_cost
;
2882 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2884 best_cost
= alg_in
->cost
;
2885 std::swap (alg_in
, best_alg
);
2886 best_alg
->log
[best_alg
->ops
] = m
;
2887 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2895 /* If we have an odd number, add or subtract one. */
2898 unsigned HOST_WIDE_INT w
;
2901 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2903 /* If T was -1, then W will be zero after the loop. This is another
2904 case where T ends with ...111. Handling this with (T + 1) and
2905 subtract 1 produces slightly better code and results in algorithm
2906 selection much faster than treating it like the ...0111 case
2910 /* Reject the case where t is 3.
2911 Thus we prefer addition in that case. */
2914 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2916 op_cost
= add_cost (speed
, mode
);
2917 new_limit
.cost
= best_cost
.cost
- op_cost
;
2918 new_limit
.latency
= best_cost
.latency
- op_cost
;
2919 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2921 alg_in
->cost
.cost
+= op_cost
;
2922 alg_in
->cost
.latency
+= op_cost
;
2923 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2925 best_cost
= alg_in
->cost
;
2926 std::swap (alg_in
, best_alg
);
2927 best_alg
->log
[best_alg
->ops
] = 0;
2928 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2933 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2935 op_cost
= add_cost (speed
, mode
);
2936 new_limit
.cost
= best_cost
.cost
- op_cost
;
2937 new_limit
.latency
= best_cost
.latency
- op_cost
;
2938 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2940 alg_in
->cost
.cost
+= op_cost
;
2941 alg_in
->cost
.latency
+= op_cost
;
2942 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2944 best_cost
= alg_in
->cost
;
2945 std::swap (alg_in
, best_alg
);
2946 best_alg
->log
[best_alg
->ops
] = 0;
2947 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2951 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2952 quickly with a - a * n for some appropriate constant n. */
2953 m
= exact_log2 (-orig_t
+ 1);
2954 if (m
>= 0 && m
< maxm
)
2956 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2957 /* If the target has a cheap shift-and-subtract insn use
2958 that in preference to a shift insn followed by a sub insn.
2959 Assume that the shift-and-sub is "atomic" with a latency
2960 equal to it's cost, otherwise assume that on superscalar
2961 hardware the shift may be executed concurrently with the
2962 earlier steps in the algorithm. */
2963 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2965 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2966 op_latency
= op_cost
;
2969 op_latency
= add_cost (speed
, mode
);
2971 new_limit
.cost
= best_cost
.cost
- op_cost
;
2972 new_limit
.latency
= best_cost
.latency
- op_latency
;
2973 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2976 alg_in
->cost
.cost
+= op_cost
;
2977 alg_in
->cost
.latency
+= op_latency
;
2978 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2980 best_cost
= alg_in
->cost
;
2981 std::swap (alg_in
, best_alg
);
2982 best_alg
->log
[best_alg
->ops
] = m
;
2983 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2991 /* Look for factors of t of the form
2992 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2993 If we find such a factor, we can multiply by t using an algorithm that
2994 multiplies by q, shift the result by m and add/subtract it to itself.
2996 We search for large factors first and loop down, even if large factors
2997 are less probable than small; if we find a large factor we will find a
2998 good sequence quickly, and therefore be able to prune (by decreasing
2999 COST_LIMIT) the search. */
3001 do_alg_addsub_factor
:
3002 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
3004 unsigned HOST_WIDE_INT d
;
3006 d
= (HOST_WIDE_INT_1U
<< m
) + 1;
3007 if (t
% d
== 0 && t
> d
&& m
< maxm
3008 && (!cache_hit
|| cache_alg
== alg_add_factor
))
3010 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3011 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
3012 op_cost
= shiftadd_cost (speed
, mode
, m
);
3014 op_latency
= op_cost
;
3017 new_limit
.cost
= best_cost
.cost
- op_cost
;
3018 new_limit
.latency
= best_cost
.latency
- op_latency
;
3019 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
3021 alg_in
->cost
.cost
+= op_cost
;
3022 alg_in
->cost
.latency
+= op_latency
;
3023 if (alg_in
->cost
.latency
< op_cost
)
3024 alg_in
->cost
.latency
= op_cost
;
3025 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3027 best_cost
= alg_in
->cost
;
3028 std::swap (alg_in
, best_alg
);
3029 best_alg
->log
[best_alg
->ops
] = m
;
3030 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
3032 /* Other factors will have been taken care of in the recursion. */
3036 d
= (HOST_WIDE_INT_1U
<< m
) - 1;
3037 if (t
% d
== 0 && t
> d
&& m
< maxm
3038 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
3040 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3041 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
3042 op_cost
= shiftsub0_cost (speed
, mode
, m
);
3044 op_latency
= op_cost
;
3046 new_limit
.cost
= best_cost
.cost
- op_cost
;
3047 new_limit
.latency
= best_cost
.latency
- op_latency
;
3048 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
3050 alg_in
->cost
.cost
+= op_cost
;
3051 alg_in
->cost
.latency
+= op_latency
;
3052 if (alg_in
->cost
.latency
< op_cost
)
3053 alg_in
->cost
.latency
= op_cost
;
3054 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3056 best_cost
= alg_in
->cost
;
3057 std::swap (alg_in
, best_alg
);
3058 best_alg
->log
[best_alg
->ops
] = m
;
3059 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
3067 /* Try shift-and-add (load effective address) instructions,
3068 i.e. do a*3, a*5, a*9. */
3076 op_cost
= shiftadd_cost (speed
, mode
, m
);
3077 new_limit
.cost
= best_cost
.cost
- op_cost
;
3078 new_limit
.latency
= best_cost
.latency
- op_cost
;
3079 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
3081 alg_in
->cost
.cost
+= op_cost
;
3082 alg_in
->cost
.latency
+= op_cost
;
3083 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3085 best_cost
= alg_in
->cost
;
3086 std::swap (alg_in
, best_alg
);
3087 best_alg
->log
[best_alg
->ops
] = m
;
3088 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
3099 op_cost
= shiftsub0_cost (speed
, mode
, m
);
3100 new_limit
.cost
= best_cost
.cost
- op_cost
;
3101 new_limit
.latency
= best_cost
.latency
- op_cost
;
3102 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
3104 alg_in
->cost
.cost
+= op_cost
;
3105 alg_in
->cost
.latency
+= op_cost
;
3106 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3108 best_cost
= alg_in
->cost
;
3109 std::swap (alg_in
, best_alg
);
3110 best_alg
->log
[best_alg
->ops
] = m
;
3111 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
3119 /* If best_cost has not decreased, we have not found any algorithm. */
3120 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
3122 /* We failed to find an algorithm. Record alg_impossible for
3123 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3124 we are asked to find an algorithm for T within the same or
3125 lower COST_LIMIT, we can immediately return to the
3128 entry_ptr
->mode
= mode
;
3129 entry_ptr
->speed
= speed
;
3130 entry_ptr
->alg
= alg_impossible
;
3131 entry_ptr
->cost
= *cost_limit
;
3135 /* Cache the result. */
3139 entry_ptr
->mode
= mode
;
3140 entry_ptr
->speed
= speed
;
3141 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
3142 entry_ptr
->cost
.cost
= best_cost
.cost
;
3143 entry_ptr
->cost
.latency
= best_cost
.latency
;
3146 /* If we are getting a too long sequence for `struct algorithm'
3147 to record, make this search fail. */
3148 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
3151 /* Copy the algorithm from temporary space to the space at alg_out.
3152 We avoid using structure assignment because the majority of
3153 best_alg is normally undefined, and this is a critical function. */
3154 alg_out
->ops
= best_alg
->ops
+ 1;
3155 alg_out
->cost
= best_cost
;
3156 memcpy (alg_out
->op
, best_alg
->op
,
3157 alg_out
->ops
* sizeof *alg_out
->op
);
3158 memcpy (alg_out
->log
, best_alg
->log
,
3159 alg_out
->ops
* sizeof *alg_out
->log
);
3162 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3163 Try three variations:
3165 - a shift/add sequence based on VAL itself
3166 - a shift/add sequence based on -VAL, followed by a negation
3167 - a shift/add sequence based on VAL - 1, followed by an addition.
3169 Return true if the cheapest of these cost less than MULT_COST,
3170 describing the algorithm in *ALG and final fixup in *VARIANT. */
3173 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
3174 struct algorithm
*alg
, enum mult_variant
*variant
,
3177 struct algorithm alg2
;
3178 struct mult_cost limit
;
3180 bool speed
= optimize_insn_for_speed_p ();
3182 /* Fail quickly for impossible bounds. */
3186 /* Ensure that mult_cost provides a reasonable upper bound.
3187 Any constant multiplication can be performed with less
3188 than 2 * bits additions. */
3189 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3190 if (mult_cost
> op_cost
)
3191 mult_cost
= op_cost
;
3193 *variant
= basic_variant
;
3194 limit
.cost
= mult_cost
;
3195 limit
.latency
= mult_cost
;
3196 synth_mult (alg
, val
, &limit
, mode
);
3198 /* This works only if the inverted value actually fits in an
3200 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3202 op_cost
= neg_cost (speed
, mode
);
3203 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3205 limit
.cost
= alg
->cost
.cost
- op_cost
;
3206 limit
.latency
= alg
->cost
.latency
- op_cost
;
3210 limit
.cost
= mult_cost
- op_cost
;
3211 limit
.latency
= mult_cost
- op_cost
;
3214 synth_mult (&alg2
, -val
, &limit
, mode
);
3215 alg2
.cost
.cost
+= op_cost
;
3216 alg2
.cost
.latency
+= op_cost
;
3217 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3218 *alg
= alg2
, *variant
= negate_variant
;
3221 /* This proves very useful for division-by-constant. */
3222 op_cost
= add_cost (speed
, mode
);
3223 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3225 limit
.cost
= alg
->cost
.cost
- op_cost
;
3226 limit
.latency
= alg
->cost
.latency
- op_cost
;
3230 limit
.cost
= mult_cost
- op_cost
;
3231 limit
.latency
= mult_cost
- op_cost
;
3234 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3235 alg2
.cost
.cost
+= op_cost
;
3236 alg2
.cost
.latency
+= op_cost
;
3237 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3238 *alg
= alg2
, *variant
= add_variant
;
3240 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3243 /* A subroutine of expand_mult, used for constant multiplications.
3244 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3245 convenient. Use the shift/add sequence described by ALG and apply
3246 the final fixup specified by VARIANT. */
3249 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3250 rtx target
, const struct algorithm
*alg
,
3251 enum mult_variant variant
)
3253 unsigned HOST_WIDE_INT val_so_far
;
3259 /* Avoid referencing memory over and over and invalid sharing
3261 op0
= force_reg (mode
, op0
);
3263 /* ACCUM starts out either as OP0 or as a zero, depending on
3264 the first operation. */
3266 if (alg
->op
[0] == alg_zero
)
3268 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3271 else if (alg
->op
[0] == alg_m
)
3273 accum
= copy_to_mode_reg (mode
, op0
);
3279 for (opno
= 1; opno
< alg
->ops
; opno
++)
3281 int log
= alg
->log
[opno
];
3282 rtx shift_subtarget
= optimize
? 0 : accum
;
3284 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3287 rtx accum_target
= optimize
? 0 : accum
;
3290 switch (alg
->op
[opno
])
3293 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3294 /* REG_EQUAL note will be attached to the following insn. */
3295 emit_move_insn (accum
, tem
);
3300 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3301 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3302 add_target
? add_target
: accum_target
);
3303 val_so_far
+= HOST_WIDE_INT_1U
<< log
;
3307 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3308 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3309 add_target
? add_target
: accum_target
);
3310 val_so_far
-= HOST_WIDE_INT_1U
<< log
;
3314 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3315 log
, shift_subtarget
, 0);
3316 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3317 add_target
? add_target
: accum_target
);
3318 val_so_far
= (val_so_far
<< log
) + 1;
3322 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3323 log
, shift_subtarget
, 0);
3324 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3325 add_target
? add_target
: accum_target
);
3326 val_so_far
= (val_so_far
<< log
) - 1;
3329 case alg_add_factor
:
3330 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3331 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3332 add_target
? add_target
: accum_target
);
3333 val_so_far
+= val_so_far
<< log
;
3336 case alg_sub_factor
:
3337 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3338 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3340 ? add_target
: (optimize
? 0 : tem
)));
3341 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3348 if (SCALAR_INT_MODE_P (mode
))
3350 /* Write a REG_EQUAL note on the last insn so that we can cse
3351 multiplication sequences. Note that if ACCUM is a SUBREG,
3352 we've set the inner register and must properly indicate that. */
3353 tem
= op0
, nmode
= mode
;
3354 accum_inner
= accum
;
3355 if (GET_CODE (accum
) == SUBREG
)
3357 accum_inner
= SUBREG_REG (accum
);
3358 nmode
= GET_MODE (accum_inner
);
3359 tem
= gen_lowpart (nmode
, op0
);
3362 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3363 In that case, only the low bits of accum would be guaranteed to
3364 be equal to the content of the REG_EQUAL note, the upper bits
3366 if (!paradoxical_subreg_p (tem
))
3368 insn
= get_last_insn ();
3369 wide_int wval_so_far
3370 = wi::uhwi (val_so_far
,
3371 GET_MODE_PRECISION (as_a
<scalar_mode
> (nmode
)));
3372 rtx c
= immed_wide_int_const (wval_so_far
, nmode
);
3373 set_dst_reg_note (insn
, REG_EQUAL
, gen_rtx_MULT (nmode
, tem
, c
),
3379 if (variant
== negate_variant
)
3381 val_so_far
= -val_so_far
;
3382 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3384 else if (variant
== add_variant
)
3386 val_so_far
= val_so_far
+ 1;
3387 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3390 /* Compare only the bits of val and val_so_far that are significant
3391 in the result mode, to avoid sign-/zero-extension confusion. */
3392 nmode
= GET_MODE_INNER (mode
);
3393 val
&= GET_MODE_MASK (nmode
);
3394 val_so_far
&= GET_MODE_MASK (nmode
);
3395 gcc_assert (val
== (HOST_WIDE_INT
) val_so_far
);
3400 /* Perform a multiplication and return an rtx for the result.
3401 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3402 TARGET is a suggestion for where to store the result (an rtx).
3404 We check specially for a constant integer as OP1.
3405 If you want this check for OP0 as well, then before calling
3406 you should swap the two operands if OP0 would be constant. */
3409 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3410 int unsignedp
, bool no_libcall
)
3412 enum mult_variant variant
;
3413 struct algorithm algorithm
;
3416 bool speed
= optimize_insn_for_speed_p ();
3417 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3419 if (CONSTANT_P (op0
))
3420 std::swap (op0
, op1
);
3422 /* For vectors, there are several simplifications that can be made if
3423 all elements of the vector constant are identical. */
3424 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3426 if (INTEGRAL_MODE_P (mode
))
3429 HOST_WIDE_INT coeff
;
3433 if (op1
== CONST0_RTX (mode
))
3435 if (op1
== CONST1_RTX (mode
))
3437 if (op1
== CONSTM1_RTX (mode
))
3438 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3444 /* If mode is integer vector mode, check if the backend supports
3445 vector lshift (by scalar or vector) at all. If not, we can't use
3446 synthetized multiply. */
3447 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3448 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3449 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3452 /* These are the operations that are potentially turned into
3453 a sequence of shifts and additions. */
3454 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3456 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3457 less than or equal in size to `unsigned int' this doesn't matter.
3458 If the mode is larger than `unsigned int', then synth_mult works
3459 only if the constant value exactly fits in an `unsigned int' without
3460 any truncation. This means that multiplying by negative values does
3461 not work; results are off by 2^32 on a 32 bit machine. */
3462 if (CONST_INT_P (scalar_op1
))
3464 coeff
= INTVAL (scalar_op1
);
3467 #if TARGET_SUPPORTS_WIDE_INT
3468 else if (CONST_WIDE_INT_P (scalar_op1
))
3470 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3473 int shift
= wi::exact_log2 (rtx_mode_t (scalar_op1
, mode
));
3474 /* Perfect power of 2 (other than 1, which is handled above). */
3476 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3477 shift
, target
, unsignedp
);
3484 /* We used to test optimize here, on the grounds that it's better to
3485 produce a smaller program when -O is not used. But this causes
3486 such a terrible slowdown sometimes that it seems better to always
3489 /* Special case powers of two. */
3490 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3491 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3492 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3493 floor_log2 (coeff
), target
, unsignedp
);
3495 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3497 /* Attempt to handle multiplication of DImode values by negative
3498 coefficients, by performing the multiplication by a positive
3499 multiplier and then inverting the result. */
3500 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3502 /* Its safe to use -coeff even for INT_MIN, as the
3503 result is interpreted as an unsigned coefficient.
3504 Exclude cost of op0 from max_cost to match the cost
3505 calculation of the synth_mult. */
3506 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3507 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3509 - neg_cost (speed
, mode
));
3513 /* Special case powers of two. */
3514 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3516 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3517 floor_log2 (coeff
), target
, unsignedp
);
3518 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3521 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3524 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3525 &algorithm
, variant
);
3526 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3531 /* Exclude cost of op0 from max_cost to match the cost
3532 calculation of the synth_mult. */
3533 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3534 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3535 return expand_mult_const (mode
, op0
, coeff
, target
,
3536 &algorithm
, variant
);
3540 /* Expand x*2.0 as x+x. */
3541 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3542 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3544 op0
= force_reg (GET_MODE (op0
), op0
);
3545 return expand_binop (mode
, add_optab
, op0
, op0
,
3547 no_libcall
? OPTAB_WIDEN
: OPTAB_LIB_WIDEN
);
3550 /* This used to use umul_optab if unsigned, but for non-widening multiply
3551 there is no difference between signed and unsigned. */
3552 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3553 op0
, op1
, target
, unsignedp
,
3554 no_libcall
? OPTAB_WIDEN
: OPTAB_LIB_WIDEN
);
3555 gcc_assert (op0
|| no_libcall
);
3559 /* Return a cost estimate for multiplying a register by the given
3560 COEFFicient in the given MODE and SPEED. */
3563 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3566 struct algorithm algorithm
;
3567 enum mult_variant variant
;
3569 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3570 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3572 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3573 return algorithm
.cost
.cost
;
3578 /* Perform a widening multiplication and return an rtx for the result.
3579 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3580 TARGET is a suggestion for where to store the result (an rtx).
3581 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3582 or smul_widen_optab.
3584 We check specially for a constant integer as OP1, comparing the
3585 cost of a widening multiply against the cost of a sequence of shifts
3589 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3590 int unsignedp
, optab this_optab
)
3592 bool speed
= optimize_insn_for_speed_p ();
3595 if (CONST_INT_P (op1
)
3596 && GET_MODE (op0
) != VOIDmode
3597 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3598 this_optab
== umul_widen_optab
))
3599 && CONST_INT_P (cop1
)
3600 && (INTVAL (cop1
) >= 0
3601 || HWI_COMPUTABLE_MODE_P (mode
)))
3603 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3605 enum mult_variant variant
;
3606 struct algorithm algorithm
;
3609 return CONST0_RTX (mode
);
3611 /* Special case powers of two. */
3612 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3614 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3615 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3616 floor_log2 (coeff
), target
, unsignedp
);
3619 /* Exclude cost of op0 from max_cost to match the cost
3620 calculation of the synth_mult. */
3621 max_cost
= mul_widen_cost (speed
, mode
);
3622 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3625 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3626 return expand_mult_const (mode
, op0
, coeff
, target
,
3627 &algorithm
, variant
);
3630 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3631 unsignedp
, OPTAB_LIB_WIDEN
);
3634 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3635 replace division by D, and put the least significant N bits of the result
3636 in *MULTIPLIER_PTR and return the most significant bit.
3638 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3639 needed precision is in PRECISION (should be <= N).
3641 PRECISION should be as small as possible so this function can choose
3642 multiplier more freely.
3644 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3645 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3647 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3648 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3650 unsigned HOST_WIDE_INT
3651 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3652 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3653 int *post_shift_ptr
, int *lgup_ptr
)
3655 int lgup
, post_shift
;
3658 /* lgup = ceil(log2(divisor)); */
3659 lgup
= ceil_log2 (d
);
3661 gcc_assert (lgup
<= n
);
3664 pow2
= n
+ lgup
- precision
;
3666 /* mlow = 2^(N + lgup)/d */
3667 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3668 wide_int mlow
= wi::udiv_trunc (val
, d
);
3670 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3671 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3672 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3674 /* If precision == N, then mlow, mhigh exceed 2^N
3675 (but they do not exceed 2^(N+1)). */
3677 /* Reduce to lowest terms. */
3678 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3680 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3681 HOST_BITS_PER_WIDE_INT
);
3682 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3683 HOST_BITS_PER_WIDE_INT
);
3687 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3688 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3691 *post_shift_ptr
= post_shift
;
3693 if (n
< HOST_BITS_PER_WIDE_INT
)
3695 unsigned HOST_WIDE_INT mask
= (HOST_WIDE_INT_1U
<< n
) - 1;
3696 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3697 return mhigh
.to_uhwi () > mask
;
3701 *multiplier_ptr
= mhigh
.to_uhwi ();
3702 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3706 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3707 congruent to 1 (mod 2**N). */
3709 static unsigned HOST_WIDE_INT
3710 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3712 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3714 /* The algorithm notes that the choice y = x satisfies
3715 x*y == 1 mod 2^3, since x is assumed odd.
3716 Each iteration doubles the number of bits of significance in y. */
3718 unsigned HOST_WIDE_INT mask
;
3719 unsigned HOST_WIDE_INT y
= x
;
3722 mask
= (n
== HOST_BITS_PER_WIDE_INT
3724 : (HOST_WIDE_INT_1U
<< n
) - 1);
3728 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3734 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3735 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3736 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3737 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3740 The result is put in TARGET if that is convenient.
3742 MODE is the mode of operation. */
3745 expand_mult_highpart_adjust (scalar_int_mode mode
, rtx adj_operand
, rtx op0
,
3746 rtx op1
, rtx target
, int unsignedp
)
3749 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3751 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3752 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3753 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3755 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3758 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3759 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3760 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3761 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3767 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3770 extract_high_half (scalar_int_mode mode
, rtx op
)
3772 if (mode
== word_mode
)
3773 return gen_highpart (mode
, op
);
3775 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3777 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3778 GET_MODE_BITSIZE (mode
), 0, 1);
3779 return convert_modes (mode
, wider_mode
, op
, 0);
3782 /* Like expmed_mult_highpart, but only consider using a multiplication
3783 optab. OP1 is an rtx for the constant operand. */
3786 expmed_mult_highpart_optab (scalar_int_mode mode
, rtx op0
, rtx op1
,
3787 rtx target
, int unsignedp
, int max_cost
)
3789 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3793 bool speed
= optimize_insn_for_speed_p ();
3795 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3797 size
= GET_MODE_BITSIZE (mode
);
3799 /* Firstly, try using a multiplication insn that only generates the needed
3800 high part of the product, and in the sign flavor of unsignedp. */
3801 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3803 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3804 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3805 unsignedp
, OPTAB_DIRECT
);
3810 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3811 Need to adjust the result after the multiplication. */
3812 if (size
- 1 < BITS_PER_WORD
3813 && (mul_highpart_cost (speed
, mode
)
3814 + 2 * shift_cost (speed
, mode
, size
-1)
3815 + 4 * add_cost (speed
, mode
) < max_cost
))
3817 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3818 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3819 unsignedp
, OPTAB_DIRECT
);
3821 /* We used the wrong signedness. Adjust the result. */
3822 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3826 /* Try widening multiplication. */
3827 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3828 if (convert_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3829 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3831 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3832 unsignedp
, OPTAB_WIDEN
);
3834 return extract_high_half (mode
, tem
);
3837 /* Try widening the mode and perform a non-widening multiplication. */
3838 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3839 && size
- 1 < BITS_PER_WORD
3840 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3846 /* We need to widen the operands, for example to ensure the
3847 constant multiplier is correctly sign or zero extended.
3848 Use a sequence to clean-up any instructions emitted by
3849 the conversions if things don't work out. */
3851 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3852 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3853 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3854 unsignedp
, OPTAB_WIDEN
);
3855 insns
= get_insns ();
3861 return extract_high_half (mode
, tem
);
3865 /* Try widening multiplication of opposite signedness, and adjust. */
3866 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3867 if (convert_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3868 && size
- 1 < BITS_PER_WORD
3869 && (mul_widen_cost (speed
, wider_mode
)
3870 + 2 * shift_cost (speed
, mode
, size
-1)
3871 + 4 * add_cost (speed
, mode
) < max_cost
))
3873 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3874 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3877 tem
= extract_high_half (mode
, tem
);
3878 /* We used the wrong signedness. Adjust the result. */
3879 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3887 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3888 putting the high half of the result in TARGET if that is convenient,
3889 and return where the result is. If the operation cannot be performed,
3892 MODE is the mode of operation and result.
3894 UNSIGNEDP nonzero means unsigned multiply.
3896 MAX_COST is the total allowed cost for the expanded RTL. */
3899 expmed_mult_highpart (scalar_int_mode mode
, rtx op0
, rtx op1
,
3900 rtx target
, int unsignedp
, int max_cost
)
3902 unsigned HOST_WIDE_INT cnst1
;
3904 bool sign_adjust
= false;
3905 enum mult_variant variant
;
3906 struct algorithm alg
;
3908 bool speed
= optimize_insn_for_speed_p ();
3910 /* We can't support modes wider than HOST_BITS_PER_INT. */
3911 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3913 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3915 /* We can't optimize modes wider than BITS_PER_WORD.
3916 ??? We might be able to perform double-word arithmetic if
3917 mode == word_mode, however all the cost calculations in
3918 synth_mult etc. assume single-word operations. */
3919 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3920 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3921 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3922 unsignedp
, max_cost
);
3924 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3926 /* Check whether we try to multiply by a negative constant. */
3927 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3930 extra_cost
+= add_cost (speed
, mode
);
3933 /* See whether shift/add multiplication is cheap enough. */
3934 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3935 max_cost
- extra_cost
))
3937 /* See whether the specialized multiplication optabs are
3938 cheaper than the shift/add version. */
3939 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3940 alg
.cost
.cost
+ extra_cost
);
3944 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3945 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3946 tem
= extract_high_half (mode
, tem
);
3948 /* Adjust result for signedness. */
3950 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3954 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3955 unsignedp
, max_cost
);
3959 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3962 expand_smod_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3964 rtx result
, temp
, shift
;
3965 rtx_code_label
*label
;
3967 int prec
= GET_MODE_PRECISION (mode
);
3969 logd
= floor_log2 (d
);
3970 result
= gen_reg_rtx (mode
);
3972 /* Avoid conditional branches when they're expensive. */
3973 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3974 && optimize_insn_for_speed_p ())
3976 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3980 HOST_WIDE_INT masklow
= (HOST_WIDE_INT_1
<< logd
) - 1;
3981 signmask
= force_reg (mode
, signmask
);
3982 shift
= gen_int_shift_amount (mode
, GET_MODE_BITSIZE (mode
) - logd
);
3984 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3985 which instruction sequence to use. If logical right shifts
3986 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3987 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3989 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3990 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3991 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
3992 > COSTS_N_INSNS (2)))
3994 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3995 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3996 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3997 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3998 temp
= expand_binop (mode
, and_optab
, temp
,
3999 gen_int_mode (masklow
, mode
),
4000 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4001 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
4002 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4003 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4004 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4008 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
4009 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4010 signmask
= force_reg (mode
, signmask
);
4012 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
4013 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4014 temp
= expand_binop (mode
, and_optab
, temp
,
4015 gen_int_mode (masklow
, mode
),
4016 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4017 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4018 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4024 /* Mask contains the mode's signbit and the significant bits of the
4025 modulus. By including the signbit in the operation, many targets
4026 can avoid an explicit compare operation in the following comparison
4028 wide_int mask
= wi::mask (logd
, false, prec
);
4029 mask
= wi::set_bit (mask
, prec
- 1);
4031 temp
= expand_binop (mode
, and_optab
, op0
,
4032 immed_wide_int_const (mask
, mode
),
4033 result
, 1, OPTAB_LIB_WIDEN
);
4035 emit_move_insn (result
, temp
);
4037 label
= gen_label_rtx ();
4038 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
4040 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
4041 0, OPTAB_LIB_WIDEN
);
4043 mask
= wi::mask (logd
, true, prec
);
4044 temp
= expand_binop (mode
, ior_optab
, temp
,
4045 immed_wide_int_const (mask
, mode
),
4046 result
, 1, OPTAB_LIB_WIDEN
);
4047 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
4048 0, OPTAB_LIB_WIDEN
);
4050 emit_move_insn (result
, temp
);
4055 /* Expand signed division of OP0 by a power of two D in mode MODE.
4056 This routine is only called for positive values of D. */
4059 expand_sdiv_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
4062 rtx_code_label
*label
;
4065 logd
= floor_log2 (d
);
4068 && BRANCH_COST (optimize_insn_for_speed_p (),
4071 temp
= gen_reg_rtx (mode
);
4072 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
4073 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
4074 0, OPTAB_LIB_WIDEN
);
4075 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4078 if (HAVE_conditional_move
4079 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4084 temp2
= copy_to_mode_reg (mode
, op0
);
4085 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
4086 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4087 temp
= force_reg (mode
, temp
);
4089 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4090 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
4091 mode
, temp
, temp2
, mode
, 0);
4094 rtx_insn
*seq
= get_insns ();
4097 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
4102 if (BRANCH_COST (optimize_insn_for_speed_p (),
4105 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
4107 temp
= gen_reg_rtx (mode
);
4108 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
4109 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
4110 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
4111 > COSTS_N_INSNS (1))
4112 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
4113 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4115 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
4116 ushift
, NULL_RTX
, 1);
4117 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
4118 0, OPTAB_LIB_WIDEN
);
4119 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4122 label
= gen_label_rtx ();
4123 temp
= copy_to_mode_reg (mode
, op0
);
4124 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
4125 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
4127 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4130 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4131 if that is convenient, and returning where the result is.
4132 You may request either the quotient or the remainder as the result;
4133 specify REM_FLAG nonzero to get the remainder.
4135 CODE is the expression code for which kind of division this is;
4136 it controls how rounding is done. MODE is the machine mode to use.
4137 UNSIGNEDP nonzero means do unsigned division. */
4139 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4140 and then correct it by or'ing in missing high bits
4141 if result of ANDI is nonzero.
4142 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4143 This could optimize to a bfexts instruction.
4144 But C doesn't use these operations, so their optimizations are
4146 /* ??? For modulo, we don't actually need the highpart of the first product,
4147 the low part will do nicely. And for small divisors, the second multiply
4148 can also be a low-part only multiply or even be completely left out.
4149 E.g. to calculate the remainder of a division by 3 with a 32 bit
4150 multiply, multiply with 0x55555556 and extract the upper two bits;
4151 the result is exact for inputs up to 0x1fffffff.
4152 The input range can be reduced by using cross-sum rules.
4153 For odd divisors >= 3, the following table gives right shift counts
4154 so that if a number is shifted by an integer multiple of the given
4155 amount, the remainder stays the same:
4156 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4157 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4158 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4159 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4160 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4162 Cross-sum rules for even numbers can be derived by leaving as many bits
4163 to the right alone as the divisor has zeros to the right.
4164 E.g. if x is an unsigned 32 bit number:
4165 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4169 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
4170 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
4172 machine_mode compute_mode
;
4174 rtx quotient
= 0, remainder
= 0;
4177 optab optab1
, optab2
;
4178 int op1_is_constant
, op1_is_pow2
= 0;
4179 int max_cost
, extra_cost
;
4180 static HOST_WIDE_INT last_div_const
= 0;
4181 bool speed
= optimize_insn_for_speed_p ();
4183 op1_is_constant
= CONST_INT_P (op1
);
4184 if (op1_is_constant
)
4186 wide_int ext_op1
= rtx_mode_t (op1
, mode
);
4187 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4189 && wi::popcount (wi::neg (ext_op1
)) == 1));
4193 This is the structure of expand_divmod:
4195 First comes code to fix up the operands so we can perform the operations
4196 correctly and efficiently.
4198 Second comes a switch statement with code specific for each rounding mode.
4199 For some special operands this code emits all RTL for the desired
4200 operation, for other cases, it generates only a quotient and stores it in
4201 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4202 to indicate that it has not done anything.
4204 Last comes code that finishes the operation. If QUOTIENT is set and
4205 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4206 QUOTIENT is not set, it is computed using trunc rounding.
4208 We try to generate special code for division and remainder when OP1 is a
4209 constant. If |OP1| = 2**n we can use shifts and some other fast
4210 operations. For other values of OP1, we compute a carefully selected
4211 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4214 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4215 half of the product. Different strategies for generating the product are
4216 implemented in expmed_mult_highpart.
4218 If what we actually want is the remainder, we generate that by another
4219 by-constant multiplication and a subtraction. */
4221 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4222 code below will malfunction if we are, so check here and handle
4223 the special case if so. */
4224 if (op1
== const1_rtx
)
4225 return rem_flag
? const0_rtx
: op0
;
4227 /* When dividing by -1, we could get an overflow.
4228 negv_optab can handle overflows. */
4229 if (! unsignedp
&& op1
== constm1_rtx
)
4233 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4234 ? negv_optab
: neg_optab
, op0
, target
, 0);
4238 /* Don't use the function value register as a target
4239 since we have to read it as well as write it,
4240 and function-inlining gets confused by this. */
4241 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4242 /* Don't clobber an operand while doing a multi-step calculation. */
4243 || ((rem_flag
|| op1_is_constant
)
4244 && (reg_mentioned_p (target
, op0
)
4245 || (MEM_P (op0
) && MEM_P (target
))))
4246 || reg_mentioned_p (target
, op1
)
4247 || (MEM_P (op1
) && MEM_P (target
))))
4250 /* Get the mode in which to perform this computation. Normally it will
4251 be MODE, but sometimes we can't do the desired operation in MODE.
4252 If so, pick a wider mode in which we can do the operation. Convert
4253 to that mode at the start to avoid repeated conversions.
4255 First see what operations we need. These depend on the expression
4256 we are evaluating. (We assume that divxx3 insns exist under the
4257 same conditions that modxx3 insns and that these insns don't normally
4258 fail. If these assumptions are not correct, we may generate less
4259 efficient code in some cases.)
4261 Then see if we find a mode in which we can open-code that operation
4262 (either a division, modulus, or shift). Finally, check for the smallest
4263 mode for which we can do the operation with a library call. */
4265 /* We might want to refine this now that we have division-by-constant
4266 optimization. Since expmed_mult_highpart tries so many variants, it is
4267 not straightforward to generalize this. Maybe we should make an array
4268 of possible modes in init_expmed? Save this for GCC 2.7. */
4270 optab1
= (op1_is_pow2
4271 ? (unsignedp
? lshr_optab
: ashr_optab
)
4272 : (unsignedp
? udiv_optab
: sdiv_optab
));
4273 optab2
= (op1_is_pow2
? optab1
4274 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4276 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4277 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4278 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4281 if (compute_mode
== VOIDmode
)
4282 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4283 if (optab_libfunc (optab1
, compute_mode
)
4284 || optab_libfunc (optab2
, compute_mode
))
4287 /* If we still couldn't find a mode, use MODE, but expand_binop will
4289 if (compute_mode
== VOIDmode
)
4290 compute_mode
= mode
;
4292 if (target
&& GET_MODE (target
) == compute_mode
)
4295 tquotient
= gen_reg_rtx (compute_mode
);
4298 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4299 (mode), and thereby get better code when OP1 is a constant. Do that
4300 later. It will require going over all usages of SIZE below. */
4301 size
= GET_MODE_BITSIZE (mode
);
4304 /* Only deduct something for a REM if the last divide done was
4305 for a different constant. Then set the constant of the last
4307 max_cost
= (unsignedp
4308 ? udiv_cost (speed
, compute_mode
)
4309 : sdiv_cost (speed
, compute_mode
));
4310 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4311 && INTVAL (op1
) == last_div_const
))
4312 max_cost
-= (mul_cost (speed
, compute_mode
)
4313 + add_cost (speed
, compute_mode
));
4315 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4317 /* Now convert to the best mode to use. */
4318 if (compute_mode
!= mode
)
4320 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4321 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4323 /* convert_modes may have placed op1 into a register, so we
4324 must recompute the following. */
4325 op1_is_constant
= CONST_INT_P (op1
);
4326 if (op1_is_constant
)
4328 wide_int ext_op1
= rtx_mode_t (op1
, compute_mode
);
4329 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4331 && wi::popcount (wi::neg (ext_op1
)) == 1));
4337 /* If one of the operands is a volatile MEM, copy it into a register. */
4339 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4340 op0
= force_reg (compute_mode
, op0
);
4341 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4342 op1
= force_reg (compute_mode
, op1
);
4344 /* If we need the remainder or if OP1 is constant, we need to
4345 put OP0 in a register in case it has any queued subexpressions. */
4346 if (rem_flag
|| op1_is_constant
)
4347 op0
= force_reg (compute_mode
, op0
);
4349 last
= get_last_insn ();
4351 /* Promote floor rounding to trunc rounding for unsigned operations. */
4354 if (code
== FLOOR_DIV_EXPR
)
4355 code
= TRUNC_DIV_EXPR
;
4356 if (code
== FLOOR_MOD_EXPR
)
4357 code
= TRUNC_MOD_EXPR
;
4358 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4359 code
= TRUNC_DIV_EXPR
;
4362 if (op1
!= const0_rtx
)
4365 case TRUNC_MOD_EXPR
:
4366 case TRUNC_DIV_EXPR
:
4367 if (op1_is_constant
)
4369 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4370 int size
= GET_MODE_BITSIZE (int_mode
);
4373 unsigned HOST_WIDE_INT mh
, ml
;
4374 int pre_shift
, post_shift
;
4376 wide_int wd
= rtx_mode_t (op1
, int_mode
);
4377 unsigned HOST_WIDE_INT d
= wd
.to_uhwi ();
4379 if (wi::popcount (wd
) == 1)
4381 pre_shift
= floor_log2 (d
);
4384 unsigned HOST_WIDE_INT mask
4385 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4387 = expand_binop (int_mode
, and_optab
, op0
,
4388 gen_int_mode (mask
, int_mode
),
4392 return gen_lowpart (mode
, remainder
);
4394 quotient
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4395 pre_shift
, tquotient
, 1);
4397 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4399 if (d
>= (HOST_WIDE_INT_1U
<< (size
- 1)))
4401 /* Most significant bit of divisor is set; emit an scc
4403 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4408 /* Find a suitable multiplier and right shift count
4409 instead of multiplying with D. */
4411 mh
= choose_multiplier (d
, size
, size
,
4412 &ml
, &post_shift
, &dummy
);
4414 /* If the suggested multiplier is more than SIZE bits,
4415 we can do better for even divisors, using an
4416 initial right shift. */
4417 if (mh
!= 0 && (d
& 1) == 0)
4419 pre_shift
= ctz_or_zero (d
);
4420 mh
= choose_multiplier (d
>> pre_shift
, size
,
4422 &ml
, &post_shift
, &dummy
);
4432 if (post_shift
- 1 >= BITS_PER_WORD
)
4436 = (shift_cost (speed
, int_mode
, post_shift
- 1)
4437 + shift_cost (speed
, int_mode
, 1)
4438 + 2 * add_cost (speed
, int_mode
));
4439 t1
= expmed_mult_highpart
4440 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4441 NULL_RTX
, 1, max_cost
- extra_cost
);
4444 t2
= force_operand (gen_rtx_MINUS (int_mode
,
4447 t3
= expand_shift (RSHIFT_EXPR
, int_mode
,
4448 t2
, 1, NULL_RTX
, 1);
4449 t4
= force_operand (gen_rtx_PLUS (int_mode
,
4452 quotient
= expand_shift
4453 (RSHIFT_EXPR
, int_mode
, t4
,
4454 post_shift
- 1, tquotient
, 1);
4460 if (pre_shift
>= BITS_PER_WORD
4461 || post_shift
>= BITS_PER_WORD
)
4465 (RSHIFT_EXPR
, int_mode
, op0
,
4466 pre_shift
, NULL_RTX
, 1);
4468 = (shift_cost (speed
, int_mode
, pre_shift
)
4469 + shift_cost (speed
, int_mode
, post_shift
));
4470 t2
= expmed_mult_highpart
4472 gen_int_mode (ml
, int_mode
),
4473 NULL_RTX
, 1, max_cost
- extra_cost
);
4476 quotient
= expand_shift
4477 (RSHIFT_EXPR
, int_mode
, t2
,
4478 post_shift
, tquotient
, 1);
4482 else /* Too wide mode to use tricky code */
4485 insn
= get_last_insn ();
4487 set_dst_reg_note (insn
, REG_EQUAL
,
4488 gen_rtx_UDIV (int_mode
, op0
, op1
),
4491 else /* TRUNC_DIV, signed */
4493 unsigned HOST_WIDE_INT ml
;
4494 int lgup
, post_shift
;
4496 HOST_WIDE_INT d
= INTVAL (op1
);
4497 unsigned HOST_WIDE_INT abs_d
;
4499 /* Not prepared to handle division/remainder by
4500 0xffffffffffffffff8000000000000000 etc. */
4501 if (d
== HOST_WIDE_INT_MIN
&& size
> HOST_BITS_PER_WIDE_INT
)
4504 /* Since d might be INT_MIN, we have to cast to
4505 unsigned HOST_WIDE_INT before negating to avoid
4506 undefined signed overflow. */
4508 ? (unsigned HOST_WIDE_INT
) d
4509 : - (unsigned HOST_WIDE_INT
) d
);
4511 /* n rem d = n rem -d */
4512 if (rem_flag
&& d
< 0)
4515 op1
= gen_int_mode (abs_d
, int_mode
);
4521 quotient
= expand_unop (int_mode
, neg_optab
, op0
,
4523 else if (size
<= HOST_BITS_PER_WIDE_INT
4524 && abs_d
== HOST_WIDE_INT_1U
<< (size
- 1))
4526 /* This case is not handled correctly below. */
4527 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4532 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4533 && (size
<= HOST_BITS_PER_WIDE_INT
|| d
>= 0)
4535 ? smod_pow2_cheap (speed
, int_mode
)
4536 : sdiv_pow2_cheap (speed
, int_mode
))
4537 /* We assume that cheap metric is true if the
4538 optab has an expander for this mode. */
4539 && ((optab_handler ((rem_flag
? smod_optab
4542 != CODE_FOR_nothing
)
4543 || (optab_handler (sdivmod_optab
, int_mode
)
4544 != CODE_FOR_nothing
)))
4546 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
))
4550 remainder
= expand_smod_pow2 (int_mode
, op0
, d
);
4552 return gen_lowpart (mode
, remainder
);
4555 if (sdiv_pow2_cheap (speed
, int_mode
)
4556 && ((optab_handler (sdiv_optab
, int_mode
)
4557 != CODE_FOR_nothing
)
4558 || (optab_handler (sdivmod_optab
, int_mode
)
4559 != CODE_FOR_nothing
)))
4560 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4562 gen_int_mode (abs_d
,
4566 quotient
= expand_sdiv_pow2 (int_mode
, op0
, abs_d
);
4568 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4569 negate the quotient. */
4572 insn
= get_last_insn ();
4574 && abs_d
< (HOST_WIDE_INT_1U
4575 << (HOST_BITS_PER_WIDE_INT
- 1)))
4576 set_dst_reg_note (insn
, REG_EQUAL
,
4577 gen_rtx_DIV (int_mode
, op0
,
4583 quotient
= expand_unop (int_mode
, neg_optab
,
4584 quotient
, quotient
, 0);
4587 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4589 choose_multiplier (abs_d
, size
, size
- 1,
4590 &ml
, &post_shift
, &lgup
);
4591 if (ml
< HOST_WIDE_INT_1U
<< (size
- 1))
4595 if (post_shift
>= BITS_PER_WORD
4596 || size
- 1 >= BITS_PER_WORD
)
4599 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4600 + shift_cost (speed
, int_mode
, size
- 1)
4601 + add_cost (speed
, int_mode
));
4602 t1
= expmed_mult_highpart
4603 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4604 NULL_RTX
, 0, max_cost
- extra_cost
);
4608 (RSHIFT_EXPR
, int_mode
, t1
,
4609 post_shift
, NULL_RTX
, 0);
4611 (RSHIFT_EXPR
, int_mode
, op0
,
4612 size
- 1, NULL_RTX
, 0);
4615 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t2
),
4619 = force_operand (gen_rtx_MINUS (int_mode
, t2
, t3
),
4626 if (post_shift
>= BITS_PER_WORD
4627 || size
- 1 >= BITS_PER_WORD
)
4630 ml
|= HOST_WIDE_INT_M1U
<< (size
- 1);
4631 mlr
= gen_int_mode (ml
, int_mode
);
4632 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4633 + shift_cost (speed
, int_mode
, size
- 1)
4634 + 2 * add_cost (speed
, int_mode
));
4635 t1
= expmed_mult_highpart (int_mode
, op0
, mlr
,
4637 max_cost
- extra_cost
);
4640 t2
= force_operand (gen_rtx_PLUS (int_mode
, t1
, op0
),
4643 (RSHIFT_EXPR
, int_mode
, t2
,
4644 post_shift
, NULL_RTX
, 0);
4646 (RSHIFT_EXPR
, int_mode
, op0
,
4647 size
- 1, NULL_RTX
, 0);
4650 = force_operand (gen_rtx_MINUS (int_mode
, t4
, t3
),
4654 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t4
),
4658 else /* Too wide mode to use tricky code */
4661 insn
= get_last_insn ();
4663 set_dst_reg_note (insn
, REG_EQUAL
,
4664 gen_rtx_DIV (int_mode
, op0
, op1
),
4670 delete_insns_since (last
);
4673 case FLOOR_DIV_EXPR
:
4674 case FLOOR_MOD_EXPR
:
4675 /* We will come here only for signed operations. */
4676 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
4678 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4679 int size
= GET_MODE_BITSIZE (int_mode
);
4680 unsigned HOST_WIDE_INT mh
, ml
;
4681 int pre_shift
, lgup
, post_shift
;
4682 HOST_WIDE_INT d
= INTVAL (op1
);
4686 /* We could just as easily deal with negative constants here,
4687 but it does not seem worth the trouble for GCC 2.6. */
4688 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4690 pre_shift
= floor_log2 (d
);
4693 unsigned HOST_WIDE_INT mask
4694 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4695 remainder
= expand_binop
4696 (int_mode
, and_optab
, op0
,
4697 gen_int_mode (mask
, int_mode
),
4698 remainder
, 0, OPTAB_LIB_WIDEN
);
4700 return gen_lowpart (mode
, remainder
);
4702 quotient
= expand_shift
4703 (RSHIFT_EXPR
, int_mode
, op0
,
4704 pre_shift
, tquotient
, 0);
4710 mh
= choose_multiplier (d
, size
, size
- 1,
4711 &ml
, &post_shift
, &lgup
);
4714 if (post_shift
< BITS_PER_WORD
4715 && size
- 1 < BITS_PER_WORD
)
4718 (RSHIFT_EXPR
, int_mode
, op0
,
4719 size
- 1, NULL_RTX
, 0);
4720 t2
= expand_binop (int_mode
, xor_optab
, op0
, t1
,
4721 NULL_RTX
, 0, OPTAB_WIDEN
);
4722 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4723 + shift_cost (speed
, int_mode
, size
- 1)
4724 + 2 * add_cost (speed
, int_mode
));
4725 t3
= expmed_mult_highpart
4726 (int_mode
, t2
, gen_int_mode (ml
, int_mode
),
4727 NULL_RTX
, 1, max_cost
- extra_cost
);
4731 (RSHIFT_EXPR
, int_mode
, t3
,
4732 post_shift
, NULL_RTX
, 1);
4733 quotient
= expand_binop (int_mode
, xor_optab
,
4734 t4
, t1
, tquotient
, 0,
4742 rtx nsign
, t1
, t2
, t3
, t4
;
4743 t1
= force_operand (gen_rtx_PLUS (int_mode
,
4744 op0
, constm1_rtx
), NULL_RTX
);
4745 t2
= expand_binop (int_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4747 nsign
= expand_shift (RSHIFT_EXPR
, int_mode
, t2
,
4748 size
- 1, NULL_RTX
, 0);
4749 t3
= force_operand (gen_rtx_MINUS (int_mode
, t1
, nsign
),
4751 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, int_mode
, t3
, op1
,
4756 t5
= expand_unop (int_mode
, one_cmpl_optab
, nsign
,
4758 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t4
, t5
),
4766 delete_insns_since (last
);
4768 /* Try using an instruction that produces both the quotient and
4769 remainder, using truncation. We can easily compensate the quotient
4770 or remainder to get floor rounding, once we have the remainder.
4771 Notice that we compute also the final remainder value here,
4772 and return the result right away. */
4773 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4774 target
= gen_reg_rtx (compute_mode
);
4779 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4780 quotient
= gen_reg_rtx (compute_mode
);
4785 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4786 remainder
= gen_reg_rtx (compute_mode
);
4789 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4790 quotient
, remainder
, 0))
4792 /* This could be computed with a branch-less sequence.
4793 Save that for later. */
4795 rtx_code_label
*label
= gen_label_rtx ();
4796 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4797 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4798 NULL_RTX
, 0, OPTAB_WIDEN
);
4799 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4800 expand_dec (quotient
, const1_rtx
);
4801 expand_inc (remainder
, op1
);
4803 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4806 /* No luck with division elimination or divmod. Have to do it
4807 by conditionally adjusting op0 *and* the result. */
4809 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4813 quotient
= gen_reg_rtx (compute_mode
);
4814 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4815 label1
= gen_label_rtx ();
4816 label2
= gen_label_rtx ();
4817 label3
= gen_label_rtx ();
4818 label4
= gen_label_rtx ();
4819 label5
= gen_label_rtx ();
4820 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4821 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4822 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4823 quotient
, 0, OPTAB_LIB_WIDEN
);
4824 if (tem
!= quotient
)
4825 emit_move_insn (quotient
, tem
);
4826 emit_jump_insn (targetm
.gen_jump (label5
));
4828 emit_label (label1
);
4829 expand_inc (adjusted_op0
, const1_rtx
);
4830 emit_jump_insn (targetm
.gen_jump (label4
));
4832 emit_label (label2
);
4833 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4834 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4835 quotient
, 0, OPTAB_LIB_WIDEN
);
4836 if (tem
!= quotient
)
4837 emit_move_insn (quotient
, tem
);
4838 emit_jump_insn (targetm
.gen_jump (label5
));
4840 emit_label (label3
);
4841 expand_dec (adjusted_op0
, const1_rtx
);
4842 emit_label (label4
);
4843 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4844 quotient
, 0, OPTAB_LIB_WIDEN
);
4845 if (tem
!= quotient
)
4846 emit_move_insn (quotient
, tem
);
4847 expand_dec (quotient
, const1_rtx
);
4848 emit_label (label5
);
4857 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4858 && (HWI_COMPUTABLE_MODE_P (compute_mode
)
4859 || INTVAL (op1
) >= 0))
4861 scalar_int_mode int_mode
4862 = as_a
<scalar_int_mode
> (compute_mode
);
4864 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4865 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4866 floor_log2 (d
), tquotient
, 1);
4867 t2
= expand_binop (int_mode
, and_optab
, op0
,
4868 gen_int_mode (d
- 1, int_mode
),
4869 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4870 t3
= gen_reg_rtx (int_mode
);
4871 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
, int_mode
, 1, 1);
4874 rtx_code_label
*lab
;
4875 lab
= gen_label_rtx ();
4876 do_cmp_and_jump (t2
, const0_rtx
, EQ
, int_mode
, lab
);
4877 expand_inc (t1
, const1_rtx
);
4882 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t1
, t3
),
4887 /* Try using an instruction that produces both the quotient and
4888 remainder, using truncation. We can easily compensate the
4889 quotient or remainder to get ceiling rounding, once we have the
4890 remainder. Notice that we compute also the final remainder
4891 value here, and return the result right away. */
4892 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4893 target
= gen_reg_rtx (compute_mode
);
4897 remainder
= (REG_P (target
)
4898 ? target
: gen_reg_rtx (compute_mode
));
4899 quotient
= gen_reg_rtx (compute_mode
);
4903 quotient
= (REG_P (target
)
4904 ? target
: gen_reg_rtx (compute_mode
));
4905 remainder
= gen_reg_rtx (compute_mode
);
4908 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4911 /* This could be computed with a branch-less sequence.
4912 Save that for later. */
4913 rtx_code_label
*label
= gen_label_rtx ();
4914 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4915 compute_mode
, label
);
4916 expand_inc (quotient
, const1_rtx
);
4917 expand_dec (remainder
, op1
);
4919 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4922 /* No luck with division elimination or divmod. Have to do it
4923 by conditionally adjusting op0 *and* the result. */
4925 rtx_code_label
*label1
, *label2
;
4926 rtx adjusted_op0
, tem
;
4928 quotient
= gen_reg_rtx (compute_mode
);
4929 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4930 label1
= gen_label_rtx ();
4931 label2
= gen_label_rtx ();
4932 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4933 compute_mode
, label1
);
4934 emit_move_insn (quotient
, const0_rtx
);
4935 emit_jump_insn (targetm
.gen_jump (label2
));
4937 emit_label (label1
);
4938 expand_dec (adjusted_op0
, const1_rtx
);
4939 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4940 quotient
, 1, OPTAB_LIB_WIDEN
);
4941 if (tem
!= quotient
)
4942 emit_move_insn (quotient
, tem
);
4943 expand_inc (quotient
, const1_rtx
);
4944 emit_label (label2
);
4949 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4950 && INTVAL (op1
) >= 0)
4952 /* This is extremely similar to the code for the unsigned case
4953 above. For 2.7 we should merge these variants, but for
4954 2.6.1 I don't want to touch the code for unsigned since that
4955 get used in C. The signed case will only be used by other
4959 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4960 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4961 floor_log2 (d
), tquotient
, 0);
4962 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4963 gen_int_mode (d
- 1, compute_mode
),
4964 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4965 t3
= gen_reg_rtx (compute_mode
);
4966 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4967 compute_mode
, 1, 1);
4970 rtx_code_label
*lab
;
4971 lab
= gen_label_rtx ();
4972 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4973 expand_inc (t1
, const1_rtx
);
4978 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4984 /* Try using an instruction that produces both the quotient and
4985 remainder, using truncation. We can easily compensate the
4986 quotient or remainder to get ceiling rounding, once we have the
4987 remainder. Notice that we compute also the final remainder
4988 value here, and return the result right away. */
4989 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4990 target
= gen_reg_rtx (compute_mode
);
4993 remainder
= (REG_P (target
)
4994 ? target
: gen_reg_rtx (compute_mode
));
4995 quotient
= gen_reg_rtx (compute_mode
);
4999 quotient
= (REG_P (target
)
5000 ? target
: gen_reg_rtx (compute_mode
));
5001 remainder
= gen_reg_rtx (compute_mode
);
5004 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
5007 /* This could be computed with a branch-less sequence.
5008 Save that for later. */
5010 rtx_code_label
*label
= gen_label_rtx ();
5011 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
5012 compute_mode
, label
);
5013 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
5014 NULL_RTX
, 0, OPTAB_WIDEN
);
5015 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
5016 expand_inc (quotient
, const1_rtx
);
5017 expand_dec (remainder
, op1
);
5019 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5022 /* No luck with division elimination or divmod. Have to do it
5023 by conditionally adjusting op0 *and* the result. */
5025 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
5029 quotient
= gen_reg_rtx (compute_mode
);
5030 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
5031 label1
= gen_label_rtx ();
5032 label2
= gen_label_rtx ();
5033 label3
= gen_label_rtx ();
5034 label4
= gen_label_rtx ();
5035 label5
= gen_label_rtx ();
5036 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
5037 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
5038 compute_mode
, label1
);
5039 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5040 quotient
, 0, OPTAB_LIB_WIDEN
);
5041 if (tem
!= quotient
)
5042 emit_move_insn (quotient
, tem
);
5043 emit_jump_insn (targetm
.gen_jump (label5
));
5045 emit_label (label1
);
5046 expand_dec (adjusted_op0
, const1_rtx
);
5047 emit_jump_insn (targetm
.gen_jump (label4
));
5049 emit_label (label2
);
5050 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
5051 compute_mode
, label3
);
5052 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5053 quotient
, 0, OPTAB_LIB_WIDEN
);
5054 if (tem
!= quotient
)
5055 emit_move_insn (quotient
, tem
);
5056 emit_jump_insn (targetm
.gen_jump (label5
));
5058 emit_label (label3
);
5059 expand_inc (adjusted_op0
, const1_rtx
);
5060 emit_label (label4
);
5061 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5062 quotient
, 0, OPTAB_LIB_WIDEN
);
5063 if (tem
!= quotient
)
5064 emit_move_insn (quotient
, tem
);
5065 expand_inc (quotient
, const1_rtx
);
5066 emit_label (label5
);
5071 case EXACT_DIV_EXPR
:
5072 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
5074 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5075 int size
= GET_MODE_BITSIZE (int_mode
);
5076 HOST_WIDE_INT d
= INTVAL (op1
);
5077 unsigned HOST_WIDE_INT ml
;
5081 pre_shift
= ctz_or_zero (d
);
5082 ml
= invert_mod2n (d
>> pre_shift
, size
);
5083 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
5084 pre_shift
, NULL_RTX
, unsignedp
);
5085 quotient
= expand_mult (int_mode
, t1
, gen_int_mode (ml
, int_mode
),
5088 insn
= get_last_insn ();
5089 set_dst_reg_note (insn
, REG_EQUAL
,
5090 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5091 int_mode
, op0
, op1
),
5096 case ROUND_DIV_EXPR
:
5097 case ROUND_MOD_EXPR
:
5100 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5102 rtx_code_label
*label
;
5103 label
= gen_label_rtx ();
5104 quotient
= gen_reg_rtx (int_mode
);
5105 remainder
= gen_reg_rtx (int_mode
);
5106 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
5109 quotient
= expand_binop (int_mode
, udiv_optab
, op0
, op1
,
5110 quotient
, 1, OPTAB_LIB_WIDEN
);
5111 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 1);
5112 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
5113 remainder
, 1, OPTAB_LIB_WIDEN
);
5115 tem
= plus_constant (int_mode
, op1
, -1);
5116 tem
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
, 1, NULL_RTX
, 1);
5117 do_cmp_and_jump (remainder
, tem
, LEU
, int_mode
, label
);
5118 expand_inc (quotient
, const1_rtx
);
5119 expand_dec (remainder
, op1
);
5124 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5125 int size
= GET_MODE_BITSIZE (int_mode
);
5126 rtx abs_rem
, abs_op1
, tem
, mask
;
5127 rtx_code_label
*label
;
5128 label
= gen_label_rtx ();
5129 quotient
= gen_reg_rtx (int_mode
);
5130 remainder
= gen_reg_rtx (int_mode
);
5131 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
5134 quotient
= expand_binop (int_mode
, sdiv_optab
, op0
, op1
,
5135 quotient
, 0, OPTAB_LIB_WIDEN
);
5136 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 0);
5137 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
5138 remainder
, 0, OPTAB_LIB_WIDEN
);
5140 abs_rem
= expand_abs (int_mode
, remainder
, NULL_RTX
, 1, 0);
5141 abs_op1
= expand_abs (int_mode
, op1
, NULL_RTX
, 1, 0);
5142 tem
= expand_shift (LSHIFT_EXPR
, int_mode
, abs_rem
,
5144 do_cmp_and_jump (tem
, abs_op1
, LTU
, int_mode
, label
);
5145 tem
= expand_binop (int_mode
, xor_optab
, op0
, op1
,
5146 NULL_RTX
, 0, OPTAB_WIDEN
);
5147 mask
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
,
5148 size
- 1, NULL_RTX
, 0);
5149 tem
= expand_binop (int_mode
, xor_optab
, mask
, const1_rtx
,
5150 NULL_RTX
, 0, OPTAB_WIDEN
);
5151 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5152 NULL_RTX
, 0, OPTAB_WIDEN
);
5153 expand_inc (quotient
, tem
);
5154 tem
= expand_binop (int_mode
, xor_optab
, mask
, op1
,
5155 NULL_RTX
, 0, OPTAB_WIDEN
);
5156 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5157 NULL_RTX
, 0, OPTAB_WIDEN
);
5158 expand_dec (remainder
, tem
);
5161 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5169 if (target
&& GET_MODE (target
) != compute_mode
)
5174 /* Try to produce the remainder without producing the quotient.
5175 If we seem to have a divmod pattern that does not require widening,
5176 don't try widening here. We should really have a WIDEN argument
5177 to expand_twoval_binop, since what we'd really like to do here is
5178 1) try a mod insn in compute_mode
5179 2) try a divmod insn in compute_mode
5180 3) try a div insn in compute_mode and multiply-subtract to get
5182 4) try the same things with widening allowed. */
5184 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5187 ((optab_handler (optab2
, compute_mode
)
5188 != CODE_FOR_nothing
)
5189 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5192 /* No luck there. Can we do remainder and divide at once
5193 without a library call? */
5194 remainder
= gen_reg_rtx (compute_mode
);
5195 if (! expand_twoval_binop ((unsignedp
5199 NULL_RTX
, remainder
, unsignedp
))
5204 return gen_lowpart (mode
, remainder
);
5207 /* Produce the quotient. Try a quotient insn, but not a library call.
5208 If we have a divmod in this mode, use it in preference to widening
5209 the div (for this test we assume it will not fail). Note that optab2
5210 is set to the one of the two optabs that the call below will use. */
5212 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5213 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5215 ((optab_handler (optab2
, compute_mode
)
5216 != CODE_FOR_nothing
)
5217 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5221 /* No luck there. Try a quotient-and-remainder insn,
5222 keeping the quotient alone. */
5223 quotient
= gen_reg_rtx (compute_mode
);
5224 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5226 quotient
, NULL_RTX
, unsignedp
))
5230 /* Still no luck. If we are not computing the remainder,
5231 use a library call for the quotient. */
5232 quotient
= sign_expand_binop (compute_mode
,
5233 udiv_optab
, sdiv_optab
,
5235 unsignedp
, OPTAB_LIB_WIDEN
);
5242 if (target
&& GET_MODE (target
) != compute_mode
)
5247 /* No divide instruction either. Use library for remainder. */
5248 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5250 unsignedp
, OPTAB_LIB_WIDEN
);
5251 /* No remainder function. Try a quotient-and-remainder
5252 function, keeping the remainder. */
5255 remainder
= gen_reg_rtx (compute_mode
);
5256 if (!expand_twoval_binop_libfunc
5257 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5259 NULL_RTX
, remainder
,
5260 unsignedp
? UMOD
: MOD
))
5261 remainder
= NULL_RTX
;
5266 /* We divided. Now finish doing X - Y * (X / Y). */
5267 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5268 NULL_RTX
, unsignedp
);
5269 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5270 remainder
, target
, unsignedp
,
5275 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5278 /* Return a tree node with data type TYPE, describing the value of X.
5279 Usually this is an VAR_DECL, if there is no obvious better choice.
5280 X may be an expression, however we only support those expressions
5281 generated by loop.c. */
5284 make_tree (tree type
, rtx x
)
5288 switch (GET_CODE (x
))
5291 case CONST_WIDE_INT
:
5292 t
= wide_int_to_tree (type
, rtx_mode_t (x
, TYPE_MODE (type
)));
5296 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5297 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5298 t
= wide_int_to_tree (type
,
5299 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5300 HOST_BITS_PER_WIDE_INT
* 2));
5302 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5308 unsigned int npatterns
= CONST_VECTOR_NPATTERNS (x
);
5309 unsigned int nelts_per_pattern
= CONST_VECTOR_NELTS_PER_PATTERN (x
);
5310 tree itype
= TREE_TYPE (type
);
5312 /* Build a tree with vector elements. */
5313 tree_vector_builder
elts (type
, npatterns
, nelts_per_pattern
);
5314 unsigned int count
= elts
.encoded_nelts ();
5315 for (unsigned int i
= 0; i
< count
; ++i
)
5317 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5318 elts
.quick_push (make_tree (itype
, elt
));
5321 return elts
.build ();
5325 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5326 make_tree (type
, XEXP (x
, 1)));
5329 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5330 make_tree (type
, XEXP (x
, 1)));
5333 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5336 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5337 make_tree (type
, XEXP (x
, 1)));
5340 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5341 make_tree (type
, XEXP (x
, 1)));
5344 t
= unsigned_type_for (type
);
5345 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5346 make_tree (t
, XEXP (x
, 0)),
5347 make_tree (type
, XEXP (x
, 1))));
5350 t
= signed_type_for (type
);
5351 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5352 make_tree (t
, XEXP (x
, 0)),
5353 make_tree (type
, XEXP (x
, 1))));
5356 if (TREE_CODE (type
) != REAL_TYPE
)
5357 t
= signed_type_for (type
);
5361 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5362 make_tree (t
, XEXP (x
, 0)),
5363 make_tree (t
, XEXP (x
, 1))));
5365 t
= unsigned_type_for (type
);
5366 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5367 make_tree (t
, XEXP (x
, 0)),
5368 make_tree (t
, XEXP (x
, 1))));
5372 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5373 GET_CODE (x
) == ZERO_EXTEND
);
5374 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5377 return make_tree (type
, XEXP (x
, 0));
5380 t
= SYMBOL_REF_DECL (x
);
5382 return fold_convert (type
, build_fold_addr_expr (t
));
5386 if (CONST_POLY_INT_P (x
))
5387 return wide_int_to_tree (t
, const_poly_int_value (x
));
5389 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5391 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5392 address mode to pointer mode. */
5393 if (POINTER_TYPE_P (type
))
5394 x
= convert_memory_address_addr_space
5395 (SCALAR_INT_TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5397 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5398 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5399 t
->decl_with_rtl
.rtl
= x
;
5405 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5406 and returning TARGET.
5408 If TARGET is 0, a pseudo-register or constant is returned. */
5411 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5415 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5416 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5418 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5422 else if (tem
!= target
)
5423 emit_move_insn (target
, tem
);
5427 /* Helper function for emit_store_flag. */
5429 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5430 machine_mode mode
, machine_mode compare_mode
,
5431 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5432 machine_mode target_mode
)
5434 class expand_operand ops
[4];
5435 rtx op0
, comparison
, subtarget
;
5437 scalar_int_mode result_mode
= targetm
.cstore_mode (icode
);
5438 scalar_int_mode int_target_mode
;
5440 last
= get_last_insn ();
5441 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5442 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5445 delete_insns_since (last
);
5449 if (target_mode
== VOIDmode
)
5450 int_target_mode
= result_mode
;
5452 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5454 target
= gen_reg_rtx (int_target_mode
);
5456 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5458 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5459 create_fixed_operand (&ops
[1], comparison
);
5460 create_fixed_operand (&ops
[2], x
);
5461 create_fixed_operand (&ops
[3], y
);
5462 if (!maybe_expand_insn (icode
, 4, ops
))
5464 delete_insns_since (last
);
5467 subtarget
= ops
[0].value
;
5469 /* If we are converting to a wider mode, first convert to
5470 INT_TARGET_MODE, then normalize. This produces better combining
5471 opportunities on machines that have a SIGN_EXTRACT when we are
5472 testing a single bit. This mostly benefits the 68k.
5474 If STORE_FLAG_VALUE does not have the sign bit set when
5475 interpreted in MODE, we can do this conversion as unsigned, which
5476 is usually more efficient. */
5477 if (GET_MODE_PRECISION (int_target_mode
) > GET_MODE_PRECISION (result_mode
))
5479 gcc_assert (GET_MODE_PRECISION (result_mode
) != 1
5480 || STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1);
5482 bool unsignedp
= (STORE_FLAG_VALUE
>= 0);
5483 convert_move (target
, subtarget
, unsignedp
);
5486 result_mode
= int_target_mode
;
5491 /* If we want to keep subexpressions around, don't reuse our last
5496 /* Now normalize to the proper value in MODE. Sometimes we don't
5497 have to do anything. */
5498 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5500 /* STORE_FLAG_VALUE might be the most negative number, so write
5501 the comparison this way to avoid a compiler-time warning. */
5502 else if (- normalizep
== STORE_FLAG_VALUE
)
5503 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5505 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5506 it hard to use a value of just the sign bit due to ANSI integer
5507 constant typing rules. */
5508 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5509 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5510 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5514 gcc_assert (STORE_FLAG_VALUE
& 1);
5516 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5517 if (normalizep
== -1)
5518 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5521 /* If we were converting to a smaller mode, do the conversion now. */
5522 if (int_target_mode
!= result_mode
)
5524 convert_move (target
, op0
, 0);
5532 /* A subroutine of emit_store_flag only including "tricks" that do not
5533 need a recursive call. These are kept separate to avoid infinite
5537 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5538 machine_mode mode
, int unsignedp
, int normalizep
,
5539 machine_mode target_mode
)
5542 enum insn_code icode
;
5543 machine_mode compare_mode
;
5544 enum mode_class mclass
;
5545 enum rtx_code scode
;
5548 code
= unsigned_condition (code
);
5549 scode
= swap_condition (code
);
5551 /* If one operand is constant, make it the second one. Only do this
5552 if the other operand is not constant as well. */
5554 if (swap_commutative_operands_p (op0
, op1
))
5556 std::swap (op0
, op1
);
5557 code
= swap_condition (code
);
5560 if (mode
== VOIDmode
)
5561 mode
= GET_MODE (op0
);
5563 if (CONST_SCALAR_INT_P (op1
))
5564 canonicalize_comparison (mode
, &code
, &op1
);
5566 /* For some comparisons with 1 and -1, we can convert this to
5567 comparisons with zero. This will often produce more opportunities for
5568 store-flag insns. */
5573 if (op1
== const1_rtx
)
5574 op1
= const0_rtx
, code
= LE
;
5577 if (op1
== constm1_rtx
)
5578 op1
= const0_rtx
, code
= LT
;
5581 if (op1
== const1_rtx
)
5582 op1
= const0_rtx
, code
= GT
;
5585 if (op1
== constm1_rtx
)
5586 op1
= const0_rtx
, code
= GE
;
5589 if (op1
== const1_rtx
)
5590 op1
= const0_rtx
, code
= NE
;
5593 if (op1
== const1_rtx
)
5594 op1
= const0_rtx
, code
= EQ
;
5600 /* If we are comparing a double-word integer with zero or -1, we can
5601 convert the comparison into one involving a single word. */
5602 scalar_int_mode int_mode
;
5603 if (is_int_mode (mode
, &int_mode
)
5604 && GET_MODE_BITSIZE (int_mode
) == BITS_PER_WORD
* 2
5605 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5608 if ((code
== EQ
|| code
== NE
)
5609 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5613 /* Do a logical OR or AND of the two words and compare the
5615 op00
= simplify_gen_subreg (word_mode
, op0
, int_mode
, 0);
5616 op01
= simplify_gen_subreg (word_mode
, op0
, int_mode
, UNITS_PER_WORD
);
5617 tem
= expand_binop (word_mode
,
5618 op1
== const0_rtx
? ior_optab
: and_optab
,
5619 op00
, op01
, NULL_RTX
, unsignedp
,
5623 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5624 unsignedp
, normalizep
);
5626 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5630 /* If testing the sign bit, can just test on high word. */
5631 op0h
= simplify_gen_subreg (word_mode
, op0
, int_mode
,
5632 subreg_highpart_offset (word_mode
,
5634 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5635 unsignedp
, normalizep
);
5642 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5645 target
= gen_reg_rtx (target_mode
);
5647 convert_move (target
, tem
,
5648 !val_signbit_known_set_p (word_mode
,
5649 (normalizep
? normalizep
5650 : STORE_FLAG_VALUE
)));
5655 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5656 complement of A (for GE) and shifting the sign bit to the low bit. */
5657 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5658 && is_int_mode (mode
, &int_mode
)
5659 && (normalizep
|| STORE_FLAG_VALUE
== 1
5660 || val_signbit_p (int_mode
, STORE_FLAG_VALUE
)))
5662 scalar_int_mode int_target_mode
;
5666 int_target_mode
= int_mode
;
5669 /* If the result is to be wider than OP0, it is best to convert it
5670 first. If it is to be narrower, it is *incorrect* to convert it
5672 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5673 if (GET_MODE_SIZE (int_target_mode
) > GET_MODE_SIZE (int_mode
))
5675 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5676 int_mode
= int_target_mode
;
5680 if (int_target_mode
!= int_mode
)
5684 op0
= expand_unop (int_mode
, one_cmpl_optab
, op0
,
5685 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5686 ? 0 : subtarget
), 0);
5688 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5689 /* If we are supposed to produce a 0/1 value, we want to do
5690 a logical shift from the sign bit to the low-order bit; for
5691 a -1/0 value, we do an arithmetic shift. */
5692 op0
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
5693 GET_MODE_BITSIZE (int_mode
) - 1,
5694 subtarget
, normalizep
!= -1);
5696 if (int_mode
!= int_target_mode
)
5697 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5702 mclass
= GET_MODE_CLASS (mode
);
5703 FOR_EACH_MODE_FROM (compare_mode
, mode
)
5705 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5706 icode
= optab_handler (cstore_optab
, optab_mode
);
5707 if (icode
!= CODE_FOR_nothing
)
5709 do_pending_stack_adjust ();
5710 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5711 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5715 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5717 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5718 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5729 /* Subroutine of emit_store_flag that handles cases in which the operands
5730 are scalar integers. SUBTARGET is the target to use for temporary
5731 operations and TRUEVAL is the value to store when the condition is
5732 true. All other arguments are as for emit_store_flag. */
5735 emit_store_flag_int (rtx target
, rtx subtarget
, enum rtx_code code
, rtx op0
,
5736 rtx op1
, scalar_int_mode mode
, int unsignedp
,
5737 int normalizep
, rtx trueval
)
5739 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5740 rtx_insn
*last
= get_last_insn ();
5742 /* If this is an equality comparison of integers, we can try to exclusive-or
5743 (or subtract) the two operands and use a recursive call to try the
5744 comparison with zero. Don't do any of these cases if branches are
5747 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5749 rtx tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5753 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5756 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5757 mode
, unsignedp
, normalizep
);
5761 delete_insns_since (last
);
5764 /* For integer comparisons, try the reverse comparison. However, for
5765 small X and if we'd have anyway to extend, implementing "X != 0"
5766 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5767 rtx_code rcode
= reverse_condition (code
);
5768 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5769 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5771 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5772 && op1
== const0_rtx
))
5774 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5775 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5777 /* Again, for the reverse comparison, use either an addition or a XOR. */
5779 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5780 optimize_insn_for_speed_p ()) == 0)
5782 rtx tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5783 STORE_FLAG_VALUE
, target_mode
);
5785 tem
= expand_binop (target_mode
, add_optab
, tem
,
5786 gen_int_mode (normalizep
, target_mode
),
5787 target
, 0, OPTAB_WIDEN
);
5792 && rtx_cost (trueval
, mode
, XOR
, 1,
5793 optimize_insn_for_speed_p ()) == 0)
5795 rtx tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5796 normalizep
, target_mode
);
5798 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5799 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5804 delete_insns_since (last
);
5807 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5808 the constant zero. Reject all other comparisons at this point. Only
5809 do LE and GT if branches are expensive since they are expensive on
5810 2-operand machines. */
5812 if (op1
!= const0_rtx
5813 || (code
!= EQ
&& code
!= NE
5814 && (BRANCH_COST (optimize_insn_for_speed_p (),
5815 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5818 /* Try to put the result of the comparison in the sign bit. Assume we can't
5819 do the necessary operation below. */
5823 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5824 the sign bit set. */
5828 /* This is destructive, so SUBTARGET can't be OP0. */
5829 if (rtx_equal_p (subtarget
, op0
))
5832 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5835 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5839 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5840 number of bits in the mode of OP0, minus one. */
5844 if (rtx_equal_p (subtarget
, op0
))
5847 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, op0
,
5848 GET_MODE_BITSIZE (mode
) - 1,
5851 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5855 if (code
== EQ
|| code
== NE
)
5857 /* For EQ or NE, one way to do the comparison is to apply an operation
5858 that converts the operand into a positive number if it is nonzero
5859 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5860 for NE we negate. This puts the result in the sign bit. Then we
5861 normalize with a shift, if needed.
5863 Two operations that can do the above actions are ABS and FFS, so try
5864 them. If that doesn't work, and MODE is smaller than a full word,
5865 we can use zero-extension to the wider mode (an unsigned conversion)
5866 as the operation. */
5868 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5869 that is compensated by the subsequent overflow when subtracting
5872 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5873 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5874 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5875 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5876 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5878 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5885 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5888 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5891 /* If we couldn't do it that way, for NE we can "or" the two's complement
5892 of the value with itself. For EQ, we take the one's complement of
5893 that "or", which is an extra insn, so we only handle EQ if branches
5898 || BRANCH_COST (optimize_insn_for_speed_p (),
5901 if (rtx_equal_p (subtarget
, op0
))
5904 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5905 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5908 if (tem
&& code
== EQ
)
5909 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5913 if (tem
&& normalizep
)
5914 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, tem
,
5915 GET_MODE_BITSIZE (mode
) - 1,
5916 subtarget
, normalizep
== 1);
5922 else if (GET_MODE (tem
) != target_mode
)
5924 convert_move (target
, tem
, 0);
5927 else if (!subtarget
)
5929 emit_move_insn (target
, tem
);
5934 delete_insns_since (last
);
5939 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5940 and storing in TARGET. Normally return TARGET.
5941 Return 0 if that cannot be done.
5943 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5944 it is VOIDmode, they cannot both be CONST_INT.
5946 UNSIGNEDP is for the case where we have to widen the operands
5947 to perform the operation. It says to use zero-extension.
5949 NORMALIZEP is 1 if we should convert the result to be either zero
5950 or one. Normalize is -1 if we should convert the result to be
5951 either zero or -1. If NORMALIZEP is zero, the result will be left
5952 "raw" out of the scc insn. */
5955 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5956 machine_mode mode
, int unsignedp
, int normalizep
)
5958 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5959 enum rtx_code rcode
;
5964 /* If we compare constants, we shouldn't use a store-flag operation,
5965 but a constant load. We can get there via the vanilla route that
5966 usually generates a compare-branch sequence, but will in this case
5967 fold the comparison to a constant, and thus elide the branch. */
5968 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5971 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5976 /* If we reached here, we can't do this with a scc insn, however there
5977 are some comparisons that can be done in other ways. Don't do any
5978 of these cases if branches are very cheap. */
5979 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5982 /* See what we need to return. We can only return a 1, -1, or the
5985 if (normalizep
== 0)
5987 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5988 normalizep
= STORE_FLAG_VALUE
;
5990 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5996 last
= get_last_insn ();
5998 /* If optimizing, use different pseudo registers for each insn, instead
5999 of reusing the same pseudo. This leads to better CSE, but slows
6000 down the compiler, since there are more pseudos. */
6001 subtarget
= (!optimize
6002 && (target_mode
== mode
)) ? target
: NULL_RTX
;
6003 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
6005 /* For floating-point comparisons, try the reverse comparison or try
6006 changing the "orderedness" of the comparison. */
6007 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6009 enum rtx_code first_code
;
6012 rcode
= reverse_condition_maybe_unordered (code
);
6013 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
6014 && (code
== ORDERED
|| code
== UNORDERED
6015 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
6016 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
6018 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
6019 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
6021 /* For the reverse comparison, use either an addition or a XOR. */
6023 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
6024 optimize_insn_for_speed_p ()) == 0)
6026 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
6027 STORE_FLAG_VALUE
, target_mode
);
6029 return expand_binop (target_mode
, add_optab
, tem
,
6030 gen_int_mode (normalizep
, target_mode
),
6031 target
, 0, OPTAB_WIDEN
);
6034 && rtx_cost (trueval
, mode
, XOR
, 1,
6035 optimize_insn_for_speed_p ()) == 0)
6037 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
6038 normalizep
, target_mode
);
6040 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
6041 target
, INTVAL (trueval
) >= 0,
6046 delete_insns_since (last
);
6048 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6049 if (code
== ORDERED
|| code
== UNORDERED
)
6052 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
6054 /* If there are no NaNs, the first comparison should always fall through.
6055 Effectively change the comparison to the other one. */
6056 if (!HONOR_NANS (mode
))
6058 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
6059 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
6063 if (!HAVE_conditional_move
)
6066 /* Do not turn a trapping comparison into a non-trapping one. */
6067 if ((code
!= EQ
&& code
!= NE
&& code
!= UNEQ
&& code
!= LTGT
)
6068 && flag_trapping_math
)
6071 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6072 conditional move. */
6073 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
6074 normalizep
, target_mode
);
6079 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
6080 tem
, const0_rtx
, GET_MODE (tem
), 0);
6082 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
6083 trueval
, tem
, GET_MODE (tem
), 0);
6086 delete_insns_since (last
);
6090 /* The remaining tricks only apply to integer comparisons. */
6092 scalar_int_mode int_mode
;
6093 if (is_int_mode (mode
, &int_mode
))
6094 return emit_store_flag_int (target
, subtarget
, code
, op0
, op1
, int_mode
,
6095 unsignedp
, normalizep
, trueval
);
6100 /* Like emit_store_flag, but always succeeds. */
6103 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
6104 machine_mode mode
, int unsignedp
, int normalizep
)
6107 rtx_code_label
*label
;
6108 rtx trueval
, falseval
;
6110 /* First see if emit_store_flag can do the job. */
6111 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
6115 /* If one operand is constant, make it the second one. Only do this
6116 if the other operand is not constant as well. */
6117 if (swap_commutative_operands_p (op0
, op1
))
6119 std::swap (op0
, op1
);
6120 code
= swap_condition (code
);
6123 if (mode
== VOIDmode
)
6124 mode
= GET_MODE (op0
);
6127 target
= gen_reg_rtx (word_mode
);
6129 /* If this failed, we have to do this with set/compare/jump/set code.
6130 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6131 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
6133 && GET_MODE_CLASS (mode
) == MODE_INT
6136 && op1
== const0_rtx
)
6138 label
= gen_label_rtx ();
6139 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
6140 NULL_RTX
, NULL
, label
,
6141 profile_probability::uninitialized ());
6142 emit_move_insn (target
, trueval
);
6148 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
6149 target
= gen_reg_rtx (GET_MODE (target
));
6151 /* Jump in the right direction if the target cannot implement CODE
6152 but can jump on its reverse condition. */
6153 falseval
= const0_rtx
;
6154 if (! can_compare_p (code
, mode
, ccp_jump
)
6155 && (! FLOAT_MODE_P (mode
)
6156 || code
== ORDERED
|| code
== UNORDERED
6157 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
6158 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
6160 enum rtx_code rcode
;
6161 if (FLOAT_MODE_P (mode
))
6162 rcode
= reverse_condition_maybe_unordered (code
);
6164 rcode
= reverse_condition (code
);
6166 /* Canonicalize to UNORDERED for the libcall. */
6167 if (can_compare_p (rcode
, mode
, ccp_jump
)
6168 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
6171 trueval
= const0_rtx
;
6176 emit_move_insn (target
, trueval
);
6177 label
= gen_label_rtx ();
6178 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
6179 label
, profile_probability::uninitialized ());
6181 emit_move_insn (target
, falseval
);
6187 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6188 and exclusive ranges in order to create an equivalent comparison. See
6189 canonicalize_cmp_for_target for the possible cases. */
6191 static enum rtx_code
6192 equivalent_cmp_code (enum rtx_code code
)
6218 /* Choose the more appropiate immediate in scalar integer comparisons. The
6219 purpose of this is to end up with an immediate which can be loaded into a
6220 register in fewer moves, if possible.
6222 For each integer comparison there exists an equivalent choice:
6223 i) a > b or a >= b + 1
6224 ii) a <= b or a < b + 1
6225 iii) a >= b or a > b - 1
6226 iv) a < b or a <= b - 1
6228 MODE is the mode of the first operand.
6229 CODE points to the comparison code.
6230 IMM points to the rtx containing the immediate. *IMM must satisfy
6231 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6235 canonicalize_comparison (machine_mode mode
, enum rtx_code
*code
, rtx
*imm
)
6237 if (!SCALAR_INT_MODE_P (mode
))
6241 enum signop sgn
= unsigned_condition_p (*code
) ? UNSIGNED
: SIGNED
;
6243 /* Extract the immediate value from the rtx. */
6244 wide_int imm_val
= rtx_mode_t (*imm
, mode
);
6246 if (*code
== GT
|| *code
== GTU
|| *code
== LE
|| *code
== LEU
)
6248 else if (*code
== GE
|| *code
== GEU
|| *code
== LT
|| *code
== LTU
)
6253 /* Check for overflow/underflow in the case of signed values and
6254 wrapping around in the case of unsigned values. If any occur
6255 cancel the optimization. */
6256 wi::overflow_type overflow
= wi::OVF_NONE
;
6260 imm_modif
= wi::add (imm_val
, 1, sgn
, &overflow
);
6262 imm_modif
= wi::sub (imm_val
, 1, sgn
, &overflow
);
6267 /* The following creates a pseudo; if we cannot do that, bail out. */
6268 if (!can_create_pseudo_p ())
6271 rtx reg
= gen_rtx_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
6272 rtx new_imm
= immed_wide_int_const (imm_modif
, mode
);
6274 rtx_insn
*old_rtx
= gen_move_insn (reg
, *imm
);
6275 rtx_insn
*new_rtx
= gen_move_insn (reg
, new_imm
);
6277 /* Update the immediate and the code. */
6278 if (insn_cost (old_rtx
, true) > insn_cost (new_rtx
, true))
6280 *code
= equivalent_cmp_code (*code
);
6287 /* Perform possibly multi-word comparison and conditional jump to LABEL
6288 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6289 now a thin wrapper around do_compare_rtx_and_jump. */
6292 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
6293 rtx_code_label
*label
)
6295 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
6296 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,
6297 NULL
, label
, profile_probability::uninitialized ());