1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2020 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Work around tree-optimization/91825. */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
26 #include "coretypes.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
47 struct target_expmed default_target_expmed
;
49 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
52 static bool store_integral_bit_field (rtx
, opt_scalar_int_mode
,
53 unsigned HOST_WIDE_INT
,
54 unsigned HOST_WIDE_INT
,
55 poly_uint64
, poly_uint64
,
56 machine_mode
, rtx
, bool, bool);
57 static void store_fixed_bit_field (rtx
, opt_scalar_int_mode
,
58 unsigned HOST_WIDE_INT
,
59 unsigned HOST_WIDE_INT
,
60 poly_uint64
, poly_uint64
,
61 rtx
, scalar_int_mode
, bool);
62 static void store_fixed_bit_field_1 (rtx
, scalar_int_mode
,
63 unsigned HOST_WIDE_INT
,
64 unsigned HOST_WIDE_INT
,
65 rtx
, scalar_int_mode
, bool);
66 static void store_split_bit_field (rtx
, opt_scalar_int_mode
,
67 unsigned HOST_WIDE_INT
,
68 unsigned HOST_WIDE_INT
,
69 poly_uint64
, poly_uint64
,
70 rtx
, scalar_int_mode
, bool);
71 static rtx
extract_integral_bit_field (rtx
, opt_scalar_int_mode
,
72 unsigned HOST_WIDE_INT
,
73 unsigned HOST_WIDE_INT
, int, rtx
,
74 machine_mode
, machine_mode
, bool, bool);
75 static rtx
extract_fixed_bit_field (machine_mode
, rtx
, opt_scalar_int_mode
,
76 unsigned HOST_WIDE_INT
,
77 unsigned HOST_WIDE_INT
, rtx
, int, bool);
78 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
, scalar_int_mode
,
79 unsigned HOST_WIDE_INT
,
80 unsigned HOST_WIDE_INT
, rtx
, int, bool);
81 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
82 static rtx
extract_split_bit_field (rtx
, opt_scalar_int_mode
,
83 unsigned HOST_WIDE_INT
,
84 unsigned HOST_WIDE_INT
, int, bool);
85 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
86 static rtx
expand_smod_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
87 static rtx
expand_sdiv_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91 The mask is truncated if necessary to the width of mode MODE. The
92 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
95 mask_rtx (scalar_int_mode mode
, int bitpos
, int bitsize
, bool complement
)
97 return immed_wide_int_const
98 (wi::shifted_mask (bitpos
, bitsize
, complement
,
99 GET_MODE_PRECISION (mode
)), mode
);
102 /* Test whether a value is zero of a power of two. */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
106 struct init_expmed_rtl
127 rtx pow2
[MAX_BITS_PER_WORD
];
128 rtx cint
[MAX_BITS_PER_WORD
];
132 init_expmed_one_conv (struct init_expmed_rtl
*all
, scalar_int_mode to_mode
,
133 scalar_int_mode from_mode
, bool speed
)
135 int to_size
, from_size
;
138 to_size
= GET_MODE_PRECISION (to_mode
);
139 from_size
= GET_MODE_PRECISION (from_mode
);
141 /* Most partial integers have a precision less than the "full"
142 integer it requires for storage. In case one doesn't, for
143 comparison purposes here, reduce the bit size by one in that
145 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
146 && pow2p_hwi (to_size
))
148 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
149 && pow2p_hwi (from_size
))
152 /* Assume cost of zero-extend and sign-extend is the same. */
153 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
155 PUT_MODE (all
->reg
, from_mode
);
156 set_convert_cost (to_mode
, from_mode
, speed
,
157 set_src_cost (which
, to_mode
, speed
));
161 init_expmed_one_mode (struct init_expmed_rtl
*all
,
162 machine_mode mode
, int speed
)
164 int m
, n
, mode_bitsize
;
165 machine_mode mode_from
;
167 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
169 PUT_MODE (all
->reg
, mode
);
170 PUT_MODE (all
->plus
, mode
);
171 PUT_MODE (all
->neg
, mode
);
172 PUT_MODE (all
->mult
, mode
);
173 PUT_MODE (all
->sdiv
, mode
);
174 PUT_MODE (all
->udiv
, mode
);
175 PUT_MODE (all
->sdiv_32
, mode
);
176 PUT_MODE (all
->smod_32
, mode
);
177 PUT_MODE (all
->wide_trunc
, mode
);
178 PUT_MODE (all
->shift
, mode
);
179 PUT_MODE (all
->shift_mult
, mode
);
180 PUT_MODE (all
->shift_add
, mode
);
181 PUT_MODE (all
->shift_sub0
, mode
);
182 PUT_MODE (all
->shift_sub1
, mode
);
183 PUT_MODE (all
->zext
, mode
);
184 PUT_MODE (all
->trunc
, mode
);
186 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
187 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
188 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
189 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
190 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
192 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
193 <= 2 * add_cost (speed
, mode
)));
194 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
195 <= 4 * add_cost (speed
, mode
)));
197 set_shift_cost (speed
, mode
, 0, 0);
199 int cost
= add_cost (speed
, mode
);
200 set_shiftadd_cost (speed
, mode
, 0, cost
);
201 set_shiftsub0_cost (speed
, mode
, 0, cost
);
202 set_shiftsub1_cost (speed
, mode
, 0, cost
);
205 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
206 for (m
= 1; m
< n
; m
++)
208 XEXP (all
->shift
, 1) = all
->cint
[m
];
209 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
211 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
212 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
214 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
216 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
220 scalar_int_mode int_mode_to
;
221 if (is_a
<scalar_int_mode
> (mode
, &int_mode_to
))
223 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
224 mode_from
= (machine_mode
)(mode_from
+ 1))
225 init_expmed_one_conv (all
, int_mode_to
,
226 as_a
<scalar_int_mode
> (mode_from
), speed
);
228 scalar_int_mode wider_mode
;
229 if (GET_MODE_CLASS (int_mode_to
) == MODE_INT
230 && GET_MODE_WIDER_MODE (int_mode_to
).exists (&wider_mode
))
232 PUT_MODE (all
->zext
, wider_mode
);
233 PUT_MODE (all
->wide_mult
, wider_mode
);
234 PUT_MODE (all
->wide_lshr
, wider_mode
);
235 XEXP (all
->wide_lshr
, 1)
236 = gen_int_shift_amount (wider_mode
, mode_bitsize
);
238 set_mul_widen_cost (speed
, wider_mode
,
239 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
240 set_mul_highpart_cost (speed
, int_mode_to
,
241 set_src_cost (all
->wide_trunc
,
242 int_mode_to
, speed
));
250 struct init_expmed_rtl all
;
251 machine_mode mode
= QImode
;
254 memset (&all
, 0, sizeof all
);
255 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
257 all
.pow2
[m
] = GEN_INT (HOST_WIDE_INT_1
<< m
);
258 all
.cint
[m
] = GEN_INT (m
);
261 /* Avoid using hard regs in ways which may be unsupported. */
262 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
263 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
264 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
265 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
266 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
267 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
268 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
269 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
270 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
271 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
272 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
273 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
274 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
275 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
276 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
277 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
278 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
279 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
281 for (speed
= 0; speed
< 2; speed
++)
283 crtl
->maybe_hot_insn_p
= speed
;
284 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
286 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
287 mode
= (machine_mode
)(mode
+ 1))
288 init_expmed_one_mode (&all
, mode
, speed
);
290 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
291 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
292 mode
= (machine_mode
)(mode
+ 1))
293 init_expmed_one_mode (&all
, mode
, speed
);
295 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
296 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
297 mode
= (machine_mode
)(mode
+ 1))
298 init_expmed_one_mode (&all
, mode
, speed
);
301 if (alg_hash_used_p ())
303 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
304 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
307 set_alg_hash_used_p (true);
308 default_rtl_profile ();
310 ggc_free (all
.trunc
);
311 ggc_free (all
.shift_sub1
);
312 ggc_free (all
.shift_sub0
);
313 ggc_free (all
.shift_add
);
314 ggc_free (all
.shift_mult
);
315 ggc_free (all
.shift
);
316 ggc_free (all
.wide_trunc
);
317 ggc_free (all
.wide_lshr
);
318 ggc_free (all
.wide_mult
);
320 ggc_free (all
.smod_32
);
321 ggc_free (all
.sdiv_32
);
330 /* Return an rtx representing minus the value of X.
331 MODE is the intended mode of the result,
332 useful if X is a CONST_INT. */
335 negate_rtx (machine_mode mode
, rtx x
)
337 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
340 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
345 /* Whether reverse storage order is supported on the target. */
346 static int reverse_storage_order_supported
= -1;
348 /* Check whether reverse storage order is supported on the target. */
351 check_reverse_storage_order_support (void)
353 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
355 reverse_storage_order_supported
= 0;
356 sorry ("reverse scalar storage order");
359 reverse_storage_order_supported
= 1;
362 /* Whether reverse FP storage order is supported on the target. */
363 static int reverse_float_storage_order_supported
= -1;
365 /* Check whether reverse FP storage order is supported on the target. */
368 check_reverse_float_storage_order_support (void)
370 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
372 reverse_float_storage_order_supported
= 0;
373 sorry ("reverse floating-point scalar storage order");
376 reverse_float_storage_order_supported
= 1;
379 /* Return an rtx representing value of X with reverse storage order.
380 MODE is the intended mode of the result,
381 useful if X is a CONST_INT. */
384 flip_storage_order (machine_mode mode
, rtx x
)
386 scalar_int_mode int_mode
;
392 if (COMPLEX_MODE_P (mode
))
394 rtx real
= read_complex_part (x
, false);
395 rtx imag
= read_complex_part (x
, true);
397 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
398 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
400 return gen_rtx_CONCAT (mode
, real
, imag
);
403 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
404 check_reverse_storage_order_support ();
406 if (!is_a
<scalar_int_mode
> (mode
, &int_mode
))
408 if (FLOAT_MODE_P (mode
)
409 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
410 check_reverse_float_storage_order_support ();
412 if (!int_mode_for_size (GET_MODE_PRECISION (mode
), 0).exists (&int_mode
))
414 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
417 x
= gen_lowpart (int_mode
, x
);
420 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
422 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
424 if (int_mode
!= mode
)
425 result
= gen_lowpart (mode
, result
);
430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
431 first unit of mode MODE that contains a bitfield of size BITSIZE at
432 bit position BITNUM. If MODE is not set, return a BLKmode reference
433 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
434 of the field within the new memory. */
437 narrow_bit_field_mem (rtx mem
, opt_scalar_int_mode mode
,
438 unsigned HOST_WIDE_INT bitsize
,
439 unsigned HOST_WIDE_INT bitnum
,
440 unsigned HOST_WIDE_INT
*new_bitnum
)
442 scalar_int_mode imode
;
443 if (mode
.exists (&imode
))
445 unsigned int unit
= GET_MODE_BITSIZE (imode
);
446 *new_bitnum
= bitnum
% unit
;
447 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
448 return adjust_bitfield_address (mem
, imode
, offset
);
452 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
453 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
454 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
456 return adjust_bitfield_address_size (mem
, BLKmode
, offset
, size
);
460 /* The caller wants to perform insertion or extraction PATTERN on a
461 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
462 BITREGION_START and BITREGION_END are as for store_bit_field
463 and FIELDMODE is the natural mode of the field.
465 Search for a mode that is compatible with the memory access
466 restrictions and (where applicable) with a register insertion or
467 extraction. Return the new memory on success, storing the adjusted
468 bit position in *NEW_BITNUM. Return null otherwise. */
471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
472 rtx op0
, HOST_WIDE_INT bitsize
,
473 HOST_WIDE_INT bitnum
,
474 poly_uint64 bitregion_start
,
475 poly_uint64 bitregion_end
,
476 machine_mode fieldmode
,
477 unsigned HOST_WIDE_INT
*new_bitnum
)
479 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
480 bitregion_end
, MEM_ALIGN (op0
),
481 MEM_VOLATILE_P (op0
));
482 scalar_int_mode best_mode
;
483 if (iter
.next_mode (&best_mode
))
485 /* We can use a memory in BEST_MODE. See whether this is true for
486 any wider modes. All other things being equal, we prefer to
487 use the widest mode possible because it tends to expose more
488 CSE opportunities. */
489 if (!iter
.prefer_smaller_modes ())
491 /* Limit the search to the mode required by the corresponding
492 register insertion or extraction instruction, if any. */
493 scalar_int_mode limit_mode
= word_mode
;
494 extraction_insn insn
;
495 if (get_best_reg_extraction_insn (&insn
, pattern
,
496 GET_MODE_BITSIZE (best_mode
),
498 limit_mode
= insn
.field_mode
;
500 scalar_int_mode wider_mode
;
501 while (iter
.next_mode (&wider_mode
)
502 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
503 best_mode
= wider_mode
;
505 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
512 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
513 offset is then BITNUM / BITS_PER_UNIT. */
516 lowpart_bit_field_p (poly_uint64 bitnum
, poly_uint64 bitsize
,
517 machine_mode struct_mode
)
519 poly_uint64 regsize
= REGMODE_NATURAL_SIZE (struct_mode
);
520 if (BYTES_BIG_ENDIAN
)
521 return (multiple_p (bitnum
, BITS_PER_UNIT
)
522 && (known_eq (bitnum
+ bitsize
, GET_MODE_BITSIZE (struct_mode
))
523 || multiple_p (bitnum
+ bitsize
,
524 regsize
* BITS_PER_UNIT
)));
526 return multiple_p (bitnum
, regsize
* BITS_PER_UNIT
);
529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
530 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
531 Return false if the access would touch memory outside the range
532 BITREGION_START to BITREGION_END for conformance to the C++ memory
536 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
537 unsigned HOST_WIDE_INT bitnum
,
538 scalar_int_mode fieldmode
,
539 poly_uint64 bitregion_start
,
540 poly_uint64 bitregion_end
)
542 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
544 /* -fstrict-volatile-bitfields must be enabled and we must have a
547 || !MEM_VOLATILE_P (op0
)
548 || flag_strict_volatile_bitfields
<= 0)
551 /* The bit size must not be larger than the field mode, and
552 the field mode must not be larger than a word. */
553 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
556 /* Check for cases of unaligned fields that must be split. */
557 if (bitnum
% modesize
+ bitsize
> modesize
)
560 /* The memory must be sufficiently aligned for a MODESIZE access.
561 This condition guarantees, that the memory access will not
562 touch anything after the end of the structure. */
563 if (MEM_ALIGN (op0
) < modesize
)
566 /* Check for cases where the C++ memory model applies. */
567 if (maybe_ne (bitregion_end
, 0U)
568 && (maybe_lt (bitnum
- bitnum
% modesize
, bitregion_start
)
569 || maybe_gt (bitnum
- bitnum
% modesize
+ modesize
- 1,
576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
577 bit number BITNUM can be treated as a simple value of mode MODE.
578 Store the byte offset in *BYTENUM if so. */
581 simple_mem_bitfield_p (rtx op0
, poly_uint64 bitsize
, poly_uint64 bitnum
,
582 machine_mode mode
, poly_uint64
*bytenum
)
585 && multiple_p (bitnum
, BITS_PER_UNIT
, bytenum
)
586 && known_eq (bitsize
, GET_MODE_BITSIZE (mode
))
587 && (!targetm
.slow_unaligned_access (mode
, MEM_ALIGN (op0
))
588 || (multiple_p (bitnum
, GET_MODE_ALIGNMENT (mode
))
589 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
592 /* Try to use instruction INSV to store VALUE into a field of OP0.
593 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
594 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
595 are as for store_bit_field. */
598 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
599 opt_scalar_int_mode op0_mode
,
600 unsigned HOST_WIDE_INT bitsize
,
601 unsigned HOST_WIDE_INT bitnum
,
602 rtx value
, scalar_int_mode value_mode
)
604 class expand_operand ops
[4];
607 rtx_insn
*last
= get_last_insn ();
608 bool copy_back
= false;
610 scalar_int_mode op_mode
= insv
->field_mode
;
611 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
612 if (bitsize
== 0 || bitsize
> unit
)
616 /* Get a reference to the first byte of the field. */
617 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
621 /* Convert from counting within OP0 to counting in OP_MODE. */
622 if (BYTES_BIG_ENDIAN
)
623 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
625 /* If xop0 is a register, we need it in OP_MODE
626 to make it acceptable to the format of insv. */
627 if (GET_CODE (xop0
) == SUBREG
)
628 /* We can't just change the mode, because this might clobber op0,
629 and we will need the original value of op0 if insv fails. */
630 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
631 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
632 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
635 /* If the destination is a paradoxical subreg such that we need a
636 truncate to the inner mode, perform the insertion on a temporary and
637 truncate the result to the original destination. Note that we can't
638 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
639 X) 0)) is (reg:N X). */
640 if (GET_CODE (xop0
) == SUBREG
641 && REG_P (SUBREG_REG (xop0
))
642 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
645 rtx tem
= gen_reg_rtx (op_mode
);
646 emit_move_insn (tem
, xop0
);
651 /* There are similar overflow check at the start of store_bit_field_1,
652 but that only check the situation where the field lies completely
653 outside the register, while there do have situation where the field
654 lies partialy in the register, we need to adjust bitsize for this
655 partial overflow situation. Without this fix, pr48335-2.c on big-endian
656 will broken on those arch support bit insert instruction, like arm, aarch64
658 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
660 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
661 "destination object, data truncated into %wu-bit",
662 bitsize
, unit
- bitnum
);
663 bitsize
= unit
- bitnum
;
666 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
667 "backwards" from the size of the unit we are inserting into.
668 Otherwise, we count bits from the most significant on a
669 BYTES/BITS_BIG_ENDIAN machine. */
671 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
672 bitnum
= unit
- bitsize
- bitnum
;
674 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
676 if (value_mode
!= op_mode
)
678 if (GET_MODE_BITSIZE (value_mode
) >= bitsize
)
681 /* Optimization: Don't bother really extending VALUE
682 if it has all the bits we will actually use. However,
683 if we must narrow it, be sure we do it correctly. */
685 if (GET_MODE_SIZE (value_mode
) < GET_MODE_SIZE (op_mode
))
687 tmp
= simplify_subreg (op_mode
, value1
, value_mode
, 0);
689 tmp
= simplify_gen_subreg (op_mode
,
690 force_reg (value_mode
, value1
),
695 tmp
= gen_lowpart_if_possible (op_mode
, value1
);
697 tmp
= gen_lowpart (op_mode
, force_reg (value_mode
, value1
));
701 else if (CONST_INT_P (value
))
702 value1
= gen_int_mode (INTVAL (value
), op_mode
);
704 /* Parse phase is supposed to make VALUE's data type
705 match that of the component reference, which is a type
706 at least as wide as the field; so VALUE should have
707 a mode that corresponds to that type. */
708 gcc_assert (CONSTANT_P (value
));
711 create_fixed_operand (&ops
[0], xop0
);
712 create_integer_operand (&ops
[1], bitsize
);
713 create_integer_operand (&ops
[2], bitnum
);
714 create_input_operand (&ops
[3], value1
, op_mode
);
715 if (maybe_expand_insn (insv
->icode
, 4, ops
))
718 convert_move (op0
, xop0
, true);
721 delete_insns_since (last
);
725 /* A subroutine of store_bit_field, with the same arguments. Return true
726 if the operation could be implemented.
728 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
729 no other way of implementing the operation. If FALLBACK_P is false,
730 return false instead. */
733 store_bit_field_1 (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
734 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
735 machine_mode fieldmode
,
736 rtx value
, bool reverse
, bool fallback_p
)
740 while (GET_CODE (op0
) == SUBREG
)
742 bitnum
+= subreg_memory_offset (op0
) * BITS_PER_UNIT
;
743 op0
= SUBREG_REG (op0
);
746 /* No action is needed if the target is a register and if the field
747 lies completely outside that register. This can occur if the source
748 code contains an out-of-bounds access to a small array. */
749 if (REG_P (op0
) && known_ge (bitnum
, GET_MODE_BITSIZE (GET_MODE (op0
))))
752 /* Use vec_set patterns for inserting parts of vectors whenever
754 machine_mode outermode
= GET_MODE (op0
);
755 scalar_mode innermode
= GET_MODE_INNER (outermode
);
757 if (VECTOR_MODE_P (outermode
)
759 && optab_handler (vec_set_optab
, outermode
) != CODE_FOR_nothing
760 && fieldmode
== innermode
761 && known_eq (bitsize
, GET_MODE_BITSIZE (innermode
))
762 && multiple_p (bitnum
, GET_MODE_BITSIZE (innermode
), &pos
))
764 class expand_operand ops
[3];
765 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
767 create_fixed_operand (&ops
[0], op0
);
768 create_input_operand (&ops
[1], value
, innermode
);
769 create_integer_operand (&ops
[2], pos
);
770 if (maybe_expand_insn (icode
, 3, ops
))
774 /* If the target is a register, overwriting the entire object, or storing
775 a full-word or multi-word field can be done with just a SUBREG. */
777 && known_eq (bitsize
, GET_MODE_BITSIZE (fieldmode
)))
779 /* Use the subreg machinery either to narrow OP0 to the required
780 words or to cope with mode punning between equal-sized modes.
781 In the latter case, use subreg on the rhs side, not lhs. */
783 HOST_WIDE_INT regnum
;
784 poly_uint64 regsize
= REGMODE_NATURAL_SIZE (GET_MODE (op0
));
785 if (known_eq (bitnum
, 0U)
786 && known_eq (bitsize
, GET_MODE_BITSIZE (GET_MODE (op0
))))
788 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
792 sub
= flip_storage_order (GET_MODE (op0
), sub
);
793 emit_move_insn (op0
, sub
);
797 else if (constant_multiple_p (bitnum
, regsize
* BITS_PER_UNIT
, ®num
)
798 && multiple_p (bitsize
, regsize
* BITS_PER_UNIT
))
800 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
805 value
= flip_storage_order (fieldmode
, value
);
806 emit_move_insn (sub
, value
);
812 /* If the target is memory, storing any naturally aligned field can be
813 done with a simple store. For targets that support fast unaligned
814 memory, any naturally sized, unit aligned field can be done directly. */
816 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
, &bytenum
))
818 op0
= adjust_bitfield_address (op0
, fieldmode
, bytenum
);
820 value
= flip_storage_order (fieldmode
, value
);
821 emit_move_insn (op0
, value
);
825 /* It's possible we'll need to handle other cases here for
826 polynomial bitnum and bitsize. */
828 /* From here on we need to be looking at a fixed-size insertion. */
829 unsigned HOST_WIDE_INT ibitsize
= bitsize
.to_constant ();
830 unsigned HOST_WIDE_INT ibitnum
= bitnum
.to_constant ();
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
836 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
837 scalar_int_mode imode
;
838 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
841 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
843 else if (!op0_mode
.exists ())
846 && known_eq (ibitsize
, GET_MODE_BITSIZE (GET_MODE (op0
)))
850 value
= adjust_address (value
, GET_MODE (op0
), 0);
851 emit_move_insn (op0
, value
);
856 rtx temp
= assign_stack_temp (GET_MODE (op0
),
857 GET_MODE_SIZE (GET_MODE (op0
)));
858 emit_move_insn (temp
, op0
);
859 store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0, fieldmode
, value
,
860 reverse
, fallback_p
);
861 emit_move_insn (op0
, temp
);
865 op0
= gen_lowpart (op0_mode
.require (), op0
);
868 return store_integral_bit_field (op0
, op0_mode
, ibitsize
, ibitnum
,
869 bitregion_start
, bitregion_end
,
870 fieldmode
, value
, reverse
, fallback_p
);
873 /* Subroutine of store_bit_field_1, with the same arguments, except
874 that BITSIZE and BITNUM are constant. Handle cases specific to
875 integral modes. If OP0_MODE is defined, it is the mode of OP0,
876 otherwise OP0 is a BLKmode MEM. */
879 store_integral_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
880 unsigned HOST_WIDE_INT bitsize
,
881 unsigned HOST_WIDE_INT bitnum
,
882 poly_uint64 bitregion_start
,
883 poly_uint64 bitregion_end
,
884 machine_mode fieldmode
,
885 rtx value
, bool reverse
, bool fallback_p
)
887 /* Storing an lsb-aligned field in a register
888 can be done with a movstrict instruction. */
892 && lowpart_bit_field_p (bitnum
, bitsize
, op0_mode
.require ())
893 && known_eq (bitsize
, GET_MODE_BITSIZE (fieldmode
))
894 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
896 class expand_operand ops
[2];
897 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
899 unsigned HOST_WIDE_INT subreg_off
;
901 if (GET_CODE (arg0
) == SUBREG
)
903 /* Else we've got some float mode source being extracted into
904 a different float mode destination -- this combination of
905 subregs results in Severe Tire Damage. */
906 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
907 || GET_MODE_CLASS (fieldmode
) == MODE_INT
908 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
909 arg0
= SUBREG_REG (arg0
);
912 subreg_off
= bitnum
/ BITS_PER_UNIT
;
913 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
915 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
917 create_fixed_operand (&ops
[0], arg0
);
918 /* Shrink the source operand to FIELDMODE. */
919 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
920 if (maybe_expand_insn (icode
, 2, ops
))
925 /* Handle fields bigger than a word. */
927 if (bitsize
> BITS_PER_WORD
)
929 /* Here we transfer the words of the field
930 in the order least significant first.
931 This is because the most significant word is the one which may
933 However, only do that if the value is not BLKmode. */
935 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
936 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
940 /* This is the mode we must force value to, so that there will be enough
941 subwords to extract. Note that fieldmode will often (always?) be
942 VOIDmode, because that is what store_field uses to indicate that this
943 is a bit field, but passing VOIDmode to operand_subword_force
946 The mode must be fixed-size, since insertions into variable-sized
947 objects are meant to be handled before calling this function. */
948 fixed_size_mode value_mode
= as_a
<fixed_size_mode
> (GET_MODE (value
));
949 if (value_mode
== VOIDmode
)
950 value_mode
= smallest_int_mode_for_size (nwords
* BITS_PER_WORD
);
952 last
= get_last_insn ();
953 for (i
= 0; i
< nwords
; i
++)
955 /* If I is 0, use the low-order word in both field and target;
956 if I is 1, use the next to lowest word; and so on. */
957 unsigned int wordnum
= (backwards
958 ? GET_MODE_SIZE (value_mode
) / UNITS_PER_WORD
961 unsigned int bit_offset
= (backwards
^ reverse
962 ? MAX ((int) bitsize
- ((int) i
+ 1)
965 : (int) i
* BITS_PER_WORD
);
966 rtx value_word
= operand_subword_force (value
, wordnum
, value_mode
);
967 unsigned HOST_WIDE_INT new_bitsize
=
968 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
970 /* If the remaining chunk doesn't have full wordsize we have
971 to make sure that for big-endian machines the higher order
973 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
975 int shift
= BITS_PER_WORD
- new_bitsize
;
976 rtx shift_rtx
= gen_int_shift_amount (word_mode
, shift
);
977 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
978 value_word
, shift_rtx
,
983 if (!store_bit_field_1 (op0
, new_bitsize
,
985 bitregion_start
, bitregion_end
,
987 value_word
, reverse
, fallback_p
))
989 delete_insns_since (last
);
996 /* If VALUE has a floating-point or complex mode, access it as an
997 integer of the corresponding size. This can occur on a machine
998 with 64 bit registers that uses SFmode for float. It can also
999 occur for unaligned float or complex fields. */
1000 rtx orig_value
= value
;
1001 scalar_int_mode value_mode
;
1002 if (GET_MODE (value
) == VOIDmode
)
1003 /* By this point we've dealt with values that are bigger than a word,
1004 so word_mode is a conservatively correct choice. */
1005 value_mode
= word_mode
;
1006 else if (!is_a
<scalar_int_mode
> (GET_MODE (value
), &value_mode
))
1008 value_mode
= int_mode_for_mode (GET_MODE (value
)).require ();
1009 value
= gen_reg_rtx (value_mode
);
1010 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
1013 /* If OP0 is a multi-word register, narrow it to the affected word.
1014 If the region spans two words, defer to store_split_bit_field.
1015 Don't do this if op0 is a single hard register wider than word
1016 such as a float or vector register. */
1018 && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
1020 || !HARD_REGISTER_P (op0
)
1021 || hard_regno_nregs (REGNO (op0
), op0_mode
.require ()) != 1))
1023 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1028 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1029 bitregion_start
, bitregion_end
,
1030 value
, value_mode
, reverse
);
1033 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
1034 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1036 op0_mode
= word_mode
;
1037 bitnum
%= BITS_PER_WORD
;
1040 /* From here on we can assume that the field to be stored in fits
1041 within a word. If the destination is a register, it too fits
1044 extraction_insn insv
;
1047 && get_best_reg_extraction_insn (&insv
, EP_insv
,
1048 GET_MODE_BITSIZE (op0_mode
.require ()),
1050 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
1051 bitsize
, bitnum
, value
, value_mode
))
1054 /* If OP0 is a memory, try copying it to a register and seeing if a
1055 cheap register alternative is available. */
1056 if (MEM_P (op0
) && !reverse
)
1058 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
1060 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
1061 bitsize
, bitnum
, value
, value_mode
))
1064 rtx_insn
*last
= get_last_insn ();
1066 /* Try loading part of OP0 into a register, inserting the bitfield
1067 into that, and then copying the result back to OP0. */
1068 unsigned HOST_WIDE_INT bitpos
;
1069 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1070 bitregion_start
, bitregion_end
,
1071 fieldmode
, &bitpos
);
1074 rtx tempreg
= copy_to_reg (xop0
);
1075 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1076 bitregion_start
, bitregion_end
,
1077 fieldmode
, orig_value
, reverse
, false))
1079 emit_move_insn (xop0
, tempreg
);
1082 delete_insns_since (last
);
1089 store_fixed_bit_field (op0
, op0_mode
, bitsize
, bitnum
, bitregion_start
,
1090 bitregion_end
, value
, value_mode
, reverse
);
1094 /* Generate code to store value from rtx VALUE
1095 into a bit-field within structure STR_RTX
1096 containing BITSIZE bits starting at bit BITNUM.
1098 BITREGION_START is bitpos of the first bitfield in this region.
1099 BITREGION_END is the bitpos of the ending bitfield in this region.
1100 These two fields are 0, if the C++ memory model does not apply,
1101 or we are not interested in keeping track of bitfield regions.
1103 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1105 If REVERSE is true, the store is to be done in reverse order. */
1108 store_bit_field (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
1109 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1110 machine_mode fieldmode
,
1111 rtx value
, bool reverse
)
1113 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1114 unsigned HOST_WIDE_INT ibitsize
= 0, ibitnum
= 0;
1115 scalar_int_mode int_mode
;
1116 if (bitsize
.is_constant (&ibitsize
)
1117 && bitnum
.is_constant (&ibitnum
)
1118 && is_a
<scalar_int_mode
> (fieldmode
, &int_mode
)
1119 && strict_volatile_bitfield_p (str_rtx
, ibitsize
, ibitnum
, int_mode
,
1120 bitregion_start
, bitregion_end
))
1122 /* Storing of a full word can be done with a simple store.
1123 We know here that the field can be accessed with one single
1124 instruction. For targets that support unaligned memory,
1125 an unaligned access may be necessary. */
1126 if (ibitsize
== GET_MODE_BITSIZE (int_mode
))
1128 str_rtx
= adjust_bitfield_address (str_rtx
, int_mode
,
1129 ibitnum
/ BITS_PER_UNIT
);
1131 value
= flip_storage_order (int_mode
, value
);
1132 gcc_assert (ibitnum
% BITS_PER_UNIT
== 0);
1133 emit_move_insn (str_rtx
, value
);
1139 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, ibitsize
,
1141 gcc_assert (ibitnum
+ ibitsize
<= GET_MODE_BITSIZE (int_mode
));
1142 temp
= copy_to_reg (str_rtx
);
1143 if (!store_bit_field_1 (temp
, ibitsize
, ibitnum
, 0, 0,
1144 int_mode
, value
, reverse
, true))
1147 emit_move_insn (str_rtx
, temp
);
1153 /* Under the C++0x memory model, we must not touch bits outside the
1154 bit region. Adjust the address to start at the beginning of the
1156 if (MEM_P (str_rtx
) && maybe_ne (bitregion_start
, 0U))
1158 scalar_int_mode best_mode
;
1159 machine_mode addr_mode
= VOIDmode
;
1161 poly_uint64 offset
= exact_div (bitregion_start
, BITS_PER_UNIT
);
1162 bitnum
-= bitregion_start
;
1163 poly_int64 size
= bits_to_bytes_round_up (bitnum
+ bitsize
);
1164 bitregion_end
-= bitregion_start
;
1165 bitregion_start
= 0;
1166 if (bitsize
.is_constant (&ibitsize
)
1167 && bitnum
.is_constant (&ibitnum
)
1168 && get_best_mode (ibitsize
, ibitnum
,
1169 bitregion_start
, bitregion_end
,
1170 MEM_ALIGN (str_rtx
), INT_MAX
,
1171 MEM_VOLATILE_P (str_rtx
), &best_mode
))
1172 addr_mode
= best_mode
;
1173 str_rtx
= adjust_bitfield_address_size (str_rtx
, addr_mode
,
1177 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1178 bitregion_start
, bitregion_end
,
1179 fieldmode
, value
, reverse
, true))
1183 /* Use shifts and boolean operations to store VALUE into a bit field of
1184 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1185 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1188 If REVERSE is true, the store is to be done in reverse order. */
1191 store_fixed_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1192 unsigned HOST_WIDE_INT bitsize
,
1193 unsigned HOST_WIDE_INT bitnum
,
1194 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1195 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1197 /* There is a case not handled here:
1198 a structure with a known alignment of just a halfword
1199 and a field split across two aligned halfwords within the structure.
1200 Or likewise a structure with a known alignment of just a byte
1201 and a field split across two bytes.
1202 Such cases are not supposed to be able to occur. */
1204 scalar_int_mode best_mode
;
1207 unsigned int max_bitsize
= BITS_PER_WORD
;
1208 scalar_int_mode imode
;
1209 if (op0_mode
.exists (&imode
) && GET_MODE_BITSIZE (imode
) < max_bitsize
)
1210 max_bitsize
= GET_MODE_BITSIZE (imode
);
1212 if (!get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1213 MEM_ALIGN (op0
), max_bitsize
, MEM_VOLATILE_P (op0
),
1216 /* The only way this should occur is if the field spans word
1218 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1219 bitregion_start
, bitregion_end
,
1220 value
, value_mode
, reverse
);
1224 op0
= narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
, &bitnum
);
1227 best_mode
= op0_mode
.require ();
1229 store_fixed_bit_field_1 (op0
, best_mode
, bitsize
, bitnum
,
1230 value
, value_mode
, reverse
);
1233 /* Helper function for store_fixed_bit_field, stores
1234 the bit field always using MODE, which is the mode of OP0. The other
1235 arguments are as for store_fixed_bit_field. */
1238 store_fixed_bit_field_1 (rtx op0
, scalar_int_mode mode
,
1239 unsigned HOST_WIDE_INT bitsize
,
1240 unsigned HOST_WIDE_INT bitnum
,
1241 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1247 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1248 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1250 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1251 /* BITNUM is the distance between our msb
1252 and that of the containing datum.
1253 Convert it to the distance from the lsb. */
1254 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1256 /* Now BITNUM is always the distance between our lsb
1259 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1260 we must first convert its mode to MODE. */
1262 if (CONST_INT_P (value
))
1264 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1266 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1267 v
&= (HOST_WIDE_INT_1U
<< bitsize
) - 1;
1271 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1272 && v
== (HOST_WIDE_INT_1U
<< bitsize
) - 1)
1273 || (bitsize
== HOST_BITS_PER_WIDE_INT
1274 && v
== HOST_WIDE_INT_M1U
))
1277 value
= lshift_value (mode
, v
, bitnum
);
1281 int must_and
= (GET_MODE_BITSIZE (value_mode
) != bitsize
1282 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1284 if (value_mode
!= mode
)
1285 value
= convert_to_mode (mode
, value
, 1);
1288 value
= expand_binop (mode
, and_optab
, value
,
1289 mask_rtx (mode
, 0, bitsize
, 0),
1290 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1292 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1293 bitnum
, NULL_RTX
, 1);
1297 value
= flip_storage_order (mode
, value
);
1299 /* Now clear the chosen bits in OP0,
1300 except that if VALUE is -1 we need not bother. */
1301 /* We keep the intermediates in registers to allow CSE to combine
1302 consecutive bitfield assignments. */
1304 temp
= force_reg (mode
, op0
);
1308 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1310 mask
= flip_storage_order (mode
, mask
);
1311 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1312 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1313 temp
= force_reg (mode
, temp
);
1316 /* Now logical-or VALUE into OP0, unless it is zero. */
1320 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1321 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1322 temp
= force_reg (mode
, temp
);
1327 op0
= copy_rtx (op0
);
1328 emit_move_insn (op0
, temp
);
1332 /* Store a bit field that is split across multiple accessible memory objects.
1334 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1335 BITSIZE is the field width; BITPOS the position of its first bit
1337 VALUE is the value to store, which has mode VALUE_MODE.
1338 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1341 If REVERSE is true, the store is to be done in reverse order.
1343 This does not yet handle fields wider than BITS_PER_WORD. */
1346 store_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1347 unsigned HOST_WIDE_INT bitsize
,
1348 unsigned HOST_WIDE_INT bitpos
,
1349 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
,
1350 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1352 unsigned int unit
, total_bits
, bitsdone
= 0;
1354 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1356 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1357 unit
= BITS_PER_WORD
;
1359 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1361 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1362 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1363 again, and we will mutually recurse forever. */
1364 if (MEM_P (op0
) && op0_mode
.exists ())
1365 unit
= MIN (unit
, GET_MODE_BITSIZE (op0_mode
.require ()));
1367 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1368 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1369 that VALUE might be a floating-point constant. */
1370 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1372 rtx word
= gen_lowpart_common (word_mode
, value
);
1374 if (word
&& (value
!= word
))
1377 value
= gen_lowpart_common (word_mode
, force_reg (value_mode
, value
));
1378 value_mode
= word_mode
;
1381 total_bits
= GET_MODE_BITSIZE (value_mode
);
1383 while (bitsdone
< bitsize
)
1385 unsigned HOST_WIDE_INT thissize
;
1386 unsigned HOST_WIDE_INT thispos
;
1387 unsigned HOST_WIDE_INT offset
;
1390 offset
= (bitpos
+ bitsdone
) / unit
;
1391 thispos
= (bitpos
+ bitsdone
) % unit
;
1393 /* When region of bytes we can touch is restricted, decrease
1394 UNIT close to the end of the region as needed. If op0 is a REG
1395 or SUBREG of REG, don't do this, as there can't be data races
1396 on a register and we can expand shorter code in some cases. */
1397 if (maybe_ne (bitregion_end
, 0U)
1398 && unit
> BITS_PER_UNIT
1399 && maybe_gt (bitpos
+ bitsdone
- thispos
+ unit
, bitregion_end
+ 1)
1401 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1407 /* THISSIZE must not overrun a word boundary. Otherwise,
1408 store_fixed_bit_field will call us again, and we will mutually
1410 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1411 thissize
= MIN (thissize
, unit
- thispos
);
1413 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1415 /* Fetch successively less significant portions. */
1416 if (CONST_INT_P (value
))
1417 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1418 >> (bitsize
- bitsdone
- thissize
))
1419 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1420 /* Likewise, but the source is little-endian. */
1422 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1424 bitsize
- bitsdone
- thissize
,
1425 NULL_RTX
, 1, false);
1427 /* The args are chosen so that the last part includes the
1428 lsb. Give extract_bit_field the value it needs (with
1429 endianness compensation) to fetch the piece we want. */
1430 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1432 total_bits
- bitsize
+ bitsdone
,
1433 NULL_RTX
, 1, false);
1437 /* Fetch successively more significant portions. */
1438 if (CONST_INT_P (value
))
1439 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1441 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1442 /* Likewise, but the source is big-endian. */
1444 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1446 total_bits
- bitsdone
- thissize
,
1447 NULL_RTX
, 1, false);
1449 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1450 thissize
, bitsdone
, NULL_RTX
,
1454 /* If OP0 is a register, then handle OFFSET here. */
1455 rtx op0_piece
= op0
;
1456 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
1457 if (SUBREG_P (op0
) || REG_P (op0
))
1459 scalar_int_mode imode
;
1460 if (op0_mode
.exists (&imode
)
1461 && GET_MODE_SIZE (imode
) < UNITS_PER_WORD
)
1464 op0_piece
= const0_rtx
;
1468 op0_piece
= operand_subword_force (op0
,
1469 offset
* unit
/ BITS_PER_WORD
,
1471 op0_piece_mode
= word_mode
;
1473 offset
&= BITS_PER_WORD
/ unit
- 1;
1476 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1477 it is just an out-of-bounds access. Ignore it. */
1478 if (op0_piece
!= const0_rtx
)
1479 store_fixed_bit_field (op0_piece
, op0_piece_mode
, thissize
,
1480 offset
* unit
+ thispos
, bitregion_start
,
1481 bitregion_end
, part
, word_mode
, reverse
);
1482 bitsdone
+= thissize
;
1486 /* A subroutine of extract_bit_field_1 that converts return value X
1487 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1488 to extract_bit_field. */
1491 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1492 machine_mode tmode
, bool unsignedp
)
1494 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1497 /* If the x mode is not a scalar integral, first convert to the
1498 integer mode of that size and then access it as a floating-point
1499 value via a SUBREG. */
1500 if (!SCALAR_INT_MODE_P (tmode
))
1502 scalar_int_mode int_mode
= int_mode_for_mode (tmode
).require ();
1503 x
= convert_to_mode (int_mode
, x
, unsignedp
);
1504 x
= force_reg (int_mode
, x
);
1505 return gen_lowpart (tmode
, x
);
1508 return convert_to_mode (tmode
, x
, unsignedp
);
1511 /* Try to use an ext(z)v pattern to extract a field from OP0.
1512 Return the extracted value on success, otherwise return null.
1513 EXTV describes the extraction instruction to use. If OP0_MODE
1514 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1515 The other arguments are as for extract_bit_field. */
1518 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1519 opt_scalar_int_mode op0_mode
,
1520 unsigned HOST_WIDE_INT bitsize
,
1521 unsigned HOST_WIDE_INT bitnum
,
1522 int unsignedp
, rtx target
,
1523 machine_mode mode
, machine_mode tmode
)
1525 class expand_operand ops
[4];
1526 rtx spec_target
= target
;
1527 rtx spec_target_subreg
= 0;
1528 scalar_int_mode ext_mode
= extv
->field_mode
;
1529 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1531 if (bitsize
== 0 || unit
< bitsize
)
1535 /* Get a reference to the first byte of the field. */
1536 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1540 /* Convert from counting within OP0 to counting in EXT_MODE. */
1541 if (BYTES_BIG_ENDIAN
)
1542 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
1544 /* If op0 is a register, we need it in EXT_MODE to make it
1545 acceptable to the format of ext(z)v. */
1546 if (GET_CODE (op0
) == SUBREG
&& op0_mode
.require () != ext_mode
)
1548 if (REG_P (op0
) && op0_mode
.require () != ext_mode
)
1549 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1552 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1553 "backwards" from the size of the unit we are extracting from.
1554 Otherwise, we count bits from the most significant on a
1555 BYTES/BITS_BIG_ENDIAN machine. */
1557 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1558 bitnum
= unit
- bitsize
- bitnum
;
1561 target
= spec_target
= gen_reg_rtx (tmode
);
1563 if (GET_MODE (target
) != ext_mode
)
1565 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1566 between the mode of the extraction (word_mode) and the target
1567 mode. Instead, create a temporary and use convert_move to set
1570 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1572 target
= gen_lowpart (ext_mode
, target
);
1573 if (partial_subreg_p (GET_MODE (spec_target
), ext_mode
))
1574 spec_target_subreg
= target
;
1577 target
= gen_reg_rtx (ext_mode
);
1580 create_output_operand (&ops
[0], target
, ext_mode
);
1581 create_fixed_operand (&ops
[1], op0
);
1582 create_integer_operand (&ops
[2], bitsize
);
1583 create_integer_operand (&ops
[3], bitnum
);
1584 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1586 target
= ops
[0].value
;
1587 if (target
== spec_target
)
1589 if (target
== spec_target_subreg
)
1591 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1596 /* See whether it would be valid to extract the part of OP0 described
1597 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1598 operation. Return the subreg if so, otherwise return null. */
1601 extract_bit_field_as_subreg (machine_mode mode
, rtx op0
,
1602 poly_uint64 bitsize
, poly_uint64 bitnum
)
1604 poly_uint64 bytenum
;
1605 if (multiple_p (bitnum
, BITS_PER_UNIT
, &bytenum
)
1606 && known_eq (bitsize
, GET_MODE_BITSIZE (mode
))
1607 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1608 && TRULY_NOOP_TRUNCATION_MODES_P (mode
, GET_MODE (op0
)))
1609 return simplify_gen_subreg (mode
, op0
, GET_MODE (op0
), bytenum
);
1613 /* A subroutine of extract_bit_field, with the same arguments.
1614 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1615 if we can find no other means of implementing the operation.
1616 if FALLBACK_P is false, return NULL instead. */
1619 extract_bit_field_1 (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
1620 int unsignedp
, rtx target
, machine_mode mode
,
1621 machine_mode tmode
, bool reverse
, bool fallback_p
,
1627 if (tmode
== VOIDmode
)
1630 while (GET_CODE (op0
) == SUBREG
)
1632 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1633 op0
= SUBREG_REG (op0
);
1636 /* If we have an out-of-bounds access to a register, just return an
1637 uninitialized register of the required mode. This can occur if the
1638 source code contains an out-of-bounds access to a small array. */
1639 if (REG_P (op0
) && known_ge (bitnum
, GET_MODE_BITSIZE (GET_MODE (op0
))))
1640 return gen_reg_rtx (tmode
);
1643 && mode
== GET_MODE (op0
)
1644 && known_eq (bitnum
, 0U)
1645 && known_eq (bitsize
, GET_MODE_BITSIZE (GET_MODE (op0
))))
1648 op0
= flip_storage_order (mode
, op0
);
1649 /* We're trying to extract a full register from itself. */
1653 /* First try to check for vector from vector extractions. */
1654 if (VECTOR_MODE_P (GET_MODE (op0
))
1656 && VECTOR_MODE_P (tmode
)
1657 && known_eq (bitsize
, GET_MODE_BITSIZE (tmode
))
1658 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0
)), GET_MODE_SIZE (tmode
)))
1660 machine_mode new_mode
= GET_MODE (op0
);
1661 if (GET_MODE_INNER (new_mode
) != GET_MODE_INNER (tmode
))
1663 scalar_mode inner_mode
= GET_MODE_INNER (tmode
);
1665 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0
)),
1666 GET_MODE_UNIT_BITSIZE (tmode
), &nunits
)
1667 || !related_vector_mode (tmode
, inner_mode
,
1668 nunits
).exists (&new_mode
)
1669 || maybe_ne (GET_MODE_SIZE (new_mode
),
1670 GET_MODE_SIZE (GET_MODE (op0
))))
1671 new_mode
= VOIDmode
;
1674 if (new_mode
!= VOIDmode
1675 && (convert_optab_handler (vec_extract_optab
, new_mode
, tmode
)
1676 != CODE_FOR_nothing
)
1677 && multiple_p (bitnum
, GET_MODE_BITSIZE (tmode
), &pos
))
1679 class expand_operand ops
[3];
1680 machine_mode outermode
= new_mode
;
1681 machine_mode innermode
= tmode
;
1682 enum insn_code icode
1683 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1685 if (new_mode
!= GET_MODE (op0
))
1686 op0
= gen_lowpart (new_mode
, op0
);
1687 create_output_operand (&ops
[0], target
, innermode
);
1689 create_input_operand (&ops
[1], op0
, outermode
);
1690 create_integer_operand (&ops
[2], pos
);
1691 if (maybe_expand_insn (icode
, 3, ops
))
1693 if (alt_rtl
&& ops
[0].target
)
1695 target
= ops
[0].value
;
1696 if (GET_MODE (target
) != mode
)
1697 return gen_lowpart (tmode
, target
);
1703 /* See if we can get a better vector mode before extracting. */
1704 if (VECTOR_MODE_P (GET_MODE (op0
))
1706 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1708 machine_mode new_mode
;
1710 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1711 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1712 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1713 new_mode
= MIN_MODE_VECTOR_FRACT
;
1714 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1715 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1716 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1717 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1718 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1719 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1721 new_mode
= MIN_MODE_VECTOR_INT
;
1723 FOR_EACH_MODE_FROM (new_mode
, new_mode
)
1724 if (known_eq (GET_MODE_SIZE (new_mode
), GET_MODE_SIZE (GET_MODE (op0
)))
1725 && known_eq (GET_MODE_UNIT_SIZE (new_mode
), GET_MODE_SIZE (tmode
))
1726 && targetm
.vector_mode_supported_p (new_mode
))
1728 if (new_mode
!= VOIDmode
)
1729 op0
= gen_lowpart (new_mode
, op0
);
1732 /* Use vec_extract patterns for extracting parts of vectors whenever
1733 available. If that fails, see whether the current modes and bitregion
1734 give a natural subreg. */
1735 machine_mode outermode
= GET_MODE (op0
);
1736 if (VECTOR_MODE_P (outermode
) && !MEM_P (op0
))
1738 scalar_mode innermode
= GET_MODE_INNER (outermode
);
1739 enum insn_code icode
1740 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1742 if (icode
!= CODE_FOR_nothing
1743 && known_eq (bitsize
, GET_MODE_BITSIZE (innermode
))
1744 && multiple_p (bitnum
, GET_MODE_BITSIZE (innermode
), &pos
))
1746 class expand_operand ops
[3];
1748 create_output_operand (&ops
[0], target
, innermode
);
1750 create_input_operand (&ops
[1], op0
, outermode
);
1751 create_integer_operand (&ops
[2], pos
);
1752 if (maybe_expand_insn (icode
, 3, ops
))
1754 if (alt_rtl
&& ops
[0].target
)
1756 target
= ops
[0].value
;
1757 if (GET_MODE (target
) != mode
)
1758 return gen_lowpart (tmode
, target
);
1762 /* Using subregs is useful if we're extracting one register vector
1763 from a multi-register vector. extract_bit_field_as_subreg checks
1764 for valid bitsize and bitnum, so we don't need to do that here. */
1765 if (VECTOR_MODE_P (mode
))
1767 rtx sub
= extract_bit_field_as_subreg (mode
, op0
, bitsize
, bitnum
);
1773 /* Make sure we are playing with integral modes. Pun with subregs
1775 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
1776 scalar_int_mode imode
;
1777 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
1780 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
1782 else if (op0_mode
.exists (&imode
))
1784 op0
= gen_lowpart (imode
, op0
);
1786 /* If we got a SUBREG, force it into a register since we
1787 aren't going to be able to do another SUBREG on it. */
1788 if (GET_CODE (op0
) == SUBREG
)
1789 op0
= force_reg (imode
, op0
);
1793 poly_int64 size
= GET_MODE_SIZE (GET_MODE (op0
));
1794 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1795 emit_move_insn (mem
, op0
);
1796 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1800 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1801 If that's wrong, the solution is to test for it and set TARGET to 0
1804 /* Get the mode of the field to use for atomic access or subreg
1806 if (!SCALAR_INT_MODE_P (tmode
)
1807 || !mode_for_size (bitsize
, GET_MODE_CLASS (tmode
), 0).exists (&mode1
))
1809 gcc_assert (mode1
!= BLKmode
);
1811 /* Extraction of a full MODE1 value can be done with a subreg as long
1812 as the least significant bit of the value is the least significant
1813 bit of either OP0 or a word of OP0. */
1814 if (!MEM_P (op0
) && !reverse
)
1816 rtx sub
= extract_bit_field_as_subreg (mode1
, op0
, bitsize
, bitnum
);
1818 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1821 /* Extraction of a full MODE1 value can be done with a load as long as
1822 the field is on a byte boundary and is sufficiently aligned. */
1823 poly_uint64 bytenum
;
1824 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
, &bytenum
))
1826 op0
= adjust_bitfield_address (op0
, mode1
, bytenum
);
1828 op0
= flip_storage_order (mode1
, op0
);
1829 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1832 /* If we have a memory source and a non-constant bit offset, restrict
1833 the memory to the referenced bytes. This is a worst-case fallback
1834 but is useful for things like vector booleans. */
1835 if (MEM_P (op0
) && !bitnum
.is_constant ())
1837 bytenum
= bits_to_bytes_round_down (bitnum
);
1838 bitnum
= num_trailing_bits (bitnum
);
1839 poly_uint64 bytesize
= bits_to_bytes_round_up (bitnum
+ bitsize
);
1840 op0
= adjust_bitfield_address_size (op0
, BLKmode
, bytenum
, bytesize
);
1841 op0_mode
= opt_scalar_int_mode ();
1844 /* It's possible we'll need to handle other cases here for
1845 polynomial bitnum and bitsize. */
1847 /* From here on we need to be looking at a fixed-size insertion. */
1848 return extract_integral_bit_field (op0
, op0_mode
, bitsize
.to_constant (),
1849 bitnum
.to_constant (), unsignedp
,
1850 target
, mode
, tmode
, reverse
, fallback_p
);
1853 /* Subroutine of extract_bit_field_1, with the same arguments, except
1854 that BITSIZE and BITNUM are constant. Handle cases specific to
1855 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1856 otherwise OP0 is a BLKmode MEM. */
1859 extract_integral_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1860 unsigned HOST_WIDE_INT bitsize
,
1861 unsigned HOST_WIDE_INT bitnum
, int unsignedp
,
1862 rtx target
, machine_mode mode
, machine_mode tmode
,
1863 bool reverse
, bool fallback_p
)
1865 /* Handle fields bigger than a word. */
1867 if (bitsize
> BITS_PER_WORD
)
1869 /* Here we transfer the words of the field
1870 in the order least significant first.
1871 This is because the most significant word is the one which may
1872 be less than full. */
1874 const bool backwards
= WORDS_BIG_ENDIAN
;
1875 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1879 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1880 target
= gen_reg_rtx (mode
);
1882 /* In case we're about to clobber a base register or something
1883 (see gcc.c-torture/execute/20040625-1.c). */
1884 if (reg_mentioned_p (target
, op0
))
1885 target
= gen_reg_rtx (mode
);
1887 /* Indicate for flow that the entire target reg is being set. */
1888 emit_clobber (target
);
1890 /* The mode must be fixed-size, since extract_bit_field_1 handles
1891 extractions from variable-sized objects before calling this
1893 unsigned int target_size
1894 = GET_MODE_SIZE (GET_MODE (target
)).to_constant ();
1895 last
= get_last_insn ();
1896 for (i
= 0; i
< nwords
; i
++)
1898 /* If I is 0, use the low-order word in both field and target;
1899 if I is 1, use the next to lowest word; and so on. */
1900 /* Word number in TARGET to use. */
1901 unsigned int wordnum
1902 = (backwards
? target_size
/ UNITS_PER_WORD
- i
- 1 : i
);
1903 /* Offset from start of field in OP0. */
1904 unsigned int bit_offset
= (backwards
^ reverse
1905 ? MAX ((int) bitsize
- ((int) i
+ 1)
1908 : (int) i
* BITS_PER_WORD
);
1909 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1911 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1912 bitsize
- i
* BITS_PER_WORD
),
1913 bitnum
+ bit_offset
, 1, target_part
,
1914 mode
, word_mode
, reverse
, fallback_p
, NULL
);
1916 gcc_assert (target_part
);
1919 delete_insns_since (last
);
1923 if (result_part
!= target_part
)
1924 emit_move_insn (target_part
, result_part
);
1929 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1930 need to be zero'd out. */
1931 if (target_size
> nwords
* UNITS_PER_WORD
)
1933 unsigned int i
, total_words
;
1935 total_words
= target_size
/ UNITS_PER_WORD
;
1936 for (i
= nwords
; i
< total_words
; i
++)
1938 (operand_subword (target
,
1939 backwards
? total_words
- i
- 1 : i
,
1946 /* Signed bit field: sign-extend with two arithmetic shifts. */
1947 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1948 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1949 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1950 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1953 /* If OP0 is a multi-word register, narrow it to the affected word.
1954 If the region spans two words, defer to extract_split_bit_field. */
1955 if (!MEM_P (op0
) && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
)
1957 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1961 target
= extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1962 unsignedp
, reverse
);
1963 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1965 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
1966 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1967 op0_mode
= word_mode
;
1968 bitnum
%= BITS_PER_WORD
;
1971 /* From here on we know the desired field is smaller than a word.
1972 If OP0 is a register, it too fits within a word. */
1973 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1974 extraction_insn extv
;
1977 /* ??? We could limit the structure size to the part of OP0 that
1978 contains the field, with appropriate checks for endianness
1979 and TARGET_TRULY_NOOP_TRUNCATION. */
1980 && get_best_reg_extraction_insn (&extv
, pattern
,
1981 GET_MODE_BITSIZE (op0_mode
.require ()),
1984 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
1986 unsignedp
, target
, mode
,
1992 /* If OP0 is a memory, try copying it to a register and seeing if a
1993 cheap register alternative is available. */
1994 if (MEM_P (op0
) & !reverse
)
1996 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1999 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
2001 unsignedp
, target
, mode
,
2007 rtx_insn
*last
= get_last_insn ();
2009 /* Try loading part of OP0 into a register and extracting the
2010 bitfield from that. */
2011 unsigned HOST_WIDE_INT bitpos
;
2012 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
2013 0, 0, tmode
, &bitpos
);
2016 xop0
= copy_to_reg (xop0
);
2017 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
2019 mode
, tmode
, reverse
, false, NULL
);
2022 delete_insns_since (last
);
2029 /* Find a correspondingly-sized integer field, so we can apply
2030 shifts and masks to it. */
2031 scalar_int_mode int_mode
;
2032 if (!int_mode_for_mode (tmode
).exists (&int_mode
))
2033 /* If this fails, we should probably push op0 out to memory and then
2035 int_mode
= int_mode_for_mode (mode
).require ();
2037 target
= extract_fixed_bit_field (int_mode
, op0
, op0_mode
, bitsize
,
2038 bitnum
, target
, unsignedp
, reverse
);
2040 /* Complex values must be reversed piecewise, so we need to undo the global
2041 reversal, convert to the complex mode and reverse again. */
2042 if (reverse
&& COMPLEX_MODE_P (tmode
))
2044 target
= flip_storage_order (int_mode
, target
);
2045 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
2046 target
= flip_storage_order (tmode
, target
);
2049 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
2054 /* Generate code to extract a byte-field from STR_RTX
2055 containing BITSIZE bits, starting at BITNUM,
2056 and put it in TARGET if possible (if TARGET is nonzero).
2057 Regardless of TARGET, we return the rtx for where the value is placed.
2059 STR_RTX is the structure containing the byte (a REG or MEM).
2060 UNSIGNEDP is nonzero if this is an unsigned bit field.
2061 MODE is the natural mode of the field value once extracted.
2062 TMODE is the mode the caller would like the value to have;
2063 but the value may be returned with type MODE instead.
2065 If REVERSE is true, the extraction is to be done in reverse order.
2067 If a TARGET is specified and we can store in it at no extra cost,
2068 we do so, and return TARGET.
2069 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2070 if they are equally easy.
2072 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2073 then *ALT_RTL is set to TARGET (before legitimziation). */
2076 extract_bit_field (rtx str_rtx
, poly_uint64 bitsize
, poly_uint64 bitnum
,
2077 int unsignedp
, rtx target
, machine_mode mode
,
2078 machine_mode tmode
, bool reverse
, rtx
*alt_rtl
)
2082 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2083 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx
)), 0))
2084 mode1
= GET_MODE (str_rtx
);
2085 else if (target
&& maybe_ne (GET_MODE_BITSIZE (GET_MODE (target
)), 0))
2086 mode1
= GET_MODE (target
);
2090 unsigned HOST_WIDE_INT ibitsize
, ibitnum
;
2091 scalar_int_mode int_mode
;
2092 if (bitsize
.is_constant (&ibitsize
)
2093 && bitnum
.is_constant (&ibitnum
)
2094 && is_a
<scalar_int_mode
> (mode1
, &int_mode
)
2095 && strict_volatile_bitfield_p (str_rtx
, ibitsize
, ibitnum
,
2098 /* Extraction of a full INT_MODE value can be done with a simple load.
2099 We know here that the field can be accessed with one single
2100 instruction. For targets that support unaligned memory,
2101 an unaligned access may be necessary. */
2102 if (ibitsize
== GET_MODE_BITSIZE (int_mode
))
2104 rtx result
= adjust_bitfield_address (str_rtx
, int_mode
,
2105 ibitnum
/ BITS_PER_UNIT
);
2107 result
= flip_storage_order (int_mode
, result
);
2108 gcc_assert (ibitnum
% BITS_PER_UNIT
== 0);
2109 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
2112 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, ibitsize
, ibitnum
,
2114 gcc_assert (ibitnum
+ ibitsize
<= GET_MODE_BITSIZE (int_mode
));
2115 str_rtx
= copy_to_reg (str_rtx
);
2116 return extract_bit_field_1 (str_rtx
, ibitsize
, ibitnum
, unsignedp
,
2117 target
, mode
, tmode
, reverse
, true, alt_rtl
);
2120 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
2121 target
, mode
, tmode
, reverse
, true, alt_rtl
);
2124 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2125 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2126 otherwise OP0 is a BLKmode MEM.
2128 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2129 If REVERSE is true, the extraction is to be done in reverse order.
2131 If TARGET is nonzero, attempts to store the value there
2132 and return TARGET, but this is not guaranteed.
2133 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2136 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
2137 opt_scalar_int_mode op0_mode
,
2138 unsigned HOST_WIDE_INT bitsize
,
2139 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2140 int unsignedp
, bool reverse
)
2142 scalar_int_mode mode
;
2145 if (!get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
),
2146 BITS_PER_WORD
, MEM_VOLATILE_P (op0
), &mode
))
2147 /* The only way this should occur is if the field spans word
2149 return extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
2150 unsignedp
, reverse
);
2152 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
2155 mode
= op0_mode
.require ();
2157 return extract_fixed_bit_field_1 (tmode
, op0
, mode
, bitsize
, bitnum
,
2158 target
, unsignedp
, reverse
);
2161 /* Helper function for extract_fixed_bit_field, extracts
2162 the bit field always using MODE, which is the mode of OP0.
2163 The other arguments are as for extract_fixed_bit_field. */
2166 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
, scalar_int_mode mode
,
2167 unsigned HOST_WIDE_INT bitsize
,
2168 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2169 int unsignedp
, bool reverse
)
2171 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2172 for invalid input, such as extract equivalent of f5 from
2173 gcc.dg/pr48335-2.c. */
2175 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2176 /* BITNUM is the distance between our msb and that of OP0.
2177 Convert it to the distance from the lsb. */
2178 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
2180 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2181 We have reduced the big-endian case to the little-endian case. */
2183 op0
= flip_storage_order (mode
, op0
);
2189 /* If the field does not already start at the lsb,
2190 shift it so it does. */
2191 /* Maybe propagate the target for the shift. */
2192 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2195 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2197 /* Convert the value to the desired mode. TMODE must also be a
2198 scalar integer for this conversion to make sense, since we
2199 shouldn't reinterpret the bits. */
2200 scalar_int_mode new_mode
= as_a
<scalar_int_mode
> (tmode
);
2201 if (mode
!= new_mode
)
2202 op0
= convert_to_mode (new_mode
, op0
, 1);
2204 /* Unless the msb of the field used to be the msb when we shifted,
2205 mask out the upper bits. */
2207 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2208 return expand_binop (new_mode
, and_optab
, op0
,
2209 mask_rtx (new_mode
, 0, bitsize
, 0),
2210 target
, 1, OPTAB_LIB_WIDEN
);
2214 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2215 then arithmetic-shift its lsb to the lsb of the word. */
2216 op0
= force_reg (mode
, op0
);
2218 /* Find the narrowest integer mode that contains the field. */
2220 opt_scalar_int_mode mode_iter
;
2221 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_INT
)
2222 if (GET_MODE_BITSIZE (mode_iter
.require ()) >= bitsize
+ bitnum
)
2225 mode
= mode_iter
.require ();
2226 op0
= convert_to_mode (mode
, op0
, 0);
2231 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2233 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2234 /* Maybe propagate the target for the shift. */
2235 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2236 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2239 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2240 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2243 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2247 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2250 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2253 /* Extract a bit field that is split across two words
2254 and return an RTX for the result.
2256 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2257 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2258 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2259 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2262 If REVERSE is true, the extraction is to be done in reverse order. */
2265 extract_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
2266 unsigned HOST_WIDE_INT bitsize
,
2267 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2271 unsigned int bitsdone
= 0;
2272 rtx result
= NULL_RTX
;
2275 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2277 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2278 unit
= BITS_PER_WORD
;
2280 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2282 while (bitsdone
< bitsize
)
2284 unsigned HOST_WIDE_INT thissize
;
2286 unsigned HOST_WIDE_INT thispos
;
2287 unsigned HOST_WIDE_INT offset
;
2289 offset
= (bitpos
+ bitsdone
) / unit
;
2290 thispos
= (bitpos
+ bitsdone
) % unit
;
2292 /* THISSIZE must not overrun a word boundary. Otherwise,
2293 extract_fixed_bit_field will call us again, and we will mutually
2295 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2296 thissize
= MIN (thissize
, unit
- thispos
);
2298 /* If OP0 is a register, then handle OFFSET here. */
2299 rtx op0_piece
= op0
;
2300 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
2301 if (SUBREG_P (op0
) || REG_P (op0
))
2303 op0_piece
= operand_subword_force (op0
, offset
, op0_mode
.require ());
2304 op0_piece_mode
= word_mode
;
2308 /* Extract the parts in bit-counting order,
2309 whose meaning is determined by BYTES_PER_UNIT.
2310 OFFSET is in UNITs, and UNIT is in bits. */
2311 part
= extract_fixed_bit_field (word_mode
, op0_piece
, op0_piece_mode
,
2312 thissize
, offset
* unit
+ thispos
,
2314 bitsdone
+= thissize
;
2316 /* Shift this part into place for the result. */
2317 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2319 if (bitsize
!= bitsdone
)
2320 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2321 bitsize
- bitsdone
, 0, 1);
2325 if (bitsdone
!= thissize
)
2326 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2327 bitsdone
- thissize
, 0, 1);
2333 /* Combine the parts with bitwise or. This works
2334 because we extracted each part as an unsigned bit field. */
2335 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2341 /* Unsigned bit field: we are done. */
2344 /* Signed bit field: sign-extend with two arithmetic shifts. */
2345 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2346 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2347 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2348 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2351 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2352 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2353 MODE, fill the upper bits with zeros. Fail if the layout of either
2354 mode is unknown (as for CC modes) or if the extraction would involve
2355 unprofitable mode punning. Return the value on success, otherwise
2358 This is different from gen_lowpart* in these respects:
2360 - the returned value must always be considered an rvalue
2362 - when MODE is wider than SRC_MODE, the extraction involves
2365 - when MODE is smaller than SRC_MODE, the extraction involves
2366 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2368 In other words, this routine performs a computation, whereas the
2369 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2373 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2375 scalar_int_mode int_mode
, src_int_mode
;
2377 if (mode
== src_mode
)
2380 if (CONSTANT_P (src
))
2382 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2383 fails, it will happily create (subreg (symbol_ref)) or similar
2385 poly_uint64 byte
= subreg_lowpart_offset (mode
, src_mode
);
2386 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2390 if (GET_MODE (src
) == VOIDmode
2391 || !validate_subreg (mode
, src_mode
, src
, byte
))
2394 src
= force_reg (GET_MODE (src
), src
);
2395 return gen_rtx_SUBREG (mode
, src
, byte
);
2398 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2401 if (known_eq (GET_MODE_BITSIZE (mode
), GET_MODE_BITSIZE (src_mode
))
2402 && targetm
.modes_tieable_p (mode
, src_mode
))
2404 rtx x
= gen_lowpart_common (mode
, src
);
2409 if (!int_mode_for_mode (src_mode
).exists (&src_int_mode
)
2410 || !int_mode_for_mode (mode
).exists (&int_mode
))
2413 if (!targetm
.modes_tieable_p (src_int_mode
, src_mode
))
2415 if (!targetm
.modes_tieable_p (int_mode
, mode
))
2418 src
= gen_lowpart (src_int_mode
, src
);
2419 if (!validate_subreg (int_mode
, src_int_mode
, src
,
2420 subreg_lowpart_offset (int_mode
, src_int_mode
)))
2423 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2424 src
= gen_lowpart (mode
, src
);
2428 /* Add INC into TARGET. */
2431 expand_inc (rtx target
, rtx inc
)
2433 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2435 target
, 0, OPTAB_LIB_WIDEN
);
2436 if (value
!= target
)
2437 emit_move_insn (target
, value
);
2440 /* Subtract DEC from TARGET. */
2443 expand_dec (rtx target
, rtx dec
)
2445 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2447 target
, 0, OPTAB_LIB_WIDEN
);
2448 if (value
!= target
)
2449 emit_move_insn (target
, value
);
2452 /* Output a shift instruction for expression code CODE,
2453 with SHIFTED being the rtx for the value to shift,
2454 and AMOUNT the rtx for the amount to shift by.
2455 Store the result in the rtx TARGET, if that is convenient.
2456 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457 Return the rtx for where the value is.
2458 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2459 in which case 0 is returned. */
2462 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2463 rtx amount
, rtx target
, int unsignedp
, bool may_fail
= false)
2466 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2467 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2468 optab lshift_optab
= ashl_optab
;
2469 optab rshift_arith_optab
= ashr_optab
;
2470 optab rshift_uns_optab
= lshr_optab
;
2471 optab lrotate_optab
= rotl_optab
;
2472 optab rrotate_optab
= rotr_optab
;
2473 machine_mode op1_mode
;
2474 scalar_mode scalar_mode
= GET_MODE_INNER (mode
);
2476 bool speed
= optimize_insn_for_speed_p ();
2479 op1_mode
= GET_MODE (op1
);
2481 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2482 shift amount is a vector, use the vector/vector shift patterns. */
2483 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2485 lshift_optab
= vashl_optab
;
2486 rshift_arith_optab
= vashr_optab
;
2487 rshift_uns_optab
= vlshr_optab
;
2488 lrotate_optab
= vrotl_optab
;
2489 rrotate_optab
= vrotr_optab
;
2492 /* Previously detected shift-counts computed by NEGATE_EXPR
2493 and shifted in the other direction; but that does not work
2496 if (SHIFT_COUNT_TRUNCATED
)
2498 if (CONST_INT_P (op1
)
2499 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2500 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2501 op1
= gen_int_shift_amount (mode
,
2502 (unsigned HOST_WIDE_INT
) INTVAL (op1
)
2503 % GET_MODE_BITSIZE (scalar_mode
));
2504 else if (GET_CODE (op1
) == SUBREG
2505 && subreg_lowpart_p (op1
)
2506 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2507 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2508 op1
= SUBREG_REG (op1
);
2511 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2512 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2513 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2516 && CONST_INT_P (op1
)
2517 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2518 GET_MODE_BITSIZE (scalar_mode
) - 1))
2520 op1
= gen_int_shift_amount (mode
, (GET_MODE_BITSIZE (scalar_mode
)
2523 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2526 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2527 Note that this is not the case for bigger values. For instance a rotation
2528 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2529 0x04030201 (bswapsi). */
2531 && CONST_INT_P (op1
)
2532 && INTVAL (op1
) == BITS_PER_UNIT
2533 && GET_MODE_SIZE (scalar_mode
) == 2
2534 && optab_handler (bswap_optab
, mode
) != CODE_FOR_nothing
)
2535 return expand_unop (mode
, bswap_optab
, shifted
, NULL_RTX
, unsignedp
);
2537 if (op1
== const0_rtx
)
2540 /* Check whether its cheaper to implement a left shift by a constant
2541 bit count by a sequence of additions. */
2542 if (code
== LSHIFT_EXPR
2543 && CONST_INT_P (op1
)
2545 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2546 && INTVAL (op1
) < MAX_BITS_PER_WORD
2547 && (shift_cost (speed
, mode
, INTVAL (op1
))
2548 > INTVAL (op1
) * add_cost (speed
, mode
))
2549 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2552 for (i
= 0; i
< INTVAL (op1
); i
++)
2554 temp
= force_reg (mode
, shifted
);
2555 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2556 unsignedp
, OPTAB_LIB_WIDEN
);
2561 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2563 enum optab_methods methods
;
2566 methods
= OPTAB_DIRECT
;
2567 else if (attempt
== 1)
2568 methods
= OPTAB_WIDEN
;
2570 methods
= OPTAB_LIB_WIDEN
;
2574 /* Widening does not work for rotation. */
2575 if (methods
== OPTAB_WIDEN
)
2577 else if (methods
== OPTAB_LIB_WIDEN
)
2579 /* If we have been unable to open-code this by a rotation,
2580 do it as the IOR of two shifts. I.e., to rotate A
2582 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2583 where C is the bitsize of A.
2585 It is theoretically possible that the target machine might
2586 not be able to perform either shift and hence we would
2587 be making two libcalls rather than just the one for the
2588 shift (similarly if IOR could not be done). We will allow
2589 this extremely unlikely lossage to avoid complicating the
2592 rtx subtarget
= target
== shifted
? 0 : target
;
2593 rtx new_amount
, other_amount
;
2597 if (op1
== const0_rtx
)
2599 else if (CONST_INT_P (op1
))
2600 other_amount
= gen_int_shift_amount
2601 (mode
, GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2605 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2606 op1
, GET_MODE (op1
));
2607 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2609 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2610 gen_int_mode (mask
, GET_MODE (op1
)));
2613 shifted
= force_reg (mode
, shifted
);
2615 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2616 mode
, shifted
, new_amount
, 0, 1);
2617 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2618 mode
, shifted
, other_amount
,
2620 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2621 unsignedp
, methods
);
2624 temp
= expand_binop (mode
,
2625 left
? lrotate_optab
: rrotate_optab
,
2626 shifted
, op1
, target
, unsignedp
, methods
);
2629 temp
= expand_binop (mode
,
2630 left
? lshift_optab
: rshift_uns_optab
,
2631 shifted
, op1
, target
, unsignedp
, methods
);
2633 /* Do arithmetic shifts.
2634 Also, if we are going to widen the operand, we can just as well
2635 use an arithmetic right-shift instead of a logical one. */
2636 if (temp
== 0 && ! rotate
2637 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2639 enum optab_methods methods1
= methods
;
2641 /* If trying to widen a log shift to an arithmetic shift,
2642 don't accept an arithmetic shift of the same size. */
2644 methods1
= OPTAB_MUST_WIDEN
;
2646 /* Arithmetic shift */
2648 temp
= expand_binop (mode
,
2649 left
? lshift_optab
: rshift_arith_optab
,
2650 shifted
, op1
, target
, unsignedp
, methods1
);
2653 /* We used to try extzv here for logical right shifts, but that was
2654 only useful for one machine, the VAX, and caused poor code
2655 generation there for lshrdi3, so the code was deleted and a
2656 define_expand for lshrsi3 was added to vax.md. */
2659 gcc_assert (temp
!= NULL_RTX
|| may_fail
);
2663 /* Output a shift instruction for expression code CODE,
2664 with SHIFTED being the rtx for the value to shift,
2665 and AMOUNT the amount to shift by.
2666 Store the result in the rtx TARGET, if that is convenient.
2667 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2668 Return the rtx for where the value is. */
2671 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2672 poly_int64 amount
, rtx target
, int unsignedp
)
2674 return expand_shift_1 (code
, mode
, shifted
,
2675 gen_int_shift_amount (mode
, amount
),
2679 /* Likewise, but return 0 if that cannot be done. */
2682 maybe_expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2683 int amount
, rtx target
, int unsignedp
)
2685 return expand_shift_1 (code
, mode
,
2686 shifted
, GEN_INT (amount
), target
, unsignedp
, true);
2689 /* Output a shift instruction for expression code CODE,
2690 with SHIFTED being the rtx for the value to shift,
2691 and AMOUNT the tree for the amount to shift by.
2692 Store the result in the rtx TARGET, if that is convenient.
2693 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2694 Return the rtx for where the value is. */
2697 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2698 tree amount
, rtx target
, int unsignedp
)
2700 return expand_shift_1 (code
, mode
,
2701 shifted
, expand_normal (amount
), target
, unsignedp
);
2705 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2706 const struct mult_cost
*, machine_mode mode
);
2707 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2708 const struct algorithm
*, enum mult_variant
);
2709 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2710 static rtx
extract_high_half (scalar_int_mode
, rtx
);
2711 static rtx
expmed_mult_highpart (scalar_int_mode
, rtx
, rtx
, rtx
, int, int);
2712 static rtx
expmed_mult_highpart_optab (scalar_int_mode
, rtx
, rtx
, rtx
,
2714 /* Compute and return the best algorithm for multiplying by T.
2715 The algorithm must cost less than cost_limit
2716 If retval.cost >= COST_LIMIT, no algorithm was found and all
2717 other field of the returned struct are undefined.
2718 MODE is the machine mode of the multiplication. */
2721 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2722 const struct mult_cost
*cost_limit
, machine_mode mode
)
2725 struct algorithm
*alg_in
, *best_alg
;
2726 struct mult_cost best_cost
;
2727 struct mult_cost new_limit
;
2728 int op_cost
, op_latency
;
2729 unsigned HOST_WIDE_INT orig_t
= t
;
2730 unsigned HOST_WIDE_INT q
;
2731 int maxm
, hash_index
;
2732 bool cache_hit
= false;
2733 enum alg_code cache_alg
= alg_zero
;
2734 bool speed
= optimize_insn_for_speed_p ();
2735 scalar_int_mode imode
;
2736 struct alg_hash_entry
*entry_ptr
;
2738 /* Indicate that no algorithm is yet found. If no algorithm
2739 is found, this value will be returned and indicate failure. */
2740 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2741 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2743 if (cost_limit
->cost
< 0
2744 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2747 /* Be prepared for vector modes. */
2748 imode
= as_a
<scalar_int_mode
> (GET_MODE_INNER (mode
));
2750 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2752 /* Restrict the bits of "t" to the multiplication's mode. */
2753 t
&= GET_MODE_MASK (imode
);
2755 /* t == 1 can be done in zero cost. */
2759 alg_out
->cost
.cost
= 0;
2760 alg_out
->cost
.latency
= 0;
2761 alg_out
->op
[0] = alg_m
;
2765 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2769 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2774 alg_out
->cost
.cost
= zero_cost (speed
);
2775 alg_out
->cost
.latency
= zero_cost (speed
);
2776 alg_out
->op
[0] = alg_zero
;
2781 /* We'll be needing a couple extra algorithm structures now. */
2783 alg_in
= XALLOCA (struct algorithm
);
2784 best_alg
= XALLOCA (struct algorithm
);
2785 best_cost
= *cost_limit
;
2787 /* Compute the hash index. */
2788 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2790 /* See if we already know what to do for T. */
2791 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2792 if (entry_ptr
->t
== t
2793 && entry_ptr
->mode
== mode
2794 && entry_ptr
->speed
== speed
2795 && entry_ptr
->alg
!= alg_unknown
)
2797 cache_alg
= entry_ptr
->alg
;
2799 if (cache_alg
== alg_impossible
)
2801 /* The cache tells us that it's impossible to synthesize
2802 multiplication by T within entry_ptr->cost. */
2803 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2804 /* COST_LIMIT is at least as restrictive as the one
2805 recorded in the hash table, in which case we have no
2806 hope of synthesizing a multiplication. Just
2810 /* If we get here, COST_LIMIT is less restrictive than the
2811 one recorded in the hash table, so we may be able to
2812 synthesize a multiplication. Proceed as if we didn't
2813 have the cache entry. */
2817 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2818 /* The cached algorithm shows that this multiplication
2819 requires more cost than COST_LIMIT. Just return. This
2820 way, we don't clobber this cache entry with
2821 alg_impossible but retain useful information. */
2833 goto do_alg_addsub_t_m2
;
2835 case alg_add_factor
:
2836 case alg_sub_factor
:
2837 goto do_alg_addsub_factor
;
2840 goto do_alg_add_t2_m
;
2843 goto do_alg_sub_t2_m
;
2851 /* If we have a group of zero bits at the low-order part of T, try
2852 multiplying by the remaining bits and then doing a shift. */
2857 m
= ctz_or_zero (t
); /* m = number of low zero bits */
2861 /* The function expand_shift will choose between a shift and
2862 a sequence of additions, so the observed cost is given as
2863 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2864 op_cost
= m
* add_cost (speed
, mode
);
2865 if (shift_cost (speed
, mode
, m
) < op_cost
)
2866 op_cost
= shift_cost (speed
, mode
, m
);
2867 new_limit
.cost
= best_cost
.cost
- op_cost
;
2868 new_limit
.latency
= best_cost
.latency
- op_cost
;
2869 synth_mult (alg_in
, q
, &new_limit
, mode
);
2871 alg_in
->cost
.cost
+= op_cost
;
2872 alg_in
->cost
.latency
+= op_cost
;
2873 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2875 best_cost
= alg_in
->cost
;
2876 std::swap (alg_in
, best_alg
);
2877 best_alg
->log
[best_alg
->ops
] = m
;
2878 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2881 /* See if treating ORIG_T as a signed number yields a better
2882 sequence. Try this sequence only for a negative ORIG_T
2883 as it would be useless for a non-negative ORIG_T. */
2884 if ((HOST_WIDE_INT
) orig_t
< 0)
2886 /* Shift ORIG_T as follows because a right shift of a
2887 negative-valued signed type is implementation
2889 q
= ~(~orig_t
>> m
);
2890 /* The function expand_shift will choose between a shift
2891 and a sequence of additions, so the observed cost is
2892 given as MIN (m * add_cost(speed, mode),
2893 shift_cost(speed, mode, m)). */
2894 op_cost
= m
* add_cost (speed
, mode
);
2895 if (shift_cost (speed
, mode
, m
) < op_cost
)
2896 op_cost
= shift_cost (speed
, mode
, m
);
2897 new_limit
.cost
= best_cost
.cost
- op_cost
;
2898 new_limit
.latency
= best_cost
.latency
- op_cost
;
2899 synth_mult (alg_in
, q
, &new_limit
, mode
);
2901 alg_in
->cost
.cost
+= op_cost
;
2902 alg_in
->cost
.latency
+= op_cost
;
2903 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2905 best_cost
= alg_in
->cost
;
2906 std::swap (alg_in
, best_alg
);
2907 best_alg
->log
[best_alg
->ops
] = m
;
2908 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2916 /* If we have an odd number, add or subtract one. */
2919 unsigned HOST_WIDE_INT w
;
2922 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2924 /* If T was -1, then W will be zero after the loop. This is another
2925 case where T ends with ...111. Handling this with (T + 1) and
2926 subtract 1 produces slightly better code and results in algorithm
2927 selection much faster than treating it like the ...0111 case
2931 /* Reject the case where t is 3.
2932 Thus we prefer addition in that case. */
2935 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2937 op_cost
= add_cost (speed
, mode
);
2938 new_limit
.cost
= best_cost
.cost
- op_cost
;
2939 new_limit
.latency
= best_cost
.latency
- op_cost
;
2940 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2942 alg_in
->cost
.cost
+= op_cost
;
2943 alg_in
->cost
.latency
+= op_cost
;
2944 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2946 best_cost
= alg_in
->cost
;
2947 std::swap (alg_in
, best_alg
);
2948 best_alg
->log
[best_alg
->ops
] = 0;
2949 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2954 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2956 op_cost
= add_cost (speed
, mode
);
2957 new_limit
.cost
= best_cost
.cost
- op_cost
;
2958 new_limit
.latency
= best_cost
.latency
- op_cost
;
2959 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2961 alg_in
->cost
.cost
+= op_cost
;
2962 alg_in
->cost
.latency
+= op_cost
;
2963 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2965 best_cost
= alg_in
->cost
;
2966 std::swap (alg_in
, best_alg
);
2967 best_alg
->log
[best_alg
->ops
] = 0;
2968 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2972 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2973 quickly with a - a * n for some appropriate constant n. */
2974 m
= exact_log2 (-orig_t
+ 1);
2975 if (m
>= 0 && m
< maxm
)
2977 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2978 /* If the target has a cheap shift-and-subtract insn use
2979 that in preference to a shift insn followed by a sub insn.
2980 Assume that the shift-and-sub is "atomic" with a latency
2981 equal to it's cost, otherwise assume that on superscalar
2982 hardware the shift may be executed concurrently with the
2983 earlier steps in the algorithm. */
2984 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2986 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2987 op_latency
= op_cost
;
2990 op_latency
= add_cost (speed
, mode
);
2992 new_limit
.cost
= best_cost
.cost
- op_cost
;
2993 new_limit
.latency
= best_cost
.latency
- op_latency
;
2994 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2997 alg_in
->cost
.cost
+= op_cost
;
2998 alg_in
->cost
.latency
+= op_latency
;
2999 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3001 best_cost
= alg_in
->cost
;
3002 std::swap (alg_in
, best_alg
);
3003 best_alg
->log
[best_alg
->ops
] = m
;
3004 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
3012 /* Look for factors of t of the form
3013 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3014 If we find such a factor, we can multiply by t using an algorithm that
3015 multiplies by q, shift the result by m and add/subtract it to itself.
3017 We search for large factors first and loop down, even if large factors
3018 are less probable than small; if we find a large factor we will find a
3019 good sequence quickly, and therefore be able to prune (by decreasing
3020 COST_LIMIT) the search. */
3022 do_alg_addsub_factor
:
3023 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
3025 unsigned HOST_WIDE_INT d
;
3027 d
= (HOST_WIDE_INT_1U
<< m
) + 1;
3028 if (t
% d
== 0 && t
> d
&& m
< maxm
3029 && (!cache_hit
|| cache_alg
== alg_add_factor
))
3031 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3032 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
3033 op_cost
= shiftadd_cost (speed
, mode
, m
);
3035 op_latency
= op_cost
;
3038 new_limit
.cost
= best_cost
.cost
- op_cost
;
3039 new_limit
.latency
= best_cost
.latency
- op_latency
;
3040 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
3042 alg_in
->cost
.cost
+= op_cost
;
3043 alg_in
->cost
.latency
+= op_latency
;
3044 if (alg_in
->cost
.latency
< op_cost
)
3045 alg_in
->cost
.latency
= op_cost
;
3046 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3048 best_cost
= alg_in
->cost
;
3049 std::swap (alg_in
, best_alg
);
3050 best_alg
->log
[best_alg
->ops
] = m
;
3051 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
3053 /* Other factors will have been taken care of in the recursion. */
3057 d
= (HOST_WIDE_INT_1U
<< m
) - 1;
3058 if (t
% d
== 0 && t
> d
&& m
< maxm
3059 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
3061 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3062 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
3063 op_cost
= shiftsub0_cost (speed
, mode
, m
);
3065 op_latency
= op_cost
;
3067 new_limit
.cost
= best_cost
.cost
- op_cost
;
3068 new_limit
.latency
= best_cost
.latency
- op_latency
;
3069 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
3071 alg_in
->cost
.cost
+= op_cost
;
3072 alg_in
->cost
.latency
+= op_latency
;
3073 if (alg_in
->cost
.latency
< op_cost
)
3074 alg_in
->cost
.latency
= op_cost
;
3075 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3077 best_cost
= alg_in
->cost
;
3078 std::swap (alg_in
, best_alg
);
3079 best_alg
->log
[best_alg
->ops
] = m
;
3080 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
3088 /* Try shift-and-add (load effective address) instructions,
3089 i.e. do a*3, a*5, a*9. */
3097 op_cost
= shiftadd_cost (speed
, mode
, m
);
3098 new_limit
.cost
= best_cost
.cost
- op_cost
;
3099 new_limit
.latency
= best_cost
.latency
- op_cost
;
3100 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
3102 alg_in
->cost
.cost
+= op_cost
;
3103 alg_in
->cost
.latency
+= op_cost
;
3104 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3106 best_cost
= alg_in
->cost
;
3107 std::swap (alg_in
, best_alg
);
3108 best_alg
->log
[best_alg
->ops
] = m
;
3109 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
3120 op_cost
= shiftsub0_cost (speed
, mode
, m
);
3121 new_limit
.cost
= best_cost
.cost
- op_cost
;
3122 new_limit
.latency
= best_cost
.latency
- op_cost
;
3123 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
3125 alg_in
->cost
.cost
+= op_cost
;
3126 alg_in
->cost
.latency
+= op_cost
;
3127 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
3129 best_cost
= alg_in
->cost
;
3130 std::swap (alg_in
, best_alg
);
3131 best_alg
->log
[best_alg
->ops
] = m
;
3132 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
3140 /* If best_cost has not decreased, we have not found any algorithm. */
3141 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
3143 /* We failed to find an algorithm. Record alg_impossible for
3144 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3145 we are asked to find an algorithm for T within the same or
3146 lower COST_LIMIT, we can immediately return to the
3149 entry_ptr
->mode
= mode
;
3150 entry_ptr
->speed
= speed
;
3151 entry_ptr
->alg
= alg_impossible
;
3152 entry_ptr
->cost
= *cost_limit
;
3156 /* Cache the result. */
3160 entry_ptr
->mode
= mode
;
3161 entry_ptr
->speed
= speed
;
3162 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
3163 entry_ptr
->cost
.cost
= best_cost
.cost
;
3164 entry_ptr
->cost
.latency
= best_cost
.latency
;
3167 /* If we are getting a too long sequence for `struct algorithm'
3168 to record, make this search fail. */
3169 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
3172 /* Copy the algorithm from temporary space to the space at alg_out.
3173 We avoid using structure assignment because the majority of
3174 best_alg is normally undefined, and this is a critical function. */
3175 alg_out
->ops
= best_alg
->ops
+ 1;
3176 alg_out
->cost
= best_cost
;
3177 memcpy (alg_out
->op
, best_alg
->op
,
3178 alg_out
->ops
* sizeof *alg_out
->op
);
3179 memcpy (alg_out
->log
, best_alg
->log
,
3180 alg_out
->ops
* sizeof *alg_out
->log
);
3183 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3184 Try three variations:
3186 - a shift/add sequence based on VAL itself
3187 - a shift/add sequence based on -VAL, followed by a negation
3188 - a shift/add sequence based on VAL - 1, followed by an addition.
3190 Return true if the cheapest of these cost less than MULT_COST,
3191 describing the algorithm in *ALG and final fixup in *VARIANT. */
3194 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
3195 struct algorithm
*alg
, enum mult_variant
*variant
,
3198 struct algorithm alg2
;
3199 struct mult_cost limit
;
3201 bool speed
= optimize_insn_for_speed_p ();
3203 /* Fail quickly for impossible bounds. */
3207 /* Ensure that mult_cost provides a reasonable upper bound.
3208 Any constant multiplication can be performed with less
3209 than 2 * bits additions. */
3210 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3211 if (mult_cost
> op_cost
)
3212 mult_cost
= op_cost
;
3214 *variant
= basic_variant
;
3215 limit
.cost
= mult_cost
;
3216 limit
.latency
= mult_cost
;
3217 synth_mult (alg
, val
, &limit
, mode
);
3219 /* This works only if the inverted value actually fits in an
3221 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3223 op_cost
= neg_cost (speed
, mode
);
3224 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3226 limit
.cost
= alg
->cost
.cost
- op_cost
;
3227 limit
.latency
= alg
->cost
.latency
- op_cost
;
3231 limit
.cost
= mult_cost
- op_cost
;
3232 limit
.latency
= mult_cost
- op_cost
;
3235 synth_mult (&alg2
, -val
, &limit
, mode
);
3236 alg2
.cost
.cost
+= op_cost
;
3237 alg2
.cost
.latency
+= op_cost
;
3238 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3239 *alg
= alg2
, *variant
= negate_variant
;
3242 /* This proves very useful for division-by-constant. */
3243 op_cost
= add_cost (speed
, mode
);
3244 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3246 limit
.cost
= alg
->cost
.cost
- op_cost
;
3247 limit
.latency
= alg
->cost
.latency
- op_cost
;
3251 limit
.cost
= mult_cost
- op_cost
;
3252 limit
.latency
= mult_cost
- op_cost
;
3255 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3256 alg2
.cost
.cost
+= op_cost
;
3257 alg2
.cost
.latency
+= op_cost
;
3258 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3259 *alg
= alg2
, *variant
= add_variant
;
3261 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3264 /* A subroutine of expand_mult, used for constant multiplications.
3265 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3266 convenient. Use the shift/add sequence described by ALG and apply
3267 the final fixup specified by VARIANT. */
3270 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3271 rtx target
, const struct algorithm
*alg
,
3272 enum mult_variant variant
)
3274 unsigned HOST_WIDE_INT val_so_far
;
3280 /* Avoid referencing memory over and over and invalid sharing
3282 op0
= force_reg (mode
, op0
);
3284 /* ACCUM starts out either as OP0 or as a zero, depending on
3285 the first operation. */
3287 if (alg
->op
[0] == alg_zero
)
3289 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3292 else if (alg
->op
[0] == alg_m
)
3294 accum
= copy_to_mode_reg (mode
, op0
);
3300 for (opno
= 1; opno
< alg
->ops
; opno
++)
3302 int log
= alg
->log
[opno
];
3303 rtx shift_subtarget
= optimize
? 0 : accum
;
3305 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3308 rtx accum_target
= optimize
? 0 : accum
;
3311 switch (alg
->op
[opno
])
3314 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3315 /* REG_EQUAL note will be attached to the following insn. */
3316 emit_move_insn (accum
, tem
);
3321 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3322 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3323 add_target
? add_target
: accum_target
);
3324 val_so_far
+= HOST_WIDE_INT_1U
<< log
;
3328 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3329 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3330 add_target
? add_target
: accum_target
);
3331 val_so_far
-= HOST_WIDE_INT_1U
<< log
;
3335 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3336 log
, shift_subtarget
, 0);
3337 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3338 add_target
? add_target
: accum_target
);
3339 val_so_far
= (val_so_far
<< log
) + 1;
3343 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3344 log
, shift_subtarget
, 0);
3345 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3346 add_target
? add_target
: accum_target
);
3347 val_so_far
= (val_so_far
<< log
) - 1;
3350 case alg_add_factor
:
3351 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3352 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3353 add_target
? add_target
: accum_target
);
3354 val_so_far
+= val_so_far
<< log
;
3357 case alg_sub_factor
:
3358 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3359 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3361 ? add_target
: (optimize
? 0 : tem
)));
3362 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3369 if (SCALAR_INT_MODE_P (mode
))
3371 /* Write a REG_EQUAL note on the last insn so that we can cse
3372 multiplication sequences. Note that if ACCUM is a SUBREG,
3373 we've set the inner register and must properly indicate that. */
3374 tem
= op0
, nmode
= mode
;
3375 accum_inner
= accum
;
3376 if (GET_CODE (accum
) == SUBREG
)
3378 accum_inner
= SUBREG_REG (accum
);
3379 nmode
= GET_MODE (accum_inner
);
3380 tem
= gen_lowpart (nmode
, op0
);
3383 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3384 In that case, only the low bits of accum would be guaranteed to
3385 be equal to the content of the REG_EQUAL note, the upper bits
3387 if (!paradoxical_subreg_p (tem
))
3389 insn
= get_last_insn ();
3390 wide_int wval_so_far
3391 = wi::uhwi (val_so_far
,
3392 GET_MODE_PRECISION (as_a
<scalar_mode
> (nmode
)));
3393 rtx c
= immed_wide_int_const (wval_so_far
, nmode
);
3394 set_dst_reg_note (insn
, REG_EQUAL
, gen_rtx_MULT (nmode
, tem
, c
),
3400 if (variant
== negate_variant
)
3402 val_so_far
= -val_so_far
;
3403 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3405 else if (variant
== add_variant
)
3407 val_so_far
= val_so_far
+ 1;
3408 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3411 /* Compare only the bits of val and val_so_far that are significant
3412 in the result mode, to avoid sign-/zero-extension confusion. */
3413 nmode
= GET_MODE_INNER (mode
);
3414 val
&= GET_MODE_MASK (nmode
);
3415 val_so_far
&= GET_MODE_MASK (nmode
);
3416 gcc_assert (val
== (HOST_WIDE_INT
) val_so_far
);
3421 /* Perform a multiplication and return an rtx for the result.
3422 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3423 TARGET is a suggestion for where to store the result (an rtx).
3425 We check specially for a constant integer as OP1.
3426 If you want this check for OP0 as well, then before calling
3427 you should swap the two operands if OP0 would be constant. */
3430 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3431 int unsignedp
, bool no_libcall
)
3433 enum mult_variant variant
;
3434 struct algorithm algorithm
;
3437 bool speed
= optimize_insn_for_speed_p ();
3438 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3440 if (CONSTANT_P (op0
))
3441 std::swap (op0
, op1
);
3443 /* For vectors, there are several simplifications that can be made if
3444 all elements of the vector constant are identical. */
3445 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3447 if (INTEGRAL_MODE_P (mode
))
3450 HOST_WIDE_INT coeff
;
3454 if (op1
== CONST0_RTX (mode
))
3456 if (op1
== CONST1_RTX (mode
))
3458 if (op1
== CONSTM1_RTX (mode
))
3459 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3465 /* If mode is integer vector mode, check if the backend supports
3466 vector lshift (by scalar or vector) at all. If not, we can't use
3467 synthetized multiply. */
3468 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3469 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3470 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3473 /* These are the operations that are potentially turned into
3474 a sequence of shifts and additions. */
3475 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3477 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3478 less than or equal in size to `unsigned int' this doesn't matter.
3479 If the mode is larger than `unsigned int', then synth_mult works
3480 only if the constant value exactly fits in an `unsigned int' without
3481 any truncation. This means that multiplying by negative values does
3482 not work; results are off by 2^32 on a 32 bit machine. */
3483 if (CONST_INT_P (scalar_op1
))
3485 coeff
= INTVAL (scalar_op1
);
3488 #if TARGET_SUPPORTS_WIDE_INT
3489 else if (CONST_WIDE_INT_P (scalar_op1
))
3491 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3494 int shift
= wi::exact_log2 (rtx_mode_t (scalar_op1
, mode
));
3495 /* Perfect power of 2 (other than 1, which is handled above). */
3497 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3498 shift
, target
, unsignedp
);
3505 /* We used to test optimize here, on the grounds that it's better to
3506 produce a smaller program when -O is not used. But this causes
3507 such a terrible slowdown sometimes that it seems better to always
3510 /* Special case powers of two. */
3511 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3512 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3513 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3514 floor_log2 (coeff
), target
, unsignedp
);
3516 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3518 /* Attempt to handle multiplication of DImode values by negative
3519 coefficients, by performing the multiplication by a positive
3520 multiplier and then inverting the result. */
3521 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3523 /* Its safe to use -coeff even for INT_MIN, as the
3524 result is interpreted as an unsigned coefficient.
3525 Exclude cost of op0 from max_cost to match the cost
3526 calculation of the synth_mult. */
3527 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3528 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3530 - neg_cost (speed
, mode
));
3534 /* Special case powers of two. */
3535 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3537 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3538 floor_log2 (coeff
), target
, unsignedp
);
3539 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3542 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3545 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3546 &algorithm
, variant
);
3547 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3552 /* Exclude cost of op0 from max_cost to match the cost
3553 calculation of the synth_mult. */
3554 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3555 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3556 return expand_mult_const (mode
, op0
, coeff
, target
,
3557 &algorithm
, variant
);
3561 /* Expand x*2.0 as x+x. */
3562 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3563 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3565 op0
= force_reg (GET_MODE (op0
), op0
);
3566 return expand_binop (mode
, add_optab
, op0
, op0
,
3568 no_libcall
? OPTAB_WIDEN
: OPTAB_LIB_WIDEN
);
3571 /* This used to use umul_optab if unsigned, but for non-widening multiply
3572 there is no difference between signed and unsigned. */
3573 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3574 op0
, op1
, target
, unsignedp
,
3575 no_libcall
? OPTAB_WIDEN
: OPTAB_LIB_WIDEN
);
3576 gcc_assert (op0
|| no_libcall
);
3580 /* Return a cost estimate for multiplying a register by the given
3581 COEFFicient in the given MODE and SPEED. */
3584 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3587 struct algorithm algorithm
;
3588 enum mult_variant variant
;
3590 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3591 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3593 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3594 return algorithm
.cost
.cost
;
3599 /* Perform a widening multiplication and return an rtx for the result.
3600 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3601 TARGET is a suggestion for where to store the result (an rtx).
3602 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3603 or smul_widen_optab.
3605 We check specially for a constant integer as OP1, comparing the
3606 cost of a widening multiply against the cost of a sequence of shifts
3610 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3611 int unsignedp
, optab this_optab
)
3613 bool speed
= optimize_insn_for_speed_p ();
3616 if (CONST_INT_P (op1
)
3617 && GET_MODE (op0
) != VOIDmode
3618 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3619 this_optab
== umul_widen_optab
))
3620 && CONST_INT_P (cop1
)
3621 && (INTVAL (cop1
) >= 0
3622 || HWI_COMPUTABLE_MODE_P (mode
)))
3624 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3626 enum mult_variant variant
;
3627 struct algorithm algorithm
;
3630 return CONST0_RTX (mode
);
3632 /* Special case powers of two. */
3633 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3635 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3636 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3637 floor_log2 (coeff
), target
, unsignedp
);
3640 /* Exclude cost of op0 from max_cost to match the cost
3641 calculation of the synth_mult. */
3642 max_cost
= mul_widen_cost (speed
, mode
);
3643 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3646 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3647 return expand_mult_const (mode
, op0
, coeff
, target
,
3648 &algorithm
, variant
);
3651 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3652 unsignedp
, OPTAB_LIB_WIDEN
);
3655 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3656 replace division by D, and put the least significant N bits of the result
3657 in *MULTIPLIER_PTR and return the most significant bit.
3659 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3660 needed precision is in PRECISION (should be <= N).
3662 PRECISION should be as small as possible so this function can choose
3663 multiplier more freely.
3665 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3666 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3668 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3669 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3671 unsigned HOST_WIDE_INT
3672 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3673 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3674 int *post_shift_ptr
, int *lgup_ptr
)
3676 int lgup
, post_shift
;
3679 /* lgup = ceil(log2(divisor)); */
3680 lgup
= ceil_log2 (d
);
3682 gcc_assert (lgup
<= n
);
3685 pow2
= n
+ lgup
- precision
;
3687 /* mlow = 2^(N + lgup)/d */
3688 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3689 wide_int mlow
= wi::udiv_trunc (val
, d
);
3691 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3692 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3693 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3695 /* If precision == N, then mlow, mhigh exceed 2^N
3696 (but they do not exceed 2^(N+1)). */
3698 /* Reduce to lowest terms. */
3699 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3701 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3702 HOST_BITS_PER_WIDE_INT
);
3703 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3704 HOST_BITS_PER_WIDE_INT
);
3708 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3709 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3712 *post_shift_ptr
= post_shift
;
3714 if (n
< HOST_BITS_PER_WIDE_INT
)
3716 unsigned HOST_WIDE_INT mask
= (HOST_WIDE_INT_1U
<< n
) - 1;
3717 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3718 return mhigh
.to_uhwi () > mask
;
3722 *multiplier_ptr
= mhigh
.to_uhwi ();
3723 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3727 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3728 congruent to 1 (mod 2**N). */
3730 static unsigned HOST_WIDE_INT
3731 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3733 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3735 /* The algorithm notes that the choice y = x satisfies
3736 x*y == 1 mod 2^3, since x is assumed odd.
3737 Each iteration doubles the number of bits of significance in y. */
3739 unsigned HOST_WIDE_INT mask
;
3740 unsigned HOST_WIDE_INT y
= x
;
3743 mask
= (n
== HOST_BITS_PER_WIDE_INT
3745 : (HOST_WIDE_INT_1U
<< n
) - 1);
3749 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3755 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3756 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3757 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3758 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3761 The result is put in TARGET if that is convenient.
3763 MODE is the mode of operation. */
3766 expand_mult_highpart_adjust (scalar_int_mode mode
, rtx adj_operand
, rtx op0
,
3767 rtx op1
, rtx target
, int unsignedp
)
3770 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3772 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3773 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3774 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3776 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3779 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3780 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3781 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3782 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3788 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3791 extract_high_half (scalar_int_mode mode
, rtx op
)
3793 if (mode
== word_mode
)
3794 return gen_highpart (mode
, op
);
3796 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3798 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3799 GET_MODE_BITSIZE (mode
), 0, 1);
3800 return convert_modes (mode
, wider_mode
, op
, 0);
3803 /* Like expmed_mult_highpart, but only consider using a multiplication
3804 optab. OP1 is an rtx for the constant operand. */
3807 expmed_mult_highpart_optab (scalar_int_mode mode
, rtx op0
, rtx op1
,
3808 rtx target
, int unsignedp
, int max_cost
)
3810 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3814 bool speed
= optimize_insn_for_speed_p ();
3816 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3818 size
= GET_MODE_BITSIZE (mode
);
3820 /* Firstly, try using a multiplication insn that only generates the needed
3821 high part of the product, and in the sign flavor of unsignedp. */
3822 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3824 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3825 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3826 unsignedp
, OPTAB_DIRECT
);
3831 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3832 Need to adjust the result after the multiplication. */
3833 if (size
- 1 < BITS_PER_WORD
3834 && (mul_highpart_cost (speed
, mode
)
3835 + 2 * shift_cost (speed
, mode
, size
-1)
3836 + 4 * add_cost (speed
, mode
) < max_cost
))
3838 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3839 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3840 unsignedp
, OPTAB_DIRECT
);
3842 /* We used the wrong signedness. Adjust the result. */
3843 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3847 /* Try widening multiplication. */
3848 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3849 if (convert_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3850 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3852 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3853 unsignedp
, OPTAB_WIDEN
);
3855 return extract_high_half (mode
, tem
);
3858 /* Try widening the mode and perform a non-widening multiplication. */
3859 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3860 && size
- 1 < BITS_PER_WORD
3861 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3867 /* We need to widen the operands, for example to ensure the
3868 constant multiplier is correctly sign or zero extended.
3869 Use a sequence to clean-up any instructions emitted by
3870 the conversions if things don't work out. */
3872 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3873 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3874 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3875 unsignedp
, OPTAB_WIDEN
);
3876 insns
= get_insns ();
3882 return extract_high_half (mode
, tem
);
3886 /* Try widening multiplication of opposite signedness, and adjust. */
3887 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3888 if (convert_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3889 && size
- 1 < BITS_PER_WORD
3890 && (mul_widen_cost (speed
, wider_mode
)
3891 + 2 * shift_cost (speed
, mode
, size
-1)
3892 + 4 * add_cost (speed
, mode
) < max_cost
))
3894 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3895 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3898 tem
= extract_high_half (mode
, tem
);
3899 /* We used the wrong signedness. Adjust the result. */
3900 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3908 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3909 putting the high half of the result in TARGET if that is convenient,
3910 and return where the result is. If the operation cannot be performed,
3913 MODE is the mode of operation and result.
3915 UNSIGNEDP nonzero means unsigned multiply.
3917 MAX_COST is the total allowed cost for the expanded RTL. */
3920 expmed_mult_highpart (scalar_int_mode mode
, rtx op0
, rtx op1
,
3921 rtx target
, int unsignedp
, int max_cost
)
3923 unsigned HOST_WIDE_INT cnst1
;
3925 bool sign_adjust
= false;
3926 enum mult_variant variant
;
3927 struct algorithm alg
;
3929 bool speed
= optimize_insn_for_speed_p ();
3931 /* We can't support modes wider than HOST_BITS_PER_INT. */
3932 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3934 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3936 /* We can't optimize modes wider than BITS_PER_WORD.
3937 ??? We might be able to perform double-word arithmetic if
3938 mode == word_mode, however all the cost calculations in
3939 synth_mult etc. assume single-word operations. */
3940 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3941 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3942 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3943 unsignedp
, max_cost
);
3945 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3947 /* Check whether we try to multiply by a negative constant. */
3948 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3951 extra_cost
+= add_cost (speed
, mode
);
3954 /* See whether shift/add multiplication is cheap enough. */
3955 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3956 max_cost
- extra_cost
))
3958 /* See whether the specialized multiplication optabs are
3959 cheaper than the shift/add version. */
3960 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3961 alg
.cost
.cost
+ extra_cost
);
3965 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3966 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3967 tem
= extract_high_half (mode
, tem
);
3969 /* Adjust result for signedness. */
3971 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3975 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3976 unsignedp
, max_cost
);
3980 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3983 expand_smod_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3985 rtx result
, temp
, shift
;
3986 rtx_code_label
*label
;
3988 int prec
= GET_MODE_PRECISION (mode
);
3990 logd
= floor_log2 (d
);
3991 result
= gen_reg_rtx (mode
);
3993 /* Avoid conditional branches when they're expensive. */
3994 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3995 && optimize_insn_for_speed_p ())
3997 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
4001 HOST_WIDE_INT masklow
= (HOST_WIDE_INT_1
<< logd
) - 1;
4002 signmask
= force_reg (mode
, signmask
);
4003 shift
= gen_int_shift_amount (mode
, GET_MODE_BITSIZE (mode
) - logd
);
4005 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4006 which instruction sequence to use. If logical right shifts
4007 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4008 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4010 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
4011 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
4012 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
4013 > COSTS_N_INSNS (2)))
4015 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
4016 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4017 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4018 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4019 temp
= expand_binop (mode
, and_optab
, temp
,
4020 gen_int_mode (masklow
, mode
),
4021 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4022 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
4023 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4024 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4025 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4029 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
4030 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4031 signmask
= force_reg (mode
, signmask
);
4033 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
4034 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4035 temp
= expand_binop (mode
, and_optab
, temp
,
4036 gen_int_mode (masklow
, mode
),
4037 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4038 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
4039 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4045 /* Mask contains the mode's signbit and the significant bits of the
4046 modulus. By including the signbit in the operation, many targets
4047 can avoid an explicit compare operation in the following comparison
4049 wide_int mask
= wi::mask (logd
, false, prec
);
4050 mask
= wi::set_bit (mask
, prec
- 1);
4052 temp
= expand_binop (mode
, and_optab
, op0
,
4053 immed_wide_int_const (mask
, mode
),
4054 result
, 1, OPTAB_LIB_WIDEN
);
4056 emit_move_insn (result
, temp
);
4058 label
= gen_label_rtx ();
4059 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
4061 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
4062 0, OPTAB_LIB_WIDEN
);
4064 mask
= wi::mask (logd
, true, prec
);
4065 temp
= expand_binop (mode
, ior_optab
, temp
,
4066 immed_wide_int_const (mask
, mode
),
4067 result
, 1, OPTAB_LIB_WIDEN
);
4068 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
4069 0, OPTAB_LIB_WIDEN
);
4071 emit_move_insn (result
, temp
);
4076 /* Expand signed division of OP0 by a power of two D in mode MODE.
4077 This routine is only called for positive values of D. */
4080 expand_sdiv_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
4083 rtx_code_label
*label
;
4086 logd
= floor_log2 (d
);
4089 && BRANCH_COST (optimize_insn_for_speed_p (),
4092 temp
= gen_reg_rtx (mode
);
4093 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
4094 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
4095 0, OPTAB_LIB_WIDEN
);
4096 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4099 if (HAVE_conditional_move
4100 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4105 temp2
= copy_to_mode_reg (mode
, op0
);
4106 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
4107 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4108 temp
= force_reg (mode
, temp
);
4110 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4111 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
4112 mode
, temp
, temp2
, mode
, 0);
4115 rtx_insn
*seq
= get_insns ();
4118 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
4123 if (BRANCH_COST (optimize_insn_for_speed_p (),
4126 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
4128 temp
= gen_reg_rtx (mode
);
4129 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
4130 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
4131 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
4132 > COSTS_N_INSNS (1))
4133 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
4134 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4136 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
4137 ushift
, NULL_RTX
, 1);
4138 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
4139 0, OPTAB_LIB_WIDEN
);
4140 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4143 label
= gen_label_rtx ();
4144 temp
= copy_to_mode_reg (mode
, op0
);
4145 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
4146 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
4148 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4151 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4152 if that is convenient, and returning where the result is.
4153 You may request either the quotient or the remainder as the result;
4154 specify REM_FLAG nonzero to get the remainder.
4156 CODE is the expression code for which kind of division this is;
4157 it controls how rounding is done. MODE is the machine mode to use.
4158 UNSIGNEDP nonzero means do unsigned division. */
4160 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4161 and then correct it by or'ing in missing high bits
4162 if result of ANDI is nonzero.
4163 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4164 This could optimize to a bfexts instruction.
4165 But C doesn't use these operations, so their optimizations are
4167 /* ??? For modulo, we don't actually need the highpart of the first product,
4168 the low part will do nicely. And for small divisors, the second multiply
4169 can also be a low-part only multiply or even be completely left out.
4170 E.g. to calculate the remainder of a division by 3 with a 32 bit
4171 multiply, multiply with 0x55555556 and extract the upper two bits;
4172 the result is exact for inputs up to 0x1fffffff.
4173 The input range can be reduced by using cross-sum rules.
4174 For odd divisors >= 3, the following table gives right shift counts
4175 so that if a number is shifted by an integer multiple of the given
4176 amount, the remainder stays the same:
4177 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4178 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4179 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4180 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4181 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4183 Cross-sum rules for even numbers can be derived by leaving as many bits
4184 to the right alone as the divisor has zeros to the right.
4185 E.g. if x is an unsigned 32 bit number:
4186 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4190 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
4191 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
4193 machine_mode compute_mode
;
4195 rtx quotient
= 0, remainder
= 0;
4198 optab optab1
, optab2
;
4199 int op1_is_constant
, op1_is_pow2
= 0;
4200 int max_cost
, extra_cost
;
4201 static HOST_WIDE_INT last_div_const
= 0;
4202 bool speed
= optimize_insn_for_speed_p ();
4204 op1_is_constant
= CONST_INT_P (op1
);
4205 if (op1_is_constant
)
4207 wide_int ext_op1
= rtx_mode_t (op1
, mode
);
4208 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4210 && wi::popcount (wi::neg (ext_op1
)) == 1));
4214 This is the structure of expand_divmod:
4216 First comes code to fix up the operands so we can perform the operations
4217 correctly and efficiently.
4219 Second comes a switch statement with code specific for each rounding mode.
4220 For some special operands this code emits all RTL for the desired
4221 operation, for other cases, it generates only a quotient and stores it in
4222 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4223 to indicate that it has not done anything.
4225 Last comes code that finishes the operation. If QUOTIENT is set and
4226 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4227 QUOTIENT is not set, it is computed using trunc rounding.
4229 We try to generate special code for division and remainder when OP1 is a
4230 constant. If |OP1| = 2**n we can use shifts and some other fast
4231 operations. For other values of OP1, we compute a carefully selected
4232 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4235 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4236 half of the product. Different strategies for generating the product are
4237 implemented in expmed_mult_highpart.
4239 If what we actually want is the remainder, we generate that by another
4240 by-constant multiplication and a subtraction. */
4242 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4243 code below will malfunction if we are, so check here and handle
4244 the special case if so. */
4245 if (op1
== const1_rtx
)
4246 return rem_flag
? const0_rtx
: op0
;
4248 /* When dividing by -1, we could get an overflow.
4249 negv_optab can handle overflows. */
4250 if (! unsignedp
&& op1
== constm1_rtx
)
4254 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4255 ? negv_optab
: neg_optab
, op0
, target
, 0);
4259 /* Don't use the function value register as a target
4260 since we have to read it as well as write it,
4261 and function-inlining gets confused by this. */
4262 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4263 /* Don't clobber an operand while doing a multi-step calculation. */
4264 || ((rem_flag
|| op1_is_constant
)
4265 && (reg_mentioned_p (target
, op0
)
4266 || (MEM_P (op0
) && MEM_P (target
))))
4267 || reg_mentioned_p (target
, op1
)
4268 || (MEM_P (op1
) && MEM_P (target
))))
4271 /* Get the mode in which to perform this computation. Normally it will
4272 be MODE, but sometimes we can't do the desired operation in MODE.
4273 If so, pick a wider mode in which we can do the operation. Convert
4274 to that mode at the start to avoid repeated conversions.
4276 First see what operations we need. These depend on the expression
4277 we are evaluating. (We assume that divxx3 insns exist under the
4278 same conditions that modxx3 insns and that these insns don't normally
4279 fail. If these assumptions are not correct, we may generate less
4280 efficient code in some cases.)
4282 Then see if we find a mode in which we can open-code that operation
4283 (either a division, modulus, or shift). Finally, check for the smallest
4284 mode for which we can do the operation with a library call. */
4286 /* We might want to refine this now that we have division-by-constant
4287 optimization. Since expmed_mult_highpart tries so many variants, it is
4288 not straightforward to generalize this. Maybe we should make an array
4289 of possible modes in init_expmed? Save this for GCC 2.7. */
4291 optab1
= (op1_is_pow2
4292 ? (unsignedp
? lshr_optab
: ashr_optab
)
4293 : (unsignedp
? udiv_optab
: sdiv_optab
));
4294 optab2
= (op1_is_pow2
? optab1
4295 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4297 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4298 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4299 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4302 if (compute_mode
== VOIDmode
)
4303 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4304 if (optab_libfunc (optab1
, compute_mode
)
4305 || optab_libfunc (optab2
, compute_mode
))
4308 /* If we still couldn't find a mode, use MODE, but expand_binop will
4310 if (compute_mode
== VOIDmode
)
4311 compute_mode
= mode
;
4313 if (target
&& GET_MODE (target
) == compute_mode
)
4316 tquotient
= gen_reg_rtx (compute_mode
);
4319 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4320 (mode), and thereby get better code when OP1 is a constant. Do that
4321 later. It will require going over all usages of SIZE below. */
4322 size
= GET_MODE_BITSIZE (mode
);
4325 /* Only deduct something for a REM if the last divide done was
4326 for a different constant. Then set the constant of the last
4328 max_cost
= (unsignedp
4329 ? udiv_cost (speed
, compute_mode
)
4330 : sdiv_cost (speed
, compute_mode
));
4331 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4332 && INTVAL (op1
) == last_div_const
))
4333 max_cost
-= (mul_cost (speed
, compute_mode
)
4334 + add_cost (speed
, compute_mode
));
4336 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4338 /* Now convert to the best mode to use. */
4339 if (compute_mode
!= mode
)
4341 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4342 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4344 /* convert_modes may have placed op1 into a register, so we
4345 must recompute the following. */
4346 op1_is_constant
= CONST_INT_P (op1
);
4347 if (op1_is_constant
)
4349 wide_int ext_op1
= rtx_mode_t (op1
, compute_mode
);
4350 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4352 && wi::popcount (wi::neg (ext_op1
)) == 1));
4358 /* If one of the operands is a volatile MEM, copy it into a register. */
4360 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4361 op0
= force_reg (compute_mode
, op0
);
4362 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4363 op1
= force_reg (compute_mode
, op1
);
4365 /* If we need the remainder or if OP1 is constant, we need to
4366 put OP0 in a register in case it has any queued subexpressions. */
4367 if (rem_flag
|| op1_is_constant
)
4368 op0
= force_reg (compute_mode
, op0
);
4370 last
= get_last_insn ();
4372 /* Promote floor rounding to trunc rounding for unsigned operations. */
4375 if (code
== FLOOR_DIV_EXPR
)
4376 code
= TRUNC_DIV_EXPR
;
4377 if (code
== FLOOR_MOD_EXPR
)
4378 code
= TRUNC_MOD_EXPR
;
4379 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4380 code
= TRUNC_DIV_EXPR
;
4383 if (op1
!= const0_rtx
)
4386 case TRUNC_MOD_EXPR
:
4387 case TRUNC_DIV_EXPR
:
4388 if (op1_is_constant
)
4390 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4391 int size
= GET_MODE_BITSIZE (int_mode
);
4394 unsigned HOST_WIDE_INT mh
, ml
;
4395 int pre_shift
, post_shift
;
4397 wide_int wd
= rtx_mode_t (op1
, int_mode
);
4398 unsigned HOST_WIDE_INT d
= wd
.to_uhwi ();
4400 if (wi::popcount (wd
) == 1)
4402 pre_shift
= floor_log2 (d
);
4405 unsigned HOST_WIDE_INT mask
4406 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4408 = expand_binop (int_mode
, and_optab
, op0
,
4409 gen_int_mode (mask
, int_mode
),
4413 return gen_lowpart (mode
, remainder
);
4415 quotient
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4416 pre_shift
, tquotient
, 1);
4418 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4420 if (d
>= (HOST_WIDE_INT_1U
<< (size
- 1)))
4422 /* Most significant bit of divisor is set; emit an scc
4424 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4429 /* Find a suitable multiplier and right shift count
4430 instead of multiplying with D. */
4432 mh
= choose_multiplier (d
, size
, size
,
4433 &ml
, &post_shift
, &dummy
);
4435 /* If the suggested multiplier is more than SIZE bits,
4436 we can do better for even divisors, using an
4437 initial right shift. */
4438 if (mh
!= 0 && (d
& 1) == 0)
4440 pre_shift
= ctz_or_zero (d
);
4441 mh
= choose_multiplier (d
>> pre_shift
, size
,
4443 &ml
, &post_shift
, &dummy
);
4453 if (post_shift
- 1 >= BITS_PER_WORD
)
4457 = (shift_cost (speed
, int_mode
, post_shift
- 1)
4458 + shift_cost (speed
, int_mode
, 1)
4459 + 2 * add_cost (speed
, int_mode
));
4460 t1
= expmed_mult_highpart
4461 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4462 NULL_RTX
, 1, max_cost
- extra_cost
);
4465 t2
= force_operand (gen_rtx_MINUS (int_mode
,
4468 t3
= expand_shift (RSHIFT_EXPR
, int_mode
,
4469 t2
, 1, NULL_RTX
, 1);
4470 t4
= force_operand (gen_rtx_PLUS (int_mode
,
4473 quotient
= expand_shift
4474 (RSHIFT_EXPR
, int_mode
, t4
,
4475 post_shift
- 1, tquotient
, 1);
4481 if (pre_shift
>= BITS_PER_WORD
4482 || post_shift
>= BITS_PER_WORD
)
4486 (RSHIFT_EXPR
, int_mode
, op0
,
4487 pre_shift
, NULL_RTX
, 1);
4489 = (shift_cost (speed
, int_mode
, pre_shift
)
4490 + shift_cost (speed
, int_mode
, post_shift
));
4491 t2
= expmed_mult_highpart
4493 gen_int_mode (ml
, int_mode
),
4494 NULL_RTX
, 1, max_cost
- extra_cost
);
4497 quotient
= expand_shift
4498 (RSHIFT_EXPR
, int_mode
, t2
,
4499 post_shift
, tquotient
, 1);
4503 else /* Too wide mode to use tricky code */
4506 insn
= get_last_insn ();
4508 set_dst_reg_note (insn
, REG_EQUAL
,
4509 gen_rtx_UDIV (int_mode
, op0
, op1
),
4512 else /* TRUNC_DIV, signed */
4514 unsigned HOST_WIDE_INT ml
;
4515 int lgup
, post_shift
;
4517 HOST_WIDE_INT d
= INTVAL (op1
);
4518 unsigned HOST_WIDE_INT abs_d
;
4520 /* Not prepared to handle division/remainder by
4521 0xffffffffffffffff8000000000000000 etc. */
4522 if (d
== HOST_WIDE_INT_MIN
&& size
> HOST_BITS_PER_WIDE_INT
)
4525 /* Since d might be INT_MIN, we have to cast to
4526 unsigned HOST_WIDE_INT before negating to avoid
4527 undefined signed overflow. */
4529 ? (unsigned HOST_WIDE_INT
) d
4530 : - (unsigned HOST_WIDE_INT
) d
);
4532 /* n rem d = n rem -d */
4533 if (rem_flag
&& d
< 0)
4536 op1
= gen_int_mode (abs_d
, int_mode
);
4542 quotient
= expand_unop (int_mode
, neg_optab
, op0
,
4544 else if (size
<= HOST_BITS_PER_WIDE_INT
4545 && abs_d
== HOST_WIDE_INT_1U
<< (size
- 1))
4547 /* This case is not handled correctly below. */
4548 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4553 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4554 && (size
<= HOST_BITS_PER_WIDE_INT
|| d
>= 0)
4556 ? smod_pow2_cheap (speed
, int_mode
)
4557 : sdiv_pow2_cheap (speed
, int_mode
))
4558 /* We assume that cheap metric is true if the
4559 optab has an expander for this mode. */
4560 && ((optab_handler ((rem_flag
? smod_optab
4563 != CODE_FOR_nothing
)
4564 || (optab_handler (sdivmod_optab
, int_mode
)
4565 != CODE_FOR_nothing
)))
4567 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
))
4571 remainder
= expand_smod_pow2 (int_mode
, op0
, d
);
4573 return gen_lowpart (mode
, remainder
);
4576 if (sdiv_pow2_cheap (speed
, int_mode
)
4577 && ((optab_handler (sdiv_optab
, int_mode
)
4578 != CODE_FOR_nothing
)
4579 || (optab_handler (sdivmod_optab
, int_mode
)
4580 != CODE_FOR_nothing
)))
4581 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4583 gen_int_mode (abs_d
,
4587 quotient
= expand_sdiv_pow2 (int_mode
, op0
, abs_d
);
4589 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4590 negate the quotient. */
4593 insn
= get_last_insn ();
4595 && abs_d
< (HOST_WIDE_INT_1U
4596 << (HOST_BITS_PER_WIDE_INT
- 1)))
4597 set_dst_reg_note (insn
, REG_EQUAL
,
4598 gen_rtx_DIV (int_mode
, op0
,
4604 quotient
= expand_unop (int_mode
, neg_optab
,
4605 quotient
, quotient
, 0);
4608 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4610 choose_multiplier (abs_d
, size
, size
- 1,
4611 &ml
, &post_shift
, &lgup
);
4612 if (ml
< HOST_WIDE_INT_1U
<< (size
- 1))
4616 if (post_shift
>= BITS_PER_WORD
4617 || size
- 1 >= BITS_PER_WORD
)
4620 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4621 + shift_cost (speed
, int_mode
, size
- 1)
4622 + add_cost (speed
, int_mode
));
4623 t1
= expmed_mult_highpart
4624 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4625 NULL_RTX
, 0, max_cost
- extra_cost
);
4629 (RSHIFT_EXPR
, int_mode
, t1
,
4630 post_shift
, NULL_RTX
, 0);
4632 (RSHIFT_EXPR
, int_mode
, op0
,
4633 size
- 1, NULL_RTX
, 0);
4636 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t2
),
4640 = force_operand (gen_rtx_MINUS (int_mode
, t2
, t3
),
4647 if (post_shift
>= BITS_PER_WORD
4648 || size
- 1 >= BITS_PER_WORD
)
4651 ml
|= HOST_WIDE_INT_M1U
<< (size
- 1);
4652 mlr
= gen_int_mode (ml
, int_mode
);
4653 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4654 + shift_cost (speed
, int_mode
, size
- 1)
4655 + 2 * add_cost (speed
, int_mode
));
4656 t1
= expmed_mult_highpart (int_mode
, op0
, mlr
,
4658 max_cost
- extra_cost
);
4661 t2
= force_operand (gen_rtx_PLUS (int_mode
, t1
, op0
),
4664 (RSHIFT_EXPR
, int_mode
, t2
,
4665 post_shift
, NULL_RTX
, 0);
4667 (RSHIFT_EXPR
, int_mode
, op0
,
4668 size
- 1, NULL_RTX
, 0);
4671 = force_operand (gen_rtx_MINUS (int_mode
, t4
, t3
),
4675 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t4
),
4679 else /* Too wide mode to use tricky code */
4682 insn
= get_last_insn ();
4684 set_dst_reg_note (insn
, REG_EQUAL
,
4685 gen_rtx_DIV (int_mode
, op0
, op1
),
4691 delete_insns_since (last
);
4694 case FLOOR_DIV_EXPR
:
4695 case FLOOR_MOD_EXPR
:
4696 /* We will come here only for signed operations. */
4697 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
4699 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4700 int size
= GET_MODE_BITSIZE (int_mode
);
4701 unsigned HOST_WIDE_INT mh
, ml
;
4702 int pre_shift
, lgup
, post_shift
;
4703 HOST_WIDE_INT d
= INTVAL (op1
);
4707 /* We could just as easily deal with negative constants here,
4708 but it does not seem worth the trouble for GCC 2.6. */
4709 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4711 pre_shift
= floor_log2 (d
);
4714 unsigned HOST_WIDE_INT mask
4715 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4716 remainder
= expand_binop
4717 (int_mode
, and_optab
, op0
,
4718 gen_int_mode (mask
, int_mode
),
4719 remainder
, 0, OPTAB_LIB_WIDEN
);
4721 return gen_lowpart (mode
, remainder
);
4723 quotient
= expand_shift
4724 (RSHIFT_EXPR
, int_mode
, op0
,
4725 pre_shift
, tquotient
, 0);
4731 mh
= choose_multiplier (d
, size
, size
- 1,
4732 &ml
, &post_shift
, &lgup
);
4735 if (post_shift
< BITS_PER_WORD
4736 && size
- 1 < BITS_PER_WORD
)
4739 (RSHIFT_EXPR
, int_mode
, op0
,
4740 size
- 1, NULL_RTX
, 0);
4741 t2
= expand_binop (int_mode
, xor_optab
, op0
, t1
,
4742 NULL_RTX
, 0, OPTAB_WIDEN
);
4743 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4744 + shift_cost (speed
, int_mode
, size
- 1)
4745 + 2 * add_cost (speed
, int_mode
));
4746 t3
= expmed_mult_highpart
4747 (int_mode
, t2
, gen_int_mode (ml
, int_mode
),
4748 NULL_RTX
, 1, max_cost
- extra_cost
);
4752 (RSHIFT_EXPR
, int_mode
, t3
,
4753 post_shift
, NULL_RTX
, 1);
4754 quotient
= expand_binop (int_mode
, xor_optab
,
4755 t4
, t1
, tquotient
, 0,
4763 rtx nsign
, t1
, t2
, t3
, t4
;
4764 t1
= force_operand (gen_rtx_PLUS (int_mode
,
4765 op0
, constm1_rtx
), NULL_RTX
);
4766 t2
= expand_binop (int_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4768 nsign
= expand_shift (RSHIFT_EXPR
, int_mode
, t2
,
4769 size
- 1, NULL_RTX
, 0);
4770 t3
= force_operand (gen_rtx_MINUS (int_mode
, t1
, nsign
),
4772 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, int_mode
, t3
, op1
,
4777 t5
= expand_unop (int_mode
, one_cmpl_optab
, nsign
,
4779 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t4
, t5
),
4787 delete_insns_since (last
);
4789 /* Try using an instruction that produces both the quotient and
4790 remainder, using truncation. We can easily compensate the quotient
4791 or remainder to get floor rounding, once we have the remainder.
4792 Notice that we compute also the final remainder value here,
4793 and return the result right away. */
4794 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4795 target
= gen_reg_rtx (compute_mode
);
4800 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4801 quotient
= gen_reg_rtx (compute_mode
);
4806 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4807 remainder
= gen_reg_rtx (compute_mode
);
4810 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4811 quotient
, remainder
, 0))
4813 /* This could be computed with a branch-less sequence.
4814 Save that for later. */
4816 rtx_code_label
*label
= gen_label_rtx ();
4817 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4818 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4819 NULL_RTX
, 0, OPTAB_WIDEN
);
4820 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4821 expand_dec (quotient
, const1_rtx
);
4822 expand_inc (remainder
, op1
);
4824 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4827 /* No luck with division elimination or divmod. Have to do it
4828 by conditionally adjusting op0 *and* the result. */
4830 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4834 quotient
= gen_reg_rtx (compute_mode
);
4835 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4836 label1
= gen_label_rtx ();
4837 label2
= gen_label_rtx ();
4838 label3
= gen_label_rtx ();
4839 label4
= gen_label_rtx ();
4840 label5
= gen_label_rtx ();
4841 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4842 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4843 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4844 quotient
, 0, OPTAB_LIB_WIDEN
);
4845 if (tem
!= quotient
)
4846 emit_move_insn (quotient
, tem
);
4847 emit_jump_insn (targetm
.gen_jump (label5
));
4849 emit_label (label1
);
4850 expand_inc (adjusted_op0
, const1_rtx
);
4851 emit_jump_insn (targetm
.gen_jump (label4
));
4853 emit_label (label2
);
4854 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4855 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4856 quotient
, 0, OPTAB_LIB_WIDEN
);
4857 if (tem
!= quotient
)
4858 emit_move_insn (quotient
, tem
);
4859 emit_jump_insn (targetm
.gen_jump (label5
));
4861 emit_label (label3
);
4862 expand_dec (adjusted_op0
, const1_rtx
);
4863 emit_label (label4
);
4864 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4865 quotient
, 0, OPTAB_LIB_WIDEN
);
4866 if (tem
!= quotient
)
4867 emit_move_insn (quotient
, tem
);
4868 expand_dec (quotient
, const1_rtx
);
4869 emit_label (label5
);
4878 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4879 && (HWI_COMPUTABLE_MODE_P (compute_mode
)
4880 || INTVAL (op1
) >= 0))
4882 scalar_int_mode int_mode
4883 = as_a
<scalar_int_mode
> (compute_mode
);
4885 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4886 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4887 floor_log2 (d
), tquotient
, 1);
4888 t2
= expand_binop (int_mode
, and_optab
, op0
,
4889 gen_int_mode (d
- 1, int_mode
),
4890 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4891 t3
= gen_reg_rtx (int_mode
);
4892 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
, int_mode
, 1, 1);
4895 rtx_code_label
*lab
;
4896 lab
= gen_label_rtx ();
4897 do_cmp_and_jump (t2
, const0_rtx
, EQ
, int_mode
, lab
);
4898 expand_inc (t1
, const1_rtx
);
4903 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t1
, t3
),
4908 /* Try using an instruction that produces both the quotient and
4909 remainder, using truncation. We can easily compensate the
4910 quotient or remainder to get ceiling rounding, once we have the
4911 remainder. Notice that we compute also the final remainder
4912 value here, and return the result right away. */
4913 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4914 target
= gen_reg_rtx (compute_mode
);
4918 remainder
= (REG_P (target
)
4919 ? target
: gen_reg_rtx (compute_mode
));
4920 quotient
= gen_reg_rtx (compute_mode
);
4924 quotient
= (REG_P (target
)
4925 ? target
: gen_reg_rtx (compute_mode
));
4926 remainder
= gen_reg_rtx (compute_mode
);
4929 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4932 /* This could be computed with a branch-less sequence.
4933 Save that for later. */
4934 rtx_code_label
*label
= gen_label_rtx ();
4935 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4936 compute_mode
, label
);
4937 expand_inc (quotient
, const1_rtx
);
4938 expand_dec (remainder
, op1
);
4940 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4943 /* No luck with division elimination or divmod. Have to do it
4944 by conditionally adjusting op0 *and* the result. */
4946 rtx_code_label
*label1
, *label2
;
4947 rtx adjusted_op0
, tem
;
4949 quotient
= gen_reg_rtx (compute_mode
);
4950 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4951 label1
= gen_label_rtx ();
4952 label2
= gen_label_rtx ();
4953 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4954 compute_mode
, label1
);
4955 emit_move_insn (quotient
, const0_rtx
);
4956 emit_jump_insn (targetm
.gen_jump (label2
));
4958 emit_label (label1
);
4959 expand_dec (adjusted_op0
, const1_rtx
);
4960 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4961 quotient
, 1, OPTAB_LIB_WIDEN
);
4962 if (tem
!= quotient
)
4963 emit_move_insn (quotient
, tem
);
4964 expand_inc (quotient
, const1_rtx
);
4965 emit_label (label2
);
4970 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4971 && INTVAL (op1
) >= 0)
4973 /* This is extremely similar to the code for the unsigned case
4974 above. For 2.7 we should merge these variants, but for
4975 2.6.1 I don't want to touch the code for unsigned since that
4976 get used in C. The signed case will only be used by other
4980 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4981 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4982 floor_log2 (d
), tquotient
, 0);
4983 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4984 gen_int_mode (d
- 1, compute_mode
),
4985 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4986 t3
= gen_reg_rtx (compute_mode
);
4987 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4988 compute_mode
, 1, 1);
4991 rtx_code_label
*lab
;
4992 lab
= gen_label_rtx ();
4993 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4994 expand_inc (t1
, const1_rtx
);
4999 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
5005 /* Try using an instruction that produces both the quotient and
5006 remainder, using truncation. We can easily compensate the
5007 quotient or remainder to get ceiling rounding, once we have the
5008 remainder. Notice that we compute also the final remainder
5009 value here, and return the result right away. */
5010 if (target
== 0 || GET_MODE (target
) != compute_mode
)
5011 target
= gen_reg_rtx (compute_mode
);
5014 remainder
= (REG_P (target
)
5015 ? target
: gen_reg_rtx (compute_mode
));
5016 quotient
= gen_reg_rtx (compute_mode
);
5020 quotient
= (REG_P (target
)
5021 ? target
: gen_reg_rtx (compute_mode
));
5022 remainder
= gen_reg_rtx (compute_mode
);
5025 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
5028 /* This could be computed with a branch-less sequence.
5029 Save that for later. */
5031 rtx_code_label
*label
= gen_label_rtx ();
5032 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
5033 compute_mode
, label
);
5034 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
5035 NULL_RTX
, 0, OPTAB_WIDEN
);
5036 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
5037 expand_inc (quotient
, const1_rtx
);
5038 expand_dec (remainder
, op1
);
5040 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5043 /* No luck with division elimination or divmod. Have to do it
5044 by conditionally adjusting op0 *and* the result. */
5046 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
5050 quotient
= gen_reg_rtx (compute_mode
);
5051 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
5052 label1
= gen_label_rtx ();
5053 label2
= gen_label_rtx ();
5054 label3
= gen_label_rtx ();
5055 label4
= gen_label_rtx ();
5056 label5
= gen_label_rtx ();
5057 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
5058 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
5059 compute_mode
, label1
);
5060 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5061 quotient
, 0, OPTAB_LIB_WIDEN
);
5062 if (tem
!= quotient
)
5063 emit_move_insn (quotient
, tem
);
5064 emit_jump_insn (targetm
.gen_jump (label5
));
5066 emit_label (label1
);
5067 expand_dec (adjusted_op0
, const1_rtx
);
5068 emit_jump_insn (targetm
.gen_jump (label4
));
5070 emit_label (label2
);
5071 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
5072 compute_mode
, label3
);
5073 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5074 quotient
, 0, OPTAB_LIB_WIDEN
);
5075 if (tem
!= quotient
)
5076 emit_move_insn (quotient
, tem
);
5077 emit_jump_insn (targetm
.gen_jump (label5
));
5079 emit_label (label3
);
5080 expand_inc (adjusted_op0
, const1_rtx
);
5081 emit_label (label4
);
5082 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
5083 quotient
, 0, OPTAB_LIB_WIDEN
);
5084 if (tem
!= quotient
)
5085 emit_move_insn (quotient
, tem
);
5086 expand_inc (quotient
, const1_rtx
);
5087 emit_label (label5
);
5092 case EXACT_DIV_EXPR
:
5093 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
5095 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5096 int size
= GET_MODE_BITSIZE (int_mode
);
5097 HOST_WIDE_INT d
= INTVAL (op1
);
5098 unsigned HOST_WIDE_INT ml
;
5102 pre_shift
= ctz_or_zero (d
);
5103 ml
= invert_mod2n (d
>> pre_shift
, size
);
5104 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
5105 pre_shift
, NULL_RTX
, unsignedp
);
5106 quotient
= expand_mult (int_mode
, t1
, gen_int_mode (ml
, int_mode
),
5109 insn
= get_last_insn ();
5110 set_dst_reg_note (insn
, REG_EQUAL
,
5111 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5112 int_mode
, op0
, op1
),
5117 case ROUND_DIV_EXPR
:
5118 case ROUND_MOD_EXPR
:
5121 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5123 rtx_code_label
*label
;
5124 label
= gen_label_rtx ();
5125 quotient
= gen_reg_rtx (int_mode
);
5126 remainder
= gen_reg_rtx (int_mode
);
5127 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
5130 quotient
= expand_binop (int_mode
, udiv_optab
, op0
, op1
,
5131 quotient
, 1, OPTAB_LIB_WIDEN
);
5132 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 1);
5133 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
5134 remainder
, 1, OPTAB_LIB_WIDEN
);
5136 tem
= plus_constant (int_mode
, op1
, -1);
5137 tem
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
, 1, NULL_RTX
, 1);
5138 do_cmp_and_jump (remainder
, tem
, LEU
, int_mode
, label
);
5139 expand_inc (quotient
, const1_rtx
);
5140 expand_dec (remainder
, op1
);
5145 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5146 int size
= GET_MODE_BITSIZE (int_mode
);
5147 rtx abs_rem
, abs_op1
, tem
, mask
;
5148 rtx_code_label
*label
;
5149 label
= gen_label_rtx ();
5150 quotient
= gen_reg_rtx (int_mode
);
5151 remainder
= gen_reg_rtx (int_mode
);
5152 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
5155 quotient
= expand_binop (int_mode
, sdiv_optab
, op0
, op1
,
5156 quotient
, 0, OPTAB_LIB_WIDEN
);
5157 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 0);
5158 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
5159 remainder
, 0, OPTAB_LIB_WIDEN
);
5161 abs_rem
= expand_abs (int_mode
, remainder
, NULL_RTX
, 1, 0);
5162 abs_op1
= expand_abs (int_mode
, op1
, NULL_RTX
, 1, 0);
5163 tem
= expand_shift (LSHIFT_EXPR
, int_mode
, abs_rem
,
5165 do_cmp_and_jump (tem
, abs_op1
, LTU
, int_mode
, label
);
5166 tem
= expand_binop (int_mode
, xor_optab
, op0
, op1
,
5167 NULL_RTX
, 0, OPTAB_WIDEN
);
5168 mask
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
,
5169 size
- 1, NULL_RTX
, 0);
5170 tem
= expand_binop (int_mode
, xor_optab
, mask
, const1_rtx
,
5171 NULL_RTX
, 0, OPTAB_WIDEN
);
5172 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5173 NULL_RTX
, 0, OPTAB_WIDEN
);
5174 expand_inc (quotient
, tem
);
5175 tem
= expand_binop (int_mode
, xor_optab
, mask
, op1
,
5176 NULL_RTX
, 0, OPTAB_WIDEN
);
5177 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5178 NULL_RTX
, 0, OPTAB_WIDEN
);
5179 expand_dec (remainder
, tem
);
5182 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5190 if (target
&& GET_MODE (target
) != compute_mode
)
5195 /* Try to produce the remainder without producing the quotient.
5196 If we seem to have a divmod pattern that does not require widening,
5197 don't try widening here. We should really have a WIDEN argument
5198 to expand_twoval_binop, since what we'd really like to do here is
5199 1) try a mod insn in compute_mode
5200 2) try a divmod insn in compute_mode
5201 3) try a div insn in compute_mode and multiply-subtract to get
5203 4) try the same things with widening allowed. */
5205 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5208 ((optab_handler (optab2
, compute_mode
)
5209 != CODE_FOR_nothing
)
5210 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5213 /* No luck there. Can we do remainder and divide at once
5214 without a library call? */
5215 remainder
= gen_reg_rtx (compute_mode
);
5216 if (! expand_twoval_binop ((unsignedp
5220 NULL_RTX
, remainder
, unsignedp
))
5225 return gen_lowpart (mode
, remainder
);
5228 /* Produce the quotient. Try a quotient insn, but not a library call.
5229 If we have a divmod in this mode, use it in preference to widening
5230 the div (for this test we assume it will not fail). Note that optab2
5231 is set to the one of the two optabs that the call below will use. */
5233 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5234 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5236 ((optab_handler (optab2
, compute_mode
)
5237 != CODE_FOR_nothing
)
5238 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5242 /* No luck there. Try a quotient-and-remainder insn,
5243 keeping the quotient alone. */
5244 quotient
= gen_reg_rtx (compute_mode
);
5245 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5247 quotient
, NULL_RTX
, unsignedp
))
5251 /* Still no luck. If we are not computing the remainder,
5252 use a library call for the quotient. */
5253 quotient
= sign_expand_binop (compute_mode
,
5254 udiv_optab
, sdiv_optab
,
5256 unsignedp
, OPTAB_LIB_WIDEN
);
5263 if (target
&& GET_MODE (target
) != compute_mode
)
5268 /* No divide instruction either. Use library for remainder. */
5269 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5271 unsignedp
, OPTAB_LIB_WIDEN
);
5272 /* No remainder function. Try a quotient-and-remainder
5273 function, keeping the remainder. */
5276 remainder
= gen_reg_rtx (compute_mode
);
5277 if (!expand_twoval_binop_libfunc
5278 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5280 NULL_RTX
, remainder
,
5281 unsignedp
? UMOD
: MOD
))
5282 remainder
= NULL_RTX
;
5287 /* We divided. Now finish doing X - Y * (X / Y). */
5288 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5289 NULL_RTX
, unsignedp
);
5290 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5291 remainder
, target
, unsignedp
,
5296 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5299 /* Return a tree node with data type TYPE, describing the value of X.
5300 Usually this is an VAR_DECL, if there is no obvious better choice.
5301 X may be an expression, however we only support those expressions
5302 generated by loop.c. */
5305 make_tree (tree type
, rtx x
)
5309 switch (GET_CODE (x
))
5312 case CONST_WIDE_INT
:
5313 t
= wide_int_to_tree (type
, rtx_mode_t (x
, TYPE_MODE (type
)));
5317 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5318 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5319 t
= wide_int_to_tree (type
,
5320 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5321 HOST_BITS_PER_WIDE_INT
* 2));
5323 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5329 unsigned int npatterns
= CONST_VECTOR_NPATTERNS (x
);
5330 unsigned int nelts_per_pattern
= CONST_VECTOR_NELTS_PER_PATTERN (x
);
5331 tree itype
= TREE_TYPE (type
);
5333 /* Build a tree with vector elements. */
5334 tree_vector_builder
elts (type
, npatterns
, nelts_per_pattern
);
5335 unsigned int count
= elts
.encoded_nelts ();
5336 for (unsigned int i
= 0; i
< count
; ++i
)
5338 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5339 elts
.quick_push (make_tree (itype
, elt
));
5342 return elts
.build ();
5346 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5347 make_tree (type
, XEXP (x
, 1)));
5350 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5351 make_tree (type
, XEXP (x
, 1)));
5354 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5357 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5358 make_tree (type
, XEXP (x
, 1)));
5361 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5362 make_tree (type
, XEXP (x
, 1)));
5365 t
= unsigned_type_for (type
);
5366 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5367 make_tree (t
, XEXP (x
, 0)),
5368 make_tree (type
, XEXP (x
, 1))));
5371 t
= signed_type_for (type
);
5372 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5373 make_tree (t
, XEXP (x
, 0)),
5374 make_tree (type
, XEXP (x
, 1))));
5377 if (TREE_CODE (type
) != REAL_TYPE
)
5378 t
= signed_type_for (type
);
5382 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5383 make_tree (t
, XEXP (x
, 0)),
5384 make_tree (t
, XEXP (x
, 1))));
5386 t
= unsigned_type_for (type
);
5387 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5388 make_tree (t
, XEXP (x
, 0)),
5389 make_tree (t
, XEXP (x
, 1))));
5393 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5394 GET_CODE (x
) == ZERO_EXTEND
);
5395 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5398 return make_tree (type
, XEXP (x
, 0));
5401 t
= SYMBOL_REF_DECL (x
);
5403 return fold_convert (type
, build_fold_addr_expr (t
));
5407 if (CONST_POLY_INT_P (x
))
5408 return wide_int_to_tree (t
, const_poly_int_value (x
));
5410 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5412 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5413 address mode to pointer mode. */
5414 if (POINTER_TYPE_P (type
))
5415 x
= convert_memory_address_addr_space
5416 (SCALAR_INT_TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5418 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5419 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5420 t
->decl_with_rtl
.rtl
= x
;
5426 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5427 and returning TARGET.
5429 If TARGET is 0, a pseudo-register or constant is returned. */
5432 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5436 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5437 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5439 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5443 else if (tem
!= target
)
5444 emit_move_insn (target
, tem
);
5448 /* Helper function for emit_store_flag. */
5450 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5451 machine_mode mode
, machine_mode compare_mode
,
5452 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5453 machine_mode target_mode
)
5455 class expand_operand ops
[4];
5456 rtx op0
, comparison
, subtarget
;
5458 scalar_int_mode result_mode
= targetm
.cstore_mode (icode
);
5459 scalar_int_mode int_target_mode
;
5461 last
= get_last_insn ();
5462 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5463 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5466 delete_insns_since (last
);
5470 if (target_mode
== VOIDmode
)
5471 int_target_mode
= result_mode
;
5473 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5475 target
= gen_reg_rtx (int_target_mode
);
5477 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5479 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5480 create_fixed_operand (&ops
[1], comparison
);
5481 create_fixed_operand (&ops
[2], x
);
5482 create_fixed_operand (&ops
[3], y
);
5483 if (!maybe_expand_insn (icode
, 4, ops
))
5485 delete_insns_since (last
);
5488 subtarget
= ops
[0].value
;
5490 /* If we are converting to a wider mode, first convert to
5491 INT_TARGET_MODE, then normalize. This produces better combining
5492 opportunities on machines that have a SIGN_EXTRACT when we are
5493 testing a single bit. This mostly benefits the 68k.
5495 If STORE_FLAG_VALUE does not have the sign bit set when
5496 interpreted in MODE, we can do this conversion as unsigned, which
5497 is usually more efficient. */
5498 if (GET_MODE_PRECISION (int_target_mode
) > GET_MODE_PRECISION (result_mode
))
5500 gcc_assert (GET_MODE_PRECISION (result_mode
) != 1
5501 || STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1);
5503 bool unsignedp
= (STORE_FLAG_VALUE
>= 0);
5504 convert_move (target
, subtarget
, unsignedp
);
5507 result_mode
= int_target_mode
;
5512 /* If we want to keep subexpressions around, don't reuse our last
5517 /* Now normalize to the proper value in MODE. Sometimes we don't
5518 have to do anything. */
5519 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5521 /* STORE_FLAG_VALUE might be the most negative number, so write
5522 the comparison this way to avoid a compiler-time warning. */
5523 else if (- normalizep
== STORE_FLAG_VALUE
)
5524 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5526 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5527 it hard to use a value of just the sign bit due to ANSI integer
5528 constant typing rules. */
5529 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5530 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5531 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5535 gcc_assert (STORE_FLAG_VALUE
& 1);
5537 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5538 if (normalizep
== -1)
5539 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5542 /* If we were converting to a smaller mode, do the conversion now. */
5543 if (int_target_mode
!= result_mode
)
5545 convert_move (target
, op0
, 0);
5553 /* A subroutine of emit_store_flag only including "tricks" that do not
5554 need a recursive call. These are kept separate to avoid infinite
5558 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5559 machine_mode mode
, int unsignedp
, int normalizep
,
5560 machine_mode target_mode
)
5563 enum insn_code icode
;
5564 machine_mode compare_mode
;
5565 enum mode_class mclass
;
5566 enum rtx_code scode
;
5569 code
= unsigned_condition (code
);
5570 scode
= swap_condition (code
);
5572 /* If one operand is constant, make it the second one. Only do this
5573 if the other operand is not constant as well. */
5575 if (swap_commutative_operands_p (op0
, op1
))
5577 std::swap (op0
, op1
);
5578 code
= swap_condition (code
);
5581 if (mode
== VOIDmode
)
5582 mode
= GET_MODE (op0
);
5584 if (CONST_SCALAR_INT_P (op1
))
5585 canonicalize_comparison (mode
, &code
, &op1
);
5587 /* For some comparisons with 1 and -1, we can convert this to
5588 comparisons with zero. This will often produce more opportunities for
5589 store-flag insns. */
5594 if (op1
== const1_rtx
)
5595 op1
= const0_rtx
, code
= LE
;
5598 if (op1
== constm1_rtx
)
5599 op1
= const0_rtx
, code
= LT
;
5602 if (op1
== const1_rtx
)
5603 op1
= const0_rtx
, code
= GT
;
5606 if (op1
== constm1_rtx
)
5607 op1
= const0_rtx
, code
= GE
;
5610 if (op1
== const1_rtx
)
5611 op1
= const0_rtx
, code
= NE
;
5614 if (op1
== const1_rtx
)
5615 op1
= const0_rtx
, code
= EQ
;
5621 /* If we are comparing a double-word integer with zero or -1, we can
5622 convert the comparison into one involving a single word. */
5623 scalar_int_mode int_mode
;
5624 if (is_int_mode (mode
, &int_mode
)
5625 && GET_MODE_BITSIZE (int_mode
) == BITS_PER_WORD
* 2
5626 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5629 if ((code
== EQ
|| code
== NE
)
5630 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5634 /* Do a logical OR or AND of the two words and compare the
5636 op00
= simplify_gen_subreg (word_mode
, op0
, int_mode
, 0);
5637 op01
= simplify_gen_subreg (word_mode
, op0
, int_mode
, UNITS_PER_WORD
);
5638 tem
= expand_binop (word_mode
,
5639 op1
== const0_rtx
? ior_optab
: and_optab
,
5640 op00
, op01
, NULL_RTX
, unsignedp
,
5644 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5645 unsignedp
, normalizep
);
5647 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5651 /* If testing the sign bit, can just test on high word. */
5652 op0h
= simplify_gen_subreg (word_mode
, op0
, int_mode
,
5653 subreg_highpart_offset (word_mode
,
5655 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5656 unsignedp
, normalizep
);
5663 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5666 target
= gen_reg_rtx (target_mode
);
5668 convert_move (target
, tem
,
5669 !val_signbit_known_set_p (word_mode
,
5670 (normalizep
? normalizep
5671 : STORE_FLAG_VALUE
)));
5676 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5677 complement of A (for GE) and shifting the sign bit to the low bit. */
5678 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5679 && is_int_mode (mode
, &int_mode
)
5680 && (normalizep
|| STORE_FLAG_VALUE
== 1
5681 || val_signbit_p (int_mode
, STORE_FLAG_VALUE
)))
5683 scalar_int_mode int_target_mode
;
5687 int_target_mode
= int_mode
;
5690 /* If the result is to be wider than OP0, it is best to convert it
5691 first. If it is to be narrower, it is *incorrect* to convert it
5693 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5694 if (GET_MODE_SIZE (int_target_mode
) > GET_MODE_SIZE (int_mode
))
5696 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5697 int_mode
= int_target_mode
;
5701 if (int_target_mode
!= int_mode
)
5705 op0
= expand_unop (int_mode
, one_cmpl_optab
, op0
,
5706 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5707 ? 0 : subtarget
), 0);
5709 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5710 /* If we are supposed to produce a 0/1 value, we want to do
5711 a logical shift from the sign bit to the low-order bit; for
5712 a -1/0 value, we do an arithmetic shift. */
5713 op0
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
5714 GET_MODE_BITSIZE (int_mode
) - 1,
5715 subtarget
, normalizep
!= -1);
5717 if (int_mode
!= int_target_mode
)
5718 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5723 mclass
= GET_MODE_CLASS (mode
);
5724 FOR_EACH_MODE_FROM (compare_mode
, mode
)
5726 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5727 icode
= optab_handler (cstore_optab
, optab_mode
);
5728 if (icode
!= CODE_FOR_nothing
)
5730 do_pending_stack_adjust ();
5731 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5732 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5736 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5738 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5739 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5750 /* Subroutine of emit_store_flag that handles cases in which the operands
5751 are scalar integers. SUBTARGET is the target to use for temporary
5752 operations and TRUEVAL is the value to store when the condition is
5753 true. All other arguments are as for emit_store_flag. */
5756 emit_store_flag_int (rtx target
, rtx subtarget
, enum rtx_code code
, rtx op0
,
5757 rtx op1
, scalar_int_mode mode
, int unsignedp
,
5758 int normalizep
, rtx trueval
)
5760 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5761 rtx_insn
*last
= get_last_insn ();
5763 /* If this is an equality comparison of integers, we can try to exclusive-or
5764 (or subtract) the two operands and use a recursive call to try the
5765 comparison with zero. Don't do any of these cases if branches are
5768 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5770 rtx tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5774 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5777 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5778 mode
, unsignedp
, normalizep
);
5782 delete_insns_since (last
);
5785 /* For integer comparisons, try the reverse comparison. However, for
5786 small X and if we'd have anyway to extend, implementing "X != 0"
5787 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5788 rtx_code rcode
= reverse_condition (code
);
5789 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5790 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5792 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5793 && op1
== const0_rtx
))
5795 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5796 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5798 /* Again, for the reverse comparison, use either an addition or a XOR. */
5800 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5801 optimize_insn_for_speed_p ()) == 0)
5803 rtx tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5804 STORE_FLAG_VALUE
, target_mode
);
5806 tem
= expand_binop (target_mode
, add_optab
, tem
,
5807 gen_int_mode (normalizep
, target_mode
),
5808 target
, 0, OPTAB_WIDEN
);
5813 && rtx_cost (trueval
, mode
, XOR
, 1,
5814 optimize_insn_for_speed_p ()) == 0)
5816 rtx tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5817 normalizep
, target_mode
);
5819 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5820 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5825 delete_insns_since (last
);
5828 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5829 the constant zero. Reject all other comparisons at this point. Only
5830 do LE and GT if branches are expensive since they are expensive on
5831 2-operand machines. */
5833 if (op1
!= const0_rtx
5834 || (code
!= EQ
&& code
!= NE
5835 && (BRANCH_COST (optimize_insn_for_speed_p (),
5836 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5839 /* Try to put the result of the comparison in the sign bit. Assume we can't
5840 do the necessary operation below. */
5844 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5845 the sign bit set. */
5849 /* This is destructive, so SUBTARGET can't be OP0. */
5850 if (rtx_equal_p (subtarget
, op0
))
5853 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5856 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5860 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5861 number of bits in the mode of OP0, minus one. */
5865 if (rtx_equal_p (subtarget
, op0
))
5868 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, op0
,
5869 GET_MODE_BITSIZE (mode
) - 1,
5872 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5876 if (code
== EQ
|| code
== NE
)
5878 /* For EQ or NE, one way to do the comparison is to apply an operation
5879 that converts the operand into a positive number if it is nonzero
5880 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5881 for NE we negate. This puts the result in the sign bit. Then we
5882 normalize with a shift, if needed.
5884 Two operations that can do the above actions are ABS and FFS, so try
5885 them. If that doesn't work, and MODE is smaller than a full word,
5886 we can use zero-extension to the wider mode (an unsigned conversion)
5887 as the operation. */
5889 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5890 that is compensated by the subsequent overflow when subtracting
5893 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5894 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5895 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5896 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5897 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5899 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5906 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5909 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5912 /* If we couldn't do it that way, for NE we can "or" the two's complement
5913 of the value with itself. For EQ, we take the one's complement of
5914 that "or", which is an extra insn, so we only handle EQ if branches
5919 || BRANCH_COST (optimize_insn_for_speed_p (),
5922 if (rtx_equal_p (subtarget
, op0
))
5925 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5926 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5929 if (tem
&& code
== EQ
)
5930 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5934 if (tem
&& normalizep
)
5935 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, tem
,
5936 GET_MODE_BITSIZE (mode
) - 1,
5937 subtarget
, normalizep
== 1);
5943 else if (GET_MODE (tem
) != target_mode
)
5945 convert_move (target
, tem
, 0);
5948 else if (!subtarget
)
5950 emit_move_insn (target
, tem
);
5955 delete_insns_since (last
);
5960 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5961 and storing in TARGET. Normally return TARGET.
5962 Return 0 if that cannot be done.
5964 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5965 it is VOIDmode, they cannot both be CONST_INT.
5967 UNSIGNEDP is for the case where we have to widen the operands
5968 to perform the operation. It says to use zero-extension.
5970 NORMALIZEP is 1 if we should convert the result to be either zero
5971 or one. Normalize is -1 if we should convert the result to be
5972 either zero or -1. If NORMALIZEP is zero, the result will be left
5973 "raw" out of the scc insn. */
5976 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5977 machine_mode mode
, int unsignedp
, int normalizep
)
5979 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5980 enum rtx_code rcode
;
5985 /* If we compare constants, we shouldn't use a store-flag operation,
5986 but a constant load. We can get there via the vanilla route that
5987 usually generates a compare-branch sequence, but will in this case
5988 fold the comparison to a constant, and thus elide the branch. */
5989 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5992 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5997 /* If we reached here, we can't do this with a scc insn, however there
5998 are some comparisons that can be done in other ways. Don't do any
5999 of these cases if branches are very cheap. */
6000 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6003 /* See what we need to return. We can only return a 1, -1, or the
6006 if (normalizep
== 0)
6008 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
6009 normalizep
= STORE_FLAG_VALUE
;
6011 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
6017 last
= get_last_insn ();
6019 /* If optimizing, use different pseudo registers for each insn, instead
6020 of reusing the same pseudo. This leads to better CSE, but slows
6021 down the compiler, since there are more pseudos. */
6022 subtarget
= (!optimize
6023 && (target_mode
== mode
)) ? target
: NULL_RTX
;
6024 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
6026 /* For floating-point comparisons, try the reverse comparison or try
6027 changing the "orderedness" of the comparison. */
6028 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6030 enum rtx_code first_code
;
6033 rcode
= reverse_condition_maybe_unordered (code
);
6034 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
6035 && (code
== ORDERED
|| code
== UNORDERED
6036 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
6037 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
6039 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
6040 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
6042 /* For the reverse comparison, use either an addition or a XOR. */
6044 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
6045 optimize_insn_for_speed_p ()) == 0)
6047 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
6048 STORE_FLAG_VALUE
, target_mode
);
6050 return expand_binop (target_mode
, add_optab
, tem
,
6051 gen_int_mode (normalizep
, target_mode
),
6052 target
, 0, OPTAB_WIDEN
);
6055 && rtx_cost (trueval
, mode
, XOR
, 1,
6056 optimize_insn_for_speed_p ()) == 0)
6058 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
6059 normalizep
, target_mode
);
6061 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
6062 target
, INTVAL (trueval
) >= 0,
6067 delete_insns_since (last
);
6069 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6070 if (code
== ORDERED
|| code
== UNORDERED
)
6073 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
6075 /* If there are no NaNs, the first comparison should always fall through.
6076 Effectively change the comparison to the other one. */
6077 if (!HONOR_NANS (mode
))
6079 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
6080 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
6084 if (!HAVE_conditional_move
)
6087 /* Do not turn a trapping comparison into a non-trapping one. */
6088 if ((code
!= EQ
&& code
!= NE
&& code
!= UNEQ
&& code
!= LTGT
)
6089 && flag_trapping_math
)
6092 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6093 conditional move. */
6094 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
6095 normalizep
, target_mode
);
6100 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
6101 tem
, const0_rtx
, GET_MODE (tem
), 0);
6103 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
6104 trueval
, tem
, GET_MODE (tem
), 0);
6107 delete_insns_since (last
);
6111 /* The remaining tricks only apply to integer comparisons. */
6113 scalar_int_mode int_mode
;
6114 if (is_int_mode (mode
, &int_mode
))
6115 return emit_store_flag_int (target
, subtarget
, code
, op0
, op1
, int_mode
,
6116 unsignedp
, normalizep
, trueval
);
6121 /* Like emit_store_flag, but always succeeds. */
6124 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
6125 machine_mode mode
, int unsignedp
, int normalizep
)
6128 rtx_code_label
*label
;
6129 rtx trueval
, falseval
;
6131 /* First see if emit_store_flag can do the job. */
6132 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
6136 /* If one operand is constant, make it the second one. Only do this
6137 if the other operand is not constant as well. */
6138 if (swap_commutative_operands_p (op0
, op1
))
6140 std::swap (op0
, op1
);
6141 code
= swap_condition (code
);
6144 if (mode
== VOIDmode
)
6145 mode
= GET_MODE (op0
);
6148 target
= gen_reg_rtx (word_mode
);
6150 /* If this failed, we have to do this with set/compare/jump/set code.
6151 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6152 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
6154 && GET_MODE_CLASS (mode
) == MODE_INT
6157 && op1
== const0_rtx
)
6159 label
= gen_label_rtx ();
6160 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
6161 NULL_RTX
, NULL
, label
,
6162 profile_probability::uninitialized ());
6163 emit_move_insn (target
, trueval
);
6169 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
6170 target
= gen_reg_rtx (GET_MODE (target
));
6172 /* Jump in the right direction if the target cannot implement CODE
6173 but can jump on its reverse condition. */
6174 falseval
= const0_rtx
;
6175 if (! can_compare_p (code
, mode
, ccp_jump
)
6176 && (! FLOAT_MODE_P (mode
)
6177 || code
== ORDERED
|| code
== UNORDERED
6178 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
6179 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
6181 enum rtx_code rcode
;
6182 if (FLOAT_MODE_P (mode
))
6183 rcode
= reverse_condition_maybe_unordered (code
);
6185 rcode
= reverse_condition (code
);
6187 /* Canonicalize to UNORDERED for the libcall. */
6188 if (can_compare_p (rcode
, mode
, ccp_jump
)
6189 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
6192 trueval
= const0_rtx
;
6197 emit_move_insn (target
, trueval
);
6198 label
= gen_label_rtx ();
6199 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
6200 label
, profile_probability::uninitialized ());
6202 emit_move_insn (target
, falseval
);
6208 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6209 and exclusive ranges in order to create an equivalent comparison. See
6210 canonicalize_cmp_for_target for the possible cases. */
6212 static enum rtx_code
6213 equivalent_cmp_code (enum rtx_code code
)
6239 /* Choose the more appropiate immediate in scalar integer comparisons. The
6240 purpose of this is to end up with an immediate which can be loaded into a
6241 register in fewer moves, if possible.
6243 For each integer comparison there exists an equivalent choice:
6244 i) a > b or a >= b + 1
6245 ii) a <= b or a < b + 1
6246 iii) a >= b or a > b - 1
6247 iv) a < b or a <= b - 1
6249 MODE is the mode of the first operand.
6250 CODE points to the comparison code.
6251 IMM points to the rtx containing the immediate. *IMM must satisfy
6252 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6256 canonicalize_comparison (machine_mode mode
, enum rtx_code
*code
, rtx
*imm
)
6258 if (!SCALAR_INT_MODE_P (mode
))
6262 enum signop sgn
= unsigned_condition_p (*code
) ? UNSIGNED
: SIGNED
;
6264 /* Extract the immediate value from the rtx. */
6265 wide_int imm_val
= rtx_mode_t (*imm
, mode
);
6267 if (*code
== GT
|| *code
== GTU
|| *code
== LE
|| *code
== LEU
)
6269 else if (*code
== GE
|| *code
== GEU
|| *code
== LT
|| *code
== LTU
)
6274 /* Check for overflow/underflow in the case of signed values and
6275 wrapping around in the case of unsigned values. If any occur
6276 cancel the optimization. */
6277 wi::overflow_type overflow
= wi::OVF_NONE
;
6281 imm_modif
= wi::add (imm_val
, 1, sgn
, &overflow
);
6283 imm_modif
= wi::sub (imm_val
, 1, sgn
, &overflow
);
6288 /* The following creates a pseudo; if we cannot do that, bail out. */
6289 if (!can_create_pseudo_p ())
6292 rtx reg
= gen_rtx_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
6293 rtx new_imm
= immed_wide_int_const (imm_modif
, mode
);
6295 rtx_insn
*old_rtx
= gen_move_insn (reg
, *imm
);
6296 rtx_insn
*new_rtx
= gen_move_insn (reg
, new_imm
);
6298 /* Update the immediate and the code. */
6299 if (insn_cost (old_rtx
, true) > insn_cost (new_rtx
, true))
6301 *code
= equivalent_cmp_code (*code
);
6308 /* Perform possibly multi-word comparison and conditional jump to LABEL
6309 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6310 now a thin wrapper around do_compare_rtx_and_jump. */
6313 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
6314 rtx_code_label
*label
)
6316 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
6317 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,
6318 NULL
, label
, profile_probability::uninitialized ());