1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
26 #include "diagnostic-core.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
42 #include "insn-config.h"
44 #include "hard-reg-set.h"
46 #include "statistics.h"
48 #include "fixed-value.h"
57 #include "insn-codes.h"
60 #include "langhooks.h"
62 #include "basic-block.h"
66 struct target_expmed default_target_expmed
;
68 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
71 static void store_fixed_bit_field (rtx
, unsigned HOST_WIDE_INT
,
72 unsigned HOST_WIDE_INT
,
73 unsigned HOST_WIDE_INT
,
74 unsigned HOST_WIDE_INT
,
76 static void store_fixed_bit_field_1 (rtx
, unsigned HOST_WIDE_INT
,
77 unsigned HOST_WIDE_INT
,
79 static void store_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
80 unsigned HOST_WIDE_INT
,
81 unsigned HOST_WIDE_INT
,
82 unsigned HOST_WIDE_INT
,
84 static rtx
extract_fixed_bit_field (machine_mode
, rtx
,
85 unsigned HOST_WIDE_INT
,
86 unsigned HOST_WIDE_INT
, rtx
, int, bool);
87 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
,
88 unsigned HOST_WIDE_INT
,
89 unsigned HOST_WIDE_INT
, rtx
, int, bool);
90 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
91 static rtx
extract_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
92 unsigned HOST_WIDE_INT
, int, bool);
93 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
94 static rtx
expand_smod_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
95 static rtx
expand_sdiv_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
98 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
99 The mask is truncated if necessary to the width of mode MODE. The
100 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
103 mask_rtx (machine_mode mode
, int bitpos
, int bitsize
, bool complement
)
105 return immed_wide_int_const
106 (wi::shifted_mask (bitpos
, bitsize
, complement
,
107 GET_MODE_PRECISION (mode
)), mode
);
110 /* Test whether a value is zero of a power of two. */
111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
112 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
114 struct init_expmed_rtl
135 rtx pow2
[MAX_BITS_PER_WORD
];
136 rtx cint
[MAX_BITS_PER_WORD
];
140 init_expmed_one_conv (struct init_expmed_rtl
*all
, machine_mode to_mode
,
141 machine_mode from_mode
, bool speed
)
143 int to_size
, from_size
;
146 to_size
= GET_MODE_PRECISION (to_mode
);
147 from_size
= GET_MODE_PRECISION (from_mode
);
149 /* Most partial integers have a precision less than the "full"
150 integer it requires for storage. In case one doesn't, for
151 comparison purposes here, reduce the bit size by one in that
153 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
154 && exact_log2 (to_size
) != -1)
156 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
157 && exact_log2 (from_size
) != -1)
160 /* Assume cost of zero-extend and sign-extend is the same. */
161 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
163 PUT_MODE (all
->reg
, from_mode
);
164 set_convert_cost (to_mode
, from_mode
, speed
, set_src_cost (which
, speed
));
168 init_expmed_one_mode (struct init_expmed_rtl
*all
,
169 machine_mode mode
, int speed
)
171 int m
, n
, mode_bitsize
;
172 machine_mode mode_from
;
174 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
176 PUT_MODE (all
->reg
, mode
);
177 PUT_MODE (all
->plus
, mode
);
178 PUT_MODE (all
->neg
, mode
);
179 PUT_MODE (all
->mult
, mode
);
180 PUT_MODE (all
->sdiv
, mode
);
181 PUT_MODE (all
->udiv
, mode
);
182 PUT_MODE (all
->sdiv_32
, mode
);
183 PUT_MODE (all
->smod_32
, mode
);
184 PUT_MODE (all
->wide_trunc
, mode
);
185 PUT_MODE (all
->shift
, mode
);
186 PUT_MODE (all
->shift_mult
, mode
);
187 PUT_MODE (all
->shift_add
, mode
);
188 PUT_MODE (all
->shift_sub0
, mode
);
189 PUT_MODE (all
->shift_sub1
, mode
);
190 PUT_MODE (all
->zext
, mode
);
191 PUT_MODE (all
->trunc
, mode
);
193 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, speed
));
194 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, speed
));
195 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, speed
));
196 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, speed
));
197 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, speed
));
199 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, speed
)
200 <= 2 * add_cost (speed
, mode
)));
201 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, speed
)
202 <= 4 * add_cost (speed
, mode
)));
204 set_shift_cost (speed
, mode
, 0, 0);
206 int cost
= add_cost (speed
, mode
);
207 set_shiftadd_cost (speed
, mode
, 0, cost
);
208 set_shiftsub0_cost (speed
, mode
, 0, cost
);
209 set_shiftsub1_cost (speed
, mode
, 0, cost
);
212 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
213 for (m
= 1; m
< n
; m
++)
215 XEXP (all
->shift
, 1) = all
->cint
[m
];
216 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
218 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, speed
));
219 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, speed
));
220 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, speed
));
221 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, speed
));
224 if (SCALAR_INT_MODE_P (mode
))
226 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
227 mode_from
= (machine_mode
)(mode_from
+ 1))
228 init_expmed_one_conv (all
, mode
, mode_from
, speed
);
230 if (GET_MODE_CLASS (mode
) == MODE_INT
)
232 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
233 if (wider_mode
!= VOIDmode
)
235 PUT_MODE (all
->zext
, wider_mode
);
236 PUT_MODE (all
->wide_mult
, wider_mode
);
237 PUT_MODE (all
->wide_lshr
, wider_mode
);
238 XEXP (all
->wide_lshr
, 1) = GEN_INT (mode_bitsize
);
240 set_mul_widen_cost (speed
, wider_mode
,
241 set_src_cost (all
->wide_mult
, speed
));
242 set_mul_highpart_cost (speed
, mode
,
243 set_src_cost (all
->wide_trunc
, speed
));
251 struct init_expmed_rtl all
;
252 machine_mode mode
= QImode
;
255 memset (&all
, 0, sizeof all
);
256 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
258 all
.pow2
[m
] = GEN_INT ((HOST_WIDE_INT
) 1 << m
);
259 all
.cint
[m
] = GEN_INT (m
);
262 /* Avoid using hard regs in ways which may be unsupported. */
263 all
.reg
= gen_rtx_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
264 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
265 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
266 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
267 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
268 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
269 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
270 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
271 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
272 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
273 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
274 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
275 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
276 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
277 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
278 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
279 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
280 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
282 for (speed
= 0; speed
< 2; speed
++)
284 crtl
->maybe_hot_insn_p
= speed
;
285 set_zero_cost (speed
, set_src_cost (const0_rtx
, speed
));
287 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
288 mode
= (machine_mode
)(mode
+ 1))
289 init_expmed_one_mode (&all
, mode
, speed
);
291 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
292 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
293 mode
= (machine_mode
)(mode
+ 1))
294 init_expmed_one_mode (&all
, mode
, speed
);
296 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
297 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
298 mode
= (machine_mode
)(mode
+ 1))
299 init_expmed_one_mode (&all
, mode
, speed
);
302 if (alg_hash_used_p ())
304 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
305 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
308 set_alg_hash_used_p (true);
309 default_rtl_profile ();
311 ggc_free (all
.trunc
);
312 ggc_free (all
.shift_sub1
);
313 ggc_free (all
.shift_sub0
);
314 ggc_free (all
.shift_add
);
315 ggc_free (all
.shift_mult
);
316 ggc_free (all
.shift
);
317 ggc_free (all
.wide_trunc
);
318 ggc_free (all
.wide_lshr
);
319 ggc_free (all
.wide_mult
);
321 ggc_free (all
.smod_32
);
322 ggc_free (all
.sdiv_32
);
331 /* Return an rtx representing minus the value of X.
332 MODE is the intended mode of the result,
333 useful if X is a CONST_INT. */
336 negate_rtx (machine_mode mode
, rtx x
)
338 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
341 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
346 /* Return an rtx representing value of X with reverse storage order.
347 MODE is the intended mode of the result,
348 useful if X is a CONST_INT. */
351 flip_storage_order (enum machine_mode mode
, rtx x
)
353 enum machine_mode int_mode
;
359 if (SCALAR_INT_MODE_P (mode
))
363 int_mode
= int_mode_for_mode (mode
);
364 gcc_assert (int_mode
!= BLKmode
);
365 x
= gen_lowpart (int_mode
, x
);
368 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
370 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
372 if (int_mode
!= mode
)
373 result
= gen_lowpart (mode
, result
);
378 /* Adjust bitfield memory MEM so that it points to the first unit of mode
379 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
380 If MODE is BLKmode, return a reference to every byte in the bitfield.
381 Set *NEW_BITNUM to the bit position of the field within the new memory. */
384 narrow_bit_field_mem (rtx mem
, machine_mode mode
,
385 unsigned HOST_WIDE_INT bitsize
,
386 unsigned HOST_WIDE_INT bitnum
,
387 unsigned HOST_WIDE_INT
*new_bitnum
)
391 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
392 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
393 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
395 return adjust_bitfield_address_size (mem
, mode
, offset
, size
);
399 unsigned int unit
= GET_MODE_BITSIZE (mode
);
400 *new_bitnum
= bitnum
% unit
;
401 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
402 return adjust_bitfield_address (mem
, mode
, offset
);
406 /* The caller wants to perform insertion or extraction PATTERN on a
407 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
408 BITREGION_START and BITREGION_END are as for store_bit_field
409 and FIELDMODE is the natural mode of the field.
411 Search for a mode that is compatible with the memory access
412 restrictions and (where applicable) with a register insertion or
413 extraction. Return the new memory on success, storing the adjusted
414 bit position in *NEW_BITNUM. Return null otherwise. */
417 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
418 rtx op0
, HOST_WIDE_INT bitsize
,
419 HOST_WIDE_INT bitnum
,
420 unsigned HOST_WIDE_INT bitregion_start
,
421 unsigned HOST_WIDE_INT bitregion_end
,
422 machine_mode fieldmode
,
423 unsigned HOST_WIDE_INT
*new_bitnum
)
425 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
426 bitregion_end
, MEM_ALIGN (op0
),
427 MEM_VOLATILE_P (op0
));
428 machine_mode best_mode
;
429 if (iter
.next_mode (&best_mode
))
431 /* We can use a memory in BEST_MODE. See whether this is true for
432 any wider modes. All other things being equal, we prefer to
433 use the widest mode possible because it tends to expose more
434 CSE opportunities. */
435 if (!iter
.prefer_smaller_modes ())
437 /* Limit the search to the mode required by the corresponding
438 register insertion or extraction instruction, if any. */
439 machine_mode limit_mode
= word_mode
;
440 extraction_insn insn
;
441 if (get_best_reg_extraction_insn (&insn
, pattern
,
442 GET_MODE_BITSIZE (best_mode
),
444 limit_mode
= insn
.field_mode
;
446 machine_mode wider_mode
;
447 while (iter
.next_mode (&wider_mode
)
448 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
449 best_mode
= wider_mode
;
451 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
457 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
458 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
459 offset is then BITNUM / BITS_PER_UNIT. */
462 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum
,
463 unsigned HOST_WIDE_INT bitsize
,
464 machine_mode struct_mode
)
466 if (BYTES_BIG_ENDIAN
)
467 return (bitnum
% BITS_PER_UNIT
== 0
468 && (bitnum
+ bitsize
== GET_MODE_BITSIZE (struct_mode
)
469 || (bitnum
+ bitsize
) % BITS_PER_WORD
== 0));
471 return bitnum
% BITS_PER_WORD
== 0;
474 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
475 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
476 Return false if the access would touch memory outside the range
477 BITREGION_START to BITREGION_END for conformance to the C++ memory
481 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
482 unsigned HOST_WIDE_INT bitnum
,
483 machine_mode fieldmode
,
484 unsigned HOST_WIDE_INT bitregion_start
,
485 unsigned HOST_WIDE_INT bitregion_end
)
487 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
489 /* -fstrict-volatile-bitfields must be enabled and we must have a
492 || !MEM_VOLATILE_P (op0
)
493 || flag_strict_volatile_bitfields
<= 0)
496 /* Non-integral modes likely only happen with packed structures.
498 if (!SCALAR_INT_MODE_P (fieldmode
))
501 /* The bit size must not be larger than the field mode, and
502 the field mode must not be larger than a word. */
503 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
506 /* Check for cases of unaligned fields that must be split. */
507 if (bitnum
% modesize
+ bitsize
> modesize
)
510 /* The memory must be sufficiently aligned for a MODESIZE access.
511 This condition guarantees, that the memory access will not
512 touch anything after the end of the structure. */
513 if (MEM_ALIGN (op0
) < modesize
)
516 /* Check for cases where the C++ memory model applies. */
517 if (bitregion_end
!= 0
518 && (bitnum
- bitnum
% modesize
< bitregion_start
519 || bitnum
- bitnum
% modesize
+ modesize
- 1 > bitregion_end
))
525 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
526 bit number BITNUM can be treated as a simple value of mode MODE. */
529 simple_mem_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
530 unsigned HOST_WIDE_INT bitnum
, machine_mode mode
)
533 && bitnum
% BITS_PER_UNIT
== 0
534 && bitsize
== GET_MODE_BITSIZE (mode
)
535 && (!SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (op0
))
536 || (bitnum
% GET_MODE_ALIGNMENT (mode
) == 0
537 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
540 /* Try to use instruction INSV to store VALUE into a field of OP0.
541 BITSIZE and BITNUM are as for store_bit_field. */
544 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
545 unsigned HOST_WIDE_INT bitsize
,
546 unsigned HOST_WIDE_INT bitnum
,
549 struct expand_operand ops
[4];
552 rtx_insn
*last
= get_last_insn ();
553 bool copy_back
= false;
555 machine_mode op_mode
= insv
->field_mode
;
556 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
557 if (bitsize
== 0 || bitsize
> unit
)
561 /* Get a reference to the first byte of the field. */
562 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
566 /* Convert from counting within OP0 to counting in OP_MODE. */
567 if (BYTES_BIG_ENDIAN
)
568 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
570 /* If xop0 is a register, we need it in OP_MODE
571 to make it acceptable to the format of insv. */
572 if (GET_CODE (xop0
) == SUBREG
)
573 /* We can't just change the mode, because this might clobber op0,
574 and we will need the original value of op0 if insv fails. */
575 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
576 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
577 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
580 /* If the destination is a paradoxical subreg such that we need a
581 truncate to the inner mode, perform the insertion on a temporary and
582 truncate the result to the original destination. Note that we can't
583 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
584 X) 0)) is (reg:N X). */
585 if (GET_CODE (xop0
) == SUBREG
586 && REG_P (SUBREG_REG (xop0
))
587 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
590 rtx tem
= gen_reg_rtx (op_mode
);
591 emit_move_insn (tem
, xop0
);
596 /* There are similar overflow check at the start of store_bit_field_1,
597 but that only check the situation where the field lies completely
598 outside the register, while there do have situation where the field
599 lies partialy in the register, we need to adjust bitsize for this
600 partial overflow situation. Without this fix, pr48335-2.c on big-endian
601 will broken on those arch support bit insert instruction, like arm, aarch64
603 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
605 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
606 "destination object, data truncated into %wu-bit",
607 bitsize
, unit
- bitnum
);
608 bitsize
= unit
- bitnum
;
611 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
612 "backwards" from the size of the unit we are inserting into.
613 Otherwise, we count bits from the most significant on a
614 BYTES/BITS_BIG_ENDIAN machine. */
616 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
617 bitnum
= unit
- bitsize
- bitnum
;
619 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
621 if (GET_MODE (value
) != op_mode
)
623 if (GET_MODE_BITSIZE (GET_MODE (value
)) >= bitsize
)
625 /* Optimization: Don't bother really extending VALUE
626 if it has all the bits we will actually use. However,
627 if we must narrow it, be sure we do it correctly. */
629 if (GET_MODE_SIZE (GET_MODE (value
)) < GET_MODE_SIZE (op_mode
))
633 tmp
= simplify_subreg (op_mode
, value1
, GET_MODE (value
), 0);
635 tmp
= simplify_gen_subreg (op_mode
,
636 force_reg (GET_MODE (value
),
638 GET_MODE (value
), 0);
642 value1
= gen_lowpart (op_mode
, value1
);
644 else if (CONST_INT_P (value
))
645 value1
= gen_int_mode (INTVAL (value
), op_mode
);
647 /* Parse phase is supposed to make VALUE's data type
648 match that of the component reference, which is a type
649 at least as wide as the field; so VALUE should have
650 a mode that corresponds to that type. */
651 gcc_assert (CONSTANT_P (value
));
654 create_fixed_operand (&ops
[0], xop0
);
655 create_integer_operand (&ops
[1], bitsize
);
656 create_integer_operand (&ops
[2], bitnum
);
657 create_input_operand (&ops
[3], value1
, op_mode
);
658 if (maybe_expand_insn (insv
->icode
, 4, ops
))
661 convert_move (op0
, xop0
, true);
664 delete_insns_since (last
);
668 /* A subroutine of store_bit_field, with the same arguments. Return true
669 if the operation could be implemented.
671 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
672 no other way of implementing the operation. If FALLBACK_P is false,
673 return false instead. */
676 store_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
677 unsigned HOST_WIDE_INT bitnum
,
678 unsigned HOST_WIDE_INT bitregion_start
,
679 unsigned HOST_WIDE_INT bitregion_end
,
680 machine_mode fieldmode
,
681 rtx value
, bool reverse
, bool fallback_p
)
686 while (GET_CODE (op0
) == SUBREG
)
688 /* The following line once was done only if WORDS_BIG_ENDIAN,
689 but I think that is a mistake. WORDS_BIG_ENDIAN is
690 meaningful at a much higher level; when structures are copied
691 between memory and regs, the higher-numbered regs
692 always get higher addresses. */
693 int inner_mode_size
= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0
)));
694 int outer_mode_size
= GET_MODE_SIZE (GET_MODE (op0
));
697 /* Paradoxical subregs need special handling on big endian machines. */
698 if (SUBREG_BYTE (op0
) == 0 && inner_mode_size
< outer_mode_size
)
700 int difference
= inner_mode_size
- outer_mode_size
;
702 if (WORDS_BIG_ENDIAN
)
703 byte_offset
+= (difference
/ UNITS_PER_WORD
) * UNITS_PER_WORD
;
704 if (BYTES_BIG_ENDIAN
)
705 byte_offset
+= difference
% UNITS_PER_WORD
;
708 byte_offset
= SUBREG_BYTE (op0
);
710 bitnum
+= byte_offset
* BITS_PER_UNIT
;
711 op0
= SUBREG_REG (op0
);
714 /* No action is needed if the target is a register and if the field
715 lies completely outside that register. This can occur if the source
716 code contains an out-of-bounds access to a small array. */
717 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
720 /* Use vec_set patterns for inserting parts of vectors whenever
722 if (VECTOR_MODE_P (GET_MODE (op0
))
724 && optab_handler (vec_set_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
725 && fieldmode
== GET_MODE_INNER (GET_MODE (op0
))
726 && bitsize
== GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))
727 && !(bitnum
% GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))))
729 struct expand_operand ops
[3];
730 machine_mode outermode
= GET_MODE (op0
);
731 machine_mode innermode
= GET_MODE_INNER (outermode
);
732 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
733 int pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
735 create_fixed_operand (&ops
[0], op0
);
736 create_input_operand (&ops
[1], value
, innermode
);
737 create_integer_operand (&ops
[2], pos
);
738 if (maybe_expand_insn (icode
, 3, ops
))
742 /* If the target is a register, overwriting the entire object, or storing
743 a full-word or multi-word field can be done with just a SUBREG. */
745 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
746 && ((bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)) && bitnum
== 0)
747 || (bitsize
% BITS_PER_WORD
== 0 && bitnum
% BITS_PER_WORD
== 0)))
749 /* Use the subreg machinery either to narrow OP0 to the required
750 words or to cope with mode punning between equal-sized modes.
751 In the latter case, use subreg on the rhs side, not lhs. */
754 if (bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
756 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
760 sub
= flip_storage_order (GET_MODE (op0
), sub
);
761 emit_move_insn (op0
, sub
);
767 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
768 bitnum
/ BITS_PER_UNIT
);
772 value
= flip_storage_order (fieldmode
, value
);
773 emit_move_insn (sub
, value
);
779 /* If the target is memory, storing any naturally aligned field can be
780 done with a simple store. For targets that support fast unaligned
781 memory, any naturally sized, unit aligned field can be done directly. */
782 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
))
784 op0
= adjust_bitfield_address (op0
, fieldmode
, bitnum
/ BITS_PER_UNIT
);
786 value
= flip_storage_order (fieldmode
, value
);
787 emit_move_insn (op0
, value
);
791 /* Make sure we are playing with integral modes. Pun with subregs
792 if we aren't. This must come after the entire register case above,
793 since that case is valid for any mode. The following cases are only
794 valid for integral modes. */
796 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
797 if (imode
!= GET_MODE (op0
))
800 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
803 gcc_assert (imode
!= BLKmode
);
804 op0
= gen_lowpart (imode
, op0
);
809 /* Storing an lsb-aligned field in a register
810 can be done with a movstrict instruction. */
814 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
815 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
816 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
818 struct expand_operand ops
[2];
819 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
821 unsigned HOST_WIDE_INT subreg_off
;
823 if (GET_CODE (arg0
) == SUBREG
)
825 /* Else we've got some float mode source being extracted into
826 a different float mode destination -- this combination of
827 subregs results in Severe Tire Damage. */
828 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
829 || GET_MODE_CLASS (fieldmode
) == MODE_INT
830 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
831 arg0
= SUBREG_REG (arg0
);
834 subreg_off
= bitnum
/ BITS_PER_UNIT
;
835 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
837 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
839 create_fixed_operand (&ops
[0], arg0
);
840 /* Shrink the source operand to FIELDMODE. */
841 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
842 if (maybe_expand_insn (icode
, 2, ops
))
847 /* Handle fields bigger than a word. */
849 if (bitsize
> BITS_PER_WORD
)
851 /* Here we transfer the words of the field
852 in the order least significant first.
853 This is because the most significant word is the one which may
855 However, only do that if the value is not BLKmode. */
857 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
858 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
862 /* This is the mode we must force value to, so that there will be enough
863 subwords to extract. Note that fieldmode will often (always?) be
864 VOIDmode, because that is what store_field uses to indicate that this
865 is a bit field, but passing VOIDmode to operand_subword_force
867 fieldmode
= GET_MODE (value
);
868 if (fieldmode
== VOIDmode
)
869 fieldmode
= smallest_mode_for_size (nwords
* BITS_PER_WORD
, MODE_INT
);
871 last
= get_last_insn ();
872 for (i
= 0; i
< nwords
; i
++)
874 /* If I is 0, use the low-order word in both field and target;
875 if I is 1, use the next to lowest word; and so on. */
876 unsigned int wordnum
= (backwards
877 ? GET_MODE_SIZE (fieldmode
) / UNITS_PER_WORD
880 unsigned int bit_offset
= (backwards
^ reverse
881 ? MAX ((int) bitsize
- ((int) i
+ 1)
884 : (int) i
* BITS_PER_WORD
);
885 rtx value_word
= operand_subword_force (value
, wordnum
, fieldmode
);
886 unsigned HOST_WIDE_INT new_bitsize
=
887 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
889 /* If the remaining chunk doesn't have full wordsize we have
890 to make sure that for big endian machines the higher order
892 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
893 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
895 GEN_INT (BITS_PER_WORD
900 if (!store_bit_field_1 (op0
, new_bitsize
,
902 bitregion_start
, bitregion_end
,
904 value_word
, reverse
, fallback_p
))
906 delete_insns_since (last
);
913 /* If VALUE has a floating-point or complex mode, access it as an
914 integer of the corresponding size. This can occur on a machine
915 with 64 bit registers that uses SFmode for float. It can also
916 occur for unaligned float or complex fields. */
918 if (GET_MODE (value
) != VOIDmode
919 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_INT
920 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_PARTIAL_INT
)
922 value
= gen_reg_rtx (int_mode_for_mode (GET_MODE (value
)));
923 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
926 /* If OP0 is a multi-word register, narrow it to the affected word.
927 If the region spans two words, defer to store_split_bit_field. */
928 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
930 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
931 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
933 bitnum
%= BITS_PER_WORD
;
934 if (bitnum
+ bitsize
> BITS_PER_WORD
)
939 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
940 bitregion_end
, value
, reverse
);
945 /* From here on we can assume that the field to be stored in fits
946 within a word. If the destination is a register, it too fits
949 extraction_insn insv
;
952 && get_best_reg_extraction_insn (&insv
, EP_insv
,
953 GET_MODE_BITSIZE (GET_MODE (op0
)),
955 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
958 /* If OP0 is a memory, try copying it to a register and seeing if a
959 cheap register alternative is available. */
960 if (MEM_P (op0
) && !reverse
)
962 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
964 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
967 rtx_insn
*last
= get_last_insn ();
969 /* Try loading part of OP0 into a register, inserting the bitfield
970 into that, and then copying the result back to OP0. */
971 unsigned HOST_WIDE_INT bitpos
;
972 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
973 bitregion_start
, bitregion_end
,
977 rtx tempreg
= copy_to_reg (xop0
);
978 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
979 bitregion_start
, bitregion_end
,
980 fieldmode
, orig_value
, reverse
, false))
982 emit_move_insn (xop0
, tempreg
);
985 delete_insns_since (last
);
992 store_fixed_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
993 bitregion_end
, value
, reverse
);
997 /* Generate code to store value from rtx VALUE
998 into a bit-field within structure STR_RTX
999 containing BITSIZE bits starting at bit BITNUM.
1001 BITREGION_START is bitpos of the first bitfield in this region.
1002 BITREGION_END is the bitpos of the ending bitfield in this region.
1003 These two fields are 0, if the C++ memory model does not apply,
1004 or we are not interested in keeping track of bitfield regions.
1006 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1008 If REVERSE is true, the store is to be done in reverse order. */
1011 store_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1012 unsigned HOST_WIDE_INT bitnum
,
1013 unsigned HOST_WIDE_INT bitregion_start
,
1014 unsigned HOST_WIDE_INT bitregion_end
,
1015 machine_mode fieldmode
,
1016 rtx value
, bool reverse
)
1018 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1019 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, fieldmode
,
1020 bitregion_start
, bitregion_end
))
1022 /* Storing of a full word can be done with a simple store.
1023 We know here that the field can be accessed with one single
1024 instruction. For targets that support unaligned memory,
1025 an unaligned access may be necessary. */
1026 if (bitsize
== GET_MODE_BITSIZE (fieldmode
))
1028 str_rtx
= adjust_bitfield_address (str_rtx
, fieldmode
,
1029 bitnum
/ BITS_PER_UNIT
);
1031 value
= flip_storage_order (fieldmode
, value
);
1032 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1033 emit_move_insn (str_rtx
, value
);
1039 str_rtx
= narrow_bit_field_mem (str_rtx
, fieldmode
, bitsize
, bitnum
,
1041 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (fieldmode
));
1042 temp
= copy_to_reg (str_rtx
);
1043 if (!store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0,
1044 fieldmode
, value
, reverse
, true))
1047 emit_move_insn (str_rtx
, temp
);
1053 /* Under the C++0x memory model, we must not touch bits outside the
1054 bit region. Adjust the address to start at the beginning of the
1056 if (MEM_P (str_rtx
) && bitregion_start
> 0)
1058 machine_mode bestmode
;
1059 HOST_WIDE_INT offset
, size
;
1061 gcc_assert ((bitregion_start
% BITS_PER_UNIT
) == 0);
1063 offset
= bitregion_start
/ BITS_PER_UNIT
;
1064 bitnum
-= bitregion_start
;
1065 size
= (bitnum
+ bitsize
+ BITS_PER_UNIT
- 1) / BITS_PER_UNIT
;
1066 bitregion_end
-= bitregion_start
;
1067 bitregion_start
= 0;
1068 bestmode
= get_best_mode (bitsize
, bitnum
,
1069 bitregion_start
, bitregion_end
,
1070 MEM_ALIGN (str_rtx
), VOIDmode
,
1071 MEM_VOLATILE_P (str_rtx
));
1072 str_rtx
= adjust_bitfield_address_size (str_rtx
, bestmode
, offset
, size
);
1075 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1076 bitregion_start
, bitregion_end
,
1077 fieldmode
, value
, reverse
, true))
1081 /* Use shifts and boolean operations to store VALUE into a bit field of
1082 width BITSIZE in OP0, starting at bit BITNUM.
1084 If REVERSE is true, the store is to be done in reverse order. */
1087 store_fixed_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1088 unsigned HOST_WIDE_INT bitnum
,
1089 unsigned HOST_WIDE_INT bitregion_start
,
1090 unsigned HOST_WIDE_INT bitregion_end
,
1091 rtx value
, bool reverse
)
1093 /* There is a case not handled here:
1094 a structure with a known alignment of just a halfword
1095 and a field split across two aligned halfwords within the structure.
1096 Or likewise a structure with a known alignment of just a byte
1097 and a field split across two bytes.
1098 Such cases are not supposed to be able to occur. */
1102 machine_mode mode
= GET_MODE (op0
);
1103 if (GET_MODE_BITSIZE (mode
) == 0
1104 || GET_MODE_BITSIZE (mode
) > GET_MODE_BITSIZE (word_mode
))
1106 mode
= get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1107 MEM_ALIGN (op0
), mode
, MEM_VOLATILE_P (op0
));
1109 if (mode
== VOIDmode
)
1111 /* The only way this should occur is if the field spans word
1113 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1114 bitregion_end
, value
, reverse
);
1118 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1121 store_fixed_bit_field_1 (op0
, bitsize
, bitnum
, value
, reverse
);
1124 /* Helper function for store_fixed_bit_field, stores
1125 the bit field always using the MODE of OP0. */
1128 store_fixed_bit_field_1 (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1129 unsigned HOST_WIDE_INT bitnum
,
1130 rtx value
, bool reverse
)
1137 mode
= GET_MODE (op0
);
1138 gcc_assert (SCALAR_INT_MODE_P (mode
));
1140 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1141 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1143 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1144 /* BITNUM is the distance between our msb
1145 and that of the containing datum.
1146 Convert it to the distance from the lsb. */
1147 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1149 /* Now BITNUM is always the distance between our lsb
1152 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1153 we must first convert its mode to MODE. */
1155 if (CONST_INT_P (value
))
1157 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1159 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1160 v
&= ((unsigned HOST_WIDE_INT
) 1 << bitsize
) - 1;
1164 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1165 && v
== ((unsigned HOST_WIDE_INT
) 1 << bitsize
) - 1)
1166 || (bitsize
== HOST_BITS_PER_WIDE_INT
1167 && v
== (unsigned HOST_WIDE_INT
) -1))
1170 value
= lshift_value (mode
, v
, bitnum
);
1174 int must_and
= (GET_MODE_BITSIZE (GET_MODE (value
)) != bitsize
1175 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1177 if (GET_MODE (value
) != mode
)
1178 value
= convert_to_mode (mode
, value
, 1);
1181 value
= expand_binop (mode
, and_optab
, value
,
1182 mask_rtx (mode
, 0, bitsize
, 0),
1183 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1185 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1186 bitnum
, NULL_RTX
, 1);
1190 value
= flip_storage_order (mode
, value
);
1192 /* Now clear the chosen bits in OP0,
1193 except that if VALUE is -1 we need not bother. */
1194 /* We keep the intermediates in registers to allow CSE to combine
1195 consecutive bitfield assignments. */
1197 temp
= force_reg (mode
, op0
);
1201 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1203 mask
= flip_storage_order (mode
, mask
);
1204 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1205 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1206 temp
= force_reg (mode
, temp
);
1209 /* Now logical-or VALUE into OP0, unless it is zero. */
1213 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1214 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1215 temp
= force_reg (mode
, temp
);
1220 op0
= copy_rtx (op0
);
1221 emit_move_insn (op0
, temp
);
1225 /* Store a bit field that is split across multiple accessible memory objects.
1227 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1228 BITSIZE is the field width; BITPOS the position of its first bit
1230 VALUE is the value to store.
1232 If REVERSE is true, the store is to be done in reverse order.
1234 This does not yet handle fields wider than BITS_PER_WORD. */
1237 store_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1238 unsigned HOST_WIDE_INT bitpos
,
1239 unsigned HOST_WIDE_INT bitregion_start
,
1240 unsigned HOST_WIDE_INT bitregion_end
,
1241 rtx value
, bool reverse
)
1243 unsigned int unit
, total_bits
, bitsdone
= 0;
1245 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1247 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1248 unit
= BITS_PER_WORD
;
1250 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1252 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1253 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1254 again, and we will mutually recurse forever. */
1255 if (MEM_P (op0
) && GET_MODE_BITSIZE (GET_MODE (op0
)) > 0)
1256 unit
= MIN (unit
, GET_MODE_BITSIZE (GET_MODE (op0
)));
1258 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1259 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1260 that VALUE might be a floating-point constant. */
1261 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1263 rtx word
= gen_lowpart_common (word_mode
, value
);
1265 if (word
&& (value
!= word
))
1268 value
= gen_lowpart_common (word_mode
,
1269 force_reg (GET_MODE (value
) != VOIDmode
1271 : word_mode
, value
));
1274 total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1276 while (bitsdone
< bitsize
)
1278 unsigned HOST_WIDE_INT thissize
;
1279 unsigned HOST_WIDE_INT thispos
;
1280 unsigned HOST_WIDE_INT offset
;
1283 offset
= (bitpos
+ bitsdone
) / unit
;
1284 thispos
= (bitpos
+ bitsdone
) % unit
;
1286 /* When region of bytes we can touch is restricted, decrease
1287 UNIT close to the end of the region as needed. If op0 is a REG
1288 or SUBREG of REG, don't do this, as there can't be data races
1289 on a register and we can expand shorter code in some cases. */
1291 && unit
> BITS_PER_UNIT
1292 && bitpos
+ bitsdone
- thispos
+ unit
> bitregion_end
+ 1
1294 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1300 /* THISSIZE must not overrun a word boundary. Otherwise,
1301 store_fixed_bit_field will call us again, and we will mutually
1303 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1304 thissize
= MIN (thissize
, unit
- thispos
);
1306 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1308 /* Fetch successively less significant portions. */
1309 if (CONST_INT_P (value
))
1310 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1311 >> (bitsize
- bitsdone
- thissize
))
1312 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1313 /* Likewise, but the source is little-endian. */
1315 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1316 bitsize
- bitsdone
- thissize
,
1317 NULL_RTX
, 1, false);
1320 int total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1321 /* The args are chosen so that the last part includes the
1322 lsb. Give extract_bit_field the value it needs (with
1323 endianness compensation) to fetch the piece we want. */
1324 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1325 total_bits
- bitsize
+ bitsdone
,
1326 NULL_RTX
, 1, false);
1331 /* Fetch successively more significant portions. */
1332 if (CONST_INT_P (value
))
1333 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1335 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1336 /* Likewise, but the source is big-endian. */
1338 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1339 total_bits
- bitsdone
- thissize
,
1340 NULL_RTX
, 1, false);
1342 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1343 bitsdone
, NULL_RTX
, 1, false);
1346 /* If OP0 is a register, then handle OFFSET here.
1348 When handling multiword bitfields, extract_bit_field may pass
1349 down a word_mode SUBREG of a larger REG for a bitfield that actually
1350 crosses a word boundary. Thus, for a SUBREG, we must find
1351 the current word starting from the base register. */
1352 if (GET_CODE (op0
) == SUBREG
)
1354 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
)
1355 + (offset
* unit
/ BITS_PER_WORD
);
1356 machine_mode sub_mode
= GET_MODE (SUBREG_REG (op0
));
1357 if (sub_mode
!= BLKmode
&& GET_MODE_SIZE (sub_mode
) < UNITS_PER_WORD
)
1358 word
= word_offset
? const0_rtx
: op0
;
1360 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
1361 GET_MODE (SUBREG_REG (op0
)));
1362 offset
&= BITS_PER_WORD
/ unit
- 1;
1364 else if (REG_P (op0
))
1366 machine_mode op0_mode
= GET_MODE (op0
);
1367 if (op0_mode
!= BLKmode
&& GET_MODE_SIZE (op0_mode
) < UNITS_PER_WORD
)
1368 word
= offset
? const0_rtx
: op0
;
1370 word
= operand_subword_force (op0
, offset
* unit
/ BITS_PER_WORD
,
1372 offset
&= BITS_PER_WORD
/ unit
- 1;
1377 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1378 it is just an out-of-bounds access. Ignore it. */
1379 if (word
!= const0_rtx
)
1380 store_fixed_bit_field (word
, thissize
, offset
* unit
+ thispos
,
1381 bitregion_start
, bitregion_end
, part
,
1383 bitsdone
+= thissize
;
1387 /* A subroutine of extract_bit_field_1 that converts return value X
1388 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1389 to extract_bit_field. */
1392 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1393 machine_mode tmode
, bool unsignedp
)
1395 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1398 /* If the x mode is not a scalar integral, first convert to the
1399 integer mode of that size and then access it as a floating-point
1400 value via a SUBREG. */
1401 if (!SCALAR_INT_MODE_P (tmode
))
1405 smode
= mode_for_size (GET_MODE_BITSIZE (tmode
), MODE_INT
, 0);
1406 x
= convert_to_mode (smode
, x
, unsignedp
);
1407 x
= force_reg (smode
, x
);
1408 return gen_lowpart (tmode
, x
);
1411 return convert_to_mode (tmode
, x
, unsignedp
);
1414 /* Try to use an ext(z)v pattern to extract a field from OP0.
1415 Return the extracted value on success, otherwise return null.
1416 EXT_MODE is the mode of the extraction and the other arguments
1417 are as for extract_bit_field. */
1420 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1421 unsigned HOST_WIDE_INT bitsize
,
1422 unsigned HOST_WIDE_INT bitnum
,
1423 int unsignedp
, rtx target
,
1424 machine_mode mode
, machine_mode tmode
)
1426 struct expand_operand ops
[4];
1427 rtx spec_target
= target
;
1428 rtx spec_target_subreg
= 0;
1429 machine_mode ext_mode
= extv
->field_mode
;
1430 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1432 if (bitsize
== 0 || unit
< bitsize
)
1436 /* Get a reference to the first byte of the field. */
1437 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1441 /* Convert from counting within OP0 to counting in EXT_MODE. */
1442 if (BYTES_BIG_ENDIAN
)
1443 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
1445 /* If op0 is a register, we need it in EXT_MODE to make it
1446 acceptable to the format of ext(z)v. */
1447 if (GET_CODE (op0
) == SUBREG
&& GET_MODE (op0
) != ext_mode
)
1449 if (REG_P (op0
) && GET_MODE (op0
) != ext_mode
)
1450 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1453 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1454 "backwards" from the size of the unit we are extracting from.
1455 Otherwise, we count bits from the most significant on a
1456 BYTES/BITS_BIG_ENDIAN machine. */
1458 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1459 bitnum
= unit
- bitsize
- bitnum
;
1462 target
= spec_target
= gen_reg_rtx (tmode
);
1464 if (GET_MODE (target
) != ext_mode
)
1466 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1467 between the mode of the extraction (word_mode) and the target
1468 mode. Instead, create a temporary and use convert_move to set
1471 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1473 target
= gen_lowpart (ext_mode
, target
);
1474 if (GET_MODE_PRECISION (ext_mode
)
1475 > GET_MODE_PRECISION (GET_MODE (spec_target
)))
1476 spec_target_subreg
= target
;
1479 target
= gen_reg_rtx (ext_mode
);
1482 create_output_operand (&ops
[0], target
, ext_mode
);
1483 create_fixed_operand (&ops
[1], op0
);
1484 create_integer_operand (&ops
[2], bitsize
);
1485 create_integer_operand (&ops
[3], bitnum
);
1486 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1488 target
= ops
[0].value
;
1489 if (target
== spec_target
)
1491 if (target
== spec_target_subreg
)
1493 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1498 /* A subroutine of extract_bit_field, with the same arguments.
1499 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1500 if we can find no other means of implementing the operation.
1501 if FALLBACK_P is false, return NULL instead. */
1504 extract_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1505 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1506 machine_mode mode
, machine_mode tmode
,
1507 bool reverse
, bool fallback_p
)
1510 machine_mode int_mode
;
1513 if (tmode
== VOIDmode
)
1516 while (GET_CODE (op0
) == SUBREG
)
1518 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1519 op0
= SUBREG_REG (op0
);
1522 /* If we have an out-of-bounds access to a register, just return an
1523 uninitialized register of the required mode. This can occur if the
1524 source code contains an out-of-bounds access to a small array. */
1525 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
1526 return gen_reg_rtx (tmode
);
1529 && mode
== GET_MODE (op0
)
1531 && bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
1534 op0
= flip_storage_order (mode
, op0
);
1535 /* We're trying to extract a full register from itself. */
1539 /* See if we can get a better vector mode before extracting. */
1540 if (VECTOR_MODE_P (GET_MODE (op0
))
1542 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1544 machine_mode new_mode
;
1546 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1547 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1548 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1549 new_mode
= MIN_MODE_VECTOR_FRACT
;
1550 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1551 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1552 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1553 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1554 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1555 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1557 new_mode
= MIN_MODE_VECTOR_INT
;
1559 for (; new_mode
!= VOIDmode
; new_mode
= GET_MODE_WIDER_MODE (new_mode
))
1560 if (GET_MODE_SIZE (new_mode
) == GET_MODE_SIZE (GET_MODE (op0
))
1561 && targetm
.vector_mode_supported_p (new_mode
))
1563 if (new_mode
!= VOIDmode
)
1564 op0
= gen_lowpart (new_mode
, op0
);
1567 /* Use vec_extract patterns for extracting parts of vectors whenever
1569 if (VECTOR_MODE_P (GET_MODE (op0
))
1571 && optab_handler (vec_extract_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
1572 && ((bitnum
+ bitsize
- 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))
1573 == bitnum
/ GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0
)))))
1575 struct expand_operand ops
[3];
1576 machine_mode outermode
= GET_MODE (op0
);
1577 machine_mode innermode
= GET_MODE_INNER (outermode
);
1578 enum insn_code icode
= optab_handler (vec_extract_optab
, outermode
);
1579 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1581 create_output_operand (&ops
[0], target
, innermode
);
1582 create_input_operand (&ops
[1], op0
, outermode
);
1583 create_integer_operand (&ops
[2], pos
);
1584 if (maybe_expand_insn (icode
, 3, ops
))
1586 target
= ops
[0].value
;
1587 if (GET_MODE (target
) != mode
)
1588 return gen_lowpart (tmode
, target
);
1593 /* Make sure we are playing with integral modes. Pun with subregs
1596 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
1597 if (imode
!= GET_MODE (op0
))
1600 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
1601 else if (imode
!= BLKmode
)
1603 op0
= gen_lowpart (imode
, op0
);
1605 /* If we got a SUBREG, force it into a register since we
1606 aren't going to be able to do another SUBREG on it. */
1607 if (GET_CODE (op0
) == SUBREG
)
1608 op0
= force_reg (imode
, op0
);
1610 else if (REG_P (op0
))
1613 imode
= smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0
)),
1615 reg
= gen_reg_rtx (imode
);
1616 subreg
= gen_lowpart_SUBREG (GET_MODE (op0
), reg
);
1617 emit_move_insn (subreg
, op0
);
1619 bitnum
+= SUBREG_BYTE (subreg
) * BITS_PER_UNIT
;
1623 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (op0
));
1624 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1625 emit_move_insn (mem
, op0
);
1626 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1631 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1632 If that's wrong, the solution is to test for it and set TARGET to 0
1635 /* Get the mode of the field to use for atomic access or subreg
1638 if (SCALAR_INT_MODE_P (tmode
))
1640 machine_mode try_mode
= mode_for_size (bitsize
,
1641 GET_MODE_CLASS (tmode
), 0);
1642 if (try_mode
!= BLKmode
)
1645 gcc_assert (mode1
!= BLKmode
);
1647 /* Extraction of a full MODE1 value can be done with a subreg as long
1648 as the least significant bit of the value is the least significant
1649 bit of either OP0 or a word of OP0. */
1652 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1653 && bitsize
== GET_MODE_BITSIZE (mode1
)
1654 && TRULY_NOOP_TRUNCATION_MODES_P (mode1
, GET_MODE (op0
)))
1656 rtx sub
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
),
1657 bitnum
/ BITS_PER_UNIT
);
1659 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1662 /* Extraction of a full MODE1 value can be done with a load as long as
1663 the field is on a byte boundary and is sufficiently aligned. */
1664 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
))
1666 op0
= adjust_bitfield_address (op0
, mode1
, bitnum
/ BITS_PER_UNIT
);
1668 op0
= flip_storage_order (mode1
, op0
);
1669 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1672 /* Handle fields bigger than a word. */
1674 if (bitsize
> BITS_PER_WORD
)
1676 /* Here we transfer the words of the field
1677 in the order least significant first.
1678 This is because the most significant word is the one which may
1679 be less than full. */
1681 const bool backwards
= WORDS_BIG_ENDIAN
;
1682 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1686 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1687 target
= gen_reg_rtx (mode
);
1689 /* Indicate for flow that the entire target reg is being set. */
1690 emit_clobber (target
);
1692 last
= get_last_insn ();
1693 for (i
= 0; i
< nwords
; i
++)
1695 /* If I is 0, use the low-order word in both field and target;
1696 if I is 1, use the next to lowest word; and so on. */
1697 /* Word number in TARGET to use. */
1698 unsigned int wordnum
1700 ? GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
- i
- 1
1702 /* Offset from start of field in OP0. */
1703 unsigned int bit_offset
= (backwards
^ reverse
1704 ? MAX ((int) bitsize
- ((int) i
+ 1)
1707 : (int) i
* BITS_PER_WORD
);
1708 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1710 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1711 bitsize
- i
* BITS_PER_WORD
),
1712 bitnum
+ bit_offset
, 1, target_part
,
1713 mode
, word_mode
, reverse
, fallback_p
);
1715 gcc_assert (target_part
);
1718 delete_insns_since (last
);
1722 if (result_part
!= target_part
)
1723 emit_move_insn (target_part
, result_part
);
1728 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1729 need to be zero'd out. */
1730 if (GET_MODE_SIZE (GET_MODE (target
)) > nwords
* UNITS_PER_WORD
)
1732 unsigned int i
, total_words
;
1734 total_words
= GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
;
1735 for (i
= nwords
; i
< total_words
; i
++)
1737 (operand_subword (target
,
1738 backwards
? total_words
- i
- 1 : i
,
1745 /* Signed bit field: sign-extend with two arithmetic shifts. */
1746 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1747 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1748 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1749 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1752 /* If OP0 is a multi-word register, narrow it to the affected word.
1753 If the region spans two words, defer to extract_split_bit_field. */
1754 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
1756 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
1757 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1758 bitnum
%= BITS_PER_WORD
;
1759 if (bitnum
+ bitsize
> BITS_PER_WORD
)
1763 target
= extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1765 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1769 /* From here on we know the desired field is smaller than a word.
1770 If OP0 is a register, it too fits within a word. */
1771 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1772 extraction_insn extv
;
1775 /* ??? We could limit the structure size to the part of OP0 that
1776 contains the field, with appropriate checks for endianness
1777 and TRULY_NOOP_TRUNCATION. */
1778 && get_best_reg_extraction_insn (&extv
, pattern
,
1779 GET_MODE_BITSIZE (GET_MODE (op0
)),
1782 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
, bitnum
,
1783 unsignedp
, target
, mode
,
1789 /* If OP0 is a memory, try copying it to a register and seeing if a
1790 cheap register alternative is available. */
1791 if (MEM_P (op0
) & !reverse
)
1793 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1796 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
,
1804 rtx_insn
*last
= get_last_insn ();
1806 /* Try loading part of OP0 into a register and extracting the
1807 bitfield from that. */
1808 unsigned HOST_WIDE_INT bitpos
;
1809 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
1810 0, 0, tmode
, &bitpos
);
1813 xop0
= copy_to_reg (xop0
);
1814 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
1816 mode
, tmode
, reverse
, false);
1819 delete_insns_since (last
);
1826 /* Find a correspondingly-sized integer field, so we can apply
1827 shifts and masks to it. */
1828 int_mode
= int_mode_for_mode (tmode
);
1829 if (int_mode
== BLKmode
)
1830 int_mode
= int_mode_for_mode (mode
);
1831 /* Should probably push op0 out to memory and then do a load. */
1832 gcc_assert (int_mode
!= BLKmode
);
1834 target
= extract_fixed_bit_field (int_mode
, op0
, bitsize
, bitnum
,
1835 target
, unsignedp
, reverse
);
1836 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1839 /* Generate code to extract a byte-field from STR_RTX
1840 containing BITSIZE bits, starting at BITNUM,
1841 and put it in TARGET if possible (if TARGET is nonzero).
1842 Regardless of TARGET, we return the rtx for where the value is placed.
1844 STR_RTX is the structure containing the byte (a REG or MEM).
1845 UNSIGNEDP is nonzero if this is an unsigned bit field.
1846 MODE is the natural mode of the field value once extracted.
1847 TMODE is the mode the caller would like the value to have;
1848 but the value may be returned with type MODE instead.
1850 If REVERSE is true, the extraction is to be done in reverse order.
1852 If a TARGET is specified and we can store in it at no extra cost,
1853 we do so, and return TARGET.
1854 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1855 if they are equally easy. */
1858 extract_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1859 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1860 machine_mode mode
, machine_mode tmode
, bool reverse
)
1864 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1865 if (GET_MODE_BITSIZE (GET_MODE (str_rtx
)) > 0)
1866 mode1
= GET_MODE (str_rtx
);
1867 else if (target
&& GET_MODE_BITSIZE (GET_MODE (target
)) > 0)
1868 mode1
= GET_MODE (target
);
1872 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, mode1
, 0, 0))
1874 /* Extraction of a full MODE1 value can be done with a simple load.
1875 We know here that the field can be accessed with one single
1876 instruction. For targets that support unaligned memory,
1877 an unaligned access may be necessary. */
1878 if (bitsize
== GET_MODE_BITSIZE (mode1
))
1880 rtx result
= adjust_bitfield_address (str_rtx
, mode1
,
1881 bitnum
/ BITS_PER_UNIT
);
1883 result
= flip_storage_order (mode1
, result
);
1884 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1885 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
1888 str_rtx
= narrow_bit_field_mem (str_rtx
, mode1
, bitsize
, bitnum
,
1890 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (mode1
));
1891 str_rtx
= copy_to_reg (str_rtx
);
1894 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
1895 target
, mode
, tmode
, reverse
, true);
1898 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1899 from bit BITNUM of OP0.
1901 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1902 If REVERSE is true, the extraction is to be done in reverse order.
1904 If TARGET is nonzero, attempts to store the value there
1905 and return TARGET, but this is not guaranteed.
1906 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1909 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
1910 unsigned HOST_WIDE_INT bitsize
,
1911 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1912 int unsignedp
, bool reverse
)
1917 = get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
), word_mode
,
1918 MEM_VOLATILE_P (op0
));
1920 if (mode
== VOIDmode
)
1921 /* The only way this should occur is if the field spans word
1923 return extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1926 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1929 return extract_fixed_bit_field_1 (tmode
, op0
, bitsize
, bitnum
,
1930 target
, unsignedp
, reverse
);
1933 /* Helper function for extract_fixed_bit_field, extracts
1934 the bit field always using the MODE of OP0. */
1937 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
,
1938 unsigned HOST_WIDE_INT bitsize
,
1939 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1940 int unsignedp
, bool reverse
)
1942 machine_mode mode
= GET_MODE (op0
);
1943 gcc_assert (SCALAR_INT_MODE_P (mode
));
1945 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1946 for invalid input, such as extract equivalent of f5 from
1947 gcc.dg/pr48335-2.c. */
1949 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1950 /* BITNUM is the distance between our msb and that of OP0.
1951 Convert it to the distance from the lsb. */
1952 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1954 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1955 We have reduced the big-endian case to the little-endian case. */
1957 op0
= flip_storage_order (mode
, op0
);
1963 /* If the field does not already start at the lsb,
1964 shift it so it does. */
1965 /* Maybe propagate the target for the shift. */
1966 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
1969 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
1971 /* Convert the value to the desired mode. */
1973 op0
= convert_to_mode (tmode
, op0
, 1);
1975 /* Unless the msb of the field used to be the msb when we shifted,
1976 mask out the upper bits. */
1978 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
1979 return expand_binop (GET_MODE (op0
), and_optab
, op0
,
1980 mask_rtx (GET_MODE (op0
), 0, bitsize
, 0),
1981 target
, 1, OPTAB_LIB_WIDEN
);
1985 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1986 then arithmetic-shift its lsb to the lsb of the word. */
1987 op0
= force_reg (mode
, op0
);
1989 /* Find the narrowest integer mode that contains the field. */
1991 for (mode
= GET_CLASS_NARROWEST_MODE (MODE_INT
); mode
!= VOIDmode
;
1992 mode
= GET_MODE_WIDER_MODE (mode
))
1993 if (GET_MODE_BITSIZE (mode
) >= bitsize
+ bitnum
)
1995 op0
= convert_to_mode (mode
, op0
, 0);
2002 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2004 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2005 /* Maybe propagate the target for the shift. */
2006 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2007 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2010 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2011 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2014 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2018 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2021 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2024 /* Extract a bit field that is split across two words
2025 and return an RTX for the result.
2027 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2028 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2029 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2031 If REVERSE is true, the extraction is to be done in reverse order. */
2034 extract_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
2035 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2039 unsigned int bitsdone
= 0;
2040 rtx result
= NULL_RTX
;
2043 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2045 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2046 unit
= BITS_PER_WORD
;
2048 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2050 while (bitsdone
< bitsize
)
2052 unsigned HOST_WIDE_INT thissize
;
2054 unsigned HOST_WIDE_INT thispos
;
2055 unsigned HOST_WIDE_INT offset
;
2057 offset
= (bitpos
+ bitsdone
) / unit
;
2058 thispos
= (bitpos
+ bitsdone
) % unit
;
2060 /* THISSIZE must not overrun a word boundary. Otherwise,
2061 extract_fixed_bit_field will call us again, and we will mutually
2063 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2064 thissize
= MIN (thissize
, unit
- thispos
);
2066 /* If OP0 is a register, then handle OFFSET here.
2068 When handling multiword bitfields, extract_bit_field may pass
2069 down a word_mode SUBREG of a larger REG for a bitfield that actually
2070 crosses a word boundary. Thus, for a SUBREG, we must find
2071 the current word starting from the base register. */
2072 if (GET_CODE (op0
) == SUBREG
)
2074 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
) + offset
;
2075 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
2076 GET_MODE (SUBREG_REG (op0
)));
2079 else if (REG_P (op0
))
2081 word
= operand_subword_force (op0
, offset
, GET_MODE (op0
));
2087 /* Extract the parts in bit-counting order,
2088 whose meaning is determined by BYTES_PER_UNIT.
2089 OFFSET is in UNITs, and UNIT is in bits. */
2090 part
= extract_fixed_bit_field (word_mode
, word
, thissize
,
2091 offset
* unit
+ thispos
, 0, 1, reverse
);
2092 bitsdone
+= thissize
;
2094 /* Shift this part into place for the result. */
2095 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2097 if (bitsize
!= bitsdone
)
2098 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2099 bitsize
- bitsdone
, 0, 1);
2103 if (bitsdone
!= thissize
)
2104 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2105 bitsdone
- thissize
, 0, 1);
2111 /* Combine the parts with bitwise or. This works
2112 because we extracted each part as an unsigned bit field. */
2113 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2119 /* Unsigned bit field: we are done. */
2122 /* Signed bit field: sign-extend with two arithmetic shifts. */
2123 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2124 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2125 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2126 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2129 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2130 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2131 MODE, fill the upper bits with zeros. Fail if the layout of either
2132 mode is unknown (as for CC modes) or if the extraction would involve
2133 unprofitable mode punning. Return the value on success, otherwise
2136 This is different from gen_lowpart* in these respects:
2138 - the returned value must always be considered an rvalue
2140 - when MODE is wider than SRC_MODE, the extraction involves
2143 - when MODE is smaller than SRC_MODE, the extraction involves
2144 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2146 In other words, this routine performs a computation, whereas the
2147 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2151 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2153 machine_mode int_mode
, src_int_mode
;
2155 if (mode
== src_mode
)
2158 if (CONSTANT_P (src
))
2160 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2161 fails, it will happily create (subreg (symbol_ref)) or similar
2163 unsigned int byte
= subreg_lowpart_offset (mode
, src_mode
);
2164 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2168 if (GET_MODE (src
) == VOIDmode
2169 || !validate_subreg (mode
, src_mode
, src
, byte
))
2172 src
= force_reg (GET_MODE (src
), src
);
2173 return gen_rtx_SUBREG (mode
, src
, byte
);
2176 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2179 if (GET_MODE_BITSIZE (mode
) == GET_MODE_BITSIZE (src_mode
)
2180 && MODES_TIEABLE_P (mode
, src_mode
))
2182 rtx x
= gen_lowpart_common (mode
, src
);
2187 src_int_mode
= int_mode_for_mode (src_mode
);
2188 int_mode
= int_mode_for_mode (mode
);
2189 if (src_int_mode
== BLKmode
|| int_mode
== BLKmode
)
2192 if (!MODES_TIEABLE_P (src_int_mode
, src_mode
))
2194 if (!MODES_TIEABLE_P (int_mode
, mode
))
2197 src
= gen_lowpart (src_int_mode
, src
);
2198 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2199 src
= gen_lowpart (mode
, src
);
2203 /* Add INC into TARGET. */
2206 expand_inc (rtx target
, rtx inc
)
2208 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2210 target
, 0, OPTAB_LIB_WIDEN
);
2211 if (value
!= target
)
2212 emit_move_insn (target
, value
);
2215 /* Subtract DEC from TARGET. */
2218 expand_dec (rtx target
, rtx dec
)
2220 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2222 target
, 0, OPTAB_LIB_WIDEN
);
2223 if (value
!= target
)
2224 emit_move_insn (target
, value
);
2227 /* Output a shift instruction for expression code CODE,
2228 with SHIFTED being the rtx for the value to shift,
2229 and AMOUNT the rtx for the amount to shift by.
2230 Store the result in the rtx TARGET, if that is convenient.
2231 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2232 Return the rtx for where the value is. */
2235 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2236 rtx amount
, rtx target
, int unsignedp
)
2239 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2240 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2241 optab lshift_optab
= ashl_optab
;
2242 optab rshift_arith_optab
= ashr_optab
;
2243 optab rshift_uns_optab
= lshr_optab
;
2244 optab lrotate_optab
= rotl_optab
;
2245 optab rrotate_optab
= rotr_optab
;
2246 machine_mode op1_mode
;
2247 machine_mode scalar_mode
= mode
;
2249 bool speed
= optimize_insn_for_speed_p ();
2251 if (VECTOR_MODE_P (mode
))
2252 scalar_mode
= GET_MODE_INNER (mode
);
2254 op1_mode
= GET_MODE (op1
);
2256 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2257 shift amount is a vector, use the vector/vector shift patterns. */
2258 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2260 lshift_optab
= vashl_optab
;
2261 rshift_arith_optab
= vashr_optab
;
2262 rshift_uns_optab
= vlshr_optab
;
2263 lrotate_optab
= vrotl_optab
;
2264 rrotate_optab
= vrotr_optab
;
2267 /* Previously detected shift-counts computed by NEGATE_EXPR
2268 and shifted in the other direction; but that does not work
2271 if (SHIFT_COUNT_TRUNCATED
)
2273 if (CONST_INT_P (op1
)
2274 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2275 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2276 op1
= GEN_INT ((unsigned HOST_WIDE_INT
) INTVAL (op1
)
2277 % GET_MODE_BITSIZE (scalar_mode
));
2278 else if (GET_CODE (op1
) == SUBREG
2279 && subreg_lowpart_p (op1
)
2280 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2281 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2282 op1
= SUBREG_REG (op1
);
2285 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2286 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2287 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2290 && CONST_INT_P (op1
)
2291 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2292 GET_MODE_BITSIZE (scalar_mode
) - 1))
2294 op1
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2296 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2299 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2300 Note that this is not the case for bigger values. For instance a rotation
2301 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2302 0x04030201 (bswapsi). */
2304 && CONST_INT_P (op1
)
2305 && INTVAL (op1
) == BITS_PER_UNIT
2306 && GET_MODE_SIZE (scalar_mode
) == 2
2307 && optab_handler (bswap_optab
, HImode
) != CODE_FOR_nothing
)
2308 return expand_unop (HImode
, bswap_optab
, shifted
, NULL_RTX
,
2311 if (op1
== const0_rtx
)
2314 /* Check whether its cheaper to implement a left shift by a constant
2315 bit count by a sequence of additions. */
2316 if (code
== LSHIFT_EXPR
2317 && CONST_INT_P (op1
)
2319 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2320 && INTVAL (op1
) < MAX_BITS_PER_WORD
2321 && (shift_cost (speed
, mode
, INTVAL (op1
))
2322 > INTVAL (op1
) * add_cost (speed
, mode
))
2323 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2326 for (i
= 0; i
< INTVAL (op1
); i
++)
2328 temp
= force_reg (mode
, shifted
);
2329 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2330 unsignedp
, OPTAB_LIB_WIDEN
);
2335 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2337 enum optab_methods methods
;
2340 methods
= OPTAB_DIRECT
;
2341 else if (attempt
== 1)
2342 methods
= OPTAB_WIDEN
;
2344 methods
= OPTAB_LIB_WIDEN
;
2348 /* Widening does not work for rotation. */
2349 if (methods
== OPTAB_WIDEN
)
2351 else if (methods
== OPTAB_LIB_WIDEN
)
2353 /* If we have been unable to open-code this by a rotation,
2354 do it as the IOR of two shifts. I.e., to rotate A
2356 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2357 where C is the bitsize of A.
2359 It is theoretically possible that the target machine might
2360 not be able to perform either shift and hence we would
2361 be making two libcalls rather than just the one for the
2362 shift (similarly if IOR could not be done). We will allow
2363 this extremely unlikely lossage to avoid complicating the
2366 rtx subtarget
= target
== shifted
? 0 : target
;
2367 rtx new_amount
, other_amount
;
2371 if (op1
== const0_rtx
)
2373 else if (CONST_INT_P (op1
))
2374 other_amount
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
)
2379 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2380 op1
, GET_MODE (op1
));
2381 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2383 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2384 gen_int_mode (mask
, GET_MODE (op1
)));
2387 shifted
= force_reg (mode
, shifted
);
2389 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2390 mode
, shifted
, new_amount
, 0, 1);
2391 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2392 mode
, shifted
, other_amount
,
2394 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2395 unsignedp
, methods
);
2398 temp
= expand_binop (mode
,
2399 left
? lrotate_optab
: rrotate_optab
,
2400 shifted
, op1
, target
, unsignedp
, methods
);
2403 temp
= expand_binop (mode
,
2404 left
? lshift_optab
: rshift_uns_optab
,
2405 shifted
, op1
, target
, unsignedp
, methods
);
2407 /* Do arithmetic shifts.
2408 Also, if we are going to widen the operand, we can just as well
2409 use an arithmetic right-shift instead of a logical one. */
2410 if (temp
== 0 && ! rotate
2411 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2413 enum optab_methods methods1
= methods
;
2415 /* If trying to widen a log shift to an arithmetic shift,
2416 don't accept an arithmetic shift of the same size. */
2418 methods1
= OPTAB_MUST_WIDEN
;
2420 /* Arithmetic shift */
2422 temp
= expand_binop (mode
,
2423 left
? lshift_optab
: rshift_arith_optab
,
2424 shifted
, op1
, target
, unsignedp
, methods1
);
2427 /* We used to try extzv here for logical right shifts, but that was
2428 only useful for one machine, the VAX, and caused poor code
2429 generation there for lshrdi3, so the code was deleted and a
2430 define_expand for lshrsi3 was added to vax.md. */
2437 /* Output a shift instruction for expression code CODE,
2438 with SHIFTED being the rtx for the value to shift,
2439 and AMOUNT the amount to shift by.
2440 Store the result in the rtx TARGET, if that is convenient.
2441 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2442 Return the rtx for where the value is. */
2445 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2446 int amount
, rtx target
, int unsignedp
)
2448 return expand_shift_1 (code
, mode
,
2449 shifted
, GEN_INT (amount
), target
, unsignedp
);
2452 /* Output a shift instruction for expression code CODE,
2453 with SHIFTED being the rtx for the value to shift,
2454 and AMOUNT the tree for the amount to shift by.
2455 Store the result in the rtx TARGET, if that is convenient.
2456 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457 Return the rtx for where the value is. */
2460 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2461 tree amount
, rtx target
, int unsignedp
)
2463 return expand_shift_1 (code
, mode
,
2464 shifted
, expand_normal (amount
), target
, unsignedp
);
2468 /* Indicates the type of fixup needed after a constant multiplication.
2469 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2470 the result should be negated, and ADD_VARIANT means that the
2471 multiplicand should be added to the result. */
2472 enum mult_variant
{basic_variant
, negate_variant
, add_variant
};
2474 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2475 const struct mult_cost
*, machine_mode mode
);
2476 static bool choose_mult_variant (machine_mode
, HOST_WIDE_INT
,
2477 struct algorithm
*, enum mult_variant
*, int);
2478 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2479 const struct algorithm
*, enum mult_variant
);
2480 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2481 static rtx
extract_high_half (machine_mode
, rtx
);
2482 static rtx
expmed_mult_highpart (machine_mode
, rtx
, rtx
, rtx
, int, int);
2483 static rtx
expmed_mult_highpart_optab (machine_mode
, rtx
, rtx
, rtx
,
2485 /* Compute and return the best algorithm for multiplying by T.
2486 The algorithm must cost less than cost_limit
2487 If retval.cost >= COST_LIMIT, no algorithm was found and all
2488 other field of the returned struct are undefined.
2489 MODE is the machine mode of the multiplication. */
2492 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2493 const struct mult_cost
*cost_limit
, machine_mode mode
)
2496 struct algorithm
*alg_in
, *best_alg
;
2497 struct mult_cost best_cost
;
2498 struct mult_cost new_limit
;
2499 int op_cost
, op_latency
;
2500 unsigned HOST_WIDE_INT orig_t
= t
;
2501 unsigned HOST_WIDE_INT q
;
2502 int maxm
, hash_index
;
2503 bool cache_hit
= false;
2504 enum alg_code cache_alg
= alg_zero
;
2505 bool speed
= optimize_insn_for_speed_p ();
2507 struct alg_hash_entry
*entry_ptr
;
2509 /* Indicate that no algorithm is yet found. If no algorithm
2510 is found, this value will be returned and indicate failure. */
2511 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2512 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2514 if (cost_limit
->cost
< 0
2515 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2518 /* Be prepared for vector modes. */
2519 imode
= GET_MODE_INNER (mode
);
2520 if (imode
== VOIDmode
)
2523 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2525 /* Restrict the bits of "t" to the multiplication's mode. */
2526 t
&= GET_MODE_MASK (imode
);
2528 /* t == 1 can be done in zero cost. */
2532 alg_out
->cost
.cost
= 0;
2533 alg_out
->cost
.latency
= 0;
2534 alg_out
->op
[0] = alg_m
;
2538 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2542 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2547 alg_out
->cost
.cost
= zero_cost (speed
);
2548 alg_out
->cost
.latency
= zero_cost (speed
);
2549 alg_out
->op
[0] = alg_zero
;
2554 /* We'll be needing a couple extra algorithm structures now. */
2556 alg_in
= XALLOCA (struct algorithm
);
2557 best_alg
= XALLOCA (struct algorithm
);
2558 best_cost
= *cost_limit
;
2560 /* Compute the hash index. */
2561 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2563 /* See if we already know what to do for T. */
2564 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2565 if (entry_ptr
->t
== t
2566 && entry_ptr
->mode
== mode
2567 && entry_ptr
->mode
== mode
2568 && entry_ptr
->speed
== speed
2569 && entry_ptr
->alg
!= alg_unknown
)
2571 cache_alg
= entry_ptr
->alg
;
2573 if (cache_alg
== alg_impossible
)
2575 /* The cache tells us that it's impossible to synthesize
2576 multiplication by T within entry_ptr->cost. */
2577 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2578 /* COST_LIMIT is at least as restrictive as the one
2579 recorded in the hash table, in which case we have no
2580 hope of synthesizing a multiplication. Just
2584 /* If we get here, COST_LIMIT is less restrictive than the
2585 one recorded in the hash table, so we may be able to
2586 synthesize a multiplication. Proceed as if we didn't
2587 have the cache entry. */
2591 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2592 /* The cached algorithm shows that this multiplication
2593 requires more cost than COST_LIMIT. Just return. This
2594 way, we don't clobber this cache entry with
2595 alg_impossible but retain useful information. */
2607 goto do_alg_addsub_t_m2
;
2609 case alg_add_factor
:
2610 case alg_sub_factor
:
2611 goto do_alg_addsub_factor
;
2614 goto do_alg_add_t2_m
;
2617 goto do_alg_sub_t2_m
;
2625 /* If we have a group of zero bits at the low-order part of T, try
2626 multiplying by the remaining bits and then doing a shift. */
2631 m
= floor_log2 (t
& -t
); /* m = number of low zero bits */
2635 /* The function expand_shift will choose between a shift and
2636 a sequence of additions, so the observed cost is given as
2637 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2638 op_cost
= m
* add_cost (speed
, mode
);
2639 if (shift_cost (speed
, mode
, m
) < op_cost
)
2640 op_cost
= shift_cost (speed
, mode
, m
);
2641 new_limit
.cost
= best_cost
.cost
- op_cost
;
2642 new_limit
.latency
= best_cost
.latency
- op_cost
;
2643 synth_mult (alg_in
, q
, &new_limit
, mode
);
2645 alg_in
->cost
.cost
+= op_cost
;
2646 alg_in
->cost
.latency
+= op_cost
;
2647 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2649 best_cost
= alg_in
->cost
;
2650 std::swap (alg_in
, best_alg
);
2651 best_alg
->log
[best_alg
->ops
] = m
;
2652 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2655 /* See if treating ORIG_T as a signed number yields a better
2656 sequence. Try this sequence only for a negative ORIG_T
2657 as it would be useless for a non-negative ORIG_T. */
2658 if ((HOST_WIDE_INT
) orig_t
< 0)
2660 /* Shift ORIG_T as follows because a right shift of a
2661 negative-valued signed type is implementation
2663 q
= ~(~orig_t
>> m
);
2664 /* The function expand_shift will choose between a shift
2665 and a sequence of additions, so the observed cost is
2666 given as MIN (m * add_cost(speed, mode),
2667 shift_cost(speed, mode, m)). */
2668 op_cost
= m
* add_cost (speed
, mode
);
2669 if (shift_cost (speed
, mode
, m
) < op_cost
)
2670 op_cost
= shift_cost (speed
, mode
, m
);
2671 new_limit
.cost
= best_cost
.cost
- op_cost
;
2672 new_limit
.latency
= best_cost
.latency
- op_cost
;
2673 synth_mult (alg_in
, q
, &new_limit
, mode
);
2675 alg_in
->cost
.cost
+= op_cost
;
2676 alg_in
->cost
.latency
+= op_cost
;
2677 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2679 best_cost
= alg_in
->cost
;
2680 std::swap (alg_in
, best_alg
);
2681 best_alg
->log
[best_alg
->ops
] = m
;
2682 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2690 /* If we have an odd number, add or subtract one. */
2693 unsigned HOST_WIDE_INT w
;
2696 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2698 /* If T was -1, then W will be zero after the loop. This is another
2699 case where T ends with ...111. Handling this with (T + 1) and
2700 subtract 1 produces slightly better code and results in algorithm
2701 selection much faster than treating it like the ...0111 case
2705 /* Reject the case where t is 3.
2706 Thus we prefer addition in that case. */
2709 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2711 op_cost
= add_cost (speed
, mode
);
2712 new_limit
.cost
= best_cost
.cost
- op_cost
;
2713 new_limit
.latency
= best_cost
.latency
- op_cost
;
2714 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2716 alg_in
->cost
.cost
+= op_cost
;
2717 alg_in
->cost
.latency
+= op_cost
;
2718 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2720 best_cost
= alg_in
->cost
;
2721 std::swap (alg_in
, best_alg
);
2722 best_alg
->log
[best_alg
->ops
] = 0;
2723 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2728 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2730 op_cost
= add_cost (speed
, mode
);
2731 new_limit
.cost
= best_cost
.cost
- op_cost
;
2732 new_limit
.latency
= best_cost
.latency
- op_cost
;
2733 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2735 alg_in
->cost
.cost
+= op_cost
;
2736 alg_in
->cost
.latency
+= op_cost
;
2737 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2739 best_cost
= alg_in
->cost
;
2740 std::swap (alg_in
, best_alg
);
2741 best_alg
->log
[best_alg
->ops
] = 0;
2742 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2746 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2747 quickly with a - a * n for some appropriate constant n. */
2748 m
= exact_log2 (-orig_t
+ 1);
2749 if (m
>= 0 && m
< maxm
)
2751 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2752 /* If the target has a cheap shift-and-subtract insn use
2753 that in preference to a shift insn followed by a sub insn.
2754 Assume that the shift-and-sub is "atomic" with a latency
2755 equal to it's cost, otherwise assume that on superscalar
2756 hardware the shift may be executed concurrently with the
2757 earlier steps in the algorithm. */
2758 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2760 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2761 op_latency
= op_cost
;
2764 op_latency
= add_cost (speed
, mode
);
2766 new_limit
.cost
= best_cost
.cost
- op_cost
;
2767 new_limit
.latency
= best_cost
.latency
- op_latency
;
2768 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2771 alg_in
->cost
.cost
+= op_cost
;
2772 alg_in
->cost
.latency
+= op_latency
;
2773 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2775 best_cost
= alg_in
->cost
;
2776 std::swap (alg_in
, best_alg
);
2777 best_alg
->log
[best_alg
->ops
] = m
;
2778 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2786 /* Look for factors of t of the form
2787 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2788 If we find such a factor, we can multiply by t using an algorithm that
2789 multiplies by q, shift the result by m and add/subtract it to itself.
2791 We search for large factors first and loop down, even if large factors
2792 are less probable than small; if we find a large factor we will find a
2793 good sequence quickly, and therefore be able to prune (by decreasing
2794 COST_LIMIT) the search. */
2796 do_alg_addsub_factor
:
2797 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
2799 unsigned HOST_WIDE_INT d
;
2801 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) + 1;
2802 if (t
% d
== 0 && t
> d
&& m
< maxm
2803 && (!cache_hit
|| cache_alg
== alg_add_factor
))
2805 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2806 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
2807 op_cost
= shiftadd_cost (speed
, mode
, m
);
2809 op_latency
= op_cost
;
2812 new_limit
.cost
= best_cost
.cost
- op_cost
;
2813 new_limit
.latency
= best_cost
.latency
- op_latency
;
2814 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2816 alg_in
->cost
.cost
+= op_cost
;
2817 alg_in
->cost
.latency
+= op_latency
;
2818 if (alg_in
->cost
.latency
< op_cost
)
2819 alg_in
->cost
.latency
= op_cost
;
2820 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2822 best_cost
= alg_in
->cost
;
2823 std::swap (alg_in
, best_alg
);
2824 best_alg
->log
[best_alg
->ops
] = m
;
2825 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
2827 /* Other factors will have been taken care of in the recursion. */
2831 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) - 1;
2832 if (t
% d
== 0 && t
> d
&& m
< maxm
2833 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
2835 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2836 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
2837 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2839 op_latency
= op_cost
;
2841 new_limit
.cost
= best_cost
.cost
- op_cost
;
2842 new_limit
.latency
= best_cost
.latency
- op_latency
;
2843 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2845 alg_in
->cost
.cost
+= op_cost
;
2846 alg_in
->cost
.latency
+= op_latency
;
2847 if (alg_in
->cost
.latency
< op_cost
)
2848 alg_in
->cost
.latency
= op_cost
;
2849 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2851 best_cost
= alg_in
->cost
;
2852 std::swap (alg_in
, best_alg
);
2853 best_alg
->log
[best_alg
->ops
] = m
;
2854 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
2862 /* Try shift-and-add (load effective address) instructions,
2863 i.e. do a*3, a*5, a*9. */
2870 if (m
>= 0 && m
< maxm
)
2872 op_cost
= shiftadd_cost (speed
, mode
, m
);
2873 new_limit
.cost
= best_cost
.cost
- op_cost
;
2874 new_limit
.latency
= best_cost
.latency
- op_cost
;
2875 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
2877 alg_in
->cost
.cost
+= op_cost
;
2878 alg_in
->cost
.latency
+= op_cost
;
2879 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2881 best_cost
= alg_in
->cost
;
2882 std::swap (alg_in
, best_alg
);
2883 best_alg
->log
[best_alg
->ops
] = m
;
2884 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
2894 if (m
>= 0 && m
< maxm
)
2896 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2897 new_limit
.cost
= best_cost
.cost
- op_cost
;
2898 new_limit
.latency
= best_cost
.latency
- op_cost
;
2899 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
2901 alg_in
->cost
.cost
+= op_cost
;
2902 alg_in
->cost
.latency
+= op_cost
;
2903 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2905 best_cost
= alg_in
->cost
;
2906 std::swap (alg_in
, best_alg
);
2907 best_alg
->log
[best_alg
->ops
] = m
;
2908 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
2916 /* If best_cost has not decreased, we have not found any algorithm. */
2917 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
2919 /* We failed to find an algorithm. Record alg_impossible for
2920 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2921 we are asked to find an algorithm for T within the same or
2922 lower COST_LIMIT, we can immediately return to the
2925 entry_ptr
->mode
= mode
;
2926 entry_ptr
->speed
= speed
;
2927 entry_ptr
->alg
= alg_impossible
;
2928 entry_ptr
->cost
= *cost_limit
;
2932 /* Cache the result. */
2936 entry_ptr
->mode
= mode
;
2937 entry_ptr
->speed
= speed
;
2938 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
2939 entry_ptr
->cost
.cost
= best_cost
.cost
;
2940 entry_ptr
->cost
.latency
= best_cost
.latency
;
2943 /* If we are getting a too long sequence for `struct algorithm'
2944 to record, make this search fail. */
2945 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
2948 /* Copy the algorithm from temporary space to the space at alg_out.
2949 We avoid using structure assignment because the majority of
2950 best_alg is normally undefined, and this is a critical function. */
2951 alg_out
->ops
= best_alg
->ops
+ 1;
2952 alg_out
->cost
= best_cost
;
2953 memcpy (alg_out
->op
, best_alg
->op
,
2954 alg_out
->ops
* sizeof *alg_out
->op
);
2955 memcpy (alg_out
->log
, best_alg
->log
,
2956 alg_out
->ops
* sizeof *alg_out
->log
);
2959 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2960 Try three variations:
2962 - a shift/add sequence based on VAL itself
2963 - a shift/add sequence based on -VAL, followed by a negation
2964 - a shift/add sequence based on VAL - 1, followed by an addition.
2966 Return true if the cheapest of these cost less than MULT_COST,
2967 describing the algorithm in *ALG and final fixup in *VARIANT. */
2970 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
2971 struct algorithm
*alg
, enum mult_variant
*variant
,
2974 struct algorithm alg2
;
2975 struct mult_cost limit
;
2977 bool speed
= optimize_insn_for_speed_p ();
2979 /* Fail quickly for impossible bounds. */
2983 /* Ensure that mult_cost provides a reasonable upper bound.
2984 Any constant multiplication can be performed with less
2985 than 2 * bits additions. */
2986 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
2987 if (mult_cost
> op_cost
)
2988 mult_cost
= op_cost
;
2990 *variant
= basic_variant
;
2991 limit
.cost
= mult_cost
;
2992 limit
.latency
= mult_cost
;
2993 synth_mult (alg
, val
, &limit
, mode
);
2995 /* This works only if the inverted value actually fits in an
2997 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
2999 op_cost
= neg_cost (speed
, mode
);
3000 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3002 limit
.cost
= alg
->cost
.cost
- op_cost
;
3003 limit
.latency
= alg
->cost
.latency
- op_cost
;
3007 limit
.cost
= mult_cost
- op_cost
;
3008 limit
.latency
= mult_cost
- op_cost
;
3011 synth_mult (&alg2
, -val
, &limit
, mode
);
3012 alg2
.cost
.cost
+= op_cost
;
3013 alg2
.cost
.latency
+= op_cost
;
3014 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3015 *alg
= alg2
, *variant
= negate_variant
;
3018 /* This proves very useful for division-by-constant. */
3019 op_cost
= add_cost (speed
, mode
);
3020 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3022 limit
.cost
= alg
->cost
.cost
- op_cost
;
3023 limit
.latency
= alg
->cost
.latency
- op_cost
;
3027 limit
.cost
= mult_cost
- op_cost
;
3028 limit
.latency
= mult_cost
- op_cost
;
3031 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3032 alg2
.cost
.cost
+= op_cost
;
3033 alg2
.cost
.latency
+= op_cost
;
3034 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3035 *alg
= alg2
, *variant
= add_variant
;
3037 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3040 /* A subroutine of expand_mult, used for constant multiplications.
3041 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3042 convenient. Use the shift/add sequence described by ALG and apply
3043 the final fixup specified by VARIANT. */
3046 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3047 rtx target
, const struct algorithm
*alg
,
3048 enum mult_variant variant
)
3050 HOST_WIDE_INT val_so_far
;
3056 /* Avoid referencing memory over and over and invalid sharing
3058 op0
= force_reg (mode
, op0
);
3060 /* ACCUM starts out either as OP0 or as a zero, depending on
3061 the first operation. */
3063 if (alg
->op
[0] == alg_zero
)
3065 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3068 else if (alg
->op
[0] == alg_m
)
3070 accum
= copy_to_mode_reg (mode
, op0
);
3076 for (opno
= 1; opno
< alg
->ops
; opno
++)
3078 int log
= alg
->log
[opno
];
3079 rtx shift_subtarget
= optimize
? 0 : accum
;
3081 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3084 rtx accum_target
= optimize
? 0 : accum
;
3087 switch (alg
->op
[opno
])
3090 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3091 /* REG_EQUAL note will be attached to the following insn. */
3092 emit_move_insn (accum
, tem
);
3097 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3098 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3099 add_target
? add_target
: accum_target
);
3100 val_so_far
+= (HOST_WIDE_INT
) 1 << log
;
3104 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3105 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3106 add_target
? add_target
: accum_target
);
3107 val_so_far
-= (HOST_WIDE_INT
) 1 << log
;
3111 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3112 log
, shift_subtarget
, 0);
3113 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3114 add_target
? add_target
: accum_target
);
3115 val_so_far
= (val_so_far
<< log
) + 1;
3119 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3120 log
, shift_subtarget
, 0);
3121 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3122 add_target
? add_target
: accum_target
);
3123 val_so_far
= (val_so_far
<< log
) - 1;
3126 case alg_add_factor
:
3127 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3128 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3129 add_target
? add_target
: accum_target
);
3130 val_so_far
+= val_so_far
<< log
;
3133 case alg_sub_factor
:
3134 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3135 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3137 ? add_target
: (optimize
? 0 : tem
)));
3138 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3145 if (SCALAR_INT_MODE_P (mode
))
3147 /* Write a REG_EQUAL note on the last insn so that we can cse
3148 multiplication sequences. Note that if ACCUM is a SUBREG,
3149 we've set the inner register and must properly indicate that. */
3150 tem
= op0
, nmode
= mode
;
3151 accum_inner
= accum
;
3152 if (GET_CODE (accum
) == SUBREG
)
3154 accum_inner
= SUBREG_REG (accum
);
3155 nmode
= GET_MODE (accum_inner
);
3156 tem
= gen_lowpart (nmode
, op0
);
3159 insn
= get_last_insn ();
3160 set_dst_reg_note (insn
, REG_EQUAL
,
3161 gen_rtx_MULT (nmode
, tem
,
3162 gen_int_mode (val_so_far
, nmode
)),
3167 if (variant
== negate_variant
)
3169 val_so_far
= -val_so_far
;
3170 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3172 else if (variant
== add_variant
)
3174 val_so_far
= val_so_far
+ 1;
3175 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3178 /* Compare only the bits of val and val_so_far that are significant
3179 in the result mode, to avoid sign-/zero-extension confusion. */
3180 nmode
= GET_MODE_INNER (mode
);
3181 if (nmode
== VOIDmode
)
3183 val
&= GET_MODE_MASK (nmode
);
3184 val_so_far
&= GET_MODE_MASK (nmode
);
3185 gcc_assert (val
== val_so_far
);
3190 /* Perform a multiplication and return an rtx for the result.
3191 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3192 TARGET is a suggestion for where to store the result (an rtx).
3194 We check specially for a constant integer as OP1.
3195 If you want this check for OP0 as well, then before calling
3196 you should swap the two operands if OP0 would be constant. */
3199 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3202 enum mult_variant variant
;
3203 struct algorithm algorithm
;
3206 bool speed
= optimize_insn_for_speed_p ();
3207 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3209 if (CONSTANT_P (op0
))
3210 std::swap (op0
, op1
);
3212 /* For vectors, there are several simplifications that can be made if
3213 all elements of the vector constant are identical. */
3215 if (GET_CODE (op1
) == CONST_VECTOR
)
3217 int i
, n
= CONST_VECTOR_NUNITS (op1
);
3218 scalar_op1
= CONST_VECTOR_ELT (op1
, 0);
3219 for (i
= 1; i
< n
; ++i
)
3220 if (!rtx_equal_p (scalar_op1
, CONST_VECTOR_ELT (op1
, i
)))
3224 if (INTEGRAL_MODE_P (mode
))
3227 HOST_WIDE_INT coeff
;
3231 if (op1
== CONST0_RTX (mode
))
3233 if (op1
== CONST1_RTX (mode
))
3235 if (op1
== CONSTM1_RTX (mode
))
3236 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3242 /* If mode is integer vector mode, check if the backend supports
3243 vector lshift (by scalar or vector) at all. If not, we can't use
3244 synthetized multiply. */
3245 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3246 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3247 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3250 /* These are the operations that are potentially turned into
3251 a sequence of shifts and additions. */
3252 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3254 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3255 less than or equal in size to `unsigned int' this doesn't matter.
3256 If the mode is larger than `unsigned int', then synth_mult works
3257 only if the constant value exactly fits in an `unsigned int' without
3258 any truncation. This means that multiplying by negative values does
3259 not work; results are off by 2^32 on a 32 bit machine. */
3260 if (CONST_INT_P (scalar_op1
))
3262 coeff
= INTVAL (scalar_op1
);
3265 #if TARGET_SUPPORTS_WIDE_INT
3266 else if (CONST_WIDE_INT_P (scalar_op1
))
3268 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3271 int shift
= wi::exact_log2 (std::make_pair (scalar_op1
, mode
));
3272 /* Perfect power of 2 (other than 1, which is handled above). */
3274 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3275 shift
, target
, unsignedp
);
3282 /* We used to test optimize here, on the grounds that it's better to
3283 produce a smaller program when -O is not used. But this causes
3284 such a terrible slowdown sometimes that it seems better to always
3287 /* Special case powers of two. */
3288 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3289 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3290 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3291 floor_log2 (coeff
), target
, unsignedp
);
3293 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3295 /* Attempt to handle multiplication of DImode values by negative
3296 coefficients, by performing the multiplication by a positive
3297 multiplier and then inverting the result. */
3298 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3300 /* Its safe to use -coeff even for INT_MIN, as the
3301 result is interpreted as an unsigned coefficient.
3302 Exclude cost of op0 from max_cost to match the cost
3303 calculation of the synth_mult. */
3304 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3305 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), speed
)
3306 - neg_cost (speed
, mode
));
3310 /* Special case powers of two. */
3311 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3313 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3314 floor_log2 (coeff
), target
, unsignedp
);
3315 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3318 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3321 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3322 &algorithm
, variant
);
3323 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3328 /* Exclude cost of op0 from max_cost to match the cost
3329 calculation of the synth_mult. */
3330 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), speed
);
3331 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3332 return expand_mult_const (mode
, op0
, coeff
, target
,
3333 &algorithm
, variant
);
3337 /* Expand x*2.0 as x+x. */
3338 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
))
3341 REAL_VALUE_FROM_CONST_DOUBLE (d
, scalar_op1
);
3343 if (REAL_VALUES_EQUAL (d
, dconst2
))
3345 op0
= force_reg (GET_MODE (op0
), op0
);
3346 return expand_binop (mode
, add_optab
, op0
, op0
,
3347 target
, unsignedp
, OPTAB_LIB_WIDEN
);
3352 /* This used to use umul_optab if unsigned, but for non-widening multiply
3353 there is no difference between signed and unsigned. */
3354 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3355 op0
, op1
, target
, unsignedp
, OPTAB_LIB_WIDEN
);
3360 /* Return a cost estimate for multiplying a register by the given
3361 COEFFicient in the given MODE and SPEED. */
3364 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3367 struct algorithm algorithm
;
3368 enum mult_variant variant
;
3370 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3371 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
), speed
);
3372 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3373 return algorithm
.cost
.cost
;
3378 /* Perform a widening multiplication and return an rtx for the result.
3379 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3380 TARGET is a suggestion for where to store the result (an rtx).
3381 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3382 or smul_widen_optab.
3384 We check specially for a constant integer as OP1, comparing the
3385 cost of a widening multiply against the cost of a sequence of shifts
3389 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3390 int unsignedp
, optab this_optab
)
3392 bool speed
= optimize_insn_for_speed_p ();
3395 if (CONST_INT_P (op1
)
3396 && GET_MODE (op0
) != VOIDmode
3397 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3398 this_optab
== umul_widen_optab
))
3399 && CONST_INT_P (cop1
)
3400 && (INTVAL (cop1
) >= 0
3401 || HWI_COMPUTABLE_MODE_P (mode
)))
3403 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3405 enum mult_variant variant
;
3406 struct algorithm algorithm
;
3409 return CONST0_RTX (mode
);
3411 /* Special case powers of two. */
3412 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3414 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3415 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3416 floor_log2 (coeff
), target
, unsignedp
);
3419 /* Exclude cost of op0 from max_cost to match the cost
3420 calculation of the synth_mult. */
3421 max_cost
= mul_widen_cost (speed
, mode
);
3422 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3425 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3426 return expand_mult_const (mode
, op0
, coeff
, target
,
3427 &algorithm
, variant
);
3430 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3431 unsignedp
, OPTAB_LIB_WIDEN
);
3434 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3435 replace division by D, and put the least significant N bits of the result
3436 in *MULTIPLIER_PTR and return the most significant bit.
3438 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3439 needed precision is in PRECISION (should be <= N).
3441 PRECISION should be as small as possible so this function can choose
3442 multiplier more freely.
3444 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3445 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3447 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3448 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3450 unsigned HOST_WIDE_INT
3451 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3452 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3453 int *post_shift_ptr
, int *lgup_ptr
)
3455 int lgup
, post_shift
;
3458 /* lgup = ceil(log2(divisor)); */
3459 lgup
= ceil_log2 (d
);
3461 gcc_assert (lgup
<= n
);
3464 pow2
= n
+ lgup
- precision
;
3466 /* mlow = 2^(N + lgup)/d */
3467 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3468 wide_int mlow
= wi::udiv_trunc (val
, d
);
3470 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3471 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3472 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3474 /* If precision == N, then mlow, mhigh exceed 2^N
3475 (but they do not exceed 2^(N+1)). */
3477 /* Reduce to lowest terms. */
3478 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3480 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3481 HOST_BITS_PER_WIDE_INT
);
3482 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3483 HOST_BITS_PER_WIDE_INT
);
3487 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3488 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3491 *post_shift_ptr
= post_shift
;
3493 if (n
< HOST_BITS_PER_WIDE_INT
)
3495 unsigned HOST_WIDE_INT mask
= ((unsigned HOST_WIDE_INT
) 1 << n
) - 1;
3496 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3497 return mhigh
.to_uhwi () >= mask
;
3501 *multiplier_ptr
= mhigh
.to_uhwi ();
3502 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3506 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3507 congruent to 1 (mod 2**N). */
3509 static unsigned HOST_WIDE_INT
3510 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3512 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3514 /* The algorithm notes that the choice y = x satisfies
3515 x*y == 1 mod 2^3, since x is assumed odd.
3516 Each iteration doubles the number of bits of significance in y. */
3518 unsigned HOST_WIDE_INT mask
;
3519 unsigned HOST_WIDE_INT y
= x
;
3522 mask
= (n
== HOST_BITS_PER_WIDE_INT
3523 ? ~(unsigned HOST_WIDE_INT
) 0
3524 : ((unsigned HOST_WIDE_INT
) 1 << n
) - 1);
3528 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3534 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3535 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3536 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3537 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3540 The result is put in TARGET if that is convenient.
3542 MODE is the mode of operation. */
3545 expand_mult_highpart_adjust (machine_mode mode
, rtx adj_operand
, rtx op0
,
3546 rtx op1
, rtx target
, int unsignedp
)
3549 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3551 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3552 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3553 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3555 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3558 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3559 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3560 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3561 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3567 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3570 extract_high_half (machine_mode mode
, rtx op
)
3572 machine_mode wider_mode
;
3574 if (mode
== word_mode
)
3575 return gen_highpart (mode
, op
);
3577 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3579 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3580 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3581 GET_MODE_BITSIZE (mode
), 0, 1);
3582 return convert_modes (mode
, wider_mode
, op
, 0);
3585 /* Like expmed_mult_highpart, but only consider using a multiplication
3586 optab. OP1 is an rtx for the constant operand. */
3589 expmed_mult_highpart_optab (machine_mode mode
, rtx op0
, rtx op1
,
3590 rtx target
, int unsignedp
, int max_cost
)
3592 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3593 machine_mode wider_mode
;
3597 bool speed
= optimize_insn_for_speed_p ();
3599 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3601 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3602 size
= GET_MODE_BITSIZE (mode
);
3604 /* Firstly, try using a multiplication insn that only generates the needed
3605 high part of the product, and in the sign flavor of unsignedp. */
3606 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3608 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3609 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3610 unsignedp
, OPTAB_DIRECT
);
3615 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3616 Need to adjust the result after the multiplication. */
3617 if (size
- 1 < BITS_PER_WORD
3618 && (mul_highpart_cost (speed
, mode
)
3619 + 2 * shift_cost (speed
, mode
, size
-1)
3620 + 4 * add_cost (speed
, mode
) < max_cost
))
3622 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3623 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3624 unsignedp
, OPTAB_DIRECT
);
3626 /* We used the wrong signedness. Adjust the result. */
3627 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3631 /* Try widening multiplication. */
3632 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3633 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3634 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3636 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3637 unsignedp
, OPTAB_WIDEN
);
3639 return extract_high_half (mode
, tem
);
3642 /* Try widening the mode and perform a non-widening multiplication. */
3643 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3644 && size
- 1 < BITS_PER_WORD
3645 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3651 /* We need to widen the operands, for example to ensure the
3652 constant multiplier is correctly sign or zero extended.
3653 Use a sequence to clean-up any instructions emitted by
3654 the conversions if things don't work out. */
3656 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3657 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3658 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3659 unsignedp
, OPTAB_WIDEN
);
3660 insns
= get_insns ();
3666 return extract_high_half (mode
, tem
);
3670 /* Try widening multiplication of opposite signedness, and adjust. */
3671 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3672 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3673 && size
- 1 < BITS_PER_WORD
3674 && (mul_widen_cost (speed
, wider_mode
)
3675 + 2 * shift_cost (speed
, mode
, size
-1)
3676 + 4 * add_cost (speed
, mode
) < max_cost
))
3678 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3679 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3682 tem
= extract_high_half (mode
, tem
);
3683 /* We used the wrong signedness. Adjust the result. */
3684 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3692 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3693 putting the high half of the result in TARGET if that is convenient,
3694 and return where the result is. If the operation can not be performed,
3697 MODE is the mode of operation and result.
3699 UNSIGNEDP nonzero means unsigned multiply.
3701 MAX_COST is the total allowed cost for the expanded RTL. */
3704 expmed_mult_highpart (machine_mode mode
, rtx op0
, rtx op1
,
3705 rtx target
, int unsignedp
, int max_cost
)
3707 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
3708 unsigned HOST_WIDE_INT cnst1
;
3710 bool sign_adjust
= false;
3711 enum mult_variant variant
;
3712 struct algorithm alg
;
3714 bool speed
= optimize_insn_for_speed_p ();
3716 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3717 /* We can't support modes wider than HOST_BITS_PER_INT. */
3718 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3720 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3722 /* We can't optimize modes wider than BITS_PER_WORD.
3723 ??? We might be able to perform double-word arithmetic if
3724 mode == word_mode, however all the cost calculations in
3725 synth_mult etc. assume single-word operations. */
3726 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3727 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3728 unsignedp
, max_cost
);
3730 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3732 /* Check whether we try to multiply by a negative constant. */
3733 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3736 extra_cost
+= add_cost (speed
, mode
);
3739 /* See whether shift/add multiplication is cheap enough. */
3740 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3741 max_cost
- extra_cost
))
3743 /* See whether the specialized multiplication optabs are
3744 cheaper than the shift/add version. */
3745 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3746 alg
.cost
.cost
+ extra_cost
);
3750 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3751 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3752 tem
= extract_high_half (mode
, tem
);
3754 /* Adjust result for signedness. */
3756 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3760 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3761 unsignedp
, max_cost
);
3765 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3768 expand_smod_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3770 rtx result
, temp
, shift
;
3771 rtx_code_label
*label
;
3773 int prec
= GET_MODE_PRECISION (mode
);
3775 logd
= floor_log2 (d
);
3776 result
= gen_reg_rtx (mode
);
3778 /* Avoid conditional branches when they're expensive. */
3779 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3780 && optimize_insn_for_speed_p ())
3782 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3786 HOST_WIDE_INT masklow
= ((HOST_WIDE_INT
) 1 << logd
) - 1;
3787 signmask
= force_reg (mode
, signmask
);
3788 shift
= GEN_INT (GET_MODE_BITSIZE (mode
) - logd
);
3790 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3791 which instruction sequence to use. If logical right shifts
3792 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3793 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3795 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3796 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3797 || (set_src_cost (temp
, optimize_insn_for_speed_p ())
3798 > COSTS_N_INSNS (2)))
3800 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3801 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3802 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3803 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3804 temp
= expand_binop (mode
, and_optab
, temp
,
3805 gen_int_mode (masklow
, mode
),
3806 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3807 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
3808 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3809 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3810 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3814 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
3815 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3816 signmask
= force_reg (mode
, signmask
);
3818 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
3819 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3820 temp
= expand_binop (mode
, and_optab
, temp
,
3821 gen_int_mode (masklow
, mode
),
3822 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3823 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3824 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3830 /* Mask contains the mode's signbit and the significant bits of the
3831 modulus. By including the signbit in the operation, many targets
3832 can avoid an explicit compare operation in the following comparison
3834 wide_int mask
= wi::mask (logd
, false, prec
);
3835 mask
= wi::set_bit (mask
, prec
- 1);
3837 temp
= expand_binop (mode
, and_optab
, op0
,
3838 immed_wide_int_const (mask
, mode
),
3839 result
, 1, OPTAB_LIB_WIDEN
);
3841 emit_move_insn (result
, temp
);
3843 label
= gen_label_rtx ();
3844 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
3846 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
3847 0, OPTAB_LIB_WIDEN
);
3849 mask
= wi::mask (logd
, true, prec
);
3850 temp
= expand_binop (mode
, ior_optab
, temp
,
3851 immed_wide_int_const (mask
, mode
),
3852 result
, 1, OPTAB_LIB_WIDEN
);
3853 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
3854 0, OPTAB_LIB_WIDEN
);
3856 emit_move_insn (result
, temp
);
3861 /* Expand signed division of OP0 by a power of two D in mode MODE.
3862 This routine is only called for positive values of D. */
3865 expand_sdiv_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3868 rtx_code_label
*label
;
3871 logd
= floor_log2 (d
);
3874 && BRANCH_COST (optimize_insn_for_speed_p (),
3877 temp
= gen_reg_rtx (mode
);
3878 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
3879 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3880 0, OPTAB_LIB_WIDEN
);
3881 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3884 #ifdef HAVE_conditional_move
3885 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3891 temp2
= copy_to_mode_reg (mode
, op0
);
3892 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
3893 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3894 temp
= force_reg (mode
, temp
);
3896 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3897 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
3898 mode
, temp
, temp2
, mode
, 0);
3901 rtx_insn
*seq
= get_insns ();
3904 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
3910 if (BRANCH_COST (optimize_insn_for_speed_p (),
3913 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
3915 temp
= gen_reg_rtx (mode
);
3916 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
3917 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
3918 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
3919 > COSTS_N_INSNS (1))
3920 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
3921 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3923 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
3924 ushift
, NULL_RTX
, 1);
3925 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3926 0, OPTAB_LIB_WIDEN
);
3927 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3930 label
= gen_label_rtx ();
3931 temp
= copy_to_mode_reg (mode
, op0
);
3932 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
3933 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
3935 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3938 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3939 if that is convenient, and returning where the result is.
3940 You may request either the quotient or the remainder as the result;
3941 specify REM_FLAG nonzero to get the remainder.
3943 CODE is the expression code for which kind of division this is;
3944 it controls how rounding is done. MODE is the machine mode to use.
3945 UNSIGNEDP nonzero means do unsigned division. */
3947 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3948 and then correct it by or'ing in missing high bits
3949 if result of ANDI is nonzero.
3950 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3951 This could optimize to a bfexts instruction.
3952 But C doesn't use these operations, so their optimizations are
3954 /* ??? For modulo, we don't actually need the highpart of the first product,
3955 the low part will do nicely. And for small divisors, the second multiply
3956 can also be a low-part only multiply or even be completely left out.
3957 E.g. to calculate the remainder of a division by 3 with a 32 bit
3958 multiply, multiply with 0x55555556 and extract the upper two bits;
3959 the result is exact for inputs up to 0x1fffffff.
3960 The input range can be reduced by using cross-sum rules.
3961 For odd divisors >= 3, the following table gives right shift counts
3962 so that if a number is shifted by an integer multiple of the given
3963 amount, the remainder stays the same:
3964 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3965 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3966 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3967 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3968 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3970 Cross-sum rules for even numbers can be derived by leaving as many bits
3971 to the right alone as the divisor has zeros to the right.
3972 E.g. if x is an unsigned 32 bit number:
3973 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3977 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
3978 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
3980 machine_mode compute_mode
;
3982 rtx quotient
= 0, remainder
= 0;
3986 optab optab1
, optab2
;
3987 int op1_is_constant
, op1_is_pow2
= 0;
3988 int max_cost
, extra_cost
;
3989 static HOST_WIDE_INT last_div_const
= 0;
3990 bool speed
= optimize_insn_for_speed_p ();
3992 op1_is_constant
= CONST_INT_P (op1
);
3993 if (op1_is_constant
)
3995 unsigned HOST_WIDE_INT ext_op1
= UINTVAL (op1
);
3997 ext_op1
&= GET_MODE_MASK (mode
);
3998 op1_is_pow2
= ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1
)
3999 || (! unsignedp
&& EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1
))));
4003 This is the structure of expand_divmod:
4005 First comes code to fix up the operands so we can perform the operations
4006 correctly and efficiently.
4008 Second comes a switch statement with code specific for each rounding mode.
4009 For some special operands this code emits all RTL for the desired
4010 operation, for other cases, it generates only a quotient and stores it in
4011 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4012 to indicate that it has not done anything.
4014 Last comes code that finishes the operation. If QUOTIENT is set and
4015 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4016 QUOTIENT is not set, it is computed using trunc rounding.
4018 We try to generate special code for division and remainder when OP1 is a
4019 constant. If |OP1| = 2**n we can use shifts and some other fast
4020 operations. For other values of OP1, we compute a carefully selected
4021 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4024 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4025 half of the product. Different strategies for generating the product are
4026 implemented in expmed_mult_highpart.
4028 If what we actually want is the remainder, we generate that by another
4029 by-constant multiplication and a subtraction. */
4031 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4032 code below will malfunction if we are, so check here and handle
4033 the special case if so. */
4034 if (op1
== const1_rtx
)
4035 return rem_flag
? const0_rtx
: op0
;
4037 /* When dividing by -1, we could get an overflow.
4038 negv_optab can handle overflows. */
4039 if (! unsignedp
&& op1
== constm1_rtx
)
4043 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4044 ? negv_optab
: neg_optab
, op0
, target
, 0);
4048 /* Don't use the function value register as a target
4049 since we have to read it as well as write it,
4050 and function-inlining gets confused by this. */
4051 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4052 /* Don't clobber an operand while doing a multi-step calculation. */
4053 || ((rem_flag
|| op1_is_constant
)
4054 && (reg_mentioned_p (target
, op0
)
4055 || (MEM_P (op0
) && MEM_P (target
))))
4056 || reg_mentioned_p (target
, op1
)
4057 || (MEM_P (op1
) && MEM_P (target
))))
4060 /* Get the mode in which to perform this computation. Normally it will
4061 be MODE, but sometimes we can't do the desired operation in MODE.
4062 If so, pick a wider mode in which we can do the operation. Convert
4063 to that mode at the start to avoid repeated conversions.
4065 First see what operations we need. These depend on the expression
4066 we are evaluating. (We assume that divxx3 insns exist under the
4067 same conditions that modxx3 insns and that these insns don't normally
4068 fail. If these assumptions are not correct, we may generate less
4069 efficient code in some cases.)
4071 Then see if we find a mode in which we can open-code that operation
4072 (either a division, modulus, or shift). Finally, check for the smallest
4073 mode for which we can do the operation with a library call. */
4075 /* We might want to refine this now that we have division-by-constant
4076 optimization. Since expmed_mult_highpart tries so many variants, it is
4077 not straightforward to generalize this. Maybe we should make an array
4078 of possible modes in init_expmed? Save this for GCC 2.7. */
4080 optab1
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4081 ? (unsignedp
? lshr_optab
: ashr_optab
)
4082 : (unsignedp
? udiv_optab
: sdiv_optab
));
4083 optab2
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4085 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4087 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4088 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4089 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4090 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4093 if (compute_mode
== VOIDmode
)
4094 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4095 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4096 if (optab_libfunc (optab1
, compute_mode
)
4097 || optab_libfunc (optab2
, compute_mode
))
4100 /* If we still couldn't find a mode, use MODE, but expand_binop will
4102 if (compute_mode
== VOIDmode
)
4103 compute_mode
= mode
;
4105 if (target
&& GET_MODE (target
) == compute_mode
)
4108 tquotient
= gen_reg_rtx (compute_mode
);
4110 size
= GET_MODE_BITSIZE (compute_mode
);
4112 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4113 (mode), and thereby get better code when OP1 is a constant. Do that
4114 later. It will require going over all usages of SIZE below. */
4115 size
= GET_MODE_BITSIZE (mode
);
4118 /* Only deduct something for a REM if the last divide done was
4119 for a different constant. Then set the constant of the last
4121 max_cost
= (unsignedp
4122 ? udiv_cost (speed
, compute_mode
)
4123 : sdiv_cost (speed
, compute_mode
));
4124 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4125 && INTVAL (op1
) == last_div_const
))
4126 max_cost
-= (mul_cost (speed
, compute_mode
)
4127 + add_cost (speed
, compute_mode
));
4129 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4131 /* Now convert to the best mode to use. */
4132 if (compute_mode
!= mode
)
4134 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4135 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4137 /* convert_modes may have placed op1 into a register, so we
4138 must recompute the following. */
4139 op1_is_constant
= CONST_INT_P (op1
);
4140 op1_is_pow2
= (op1_is_constant
4141 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4143 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1
))))));
4146 /* If one of the operands is a volatile MEM, copy it into a register. */
4148 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4149 op0
= force_reg (compute_mode
, op0
);
4150 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4151 op1
= force_reg (compute_mode
, op1
);
4153 /* If we need the remainder or if OP1 is constant, we need to
4154 put OP0 in a register in case it has any queued subexpressions. */
4155 if (rem_flag
|| op1_is_constant
)
4156 op0
= force_reg (compute_mode
, op0
);
4158 last
= get_last_insn ();
4160 /* Promote floor rounding to trunc rounding for unsigned operations. */
4163 if (code
== FLOOR_DIV_EXPR
)
4164 code
= TRUNC_DIV_EXPR
;
4165 if (code
== FLOOR_MOD_EXPR
)
4166 code
= TRUNC_MOD_EXPR
;
4167 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4168 code
= TRUNC_DIV_EXPR
;
4171 if (op1
!= const0_rtx
)
4174 case TRUNC_MOD_EXPR
:
4175 case TRUNC_DIV_EXPR
:
4176 if (op1_is_constant
)
4180 unsigned HOST_WIDE_INT mh
, ml
;
4181 int pre_shift
, post_shift
;
4183 unsigned HOST_WIDE_INT d
= (INTVAL (op1
)
4184 & GET_MODE_MASK (compute_mode
));
4186 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4188 pre_shift
= floor_log2 (d
);
4191 unsigned HOST_WIDE_INT mask
4192 = ((unsigned HOST_WIDE_INT
) 1 << pre_shift
) - 1;
4194 = expand_binop (compute_mode
, and_optab
, op0
,
4195 gen_int_mode (mask
, compute_mode
),
4199 return gen_lowpart (mode
, remainder
);
4201 quotient
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4202 pre_shift
, tquotient
, 1);
4204 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4206 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (size
- 1)))
4208 /* Most significant bit of divisor is set; emit an scc
4210 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4211 compute_mode
, 1, 1);
4215 /* Find a suitable multiplier and right shift count
4216 instead of multiplying with D. */
4218 mh
= choose_multiplier (d
, size
, size
,
4219 &ml
, &post_shift
, &dummy
);
4221 /* If the suggested multiplier is more than SIZE bits,
4222 we can do better for even divisors, using an
4223 initial right shift. */
4224 if (mh
!= 0 && (d
& 1) == 0)
4226 pre_shift
= floor_log2 (d
& -d
);
4227 mh
= choose_multiplier (d
>> pre_shift
, size
,
4229 &ml
, &post_shift
, &dummy
);
4239 if (post_shift
- 1 >= BITS_PER_WORD
)
4243 = (shift_cost (speed
, compute_mode
, post_shift
- 1)
4244 + shift_cost (speed
, compute_mode
, 1)
4245 + 2 * add_cost (speed
, compute_mode
));
4246 t1
= expmed_mult_highpart
4248 gen_int_mode (ml
, compute_mode
),
4249 NULL_RTX
, 1, max_cost
- extra_cost
);
4252 t2
= force_operand (gen_rtx_MINUS (compute_mode
,
4255 t3
= expand_shift (RSHIFT_EXPR
, compute_mode
,
4256 t2
, 1, NULL_RTX
, 1);
4257 t4
= force_operand (gen_rtx_PLUS (compute_mode
,
4260 quotient
= expand_shift
4261 (RSHIFT_EXPR
, compute_mode
, t4
,
4262 post_shift
- 1, tquotient
, 1);
4268 if (pre_shift
>= BITS_PER_WORD
4269 || post_shift
>= BITS_PER_WORD
)
4273 (RSHIFT_EXPR
, compute_mode
, op0
,
4274 pre_shift
, NULL_RTX
, 1);
4276 = (shift_cost (speed
, compute_mode
, pre_shift
)
4277 + shift_cost (speed
, compute_mode
, post_shift
));
4278 t2
= expmed_mult_highpart
4280 gen_int_mode (ml
, compute_mode
),
4281 NULL_RTX
, 1, max_cost
- extra_cost
);
4284 quotient
= expand_shift
4285 (RSHIFT_EXPR
, compute_mode
, t2
,
4286 post_shift
, tquotient
, 1);
4290 else /* Too wide mode to use tricky code */
4293 insn
= get_last_insn ();
4295 set_dst_reg_note (insn
, REG_EQUAL
,
4296 gen_rtx_UDIV (compute_mode
, op0
, op1
),
4299 else /* TRUNC_DIV, signed */
4301 unsigned HOST_WIDE_INT ml
;
4302 int lgup
, post_shift
;
4304 HOST_WIDE_INT d
= INTVAL (op1
);
4305 unsigned HOST_WIDE_INT abs_d
;
4307 /* Since d might be INT_MIN, we have to cast to
4308 unsigned HOST_WIDE_INT before negating to avoid
4309 undefined signed overflow. */
4311 ? (unsigned HOST_WIDE_INT
) d
4312 : - (unsigned HOST_WIDE_INT
) d
);
4314 /* n rem d = n rem -d */
4315 if (rem_flag
&& d
< 0)
4318 op1
= gen_int_mode (abs_d
, compute_mode
);
4324 quotient
= expand_unop (compute_mode
, neg_optab
, op0
,
4326 else if (HOST_BITS_PER_WIDE_INT
>= size
4327 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4329 /* This case is not handled correctly below. */
4330 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4331 compute_mode
, 1, 1);
4335 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4337 ? smod_pow2_cheap (speed
, compute_mode
)
4338 : sdiv_pow2_cheap (speed
, compute_mode
))
4339 /* We assume that cheap metric is true if the
4340 optab has an expander for this mode. */
4341 && ((optab_handler ((rem_flag
? smod_optab
4344 != CODE_FOR_nothing
)
4345 || (optab_handler (sdivmod_optab
,
4347 != CODE_FOR_nothing
)))
4349 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
))
4353 remainder
= expand_smod_pow2 (compute_mode
, op0
, d
);
4355 return gen_lowpart (mode
, remainder
);
4358 if (sdiv_pow2_cheap (speed
, compute_mode
)
4359 && ((optab_handler (sdiv_optab
, compute_mode
)
4360 != CODE_FOR_nothing
)
4361 || (optab_handler (sdivmod_optab
, compute_mode
)
4362 != CODE_FOR_nothing
)))
4363 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4365 gen_int_mode (abs_d
,
4369 quotient
= expand_sdiv_pow2 (compute_mode
, op0
, abs_d
);
4371 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4372 negate the quotient. */
4375 insn
= get_last_insn ();
4377 && abs_d
< ((unsigned HOST_WIDE_INT
) 1
4378 << (HOST_BITS_PER_WIDE_INT
- 1)))
4379 set_dst_reg_note (insn
, REG_EQUAL
,
4380 gen_rtx_DIV (compute_mode
, op0
,
4386 quotient
= expand_unop (compute_mode
, neg_optab
,
4387 quotient
, quotient
, 0);
4390 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4392 choose_multiplier (abs_d
, size
, size
- 1,
4393 &ml
, &post_shift
, &lgup
);
4394 if (ml
< (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4398 if (post_shift
>= BITS_PER_WORD
4399 || size
- 1 >= BITS_PER_WORD
)
4402 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4403 + shift_cost (speed
, compute_mode
, size
- 1)
4404 + add_cost (speed
, compute_mode
));
4405 t1
= expmed_mult_highpart
4406 (compute_mode
, op0
, gen_int_mode (ml
, compute_mode
),
4407 NULL_RTX
, 0, max_cost
- extra_cost
);
4411 (RSHIFT_EXPR
, compute_mode
, t1
,
4412 post_shift
, NULL_RTX
, 0);
4414 (RSHIFT_EXPR
, compute_mode
, op0
,
4415 size
- 1, NULL_RTX
, 0);
4418 = force_operand (gen_rtx_MINUS (compute_mode
,
4423 = force_operand (gen_rtx_MINUS (compute_mode
,
4431 if (post_shift
>= BITS_PER_WORD
4432 || size
- 1 >= BITS_PER_WORD
)
4435 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (size
- 1);
4436 mlr
= gen_int_mode (ml
, compute_mode
);
4437 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4438 + shift_cost (speed
, compute_mode
, size
- 1)
4439 + 2 * add_cost (speed
, compute_mode
));
4440 t1
= expmed_mult_highpart (compute_mode
, op0
, mlr
,
4442 max_cost
- extra_cost
);
4445 t2
= force_operand (gen_rtx_PLUS (compute_mode
,
4449 (RSHIFT_EXPR
, compute_mode
, t2
,
4450 post_shift
, NULL_RTX
, 0);
4452 (RSHIFT_EXPR
, compute_mode
, op0
,
4453 size
- 1, NULL_RTX
, 0);
4456 = force_operand (gen_rtx_MINUS (compute_mode
,
4461 = force_operand (gen_rtx_MINUS (compute_mode
,
4466 else /* Too wide mode to use tricky code */
4469 insn
= get_last_insn ();
4471 set_dst_reg_note (insn
, REG_EQUAL
,
4472 gen_rtx_DIV (compute_mode
, op0
, op1
),
4478 delete_insns_since (last
);
4481 case FLOOR_DIV_EXPR
:
4482 case FLOOR_MOD_EXPR
:
4483 /* We will come here only for signed operations. */
4484 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4486 unsigned HOST_WIDE_INT mh
, ml
;
4487 int pre_shift
, lgup
, post_shift
;
4488 HOST_WIDE_INT d
= INTVAL (op1
);
4492 /* We could just as easily deal with negative constants here,
4493 but it does not seem worth the trouble for GCC 2.6. */
4494 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4496 pre_shift
= floor_log2 (d
);
4499 unsigned HOST_WIDE_INT mask
4500 = ((unsigned HOST_WIDE_INT
) 1 << pre_shift
) - 1;
4501 remainder
= expand_binop
4502 (compute_mode
, and_optab
, op0
,
4503 gen_int_mode (mask
, compute_mode
),
4504 remainder
, 0, OPTAB_LIB_WIDEN
);
4506 return gen_lowpart (mode
, remainder
);
4508 quotient
= expand_shift
4509 (RSHIFT_EXPR
, compute_mode
, op0
,
4510 pre_shift
, tquotient
, 0);
4516 mh
= choose_multiplier (d
, size
, size
- 1,
4517 &ml
, &post_shift
, &lgup
);
4520 if (post_shift
< BITS_PER_WORD
4521 && size
- 1 < BITS_PER_WORD
)
4524 (RSHIFT_EXPR
, compute_mode
, op0
,
4525 size
- 1, NULL_RTX
, 0);
4526 t2
= expand_binop (compute_mode
, xor_optab
, op0
, t1
,
4527 NULL_RTX
, 0, OPTAB_WIDEN
);
4528 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4529 + shift_cost (speed
, compute_mode
, size
- 1)
4530 + 2 * add_cost (speed
, compute_mode
));
4531 t3
= expmed_mult_highpart
4532 (compute_mode
, t2
, gen_int_mode (ml
, compute_mode
),
4533 NULL_RTX
, 1, max_cost
- extra_cost
);
4537 (RSHIFT_EXPR
, compute_mode
, t3
,
4538 post_shift
, NULL_RTX
, 1);
4539 quotient
= expand_binop (compute_mode
, xor_optab
,
4540 t4
, t1
, tquotient
, 0,
4548 rtx nsign
, t1
, t2
, t3
, t4
;
4549 t1
= force_operand (gen_rtx_PLUS (compute_mode
,
4550 op0
, constm1_rtx
), NULL_RTX
);
4551 t2
= expand_binop (compute_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4553 nsign
= expand_shift
4554 (RSHIFT_EXPR
, compute_mode
, t2
,
4555 size
- 1, NULL_RTX
, 0);
4556 t3
= force_operand (gen_rtx_MINUS (compute_mode
, t1
, nsign
),
4558 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, compute_mode
, t3
, op1
,
4563 t5
= expand_unop (compute_mode
, one_cmpl_optab
, nsign
,
4565 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4574 delete_insns_since (last
);
4576 /* Try using an instruction that produces both the quotient and
4577 remainder, using truncation. We can easily compensate the quotient
4578 or remainder to get floor rounding, once we have the remainder.
4579 Notice that we compute also the final remainder value here,
4580 and return the result right away. */
4581 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4582 target
= gen_reg_rtx (compute_mode
);
4587 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4588 quotient
= gen_reg_rtx (compute_mode
);
4593 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4594 remainder
= gen_reg_rtx (compute_mode
);
4597 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4598 quotient
, remainder
, 0))
4600 /* This could be computed with a branch-less sequence.
4601 Save that for later. */
4603 rtx_code_label
*label
= gen_label_rtx ();
4604 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4605 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4606 NULL_RTX
, 0, OPTAB_WIDEN
);
4607 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4608 expand_dec (quotient
, const1_rtx
);
4609 expand_inc (remainder
, op1
);
4611 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4614 /* No luck with division elimination or divmod. Have to do it
4615 by conditionally adjusting op0 *and* the result. */
4617 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4621 quotient
= gen_reg_rtx (compute_mode
);
4622 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4623 label1
= gen_label_rtx ();
4624 label2
= gen_label_rtx ();
4625 label3
= gen_label_rtx ();
4626 label4
= gen_label_rtx ();
4627 label5
= gen_label_rtx ();
4628 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4629 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4630 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4631 quotient
, 0, OPTAB_LIB_WIDEN
);
4632 if (tem
!= quotient
)
4633 emit_move_insn (quotient
, tem
);
4634 emit_jump_insn (gen_jump (label5
));
4636 emit_label (label1
);
4637 expand_inc (adjusted_op0
, const1_rtx
);
4638 emit_jump_insn (gen_jump (label4
));
4640 emit_label (label2
);
4641 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4642 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4643 quotient
, 0, OPTAB_LIB_WIDEN
);
4644 if (tem
!= quotient
)
4645 emit_move_insn (quotient
, tem
);
4646 emit_jump_insn (gen_jump (label5
));
4648 emit_label (label3
);
4649 expand_dec (adjusted_op0
, const1_rtx
);
4650 emit_label (label4
);
4651 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4652 quotient
, 0, OPTAB_LIB_WIDEN
);
4653 if (tem
!= quotient
)
4654 emit_move_insn (quotient
, tem
);
4655 expand_dec (quotient
, const1_rtx
);
4656 emit_label (label5
);
4664 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
)))
4667 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4668 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4669 floor_log2 (d
), tquotient
, 1);
4670 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4671 gen_int_mode (d
- 1, compute_mode
),
4672 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4673 t3
= gen_reg_rtx (compute_mode
);
4674 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4675 compute_mode
, 1, 1);
4678 rtx_code_label
*lab
;
4679 lab
= gen_label_rtx ();
4680 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4681 expand_inc (t1
, const1_rtx
);
4686 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4692 /* Try using an instruction that produces both the quotient and
4693 remainder, using truncation. We can easily compensate the
4694 quotient or remainder to get ceiling rounding, once we have the
4695 remainder. Notice that we compute also the final remainder
4696 value here, and return the result right away. */
4697 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4698 target
= gen_reg_rtx (compute_mode
);
4702 remainder
= (REG_P (target
)
4703 ? target
: gen_reg_rtx (compute_mode
));
4704 quotient
= gen_reg_rtx (compute_mode
);
4708 quotient
= (REG_P (target
)
4709 ? target
: gen_reg_rtx (compute_mode
));
4710 remainder
= gen_reg_rtx (compute_mode
);
4713 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4716 /* This could be computed with a branch-less sequence.
4717 Save that for later. */
4718 rtx_code_label
*label
= gen_label_rtx ();
4719 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4720 compute_mode
, label
);
4721 expand_inc (quotient
, const1_rtx
);
4722 expand_dec (remainder
, op1
);
4724 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4727 /* No luck with division elimination or divmod. Have to do it
4728 by conditionally adjusting op0 *and* the result. */
4730 rtx_code_label
*label1
, *label2
;
4731 rtx adjusted_op0
, tem
;
4733 quotient
= gen_reg_rtx (compute_mode
);
4734 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4735 label1
= gen_label_rtx ();
4736 label2
= gen_label_rtx ();
4737 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4738 compute_mode
, label1
);
4739 emit_move_insn (quotient
, const0_rtx
);
4740 emit_jump_insn (gen_jump (label2
));
4742 emit_label (label1
);
4743 expand_dec (adjusted_op0
, const1_rtx
);
4744 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4745 quotient
, 1, OPTAB_LIB_WIDEN
);
4746 if (tem
!= quotient
)
4747 emit_move_insn (quotient
, tem
);
4748 expand_inc (quotient
, const1_rtx
);
4749 emit_label (label2
);
4754 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4755 && INTVAL (op1
) >= 0)
4757 /* This is extremely similar to the code for the unsigned case
4758 above. For 2.7 we should merge these variants, but for
4759 2.6.1 I don't want to touch the code for unsigned since that
4760 get used in C. The signed case will only be used by other
4764 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4765 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4766 floor_log2 (d
), tquotient
, 0);
4767 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4768 gen_int_mode (d
- 1, compute_mode
),
4769 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4770 t3
= gen_reg_rtx (compute_mode
);
4771 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4772 compute_mode
, 1, 1);
4775 rtx_code_label
*lab
;
4776 lab
= gen_label_rtx ();
4777 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4778 expand_inc (t1
, const1_rtx
);
4783 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4789 /* Try using an instruction that produces both the quotient and
4790 remainder, using truncation. We can easily compensate the
4791 quotient or remainder to get ceiling rounding, once we have the
4792 remainder. Notice that we compute also the final remainder
4793 value here, and return the result right away. */
4794 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4795 target
= gen_reg_rtx (compute_mode
);
4798 remainder
= (REG_P (target
)
4799 ? target
: gen_reg_rtx (compute_mode
));
4800 quotient
= gen_reg_rtx (compute_mode
);
4804 quotient
= (REG_P (target
)
4805 ? target
: gen_reg_rtx (compute_mode
));
4806 remainder
= gen_reg_rtx (compute_mode
);
4809 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
4812 /* This could be computed with a branch-less sequence.
4813 Save that for later. */
4815 rtx_code_label
*label
= gen_label_rtx ();
4816 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4817 compute_mode
, label
);
4818 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4819 NULL_RTX
, 0, OPTAB_WIDEN
);
4820 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
4821 expand_inc (quotient
, const1_rtx
);
4822 expand_dec (remainder
, op1
);
4824 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4827 /* No luck with division elimination or divmod. Have to do it
4828 by conditionally adjusting op0 *and* the result. */
4830 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4834 quotient
= gen_reg_rtx (compute_mode
);
4835 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4836 label1
= gen_label_rtx ();
4837 label2
= gen_label_rtx ();
4838 label3
= gen_label_rtx ();
4839 label4
= gen_label_rtx ();
4840 label5
= gen_label_rtx ();
4841 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4842 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
4843 compute_mode
, label1
);
4844 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4845 quotient
, 0, OPTAB_LIB_WIDEN
);
4846 if (tem
!= quotient
)
4847 emit_move_insn (quotient
, tem
);
4848 emit_jump_insn (gen_jump (label5
));
4850 emit_label (label1
);
4851 expand_dec (adjusted_op0
, const1_rtx
);
4852 emit_jump_insn (gen_jump (label4
));
4854 emit_label (label2
);
4855 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
4856 compute_mode
, label3
);
4857 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4858 quotient
, 0, OPTAB_LIB_WIDEN
);
4859 if (tem
!= quotient
)
4860 emit_move_insn (quotient
, tem
);
4861 emit_jump_insn (gen_jump (label5
));
4863 emit_label (label3
);
4864 expand_inc (adjusted_op0
, const1_rtx
);
4865 emit_label (label4
);
4866 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4867 quotient
, 0, OPTAB_LIB_WIDEN
);
4868 if (tem
!= quotient
)
4869 emit_move_insn (quotient
, tem
);
4870 expand_inc (quotient
, const1_rtx
);
4871 emit_label (label5
);
4876 case EXACT_DIV_EXPR
:
4877 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4879 HOST_WIDE_INT d
= INTVAL (op1
);
4880 unsigned HOST_WIDE_INT ml
;
4884 pre_shift
= floor_log2 (d
& -d
);
4885 ml
= invert_mod2n (d
>> pre_shift
, size
);
4886 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4887 pre_shift
, NULL_RTX
, unsignedp
);
4888 quotient
= expand_mult (compute_mode
, t1
,
4889 gen_int_mode (ml
, compute_mode
),
4892 insn
= get_last_insn ();
4893 set_dst_reg_note (insn
, REG_EQUAL
,
4894 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
4895 compute_mode
, op0
, op1
),
4900 case ROUND_DIV_EXPR
:
4901 case ROUND_MOD_EXPR
:
4905 rtx_code_label
*label
;
4906 label
= gen_label_rtx ();
4907 quotient
= gen_reg_rtx (compute_mode
);
4908 remainder
= gen_reg_rtx (compute_mode
);
4909 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
4912 quotient
= expand_binop (compute_mode
, udiv_optab
, op0
, op1
,
4913 quotient
, 1, OPTAB_LIB_WIDEN
);
4914 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 1);
4915 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4916 remainder
, 1, OPTAB_LIB_WIDEN
);
4918 tem
= plus_constant (compute_mode
, op1
, -1);
4919 tem
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
, 1, NULL_RTX
, 1);
4920 do_cmp_and_jump (remainder
, tem
, LEU
, compute_mode
, label
);
4921 expand_inc (quotient
, const1_rtx
);
4922 expand_dec (remainder
, op1
);
4927 rtx abs_rem
, abs_op1
, tem
, mask
;
4928 rtx_code_label
*label
;
4929 label
= gen_label_rtx ();
4930 quotient
= gen_reg_rtx (compute_mode
);
4931 remainder
= gen_reg_rtx (compute_mode
);
4932 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
4935 quotient
= expand_binop (compute_mode
, sdiv_optab
, op0
, op1
,
4936 quotient
, 0, OPTAB_LIB_WIDEN
);
4937 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 0);
4938 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4939 remainder
, 0, OPTAB_LIB_WIDEN
);
4941 abs_rem
= expand_abs (compute_mode
, remainder
, NULL_RTX
, 1, 0);
4942 abs_op1
= expand_abs (compute_mode
, op1
, NULL_RTX
, 1, 0);
4943 tem
= expand_shift (LSHIFT_EXPR
, compute_mode
, abs_rem
,
4945 do_cmp_and_jump (tem
, abs_op1
, LTU
, compute_mode
, label
);
4946 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4947 NULL_RTX
, 0, OPTAB_WIDEN
);
4948 mask
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
,
4949 size
- 1, NULL_RTX
, 0);
4950 tem
= expand_binop (compute_mode
, xor_optab
, mask
, const1_rtx
,
4951 NULL_RTX
, 0, OPTAB_WIDEN
);
4952 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4953 NULL_RTX
, 0, OPTAB_WIDEN
);
4954 expand_inc (quotient
, tem
);
4955 tem
= expand_binop (compute_mode
, xor_optab
, mask
, op1
,
4956 NULL_RTX
, 0, OPTAB_WIDEN
);
4957 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4958 NULL_RTX
, 0, OPTAB_WIDEN
);
4959 expand_dec (remainder
, tem
);
4962 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4970 if (target
&& GET_MODE (target
) != compute_mode
)
4975 /* Try to produce the remainder without producing the quotient.
4976 If we seem to have a divmod pattern that does not require widening,
4977 don't try widening here. We should really have a WIDEN argument
4978 to expand_twoval_binop, since what we'd really like to do here is
4979 1) try a mod insn in compute_mode
4980 2) try a divmod insn in compute_mode
4981 3) try a div insn in compute_mode and multiply-subtract to get
4983 4) try the same things with widening allowed. */
4985 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
4988 ((optab_handler (optab2
, compute_mode
)
4989 != CODE_FOR_nothing
)
4990 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
4993 /* No luck there. Can we do remainder and divide at once
4994 without a library call? */
4995 remainder
= gen_reg_rtx (compute_mode
);
4996 if (! expand_twoval_binop ((unsignedp
5000 NULL_RTX
, remainder
, unsignedp
))
5005 return gen_lowpart (mode
, remainder
);
5008 /* Produce the quotient. Try a quotient insn, but not a library call.
5009 If we have a divmod in this mode, use it in preference to widening
5010 the div (for this test we assume it will not fail). Note that optab2
5011 is set to the one of the two optabs that the call below will use. */
5013 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5014 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5016 ((optab_handler (optab2
, compute_mode
)
5017 != CODE_FOR_nothing
)
5018 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5022 /* No luck there. Try a quotient-and-remainder insn,
5023 keeping the quotient alone. */
5024 quotient
= gen_reg_rtx (compute_mode
);
5025 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5027 quotient
, NULL_RTX
, unsignedp
))
5031 /* Still no luck. If we are not computing the remainder,
5032 use a library call for the quotient. */
5033 quotient
= sign_expand_binop (compute_mode
,
5034 udiv_optab
, sdiv_optab
,
5036 unsignedp
, OPTAB_LIB_WIDEN
);
5043 if (target
&& GET_MODE (target
) != compute_mode
)
5048 /* No divide instruction either. Use library for remainder. */
5049 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5051 unsignedp
, OPTAB_LIB_WIDEN
);
5052 /* No remainder function. Try a quotient-and-remainder
5053 function, keeping the remainder. */
5056 remainder
= gen_reg_rtx (compute_mode
);
5057 if (!expand_twoval_binop_libfunc
5058 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5060 NULL_RTX
, remainder
,
5061 unsignedp
? UMOD
: MOD
))
5062 remainder
= NULL_RTX
;
5067 /* We divided. Now finish doing X - Y * (X / Y). */
5068 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5069 NULL_RTX
, unsignedp
);
5070 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5071 remainder
, target
, unsignedp
,
5076 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5079 /* Return a tree node with data type TYPE, describing the value of X.
5080 Usually this is an VAR_DECL, if there is no obvious better choice.
5081 X may be an expression, however we only support those expressions
5082 generated by loop.c. */
5085 make_tree (tree type
, rtx x
)
5089 switch (GET_CODE (x
))
5092 case CONST_WIDE_INT
:
5093 t
= wide_int_to_tree (type
, std::make_pair (x
, TYPE_MODE (type
)));
5097 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5098 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5099 t
= wide_int_to_tree (type
,
5100 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5101 HOST_BITS_PER_WIDE_INT
* 2));
5106 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
5107 t
= build_real (type
, d
);
5114 int units
= CONST_VECTOR_NUNITS (x
);
5115 tree itype
= TREE_TYPE (type
);
5119 /* Build a tree with vector elements. */
5120 elts
= XALLOCAVEC (tree
, units
);
5121 for (i
= units
- 1; i
>= 0; --i
)
5123 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5124 elts
[i
] = make_tree (itype
, elt
);
5127 return build_vector (type
, elts
);
5131 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5132 make_tree (type
, XEXP (x
, 1)));
5135 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5136 make_tree (type
, XEXP (x
, 1)));
5139 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5142 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5143 make_tree (type
, XEXP (x
, 1)));
5146 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5147 make_tree (type
, XEXP (x
, 1)));
5150 t
= unsigned_type_for (type
);
5151 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5152 make_tree (t
, XEXP (x
, 0)),
5153 make_tree (type
, XEXP (x
, 1))));
5156 t
= signed_type_for (type
);
5157 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5158 make_tree (t
, XEXP (x
, 0)),
5159 make_tree (type
, XEXP (x
, 1))));
5162 if (TREE_CODE (type
) != REAL_TYPE
)
5163 t
= signed_type_for (type
);
5167 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5168 make_tree (t
, XEXP (x
, 0)),
5169 make_tree (t
, XEXP (x
, 1))));
5171 t
= unsigned_type_for (type
);
5172 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5173 make_tree (t
, XEXP (x
, 0)),
5174 make_tree (t
, XEXP (x
, 1))));
5178 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5179 GET_CODE (x
) == ZERO_EXTEND
);
5180 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5183 return make_tree (type
, XEXP (x
, 0));
5186 t
= SYMBOL_REF_DECL (x
);
5188 return fold_convert (type
, build_fold_addr_expr (t
));
5189 /* else fall through. */
5192 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5194 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5195 address mode to pointer mode. */
5196 if (POINTER_TYPE_P (type
))
5197 x
= convert_memory_address_addr_space
5198 (TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5200 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5201 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5202 t
->decl_with_rtl
.rtl
= x
;
5208 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5209 and returning TARGET.
5211 If TARGET is 0, a pseudo-register or constant is returned. */
5214 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5218 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5219 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5221 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5225 else if (tem
!= target
)
5226 emit_move_insn (target
, tem
);
5230 /* Helper function for emit_store_flag. */
5232 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5233 machine_mode mode
, machine_mode compare_mode
,
5234 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5235 machine_mode target_mode
)
5237 struct expand_operand ops
[4];
5238 rtx op0
, comparison
, subtarget
;
5240 machine_mode result_mode
= targetm
.cstore_mode (icode
);
5242 last
= get_last_insn ();
5243 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5244 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5247 delete_insns_since (last
);
5251 if (target_mode
== VOIDmode
)
5252 target_mode
= result_mode
;
5254 target
= gen_reg_rtx (target_mode
);
5256 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5258 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5259 create_fixed_operand (&ops
[1], comparison
);
5260 create_fixed_operand (&ops
[2], x
);
5261 create_fixed_operand (&ops
[3], y
);
5262 if (!maybe_expand_insn (icode
, 4, ops
))
5264 delete_insns_since (last
);
5267 subtarget
= ops
[0].value
;
5269 /* If we are converting to a wider mode, first convert to
5270 TARGET_MODE, then normalize. This produces better combining
5271 opportunities on machines that have a SIGN_EXTRACT when we are
5272 testing a single bit. This mostly benefits the 68k.
5274 If STORE_FLAG_VALUE does not have the sign bit set when
5275 interpreted in MODE, we can do this conversion as unsigned, which
5276 is usually more efficient. */
5277 if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (result_mode
))
5279 convert_move (target
, subtarget
,
5280 val_signbit_known_clear_p (result_mode
,
5283 result_mode
= target_mode
;
5288 /* If we want to keep subexpressions around, don't reuse our last
5293 /* Now normalize to the proper value in MODE. Sometimes we don't
5294 have to do anything. */
5295 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5297 /* STORE_FLAG_VALUE might be the most negative number, so write
5298 the comparison this way to avoid a compiler-time warning. */
5299 else if (- normalizep
== STORE_FLAG_VALUE
)
5300 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5302 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5303 it hard to use a value of just the sign bit due to ANSI integer
5304 constant typing rules. */
5305 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5306 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5307 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5311 gcc_assert (STORE_FLAG_VALUE
& 1);
5313 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5314 if (normalizep
== -1)
5315 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5318 /* If we were converting to a smaller mode, do the conversion now. */
5319 if (target_mode
!= result_mode
)
5321 convert_move (target
, op0
, 0);
5329 /* A subroutine of emit_store_flag only including "tricks" that do not
5330 need a recursive call. These are kept separate to avoid infinite
5334 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5335 machine_mode mode
, int unsignedp
, int normalizep
,
5336 machine_mode target_mode
)
5339 enum insn_code icode
;
5340 machine_mode compare_mode
;
5341 enum mode_class mclass
;
5342 enum rtx_code scode
;
5346 code
= unsigned_condition (code
);
5347 scode
= swap_condition (code
);
5349 /* If one operand is constant, make it the second one. Only do this
5350 if the other operand is not constant as well. */
5352 if (swap_commutative_operands_p (op0
, op1
))
5357 code
= swap_condition (code
);
5360 if (mode
== VOIDmode
)
5361 mode
= GET_MODE (op0
);
5363 /* For some comparisons with 1 and -1, we can convert this to
5364 comparisons with zero. This will often produce more opportunities for
5365 store-flag insns. */
5370 if (op1
== const1_rtx
)
5371 op1
= const0_rtx
, code
= LE
;
5374 if (op1
== constm1_rtx
)
5375 op1
= const0_rtx
, code
= LT
;
5378 if (op1
== const1_rtx
)
5379 op1
= const0_rtx
, code
= GT
;
5382 if (op1
== constm1_rtx
)
5383 op1
= const0_rtx
, code
= GE
;
5386 if (op1
== const1_rtx
)
5387 op1
= const0_rtx
, code
= NE
;
5390 if (op1
== const1_rtx
)
5391 op1
= const0_rtx
, code
= EQ
;
5397 /* If we are comparing a double-word integer with zero or -1, we can
5398 convert the comparison into one involving a single word. */
5399 if (GET_MODE_BITSIZE (mode
) == BITS_PER_WORD
* 2
5400 && GET_MODE_CLASS (mode
) == MODE_INT
5401 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5403 if ((code
== EQ
|| code
== NE
)
5404 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5408 /* Do a logical OR or AND of the two words and compare the
5410 op00
= simplify_gen_subreg (word_mode
, op0
, mode
, 0);
5411 op01
= simplify_gen_subreg (word_mode
, op0
, mode
, UNITS_PER_WORD
);
5412 tem
= expand_binop (word_mode
,
5413 op1
== const0_rtx
? ior_optab
: and_optab
,
5414 op00
, op01
, NULL_RTX
, unsignedp
,
5418 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5419 unsignedp
, normalizep
);
5421 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5425 /* If testing the sign bit, can just test on high word. */
5426 op0h
= simplify_gen_subreg (word_mode
, op0
, mode
,
5427 subreg_highpart_offset (word_mode
,
5429 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5430 unsignedp
, normalizep
);
5437 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5440 target
= gen_reg_rtx (target_mode
);
5442 convert_move (target
, tem
,
5443 !val_signbit_known_set_p (word_mode
,
5444 (normalizep
? normalizep
5445 : STORE_FLAG_VALUE
)));
5450 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5451 complement of A (for GE) and shifting the sign bit to the low bit. */
5452 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5453 && GET_MODE_CLASS (mode
) == MODE_INT
5454 && (normalizep
|| STORE_FLAG_VALUE
== 1
5455 || val_signbit_p (mode
, STORE_FLAG_VALUE
)))
5462 /* If the result is to be wider than OP0, it is best to convert it
5463 first. If it is to be narrower, it is *incorrect* to convert it
5465 else if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (mode
))
5467 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5471 if (target_mode
!= mode
)
5475 op0
= expand_unop (mode
, one_cmpl_optab
, op0
,
5476 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5477 ? 0 : subtarget
), 0);
5479 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5480 /* If we are supposed to produce a 0/1 value, we want to do
5481 a logical shift from the sign bit to the low-order bit; for
5482 a -1/0 value, we do an arithmetic shift. */
5483 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5484 GET_MODE_BITSIZE (mode
) - 1,
5485 subtarget
, normalizep
!= -1);
5487 if (mode
!= target_mode
)
5488 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5493 mclass
= GET_MODE_CLASS (mode
);
5494 for (compare_mode
= mode
; compare_mode
!= VOIDmode
;
5495 compare_mode
= GET_MODE_WIDER_MODE (compare_mode
))
5497 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5498 icode
= optab_handler (cstore_optab
, optab_mode
);
5499 if (icode
!= CODE_FOR_nothing
)
5501 do_pending_stack_adjust ();
5502 tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5503 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5507 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5509 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5510 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5521 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5522 and storing in TARGET. Normally return TARGET.
5523 Return 0 if that cannot be done.
5525 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5526 it is VOIDmode, they cannot both be CONST_INT.
5528 UNSIGNEDP is for the case where we have to widen the operands
5529 to perform the operation. It says to use zero-extension.
5531 NORMALIZEP is 1 if we should convert the result to be either zero
5532 or one. Normalize is -1 if we should convert the result to be
5533 either zero or -1. If NORMALIZEP is zero, the result will be left
5534 "raw" out of the scc insn. */
5537 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5538 machine_mode mode
, int unsignedp
, int normalizep
)
5540 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5541 enum rtx_code rcode
;
5546 /* If we compare constants, we shouldn't use a store-flag operation,
5547 but a constant load. We can get there via the vanilla route that
5548 usually generates a compare-branch sequence, but will in this case
5549 fold the comparison to a constant, and thus elide the branch. */
5550 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5553 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5558 /* If we reached here, we can't do this with a scc insn, however there
5559 are some comparisons that can be done in other ways. Don't do any
5560 of these cases if branches are very cheap. */
5561 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5564 /* See what we need to return. We can only return a 1, -1, or the
5567 if (normalizep
== 0)
5569 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5570 normalizep
= STORE_FLAG_VALUE
;
5572 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5578 last
= get_last_insn ();
5580 /* If optimizing, use different pseudo registers for each insn, instead
5581 of reusing the same pseudo. This leads to better CSE, but slows
5582 down the compiler, since there are more pseudos */
5583 subtarget
= (!optimize
5584 && (target_mode
== mode
)) ? target
: NULL_RTX
;
5585 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
5587 /* For floating-point comparisons, try the reverse comparison or try
5588 changing the "orderedness" of the comparison. */
5589 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5591 enum rtx_code first_code
;
5594 rcode
= reverse_condition_maybe_unordered (code
);
5595 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5596 && (code
== ORDERED
|| code
== UNORDERED
5597 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5598 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5600 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5601 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5603 /* For the reverse comparison, use either an addition or a XOR. */
5605 && rtx_cost (GEN_INT (normalizep
), PLUS
, 1,
5606 optimize_insn_for_speed_p ()) == 0)
5608 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5609 STORE_FLAG_VALUE
, target_mode
);
5611 return expand_binop (target_mode
, add_optab
, tem
,
5612 gen_int_mode (normalizep
, target_mode
),
5613 target
, 0, OPTAB_WIDEN
);
5616 && rtx_cost (trueval
, XOR
, 1,
5617 optimize_insn_for_speed_p ()) == 0)
5619 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5620 normalizep
, target_mode
);
5622 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
5623 target
, INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5627 delete_insns_since (last
);
5629 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5630 if (code
== ORDERED
|| code
== UNORDERED
)
5633 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
5635 /* If there are no NaNs, the first comparison should always fall through.
5636 Effectively change the comparison to the other one. */
5637 if (!HONOR_NANS (mode
))
5639 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
5640 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
5644 #ifdef HAVE_conditional_move
5645 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5646 conditional move. */
5647 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
5648 normalizep
, target_mode
);
5653 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5654 tem
, const0_rtx
, GET_MODE (tem
), 0);
5656 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5657 trueval
, tem
, GET_MODE (tem
), 0);
5660 delete_insns_since (last
);
5667 /* The remaining tricks only apply to integer comparisons. */
5669 if (GET_MODE_CLASS (mode
) != MODE_INT
)
5672 /* If this is an equality comparison of integers, we can try to exclusive-or
5673 (or subtract) the two operands and use a recursive call to try the
5674 comparison with zero. Don't do any of these cases if branches are
5677 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5679 tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5683 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5686 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5687 mode
, unsignedp
, normalizep
);
5691 delete_insns_since (last
);
5694 /* For integer comparisons, try the reverse comparison. However, for
5695 small X and if we'd have anyway to extend, implementing "X != 0"
5696 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5697 rcode
= reverse_condition (code
);
5698 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5699 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5701 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5702 && op1
== const0_rtx
))
5704 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5705 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5707 /* Again, for the reverse comparison, use either an addition or a XOR. */
5709 && rtx_cost (GEN_INT (normalizep
), PLUS
, 1,
5710 optimize_insn_for_speed_p ()) == 0)
5712 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5713 STORE_FLAG_VALUE
, target_mode
);
5715 tem
= expand_binop (target_mode
, add_optab
, tem
,
5716 gen_int_mode (normalizep
, target_mode
),
5717 target
, 0, OPTAB_WIDEN
);
5720 && rtx_cost (trueval
, XOR
, 1,
5721 optimize_insn_for_speed_p ()) == 0)
5723 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5724 normalizep
, target_mode
);
5726 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5727 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5732 delete_insns_since (last
);
5735 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5736 the constant zero. Reject all other comparisons at this point. Only
5737 do LE and GT if branches are expensive since they are expensive on
5738 2-operand machines. */
5740 if (op1
!= const0_rtx
5741 || (code
!= EQ
&& code
!= NE
5742 && (BRANCH_COST (optimize_insn_for_speed_p (),
5743 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5746 /* Try to put the result of the comparison in the sign bit. Assume we can't
5747 do the necessary operation below. */
5751 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5752 the sign bit set. */
5756 /* This is destructive, so SUBTARGET can't be OP0. */
5757 if (rtx_equal_p (subtarget
, op0
))
5760 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5763 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5767 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5768 number of bits in the mode of OP0, minus one. */
5772 if (rtx_equal_p (subtarget
, op0
))
5775 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5776 GET_MODE_BITSIZE (mode
) - 1,
5778 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5782 if (code
== EQ
|| code
== NE
)
5784 /* For EQ or NE, one way to do the comparison is to apply an operation
5785 that converts the operand into a positive number if it is nonzero
5786 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5787 for NE we negate. This puts the result in the sign bit. Then we
5788 normalize with a shift, if needed.
5790 Two operations that can do the above actions are ABS and FFS, so try
5791 them. If that doesn't work, and MODE is smaller than a full word,
5792 we can use zero-extension to the wider mode (an unsigned conversion)
5793 as the operation. */
5795 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5796 that is compensated by the subsequent overflow when subtracting
5799 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5800 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5801 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5802 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5803 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5805 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5812 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5815 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5818 /* If we couldn't do it that way, for NE we can "or" the two's complement
5819 of the value with itself. For EQ, we take the one's complement of
5820 that "or", which is an extra insn, so we only handle EQ if branches
5825 || BRANCH_COST (optimize_insn_for_speed_p (),
5828 if (rtx_equal_p (subtarget
, op0
))
5831 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5832 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5835 if (tem
&& code
== EQ
)
5836 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5840 if (tem
&& normalizep
)
5841 tem
= expand_shift (RSHIFT_EXPR
, mode
, tem
,
5842 GET_MODE_BITSIZE (mode
) - 1,
5843 subtarget
, normalizep
== 1);
5849 else if (GET_MODE (tem
) != target_mode
)
5851 convert_move (target
, tem
, 0);
5854 else if (!subtarget
)
5856 emit_move_insn (target
, tem
);
5861 delete_insns_since (last
);
5866 /* Like emit_store_flag, but always succeeds. */
5869 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5870 machine_mode mode
, int unsignedp
, int normalizep
)
5873 rtx_code_label
*label
;
5874 rtx trueval
, falseval
;
5876 /* First see if emit_store_flag can do the job. */
5877 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
5882 target
= gen_reg_rtx (word_mode
);
5884 /* If this failed, we have to do this with set/compare/jump/set code.
5885 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5886 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
5888 && GET_MODE_CLASS (mode
) == MODE_INT
5891 && op1
== const0_rtx
)
5893 label
= gen_label_rtx ();
5894 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
,
5895 mode
, NULL_RTX
, NULL_RTX
, label
, -1);
5896 emit_move_insn (target
, trueval
);
5902 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
5903 target
= gen_reg_rtx (GET_MODE (target
));
5905 /* Jump in the right direction if the target cannot implement CODE
5906 but can jump on its reverse condition. */
5907 falseval
= const0_rtx
;
5908 if (! can_compare_p (code
, mode
, ccp_jump
)
5909 && (! FLOAT_MODE_P (mode
)
5910 || code
== ORDERED
|| code
== UNORDERED
5911 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5912 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5914 enum rtx_code rcode
;
5915 if (FLOAT_MODE_P (mode
))
5916 rcode
= reverse_condition_maybe_unordered (code
);
5918 rcode
= reverse_condition (code
);
5920 /* Canonicalize to UNORDERED for the libcall. */
5921 if (can_compare_p (rcode
, mode
, ccp_jump
)
5922 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
5925 trueval
= const0_rtx
;
5930 emit_move_insn (target
, trueval
);
5931 label
= gen_label_rtx ();
5932 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
,
5933 NULL_RTX
, label
, -1);
5935 emit_move_insn (target
, falseval
);
5941 /* Perform possibly multi-word comparison and conditional jump to LABEL
5942 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5943 now a thin wrapper around do_compare_rtx_and_jump. */
5946 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
5947 rtx_code_label
*label
)
5949 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
5950 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
,
5951 NULL_RTX
, NULL_RTX
, label
, -1);