1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2016 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
40 #include "langhooks.h"
42 struct target_expmed default_target_expmed
;
44 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
47 static void store_fixed_bit_field (rtx
, unsigned HOST_WIDE_INT
,
48 unsigned HOST_WIDE_INT
,
49 unsigned HOST_WIDE_INT
,
50 unsigned HOST_WIDE_INT
,
52 static void store_fixed_bit_field_1 (rtx
, unsigned HOST_WIDE_INT
,
53 unsigned HOST_WIDE_INT
,
55 static void store_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
56 unsigned HOST_WIDE_INT
,
57 unsigned HOST_WIDE_INT
,
58 unsigned HOST_WIDE_INT
,
60 static rtx
extract_fixed_bit_field (machine_mode
, rtx
,
61 unsigned HOST_WIDE_INT
,
62 unsigned HOST_WIDE_INT
, rtx
, int, bool);
63 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
,
64 unsigned HOST_WIDE_INT
,
65 unsigned HOST_WIDE_INT
, rtx
, int, bool);
66 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
67 static rtx
extract_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
68 unsigned HOST_WIDE_INT
, int, bool);
69 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
70 static rtx
expand_smod_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
71 static rtx
expand_sdiv_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
74 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
75 The mask is truncated if necessary to the width of mode MODE. The
76 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 mask_rtx (machine_mode mode
, int bitpos
, int bitsize
, bool complement
)
81 return immed_wide_int_const
82 (wi::shifted_mask (bitpos
, bitsize
, complement
,
83 GET_MODE_PRECISION (mode
)), mode
);
86 /* Test whether a value is zero of a power of two. */
87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
88 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
90 struct init_expmed_rtl
111 rtx pow2
[MAX_BITS_PER_WORD
];
112 rtx cint
[MAX_BITS_PER_WORD
];
116 init_expmed_one_conv (struct init_expmed_rtl
*all
, machine_mode to_mode
,
117 machine_mode from_mode
, bool speed
)
119 int to_size
, from_size
;
122 to_size
= GET_MODE_PRECISION (to_mode
);
123 from_size
= GET_MODE_PRECISION (from_mode
);
125 /* Most partial integers have a precision less than the "full"
126 integer it requires for storage. In case one doesn't, for
127 comparison purposes here, reduce the bit size by one in that
129 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
130 && exact_log2 (to_size
) != -1)
132 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
133 && exact_log2 (from_size
) != -1)
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
139 PUT_MODE (all
->reg
, from_mode
);
140 set_convert_cost (to_mode
, from_mode
, speed
,
141 set_src_cost (which
, to_mode
, speed
));
145 init_expmed_one_mode (struct init_expmed_rtl
*all
,
146 machine_mode mode
, int speed
)
148 int m
, n
, mode_bitsize
;
149 machine_mode mode_from
;
151 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
153 PUT_MODE (all
->reg
, mode
);
154 PUT_MODE (all
->plus
, mode
);
155 PUT_MODE (all
->neg
, mode
);
156 PUT_MODE (all
->mult
, mode
);
157 PUT_MODE (all
->sdiv
, mode
);
158 PUT_MODE (all
->udiv
, mode
);
159 PUT_MODE (all
->sdiv_32
, mode
);
160 PUT_MODE (all
->smod_32
, mode
);
161 PUT_MODE (all
->wide_trunc
, mode
);
162 PUT_MODE (all
->shift
, mode
);
163 PUT_MODE (all
->shift_mult
, mode
);
164 PUT_MODE (all
->shift_add
, mode
);
165 PUT_MODE (all
->shift_sub0
, mode
);
166 PUT_MODE (all
->shift_sub1
, mode
);
167 PUT_MODE (all
->zext
, mode
);
168 PUT_MODE (all
->trunc
, mode
);
170 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
171 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
172 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
173 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
174 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
176 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
177 <= 2 * add_cost (speed
, mode
)));
178 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
179 <= 4 * add_cost (speed
, mode
)));
181 set_shift_cost (speed
, mode
, 0, 0);
183 int cost
= add_cost (speed
, mode
);
184 set_shiftadd_cost (speed
, mode
, 0, cost
);
185 set_shiftsub0_cost (speed
, mode
, 0, cost
);
186 set_shiftsub1_cost (speed
, mode
, 0, cost
);
189 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
190 for (m
= 1; m
< n
; m
++)
192 XEXP (all
->shift
, 1) = all
->cint
[m
];
193 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
195 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
196 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
198 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
200 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
204 if (SCALAR_INT_MODE_P (mode
))
206 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
207 mode_from
= (machine_mode
)(mode_from
+ 1))
208 init_expmed_one_conv (all
, mode
, mode_from
, speed
);
210 if (GET_MODE_CLASS (mode
) == MODE_INT
)
212 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
213 if (wider_mode
!= VOIDmode
)
215 PUT_MODE (all
->zext
, wider_mode
);
216 PUT_MODE (all
->wide_mult
, wider_mode
);
217 PUT_MODE (all
->wide_lshr
, wider_mode
);
218 XEXP (all
->wide_lshr
, 1) = GEN_INT (mode_bitsize
);
220 set_mul_widen_cost (speed
, wider_mode
,
221 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
222 set_mul_highpart_cost (speed
, mode
,
223 set_src_cost (all
->wide_trunc
, mode
, speed
));
231 struct init_expmed_rtl all
;
232 machine_mode mode
= QImode
;
235 memset (&all
, 0, sizeof all
);
236 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
238 all
.pow2
[m
] = GEN_INT ((HOST_WIDE_INT
) 1 << m
);
239 all
.cint
[m
] = GEN_INT (m
);
242 /* Avoid using hard regs in ways which may be unsupported. */
243 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
244 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
245 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
246 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
247 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
248 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
249 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
250 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
251 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
252 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
253 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
254 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
255 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
256 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
257 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
258 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
259 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
260 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
262 for (speed
= 0; speed
< 2; speed
++)
264 crtl
->maybe_hot_insn_p
= speed
;
265 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
267 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
268 mode
= (machine_mode
)(mode
+ 1))
269 init_expmed_one_mode (&all
, mode
, speed
);
271 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
272 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
273 mode
= (machine_mode
)(mode
+ 1))
274 init_expmed_one_mode (&all
, mode
, speed
);
276 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
277 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
278 mode
= (machine_mode
)(mode
+ 1))
279 init_expmed_one_mode (&all
, mode
, speed
);
282 if (alg_hash_used_p ())
284 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
285 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
288 set_alg_hash_used_p (true);
289 default_rtl_profile ();
291 ggc_free (all
.trunc
);
292 ggc_free (all
.shift_sub1
);
293 ggc_free (all
.shift_sub0
);
294 ggc_free (all
.shift_add
);
295 ggc_free (all
.shift_mult
);
296 ggc_free (all
.shift
);
297 ggc_free (all
.wide_trunc
);
298 ggc_free (all
.wide_lshr
);
299 ggc_free (all
.wide_mult
);
301 ggc_free (all
.smod_32
);
302 ggc_free (all
.sdiv_32
);
311 /* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
316 negate_rtx (machine_mode mode
, rtx x
)
318 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
321 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
326 /* Whether reverse storage order is supported on the target. */
327 static int reverse_storage_order_supported
= -1;
329 /* Check whether reverse storage order is supported on the target. */
332 check_reverse_storage_order_support (void)
334 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
336 reverse_storage_order_supported
= 0;
337 sorry ("reverse scalar storage order");
340 reverse_storage_order_supported
= 1;
343 /* Whether reverse FP storage order is supported on the target. */
344 static int reverse_float_storage_order_supported
= -1;
346 /* Check whether reverse FP storage order is supported on the target. */
349 check_reverse_float_storage_order_support (void)
351 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
353 reverse_float_storage_order_supported
= 0;
354 sorry ("reverse floating-point scalar storage order");
357 reverse_float_storage_order_supported
= 1;
360 /* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
365 flip_storage_order (enum machine_mode mode
, rtx x
)
367 enum machine_mode int_mode
;
373 if (COMPLEX_MODE_P (mode
))
375 rtx real
= read_complex_part (x
, false);
376 rtx imag
= read_complex_part (x
, true);
378 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
379 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
381 return gen_rtx_CONCAT (mode
, real
, imag
);
384 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
385 check_reverse_storage_order_support ();
387 if (SCALAR_INT_MODE_P (mode
))
391 if (FLOAT_MODE_P (mode
)
392 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
393 check_reverse_float_storage_order_support ();
395 int_mode
= mode_for_size (GET_MODE_PRECISION (mode
), MODE_INT
, 0);
396 if (int_mode
== BLKmode
)
398 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
401 x
= gen_lowpart (int_mode
, x
);
404 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
406 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
408 if (int_mode
!= mode
)
409 result
= gen_lowpart (mode
, result
);
414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
415 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
416 If MODE is BLKmode, return a reference to every byte in the bitfield.
417 Set *NEW_BITNUM to the bit position of the field within the new memory. */
420 narrow_bit_field_mem (rtx mem
, machine_mode mode
,
421 unsigned HOST_WIDE_INT bitsize
,
422 unsigned HOST_WIDE_INT bitnum
,
423 unsigned HOST_WIDE_INT
*new_bitnum
)
427 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
428 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
429 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
431 return adjust_bitfield_address_size (mem
, mode
, offset
, size
);
435 unsigned int unit
= GET_MODE_BITSIZE (mode
);
436 *new_bitnum
= bitnum
% unit
;
437 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
438 return adjust_bitfield_address (mem
, mode
, offset
);
442 /* The caller wants to perform insertion or extraction PATTERN on a
443 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
444 BITREGION_START and BITREGION_END are as for store_bit_field
445 and FIELDMODE is the natural mode of the field.
447 Search for a mode that is compatible with the memory access
448 restrictions and (where applicable) with a register insertion or
449 extraction. Return the new memory on success, storing the adjusted
450 bit position in *NEW_BITNUM. Return null otherwise. */
453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
454 rtx op0
, HOST_WIDE_INT bitsize
,
455 HOST_WIDE_INT bitnum
,
456 unsigned HOST_WIDE_INT bitregion_start
,
457 unsigned HOST_WIDE_INT bitregion_end
,
458 machine_mode fieldmode
,
459 unsigned HOST_WIDE_INT
*new_bitnum
)
461 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
462 bitregion_end
, MEM_ALIGN (op0
),
463 MEM_VOLATILE_P (op0
));
464 machine_mode best_mode
;
465 if (iter
.next_mode (&best_mode
))
467 /* We can use a memory in BEST_MODE. See whether this is true for
468 any wider modes. All other things being equal, we prefer to
469 use the widest mode possible because it tends to expose more
470 CSE opportunities. */
471 if (!iter
.prefer_smaller_modes ())
473 /* Limit the search to the mode required by the corresponding
474 register insertion or extraction instruction, if any. */
475 machine_mode limit_mode
= word_mode
;
476 extraction_insn insn
;
477 if (get_best_reg_extraction_insn (&insn
, pattern
,
478 GET_MODE_BITSIZE (best_mode
),
480 limit_mode
= insn
.field_mode
;
482 machine_mode wider_mode
;
483 while (iter
.next_mode (&wider_mode
)
484 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
485 best_mode
= wider_mode
;
487 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
494 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
495 offset is then BITNUM / BITS_PER_UNIT. */
498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum
,
499 unsigned HOST_WIDE_INT bitsize
,
500 machine_mode struct_mode
)
502 if (BYTES_BIG_ENDIAN
)
503 return (bitnum
% BITS_PER_UNIT
== 0
504 && (bitnum
+ bitsize
== GET_MODE_BITSIZE (struct_mode
)
505 || (bitnum
+ bitsize
) % BITS_PER_WORD
== 0));
507 return bitnum
% BITS_PER_WORD
== 0;
510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
511 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
512 Return false if the access would touch memory outside the range
513 BITREGION_START to BITREGION_END for conformance to the C++ memory
517 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
518 unsigned HOST_WIDE_INT bitnum
,
519 machine_mode fieldmode
,
520 unsigned HOST_WIDE_INT bitregion_start
,
521 unsigned HOST_WIDE_INT bitregion_end
)
523 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
525 /* -fstrict-volatile-bitfields must be enabled and we must have a
528 || !MEM_VOLATILE_P (op0
)
529 || flag_strict_volatile_bitfields
<= 0)
532 /* Non-integral modes likely only happen with packed structures.
534 if (!SCALAR_INT_MODE_P (fieldmode
))
537 /* The bit size must not be larger than the field mode, and
538 the field mode must not be larger than a word. */
539 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
542 /* Check for cases of unaligned fields that must be split. */
543 if (bitnum
% modesize
+ bitsize
> modesize
)
546 /* The memory must be sufficiently aligned for a MODESIZE access.
547 This condition guarantees, that the memory access will not
548 touch anything after the end of the structure. */
549 if (MEM_ALIGN (op0
) < modesize
)
552 /* Check for cases where the C++ memory model applies. */
553 if (bitregion_end
!= 0
554 && (bitnum
- bitnum
% modesize
< bitregion_start
555 || bitnum
- bitnum
% modesize
+ modesize
- 1 > bitregion_end
))
561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
562 bit number BITNUM can be treated as a simple value of mode MODE. */
565 simple_mem_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
566 unsigned HOST_WIDE_INT bitnum
, machine_mode mode
)
569 && bitnum
% BITS_PER_UNIT
== 0
570 && bitsize
== GET_MODE_BITSIZE (mode
)
571 && (!SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (op0
))
572 || (bitnum
% GET_MODE_ALIGNMENT (mode
) == 0
573 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
576 /* Try to use instruction INSV to store VALUE into a field of OP0.
577 BITSIZE and BITNUM are as for store_bit_field. */
580 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
581 unsigned HOST_WIDE_INT bitsize
,
582 unsigned HOST_WIDE_INT bitnum
,
585 struct expand_operand ops
[4];
588 rtx_insn
*last
= get_last_insn ();
589 bool copy_back
= false;
591 machine_mode op_mode
= insv
->field_mode
;
592 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
593 if (bitsize
== 0 || bitsize
> unit
)
597 /* Get a reference to the first byte of the field. */
598 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
602 /* Convert from counting within OP0 to counting in OP_MODE. */
603 if (BYTES_BIG_ENDIAN
)
604 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
606 /* If xop0 is a register, we need it in OP_MODE
607 to make it acceptable to the format of insv. */
608 if (GET_CODE (xop0
) == SUBREG
)
609 /* We can't just change the mode, because this might clobber op0,
610 and we will need the original value of op0 if insv fails. */
611 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
612 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
613 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
616 /* If the destination is a paradoxical subreg such that we need a
617 truncate to the inner mode, perform the insertion on a temporary and
618 truncate the result to the original destination. Note that we can't
619 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
620 X) 0)) is (reg:N X). */
621 if (GET_CODE (xop0
) == SUBREG
622 && REG_P (SUBREG_REG (xop0
))
623 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
626 rtx tem
= gen_reg_rtx (op_mode
);
627 emit_move_insn (tem
, xop0
);
632 /* There are similar overflow check at the start of store_bit_field_1,
633 but that only check the situation where the field lies completely
634 outside the register, while there do have situation where the field
635 lies partialy in the register, we need to adjust bitsize for this
636 partial overflow situation. Without this fix, pr48335-2.c on big-endian
637 will broken on those arch support bit insert instruction, like arm, aarch64
639 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
641 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
642 "destination object, data truncated into %wu-bit",
643 bitsize
, unit
- bitnum
);
644 bitsize
= unit
- bitnum
;
647 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
648 "backwards" from the size of the unit we are inserting into.
649 Otherwise, we count bits from the most significant on a
650 BYTES/BITS_BIG_ENDIAN machine. */
652 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
653 bitnum
= unit
- bitsize
- bitnum
;
655 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
657 if (GET_MODE (value
) != op_mode
)
659 if (GET_MODE_BITSIZE (GET_MODE (value
)) >= bitsize
)
661 /* Optimization: Don't bother really extending VALUE
662 if it has all the bits we will actually use. However,
663 if we must narrow it, be sure we do it correctly. */
665 if (GET_MODE_SIZE (GET_MODE (value
)) < GET_MODE_SIZE (op_mode
))
669 tmp
= simplify_subreg (op_mode
, value1
, GET_MODE (value
), 0);
671 tmp
= simplify_gen_subreg (op_mode
,
672 force_reg (GET_MODE (value
),
674 GET_MODE (value
), 0);
678 value1
= gen_lowpart (op_mode
, value1
);
680 else if (CONST_INT_P (value
))
681 value1
= gen_int_mode (INTVAL (value
), op_mode
);
683 /* Parse phase is supposed to make VALUE's data type
684 match that of the component reference, which is a type
685 at least as wide as the field; so VALUE should have
686 a mode that corresponds to that type. */
687 gcc_assert (CONSTANT_P (value
));
690 create_fixed_operand (&ops
[0], xop0
);
691 create_integer_operand (&ops
[1], bitsize
);
692 create_integer_operand (&ops
[2], bitnum
);
693 create_input_operand (&ops
[3], value1
, op_mode
);
694 if (maybe_expand_insn (insv
->icode
, 4, ops
))
697 convert_move (op0
, xop0
, true);
700 delete_insns_since (last
);
704 /* A subroutine of store_bit_field, with the same arguments. Return true
705 if the operation could be implemented.
707 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
708 no other way of implementing the operation. If FALLBACK_P is false,
709 return false instead. */
712 store_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
713 unsigned HOST_WIDE_INT bitnum
,
714 unsigned HOST_WIDE_INT bitregion_start
,
715 unsigned HOST_WIDE_INT bitregion_end
,
716 machine_mode fieldmode
,
717 rtx value
, bool reverse
, bool fallback_p
)
722 while (GET_CODE (op0
) == SUBREG
)
724 /* The following line once was done only if WORDS_BIG_ENDIAN,
725 but I think that is a mistake. WORDS_BIG_ENDIAN is
726 meaningful at a much higher level; when structures are copied
727 between memory and regs, the higher-numbered regs
728 always get higher addresses. */
729 int inner_mode_size
= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0
)));
730 int outer_mode_size
= GET_MODE_SIZE (GET_MODE (op0
));
733 /* Paradoxical subregs need special handling on big-endian machines. */
734 if (SUBREG_BYTE (op0
) == 0 && inner_mode_size
< outer_mode_size
)
736 int difference
= inner_mode_size
- outer_mode_size
;
738 if (WORDS_BIG_ENDIAN
)
739 byte_offset
+= (difference
/ UNITS_PER_WORD
) * UNITS_PER_WORD
;
740 if (BYTES_BIG_ENDIAN
)
741 byte_offset
+= difference
% UNITS_PER_WORD
;
744 byte_offset
= SUBREG_BYTE (op0
);
746 bitnum
+= byte_offset
* BITS_PER_UNIT
;
747 op0
= SUBREG_REG (op0
);
750 /* No action is needed if the target is a register and if the field
751 lies completely outside that register. This can occur if the source
752 code contains an out-of-bounds access to a small array. */
753 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
756 /* Use vec_set patterns for inserting parts of vectors whenever
758 if (VECTOR_MODE_P (GET_MODE (op0
))
760 && optab_handler (vec_set_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
761 && fieldmode
== GET_MODE_INNER (GET_MODE (op0
))
762 && bitsize
== GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
763 && !(bitnum
% GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
765 struct expand_operand ops
[3];
766 machine_mode outermode
= GET_MODE (op0
);
767 machine_mode innermode
= GET_MODE_INNER (outermode
);
768 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
769 int pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
771 create_fixed_operand (&ops
[0], op0
);
772 create_input_operand (&ops
[1], value
, innermode
);
773 create_integer_operand (&ops
[2], pos
);
774 if (maybe_expand_insn (icode
, 3, ops
))
778 /* If the target is a register, overwriting the entire object, or storing
779 a full-word or multi-word field can be done with just a SUBREG. */
781 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
782 && ((bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)) && bitnum
== 0)
783 || (bitsize
% BITS_PER_WORD
== 0 && bitnum
% BITS_PER_WORD
== 0)))
785 /* Use the subreg machinery either to narrow OP0 to the required
786 words or to cope with mode punning between equal-sized modes.
787 In the latter case, use subreg on the rhs side, not lhs. */
790 if (bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
792 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
796 sub
= flip_storage_order (GET_MODE (op0
), sub
);
797 emit_move_insn (op0
, sub
);
803 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
804 bitnum
/ BITS_PER_UNIT
);
808 value
= flip_storage_order (fieldmode
, value
);
809 emit_move_insn (sub
, value
);
815 /* If the target is memory, storing any naturally aligned field can be
816 done with a simple store. For targets that support fast unaligned
817 memory, any naturally sized, unit aligned field can be done directly. */
818 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
))
820 op0
= adjust_bitfield_address (op0
, fieldmode
, bitnum
/ BITS_PER_UNIT
);
822 value
= flip_storage_order (fieldmode
, value
);
823 emit_move_insn (op0
, value
);
827 /* Make sure we are playing with integral modes. Pun with subregs
828 if we aren't. This must come after the entire register case above,
829 since that case is valid for any mode. The following cases are only
830 valid for integral modes. */
832 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
833 if (imode
!= GET_MODE (op0
))
836 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
839 gcc_assert (imode
!= BLKmode
);
840 op0
= gen_lowpart (imode
, op0
);
845 /* Storing an lsb-aligned field in a register
846 can be done with a movstrict instruction. */
850 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
851 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
852 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
854 struct expand_operand ops
[2];
855 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
857 unsigned HOST_WIDE_INT subreg_off
;
859 if (GET_CODE (arg0
) == SUBREG
)
861 /* Else we've got some float mode source being extracted into
862 a different float mode destination -- this combination of
863 subregs results in Severe Tire Damage. */
864 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
865 || GET_MODE_CLASS (fieldmode
) == MODE_INT
866 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
867 arg0
= SUBREG_REG (arg0
);
870 subreg_off
= bitnum
/ BITS_PER_UNIT
;
871 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
873 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
875 create_fixed_operand (&ops
[0], arg0
);
876 /* Shrink the source operand to FIELDMODE. */
877 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
878 if (maybe_expand_insn (icode
, 2, ops
))
883 /* Handle fields bigger than a word. */
885 if (bitsize
> BITS_PER_WORD
)
887 /* Here we transfer the words of the field
888 in the order least significant first.
889 This is because the most significant word is the one which may
891 However, only do that if the value is not BLKmode. */
893 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
894 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
898 /* This is the mode we must force value to, so that there will be enough
899 subwords to extract. Note that fieldmode will often (always?) be
900 VOIDmode, because that is what store_field uses to indicate that this
901 is a bit field, but passing VOIDmode to operand_subword_force
903 fieldmode
= GET_MODE (value
);
904 if (fieldmode
== VOIDmode
)
905 fieldmode
= smallest_mode_for_size (nwords
* BITS_PER_WORD
, MODE_INT
);
907 last
= get_last_insn ();
908 for (i
= 0; i
< nwords
; i
++)
910 /* If I is 0, use the low-order word in both field and target;
911 if I is 1, use the next to lowest word; and so on. */
912 unsigned int wordnum
= (backwards
913 ? GET_MODE_SIZE (fieldmode
) / UNITS_PER_WORD
916 unsigned int bit_offset
= (backwards
^ reverse
917 ? MAX ((int) bitsize
- ((int) i
+ 1)
920 : (int) i
* BITS_PER_WORD
);
921 rtx value_word
= operand_subword_force (value
, wordnum
, fieldmode
);
922 unsigned HOST_WIDE_INT new_bitsize
=
923 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
925 /* If the remaining chunk doesn't have full wordsize we have
926 to make sure that for big-endian machines the higher order
928 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
929 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
931 GEN_INT (BITS_PER_WORD
936 if (!store_bit_field_1 (op0
, new_bitsize
,
938 bitregion_start
, bitregion_end
,
940 value_word
, reverse
, fallback_p
))
942 delete_insns_since (last
);
949 /* If VALUE has a floating-point or complex mode, access it as an
950 integer of the corresponding size. This can occur on a machine
951 with 64 bit registers that uses SFmode for float. It can also
952 occur for unaligned float or complex fields. */
954 if (GET_MODE (value
) != VOIDmode
955 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_INT
956 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_PARTIAL_INT
)
958 value
= gen_reg_rtx (int_mode_for_mode (GET_MODE (value
)));
959 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
962 /* If OP0 is a multi-word register, narrow it to the affected word.
963 If the region spans two words, defer to store_split_bit_field. */
964 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
966 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
967 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
969 bitnum
%= BITS_PER_WORD
;
970 if (bitnum
+ bitsize
> BITS_PER_WORD
)
975 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
976 bitregion_end
, value
, reverse
);
981 /* From here on we can assume that the field to be stored in fits
982 within a word. If the destination is a register, it too fits
985 extraction_insn insv
;
988 && get_best_reg_extraction_insn (&insv
, EP_insv
,
989 GET_MODE_BITSIZE (GET_MODE (op0
)),
991 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
994 /* If OP0 is a memory, try copying it to a register and seeing if a
995 cheap register alternative is available. */
996 if (MEM_P (op0
) && !reverse
)
998 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
1000 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
1003 rtx_insn
*last
= get_last_insn ();
1005 /* Try loading part of OP0 into a register, inserting the bitfield
1006 into that, and then copying the result back to OP0. */
1007 unsigned HOST_WIDE_INT bitpos
;
1008 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1009 bitregion_start
, bitregion_end
,
1010 fieldmode
, &bitpos
);
1013 rtx tempreg
= copy_to_reg (xop0
);
1014 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1015 bitregion_start
, bitregion_end
,
1016 fieldmode
, orig_value
, reverse
, false))
1018 emit_move_insn (xop0
, tempreg
);
1021 delete_insns_since (last
);
1028 store_fixed_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1029 bitregion_end
, value
, reverse
);
1033 /* Generate code to store value from rtx VALUE
1034 into a bit-field within structure STR_RTX
1035 containing BITSIZE bits starting at bit BITNUM.
1037 BITREGION_START is bitpos of the first bitfield in this region.
1038 BITREGION_END is the bitpos of the ending bitfield in this region.
1039 These two fields are 0, if the C++ memory model does not apply,
1040 or we are not interested in keeping track of bitfield regions.
1042 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1044 If REVERSE is true, the store is to be done in reverse order. */
1047 store_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1048 unsigned HOST_WIDE_INT bitnum
,
1049 unsigned HOST_WIDE_INT bitregion_start
,
1050 unsigned HOST_WIDE_INT bitregion_end
,
1051 machine_mode fieldmode
,
1052 rtx value
, bool reverse
)
1054 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1055 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, fieldmode
,
1056 bitregion_start
, bitregion_end
))
1058 /* Storing of a full word can be done with a simple store.
1059 We know here that the field can be accessed with one single
1060 instruction. For targets that support unaligned memory,
1061 an unaligned access may be necessary. */
1062 if (bitsize
== GET_MODE_BITSIZE (fieldmode
))
1064 str_rtx
= adjust_bitfield_address (str_rtx
, fieldmode
,
1065 bitnum
/ BITS_PER_UNIT
);
1067 value
= flip_storage_order (fieldmode
, value
);
1068 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1069 emit_move_insn (str_rtx
, value
);
1075 str_rtx
= narrow_bit_field_mem (str_rtx
, fieldmode
, bitsize
, bitnum
,
1077 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (fieldmode
));
1078 temp
= copy_to_reg (str_rtx
);
1079 if (!store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0,
1080 fieldmode
, value
, reverse
, true))
1083 emit_move_insn (str_rtx
, temp
);
1089 /* Under the C++0x memory model, we must not touch bits outside the
1090 bit region. Adjust the address to start at the beginning of the
1092 if (MEM_P (str_rtx
) && bitregion_start
> 0)
1094 machine_mode bestmode
;
1095 HOST_WIDE_INT offset
, size
;
1097 gcc_assert ((bitregion_start
% BITS_PER_UNIT
) == 0);
1099 offset
= bitregion_start
/ BITS_PER_UNIT
;
1100 bitnum
-= bitregion_start
;
1101 size
= (bitnum
+ bitsize
+ BITS_PER_UNIT
- 1) / BITS_PER_UNIT
;
1102 bitregion_end
-= bitregion_start
;
1103 bitregion_start
= 0;
1104 bestmode
= get_best_mode (bitsize
, bitnum
,
1105 bitregion_start
, bitregion_end
,
1106 MEM_ALIGN (str_rtx
), VOIDmode
,
1107 MEM_VOLATILE_P (str_rtx
));
1108 str_rtx
= adjust_bitfield_address_size (str_rtx
, bestmode
, offset
, size
);
1111 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1112 bitregion_start
, bitregion_end
,
1113 fieldmode
, value
, reverse
, true))
1117 /* Use shifts and boolean operations to store VALUE into a bit field of
1118 width BITSIZE in OP0, starting at bit BITNUM.
1120 If REVERSE is true, the store is to be done in reverse order. */
1123 store_fixed_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1124 unsigned HOST_WIDE_INT bitnum
,
1125 unsigned HOST_WIDE_INT bitregion_start
,
1126 unsigned HOST_WIDE_INT bitregion_end
,
1127 rtx value
, bool reverse
)
1129 /* There is a case not handled here:
1130 a structure with a known alignment of just a halfword
1131 and a field split across two aligned halfwords within the structure.
1132 Or likewise a structure with a known alignment of just a byte
1133 and a field split across two bytes.
1134 Such cases are not supposed to be able to occur. */
1138 machine_mode mode
= GET_MODE (op0
);
1139 if (GET_MODE_BITSIZE (mode
) == 0
1140 || GET_MODE_BITSIZE (mode
) > GET_MODE_BITSIZE (word_mode
))
1142 mode
= get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1143 MEM_ALIGN (op0
), mode
, MEM_VOLATILE_P (op0
));
1145 if (mode
== VOIDmode
)
1147 /* The only way this should occur is if the field spans word
1149 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1150 bitregion_end
, value
, reverse
);
1154 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1157 store_fixed_bit_field_1 (op0
, bitsize
, bitnum
, value
, reverse
);
1160 /* Helper function for store_fixed_bit_field, stores
1161 the bit field always using the MODE of OP0. */
1164 store_fixed_bit_field_1 (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1165 unsigned HOST_WIDE_INT bitnum
,
1166 rtx value
, bool reverse
)
1173 mode
= GET_MODE (op0
);
1174 gcc_assert (SCALAR_INT_MODE_P (mode
));
1176 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1177 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1179 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1180 /* BITNUM is the distance between our msb
1181 and that of the containing datum.
1182 Convert it to the distance from the lsb. */
1183 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1185 /* Now BITNUM is always the distance between our lsb
1188 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1189 we must first convert its mode to MODE. */
1191 if (CONST_INT_P (value
))
1193 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1195 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1196 v
&= ((unsigned HOST_WIDE_INT
) 1 << bitsize
) - 1;
1200 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1201 && v
== ((unsigned HOST_WIDE_INT
) 1 << bitsize
) - 1)
1202 || (bitsize
== HOST_BITS_PER_WIDE_INT
1203 && v
== (unsigned HOST_WIDE_INT
) -1))
1206 value
= lshift_value (mode
, v
, bitnum
);
1210 int must_and
= (GET_MODE_BITSIZE (GET_MODE (value
)) != bitsize
1211 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1213 if (GET_MODE (value
) != mode
)
1214 value
= convert_to_mode (mode
, value
, 1);
1217 value
= expand_binop (mode
, and_optab
, value
,
1218 mask_rtx (mode
, 0, bitsize
, 0),
1219 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1221 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1222 bitnum
, NULL_RTX
, 1);
1226 value
= flip_storage_order (mode
, value
);
1228 /* Now clear the chosen bits in OP0,
1229 except that if VALUE is -1 we need not bother. */
1230 /* We keep the intermediates in registers to allow CSE to combine
1231 consecutive bitfield assignments. */
1233 temp
= force_reg (mode
, op0
);
1237 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1239 mask
= flip_storage_order (mode
, mask
);
1240 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1241 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1242 temp
= force_reg (mode
, temp
);
1245 /* Now logical-or VALUE into OP0, unless it is zero. */
1249 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1250 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1251 temp
= force_reg (mode
, temp
);
1256 op0
= copy_rtx (op0
);
1257 emit_move_insn (op0
, temp
);
1261 /* Store a bit field that is split across multiple accessible memory objects.
1263 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1264 BITSIZE is the field width; BITPOS the position of its first bit
1266 VALUE is the value to store.
1268 If REVERSE is true, the store is to be done in reverse order.
1270 This does not yet handle fields wider than BITS_PER_WORD. */
1273 store_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1274 unsigned HOST_WIDE_INT bitpos
,
1275 unsigned HOST_WIDE_INT bitregion_start
,
1276 unsigned HOST_WIDE_INT bitregion_end
,
1277 rtx value
, bool reverse
)
1279 unsigned int unit
, total_bits
, bitsdone
= 0;
1281 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1283 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1284 unit
= BITS_PER_WORD
;
1286 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1288 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1289 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1290 again, and we will mutually recurse forever. */
1291 if (MEM_P (op0
) && GET_MODE_BITSIZE (GET_MODE (op0
)) > 0)
1292 unit
= MIN (unit
, GET_MODE_BITSIZE (GET_MODE (op0
)));
1294 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1295 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1296 that VALUE might be a floating-point constant. */
1297 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1299 rtx word
= gen_lowpart_common (word_mode
, value
);
1301 if (word
&& (value
!= word
))
1304 value
= gen_lowpart_common (word_mode
,
1305 force_reg (GET_MODE (value
) != VOIDmode
1307 : word_mode
, value
));
1310 total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1312 while (bitsdone
< bitsize
)
1314 unsigned HOST_WIDE_INT thissize
;
1315 unsigned HOST_WIDE_INT thispos
;
1316 unsigned HOST_WIDE_INT offset
;
1319 offset
= (bitpos
+ bitsdone
) / unit
;
1320 thispos
= (bitpos
+ bitsdone
) % unit
;
1322 /* When region of bytes we can touch is restricted, decrease
1323 UNIT close to the end of the region as needed. If op0 is a REG
1324 or SUBREG of REG, don't do this, as there can't be data races
1325 on a register and we can expand shorter code in some cases. */
1327 && unit
> BITS_PER_UNIT
1328 && bitpos
+ bitsdone
- thispos
+ unit
> bitregion_end
+ 1
1330 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1336 /* THISSIZE must not overrun a word boundary. Otherwise,
1337 store_fixed_bit_field will call us again, and we will mutually
1339 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1340 thissize
= MIN (thissize
, unit
- thispos
);
1342 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1344 /* Fetch successively less significant portions. */
1345 if (CONST_INT_P (value
))
1346 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1347 >> (bitsize
- bitsdone
- thissize
))
1348 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1349 /* Likewise, but the source is little-endian. */
1351 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1352 bitsize
- bitsdone
- thissize
,
1353 NULL_RTX
, 1, false);
1356 int total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1357 /* The args are chosen so that the last part includes the
1358 lsb. Give extract_bit_field the value it needs (with
1359 endianness compensation) to fetch the piece we want. */
1360 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1361 total_bits
- bitsize
+ bitsdone
,
1362 NULL_RTX
, 1, false);
1367 /* Fetch successively more significant portions. */
1368 if (CONST_INT_P (value
))
1369 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1371 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1372 /* Likewise, but the source is big-endian. */
1374 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1375 total_bits
- bitsdone
- thissize
,
1376 NULL_RTX
, 1, false);
1378 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1379 bitsdone
, NULL_RTX
, 1, false);
1382 /* If OP0 is a register, then handle OFFSET here.
1384 When handling multiword bitfields, extract_bit_field may pass
1385 down a word_mode SUBREG of a larger REG for a bitfield that actually
1386 crosses a word boundary. Thus, for a SUBREG, we must find
1387 the current word starting from the base register. */
1388 if (GET_CODE (op0
) == SUBREG
)
1390 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
)
1391 + (offset
* unit
/ BITS_PER_WORD
);
1392 machine_mode sub_mode
= GET_MODE (SUBREG_REG (op0
));
1393 if (sub_mode
!= BLKmode
&& GET_MODE_SIZE (sub_mode
) < UNITS_PER_WORD
)
1394 word
= word_offset
? const0_rtx
: op0
;
1396 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
1397 GET_MODE (SUBREG_REG (op0
)));
1398 offset
&= BITS_PER_WORD
/ unit
- 1;
1400 else if (REG_P (op0
))
1402 machine_mode op0_mode
= GET_MODE (op0
);
1403 if (op0_mode
!= BLKmode
&& GET_MODE_SIZE (op0_mode
) < UNITS_PER_WORD
)
1404 word
= offset
? const0_rtx
: op0
;
1406 word
= operand_subword_force (op0
, offset
* unit
/ BITS_PER_WORD
,
1408 offset
&= BITS_PER_WORD
/ unit
- 1;
1413 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1414 it is just an out-of-bounds access. Ignore it. */
1415 if (word
!= const0_rtx
)
1416 store_fixed_bit_field (word
, thissize
, offset
* unit
+ thispos
,
1417 bitregion_start
, bitregion_end
, part
,
1419 bitsdone
+= thissize
;
1423 /* A subroutine of extract_bit_field_1 that converts return value X
1424 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1425 to extract_bit_field. */
1428 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1429 machine_mode tmode
, bool unsignedp
)
1431 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1434 /* If the x mode is not a scalar integral, first convert to the
1435 integer mode of that size and then access it as a floating-point
1436 value via a SUBREG. */
1437 if (!SCALAR_INT_MODE_P (tmode
))
1441 smode
= mode_for_size (GET_MODE_BITSIZE (tmode
), MODE_INT
, 0);
1442 x
= convert_to_mode (smode
, x
, unsignedp
);
1443 x
= force_reg (smode
, x
);
1444 return gen_lowpart (tmode
, x
);
1447 return convert_to_mode (tmode
, x
, unsignedp
);
1450 /* Try to use an ext(z)v pattern to extract a field from OP0.
1451 Return the extracted value on success, otherwise return null.
1452 EXT_MODE is the mode of the extraction and the other arguments
1453 are as for extract_bit_field. */
1456 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1457 unsigned HOST_WIDE_INT bitsize
,
1458 unsigned HOST_WIDE_INT bitnum
,
1459 int unsignedp
, rtx target
,
1460 machine_mode mode
, machine_mode tmode
)
1462 struct expand_operand ops
[4];
1463 rtx spec_target
= target
;
1464 rtx spec_target_subreg
= 0;
1465 machine_mode ext_mode
= extv
->field_mode
;
1466 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1468 if (bitsize
== 0 || unit
< bitsize
)
1472 /* Get a reference to the first byte of the field. */
1473 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1477 /* Convert from counting within OP0 to counting in EXT_MODE. */
1478 if (BYTES_BIG_ENDIAN
)
1479 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
1481 /* If op0 is a register, we need it in EXT_MODE to make it
1482 acceptable to the format of ext(z)v. */
1483 if (GET_CODE (op0
) == SUBREG
&& GET_MODE (op0
) != ext_mode
)
1485 if (REG_P (op0
) && GET_MODE (op0
) != ext_mode
)
1486 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1489 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1490 "backwards" from the size of the unit we are extracting from.
1491 Otherwise, we count bits from the most significant on a
1492 BYTES/BITS_BIG_ENDIAN machine. */
1494 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1495 bitnum
= unit
- bitsize
- bitnum
;
1498 target
= spec_target
= gen_reg_rtx (tmode
);
1500 if (GET_MODE (target
) != ext_mode
)
1502 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1503 between the mode of the extraction (word_mode) and the target
1504 mode. Instead, create a temporary and use convert_move to set
1507 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1509 target
= gen_lowpart (ext_mode
, target
);
1510 if (GET_MODE_PRECISION (ext_mode
)
1511 > GET_MODE_PRECISION (GET_MODE (spec_target
)))
1512 spec_target_subreg
= target
;
1515 target
= gen_reg_rtx (ext_mode
);
1518 create_output_operand (&ops
[0], target
, ext_mode
);
1519 create_fixed_operand (&ops
[1], op0
);
1520 create_integer_operand (&ops
[2], bitsize
);
1521 create_integer_operand (&ops
[3], bitnum
);
1522 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1524 target
= ops
[0].value
;
1525 if (target
== spec_target
)
1527 if (target
== spec_target_subreg
)
1529 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1534 /* A subroutine of extract_bit_field, with the same arguments.
1535 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1536 if we can find no other means of implementing the operation.
1537 if FALLBACK_P is false, return NULL instead. */
1540 extract_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1541 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1542 machine_mode mode
, machine_mode tmode
,
1543 bool reverse
, bool fallback_p
)
1546 machine_mode int_mode
;
1549 if (tmode
== VOIDmode
)
1552 while (GET_CODE (op0
) == SUBREG
)
1554 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1555 op0
= SUBREG_REG (op0
);
1558 /* If we have an out-of-bounds access to a register, just return an
1559 uninitialized register of the required mode. This can occur if the
1560 source code contains an out-of-bounds access to a small array. */
1561 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
1562 return gen_reg_rtx (tmode
);
1565 && mode
== GET_MODE (op0
)
1567 && bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
1570 op0
= flip_storage_order (mode
, op0
);
1571 /* We're trying to extract a full register from itself. */
1575 /* See if we can get a better vector mode before extracting. */
1576 if (VECTOR_MODE_P (GET_MODE (op0
))
1578 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1580 machine_mode new_mode
;
1582 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1583 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1584 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1585 new_mode
= MIN_MODE_VECTOR_FRACT
;
1586 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1587 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1588 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1589 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1590 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1591 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1593 new_mode
= MIN_MODE_VECTOR_INT
;
1595 for (; new_mode
!= VOIDmode
; new_mode
= GET_MODE_WIDER_MODE (new_mode
))
1596 if (GET_MODE_SIZE (new_mode
) == GET_MODE_SIZE (GET_MODE (op0
))
1597 && targetm
.vector_mode_supported_p (new_mode
))
1599 if (new_mode
!= VOIDmode
)
1600 op0
= gen_lowpart (new_mode
, op0
);
1603 /* Use vec_extract patterns for extracting parts of vectors whenever
1605 if (VECTOR_MODE_P (GET_MODE (op0
))
1607 && optab_handler (vec_extract_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
1608 && ((bitnum
+ bitsize
- 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
1609 == bitnum
/ GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
1611 struct expand_operand ops
[3];
1612 machine_mode outermode
= GET_MODE (op0
);
1613 machine_mode innermode
= GET_MODE_INNER (outermode
);
1614 enum insn_code icode
= optab_handler (vec_extract_optab
, outermode
);
1615 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1617 create_output_operand (&ops
[0], target
, innermode
);
1618 create_input_operand (&ops
[1], op0
, outermode
);
1619 create_integer_operand (&ops
[2], pos
);
1620 if (maybe_expand_insn (icode
, 3, ops
))
1622 target
= ops
[0].value
;
1623 if (GET_MODE (target
) != mode
)
1624 return gen_lowpart (tmode
, target
);
1629 /* Make sure we are playing with integral modes. Pun with subregs
1632 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
1633 if (imode
!= GET_MODE (op0
))
1636 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
1637 else if (imode
!= BLKmode
)
1639 op0
= gen_lowpart (imode
, op0
);
1641 /* If we got a SUBREG, force it into a register since we
1642 aren't going to be able to do another SUBREG on it. */
1643 if (GET_CODE (op0
) == SUBREG
)
1644 op0
= force_reg (imode
, op0
);
1646 else if (REG_P (op0
))
1649 imode
= smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0
)),
1651 reg
= gen_reg_rtx (imode
);
1652 subreg
= gen_lowpart_SUBREG (GET_MODE (op0
), reg
);
1653 emit_move_insn (subreg
, op0
);
1655 bitnum
+= SUBREG_BYTE (subreg
) * BITS_PER_UNIT
;
1659 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (op0
));
1660 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1661 emit_move_insn (mem
, op0
);
1662 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1667 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1668 If that's wrong, the solution is to test for it and set TARGET to 0
1671 /* Get the mode of the field to use for atomic access or subreg
1674 if (SCALAR_INT_MODE_P (tmode
))
1676 machine_mode try_mode
= mode_for_size (bitsize
,
1677 GET_MODE_CLASS (tmode
), 0);
1678 if (try_mode
!= BLKmode
)
1681 gcc_assert (mode1
!= BLKmode
);
1683 /* Extraction of a full MODE1 value can be done with a subreg as long
1684 as the least significant bit of the value is the least significant
1685 bit of either OP0 or a word of OP0. */
1688 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1689 && bitsize
== GET_MODE_BITSIZE (mode1
)
1690 && TRULY_NOOP_TRUNCATION_MODES_P (mode1
, GET_MODE (op0
)))
1692 rtx sub
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
),
1693 bitnum
/ BITS_PER_UNIT
);
1695 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1698 /* Extraction of a full MODE1 value can be done with a load as long as
1699 the field is on a byte boundary and is sufficiently aligned. */
1700 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
))
1702 op0
= adjust_bitfield_address (op0
, mode1
, bitnum
/ BITS_PER_UNIT
);
1704 op0
= flip_storage_order (mode1
, op0
);
1705 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1708 /* Handle fields bigger than a word. */
1710 if (bitsize
> BITS_PER_WORD
)
1712 /* Here we transfer the words of the field
1713 in the order least significant first.
1714 This is because the most significant word is the one which may
1715 be less than full. */
1717 const bool backwards
= WORDS_BIG_ENDIAN
;
1718 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1722 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1723 target
= gen_reg_rtx (mode
);
1725 /* In case we're about to clobber a base register or something
1726 (see gcc.c-torture/execute/20040625-1.c). */
1727 if (reg_mentioned_p (target
, str_rtx
))
1728 target
= gen_reg_rtx (mode
);
1730 /* Indicate for flow that the entire target reg is being set. */
1731 emit_clobber (target
);
1733 last
= get_last_insn ();
1734 for (i
= 0; i
< nwords
; i
++)
1736 /* If I is 0, use the low-order word in both field and target;
1737 if I is 1, use the next to lowest word; and so on. */
1738 /* Word number in TARGET to use. */
1739 unsigned int wordnum
1741 ? GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
- i
- 1
1743 /* Offset from start of field in OP0. */
1744 unsigned int bit_offset
= (backwards
^ reverse
1745 ? MAX ((int) bitsize
- ((int) i
+ 1)
1748 : (int) i
* BITS_PER_WORD
);
1749 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1751 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1752 bitsize
- i
* BITS_PER_WORD
),
1753 bitnum
+ bit_offset
, 1, target_part
,
1754 mode
, word_mode
, reverse
, fallback_p
);
1756 gcc_assert (target_part
);
1759 delete_insns_since (last
);
1763 if (result_part
!= target_part
)
1764 emit_move_insn (target_part
, result_part
);
1769 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1770 need to be zero'd out. */
1771 if (GET_MODE_SIZE (GET_MODE (target
)) > nwords
* UNITS_PER_WORD
)
1773 unsigned int i
, total_words
;
1775 total_words
= GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
;
1776 for (i
= nwords
; i
< total_words
; i
++)
1778 (operand_subword (target
,
1779 backwards
? total_words
- i
- 1 : i
,
1786 /* Signed bit field: sign-extend with two arithmetic shifts. */
1787 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1788 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1789 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1790 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1793 /* If OP0 is a multi-word register, narrow it to the affected word.
1794 If the region spans two words, defer to extract_split_bit_field. */
1795 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
1797 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
1798 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1799 bitnum
%= BITS_PER_WORD
;
1800 if (bitnum
+ bitsize
> BITS_PER_WORD
)
1804 target
= extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1806 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1810 /* From here on we know the desired field is smaller than a word.
1811 If OP0 is a register, it too fits within a word. */
1812 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1813 extraction_insn extv
;
1816 /* ??? We could limit the structure size to the part of OP0 that
1817 contains the field, with appropriate checks for endianness
1818 and TRULY_NOOP_TRUNCATION. */
1819 && get_best_reg_extraction_insn (&extv
, pattern
,
1820 GET_MODE_BITSIZE (GET_MODE (op0
)),
1823 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
, bitnum
,
1824 unsignedp
, target
, mode
,
1830 /* If OP0 is a memory, try copying it to a register and seeing if a
1831 cheap register alternative is available. */
1832 if (MEM_P (op0
) & !reverse
)
1834 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1837 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
,
1845 rtx_insn
*last
= get_last_insn ();
1847 /* Try loading part of OP0 into a register and extracting the
1848 bitfield from that. */
1849 unsigned HOST_WIDE_INT bitpos
;
1850 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
1851 0, 0, tmode
, &bitpos
);
1854 xop0
= copy_to_reg (xop0
);
1855 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
1857 mode
, tmode
, reverse
, false);
1860 delete_insns_since (last
);
1867 /* Find a correspondingly-sized integer field, so we can apply
1868 shifts and masks to it. */
1869 int_mode
= int_mode_for_mode (tmode
);
1870 if (int_mode
== BLKmode
)
1871 int_mode
= int_mode_for_mode (mode
);
1872 /* Should probably push op0 out to memory and then do a load. */
1873 gcc_assert (int_mode
!= BLKmode
);
1875 target
= extract_fixed_bit_field (int_mode
, op0
, bitsize
, bitnum
, target
,
1876 unsignedp
, reverse
);
1878 /* Complex values must be reversed piecewise, so we need to undo the global
1879 reversal, convert to the complex mode and reverse again. */
1880 if (reverse
&& COMPLEX_MODE_P (tmode
))
1882 target
= flip_storage_order (int_mode
, target
);
1883 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1884 target
= flip_storage_order (tmode
, target
);
1887 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1892 /* Generate code to extract a byte-field from STR_RTX
1893 containing BITSIZE bits, starting at BITNUM,
1894 and put it in TARGET if possible (if TARGET is nonzero).
1895 Regardless of TARGET, we return the rtx for where the value is placed.
1897 STR_RTX is the structure containing the byte (a REG or MEM).
1898 UNSIGNEDP is nonzero if this is an unsigned bit field.
1899 MODE is the natural mode of the field value once extracted.
1900 TMODE is the mode the caller would like the value to have;
1901 but the value may be returned with type MODE instead.
1903 If REVERSE is true, the extraction is to be done in reverse order.
1905 If a TARGET is specified and we can store in it at no extra cost,
1906 we do so, and return TARGET.
1907 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1908 if they are equally easy. */
1911 extract_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1912 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1913 machine_mode mode
, machine_mode tmode
, bool reverse
)
1917 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1918 if (GET_MODE_BITSIZE (GET_MODE (str_rtx
)) > 0)
1919 mode1
= GET_MODE (str_rtx
);
1920 else if (target
&& GET_MODE_BITSIZE (GET_MODE (target
)) > 0)
1921 mode1
= GET_MODE (target
);
1925 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, mode1
, 0, 0))
1927 /* Extraction of a full MODE1 value can be done with a simple load.
1928 We know here that the field can be accessed with one single
1929 instruction. For targets that support unaligned memory,
1930 an unaligned access may be necessary. */
1931 if (bitsize
== GET_MODE_BITSIZE (mode1
))
1933 rtx result
= adjust_bitfield_address (str_rtx
, mode1
,
1934 bitnum
/ BITS_PER_UNIT
);
1936 result
= flip_storage_order (mode1
, result
);
1937 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1938 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
1941 str_rtx
= narrow_bit_field_mem (str_rtx
, mode1
, bitsize
, bitnum
,
1943 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (mode1
));
1944 str_rtx
= copy_to_reg (str_rtx
);
1947 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
1948 target
, mode
, tmode
, reverse
, true);
1951 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1952 from bit BITNUM of OP0.
1954 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1955 If REVERSE is true, the extraction is to be done in reverse order.
1957 If TARGET is nonzero, attempts to store the value there
1958 and return TARGET, but this is not guaranteed.
1959 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1962 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
1963 unsigned HOST_WIDE_INT bitsize
,
1964 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1965 int unsignedp
, bool reverse
)
1970 = get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
), word_mode
,
1971 MEM_VOLATILE_P (op0
));
1973 if (mode
== VOIDmode
)
1974 /* The only way this should occur is if the field spans word
1976 return extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1979 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1982 return extract_fixed_bit_field_1 (tmode
, op0
, bitsize
, bitnum
,
1983 target
, unsignedp
, reverse
);
1986 /* Helper function for extract_fixed_bit_field, extracts
1987 the bit field always using the MODE of OP0. */
1990 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
,
1991 unsigned HOST_WIDE_INT bitsize
,
1992 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1993 int unsignedp
, bool reverse
)
1995 machine_mode mode
= GET_MODE (op0
);
1996 gcc_assert (SCALAR_INT_MODE_P (mode
));
1998 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1999 for invalid input, such as extract equivalent of f5 from
2000 gcc.dg/pr48335-2.c. */
2002 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2003 /* BITNUM is the distance between our msb and that of OP0.
2004 Convert it to the distance from the lsb. */
2005 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
2007 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2008 We have reduced the big-endian case to the little-endian case. */
2010 op0
= flip_storage_order (mode
, op0
);
2016 /* If the field does not already start at the lsb,
2017 shift it so it does. */
2018 /* Maybe propagate the target for the shift. */
2019 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2022 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2024 /* Convert the value to the desired mode. */
2026 op0
= convert_to_mode (tmode
, op0
, 1);
2028 /* Unless the msb of the field used to be the msb when we shifted,
2029 mask out the upper bits. */
2031 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2032 return expand_binop (GET_MODE (op0
), and_optab
, op0
,
2033 mask_rtx (GET_MODE (op0
), 0, bitsize
, 0),
2034 target
, 1, OPTAB_LIB_WIDEN
);
2038 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2039 then arithmetic-shift its lsb to the lsb of the word. */
2040 op0
= force_reg (mode
, op0
);
2042 /* Find the narrowest integer mode that contains the field. */
2044 for (mode
= GET_CLASS_NARROWEST_MODE (MODE_INT
); mode
!= VOIDmode
;
2045 mode
= GET_MODE_WIDER_MODE (mode
))
2046 if (GET_MODE_BITSIZE (mode
) >= bitsize
+ bitnum
)
2048 op0
= convert_to_mode (mode
, op0
, 0);
2055 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2057 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2058 /* Maybe propagate the target for the shift. */
2059 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2060 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2063 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2064 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2067 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2071 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2074 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2077 /* Extract a bit field that is split across two words
2078 and return an RTX for the result.
2080 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2081 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2082 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2084 If REVERSE is true, the extraction is to be done in reverse order. */
2087 extract_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
2088 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2092 unsigned int bitsdone
= 0;
2093 rtx result
= NULL_RTX
;
2096 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2098 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2099 unit
= BITS_PER_WORD
;
2101 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2103 while (bitsdone
< bitsize
)
2105 unsigned HOST_WIDE_INT thissize
;
2107 unsigned HOST_WIDE_INT thispos
;
2108 unsigned HOST_WIDE_INT offset
;
2110 offset
= (bitpos
+ bitsdone
) / unit
;
2111 thispos
= (bitpos
+ bitsdone
) % unit
;
2113 /* THISSIZE must not overrun a word boundary. Otherwise,
2114 extract_fixed_bit_field will call us again, and we will mutually
2116 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2117 thissize
= MIN (thissize
, unit
- thispos
);
2119 /* If OP0 is a register, then handle OFFSET here.
2121 When handling multiword bitfields, extract_bit_field may pass
2122 down a word_mode SUBREG of a larger REG for a bitfield that actually
2123 crosses a word boundary. Thus, for a SUBREG, we must find
2124 the current word starting from the base register. */
2125 if (GET_CODE (op0
) == SUBREG
)
2127 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
) + offset
;
2128 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
2129 GET_MODE (SUBREG_REG (op0
)));
2132 else if (REG_P (op0
))
2134 word
= operand_subword_force (op0
, offset
, GET_MODE (op0
));
2140 /* Extract the parts in bit-counting order,
2141 whose meaning is determined by BYTES_PER_UNIT.
2142 OFFSET is in UNITs, and UNIT is in bits. */
2143 part
= extract_fixed_bit_field (word_mode
, word
, thissize
,
2144 offset
* unit
+ thispos
, 0, 1, reverse
);
2145 bitsdone
+= thissize
;
2147 /* Shift this part into place for the result. */
2148 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2150 if (bitsize
!= bitsdone
)
2151 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2152 bitsize
- bitsdone
, 0, 1);
2156 if (bitsdone
!= thissize
)
2157 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2158 bitsdone
- thissize
, 0, 1);
2164 /* Combine the parts with bitwise or. This works
2165 because we extracted each part as an unsigned bit field. */
2166 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2172 /* Unsigned bit field: we are done. */
2175 /* Signed bit field: sign-extend with two arithmetic shifts. */
2176 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2177 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2178 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2179 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2182 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2183 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2184 MODE, fill the upper bits with zeros. Fail if the layout of either
2185 mode is unknown (as for CC modes) or if the extraction would involve
2186 unprofitable mode punning. Return the value on success, otherwise
2189 This is different from gen_lowpart* in these respects:
2191 - the returned value must always be considered an rvalue
2193 - when MODE is wider than SRC_MODE, the extraction involves
2196 - when MODE is smaller than SRC_MODE, the extraction involves
2197 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2199 In other words, this routine performs a computation, whereas the
2200 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2204 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2206 machine_mode int_mode
, src_int_mode
;
2208 if (mode
== src_mode
)
2211 if (CONSTANT_P (src
))
2213 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2214 fails, it will happily create (subreg (symbol_ref)) or similar
2216 unsigned int byte
= subreg_lowpart_offset (mode
, src_mode
);
2217 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2221 if (GET_MODE (src
) == VOIDmode
2222 || !validate_subreg (mode
, src_mode
, src
, byte
))
2225 src
= force_reg (GET_MODE (src
), src
);
2226 return gen_rtx_SUBREG (mode
, src
, byte
);
2229 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2232 if (GET_MODE_BITSIZE (mode
) == GET_MODE_BITSIZE (src_mode
)
2233 && MODES_TIEABLE_P (mode
, src_mode
))
2235 rtx x
= gen_lowpart_common (mode
, src
);
2240 src_int_mode
= int_mode_for_mode (src_mode
);
2241 int_mode
= int_mode_for_mode (mode
);
2242 if (src_int_mode
== BLKmode
|| int_mode
== BLKmode
)
2245 if (!MODES_TIEABLE_P (src_int_mode
, src_mode
))
2247 if (!MODES_TIEABLE_P (int_mode
, mode
))
2250 src
= gen_lowpart (src_int_mode
, src
);
2251 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2252 src
= gen_lowpart (mode
, src
);
2256 /* Add INC into TARGET. */
2259 expand_inc (rtx target
, rtx inc
)
2261 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2263 target
, 0, OPTAB_LIB_WIDEN
);
2264 if (value
!= target
)
2265 emit_move_insn (target
, value
);
2268 /* Subtract DEC from TARGET. */
2271 expand_dec (rtx target
, rtx dec
)
2273 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2275 target
, 0, OPTAB_LIB_WIDEN
);
2276 if (value
!= target
)
2277 emit_move_insn (target
, value
);
2280 /* Output a shift instruction for expression code CODE,
2281 with SHIFTED being the rtx for the value to shift,
2282 and AMOUNT the rtx for the amount to shift by.
2283 Store the result in the rtx TARGET, if that is convenient.
2284 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2285 Return the rtx for where the value is. */
2288 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2289 rtx amount
, rtx target
, int unsignedp
)
2292 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2293 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2294 optab lshift_optab
= ashl_optab
;
2295 optab rshift_arith_optab
= ashr_optab
;
2296 optab rshift_uns_optab
= lshr_optab
;
2297 optab lrotate_optab
= rotl_optab
;
2298 optab rrotate_optab
= rotr_optab
;
2299 machine_mode op1_mode
;
2300 machine_mode scalar_mode
= mode
;
2302 bool speed
= optimize_insn_for_speed_p ();
2304 if (VECTOR_MODE_P (mode
))
2305 scalar_mode
= GET_MODE_INNER (mode
);
2307 op1_mode
= GET_MODE (op1
);
2309 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2310 shift amount is a vector, use the vector/vector shift patterns. */
2311 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2313 lshift_optab
= vashl_optab
;
2314 rshift_arith_optab
= vashr_optab
;
2315 rshift_uns_optab
= vlshr_optab
;
2316 lrotate_optab
= vrotl_optab
;
2317 rrotate_optab
= vrotr_optab
;
2320 /* Previously detected shift-counts computed by NEGATE_EXPR
2321 and shifted in the other direction; but that does not work
2324 if (SHIFT_COUNT_TRUNCATED
)
2326 if (CONST_INT_P (op1
)
2327 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2328 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2329 op1
= GEN_INT ((unsigned HOST_WIDE_INT
) INTVAL (op1
)
2330 % GET_MODE_BITSIZE (scalar_mode
));
2331 else if (GET_CODE (op1
) == SUBREG
2332 && subreg_lowpart_p (op1
)
2333 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2334 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2335 op1
= SUBREG_REG (op1
);
2338 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2339 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2340 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2343 && CONST_INT_P (op1
)
2344 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2345 GET_MODE_BITSIZE (scalar_mode
) - 1))
2347 op1
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2349 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2352 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2353 Note that this is not the case for bigger values. For instance a rotation
2354 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2355 0x04030201 (bswapsi). */
2357 && CONST_INT_P (op1
)
2358 && INTVAL (op1
) == BITS_PER_UNIT
2359 && GET_MODE_SIZE (scalar_mode
) == 2
2360 && optab_handler (bswap_optab
, HImode
) != CODE_FOR_nothing
)
2361 return expand_unop (HImode
, bswap_optab
, shifted
, NULL_RTX
,
2364 if (op1
== const0_rtx
)
2367 /* Check whether its cheaper to implement a left shift by a constant
2368 bit count by a sequence of additions. */
2369 if (code
== LSHIFT_EXPR
2370 && CONST_INT_P (op1
)
2372 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2373 && INTVAL (op1
) < MAX_BITS_PER_WORD
2374 && (shift_cost (speed
, mode
, INTVAL (op1
))
2375 > INTVAL (op1
) * add_cost (speed
, mode
))
2376 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2379 for (i
= 0; i
< INTVAL (op1
); i
++)
2381 temp
= force_reg (mode
, shifted
);
2382 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2383 unsignedp
, OPTAB_LIB_WIDEN
);
2388 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2390 enum optab_methods methods
;
2393 methods
= OPTAB_DIRECT
;
2394 else if (attempt
== 1)
2395 methods
= OPTAB_WIDEN
;
2397 methods
= OPTAB_LIB_WIDEN
;
2401 /* Widening does not work for rotation. */
2402 if (methods
== OPTAB_WIDEN
)
2404 else if (methods
== OPTAB_LIB_WIDEN
)
2406 /* If we have been unable to open-code this by a rotation,
2407 do it as the IOR of two shifts. I.e., to rotate A
2409 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2410 where C is the bitsize of A.
2412 It is theoretically possible that the target machine might
2413 not be able to perform either shift and hence we would
2414 be making two libcalls rather than just the one for the
2415 shift (similarly if IOR could not be done). We will allow
2416 this extremely unlikely lossage to avoid complicating the
2419 rtx subtarget
= target
== shifted
? 0 : target
;
2420 rtx new_amount
, other_amount
;
2424 if (op1
== const0_rtx
)
2426 else if (CONST_INT_P (op1
))
2427 other_amount
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
)
2432 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2433 op1
, GET_MODE (op1
));
2434 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2436 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2437 gen_int_mode (mask
, GET_MODE (op1
)));
2440 shifted
= force_reg (mode
, shifted
);
2442 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2443 mode
, shifted
, new_amount
, 0, 1);
2444 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2445 mode
, shifted
, other_amount
,
2447 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2448 unsignedp
, methods
);
2451 temp
= expand_binop (mode
,
2452 left
? lrotate_optab
: rrotate_optab
,
2453 shifted
, op1
, target
, unsignedp
, methods
);
2456 temp
= expand_binop (mode
,
2457 left
? lshift_optab
: rshift_uns_optab
,
2458 shifted
, op1
, target
, unsignedp
, methods
);
2460 /* Do arithmetic shifts.
2461 Also, if we are going to widen the operand, we can just as well
2462 use an arithmetic right-shift instead of a logical one. */
2463 if (temp
== 0 && ! rotate
2464 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2466 enum optab_methods methods1
= methods
;
2468 /* If trying to widen a log shift to an arithmetic shift,
2469 don't accept an arithmetic shift of the same size. */
2471 methods1
= OPTAB_MUST_WIDEN
;
2473 /* Arithmetic shift */
2475 temp
= expand_binop (mode
,
2476 left
? lshift_optab
: rshift_arith_optab
,
2477 shifted
, op1
, target
, unsignedp
, methods1
);
2480 /* We used to try extzv here for logical right shifts, but that was
2481 only useful for one machine, the VAX, and caused poor code
2482 generation there for lshrdi3, so the code was deleted and a
2483 define_expand for lshrsi3 was added to vax.md. */
2490 /* Output a shift instruction for expression code CODE,
2491 with SHIFTED being the rtx for the value to shift,
2492 and AMOUNT the amount to shift by.
2493 Store the result in the rtx TARGET, if that is convenient.
2494 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2495 Return the rtx for where the value is. */
2498 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2499 int amount
, rtx target
, int unsignedp
)
2501 return expand_shift_1 (code
, mode
,
2502 shifted
, GEN_INT (amount
), target
, unsignedp
);
2505 /* Output a shift instruction for expression code CODE,
2506 with SHIFTED being the rtx for the value to shift,
2507 and AMOUNT the tree for the amount to shift by.
2508 Store the result in the rtx TARGET, if that is convenient.
2509 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2510 Return the rtx for where the value is. */
2513 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2514 tree amount
, rtx target
, int unsignedp
)
2516 return expand_shift_1 (code
, mode
,
2517 shifted
, expand_normal (amount
), target
, unsignedp
);
2521 /* Indicates the type of fixup needed after a constant multiplication.
2522 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2523 the result should be negated, and ADD_VARIANT means that the
2524 multiplicand should be added to the result. */
2525 enum mult_variant
{basic_variant
, negate_variant
, add_variant
};
2527 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2528 const struct mult_cost
*, machine_mode mode
);
2529 static bool choose_mult_variant (machine_mode
, HOST_WIDE_INT
,
2530 struct algorithm
*, enum mult_variant
*, int);
2531 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2532 const struct algorithm
*, enum mult_variant
);
2533 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2534 static rtx
extract_high_half (machine_mode
, rtx
);
2535 static rtx
expmed_mult_highpart (machine_mode
, rtx
, rtx
, rtx
, int, int);
2536 static rtx
expmed_mult_highpart_optab (machine_mode
, rtx
, rtx
, rtx
,
2538 /* Compute and return the best algorithm for multiplying by T.
2539 The algorithm must cost less than cost_limit
2540 If retval.cost >= COST_LIMIT, no algorithm was found and all
2541 other field of the returned struct are undefined.
2542 MODE is the machine mode of the multiplication. */
2545 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2546 const struct mult_cost
*cost_limit
, machine_mode mode
)
2549 struct algorithm
*alg_in
, *best_alg
;
2550 struct mult_cost best_cost
;
2551 struct mult_cost new_limit
;
2552 int op_cost
, op_latency
;
2553 unsigned HOST_WIDE_INT orig_t
= t
;
2554 unsigned HOST_WIDE_INT q
;
2555 int maxm
, hash_index
;
2556 bool cache_hit
= false;
2557 enum alg_code cache_alg
= alg_zero
;
2558 bool speed
= optimize_insn_for_speed_p ();
2560 struct alg_hash_entry
*entry_ptr
;
2562 /* Indicate that no algorithm is yet found. If no algorithm
2563 is found, this value will be returned and indicate failure. */
2564 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2565 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2567 if (cost_limit
->cost
< 0
2568 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2571 /* Be prepared for vector modes. */
2572 imode
= GET_MODE_INNER (mode
);
2574 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2576 /* Restrict the bits of "t" to the multiplication's mode. */
2577 t
&= GET_MODE_MASK (imode
);
2579 /* t == 1 can be done in zero cost. */
2583 alg_out
->cost
.cost
= 0;
2584 alg_out
->cost
.latency
= 0;
2585 alg_out
->op
[0] = alg_m
;
2589 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2593 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2598 alg_out
->cost
.cost
= zero_cost (speed
);
2599 alg_out
->cost
.latency
= zero_cost (speed
);
2600 alg_out
->op
[0] = alg_zero
;
2605 /* We'll be needing a couple extra algorithm structures now. */
2607 alg_in
= XALLOCA (struct algorithm
);
2608 best_alg
= XALLOCA (struct algorithm
);
2609 best_cost
= *cost_limit
;
2611 /* Compute the hash index. */
2612 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2614 /* See if we already know what to do for T. */
2615 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2616 if (entry_ptr
->t
== t
2617 && entry_ptr
->mode
== mode
2618 && entry_ptr
->mode
== mode
2619 && entry_ptr
->speed
== speed
2620 && entry_ptr
->alg
!= alg_unknown
)
2622 cache_alg
= entry_ptr
->alg
;
2624 if (cache_alg
== alg_impossible
)
2626 /* The cache tells us that it's impossible to synthesize
2627 multiplication by T within entry_ptr->cost. */
2628 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2629 /* COST_LIMIT is at least as restrictive as the one
2630 recorded in the hash table, in which case we have no
2631 hope of synthesizing a multiplication. Just
2635 /* If we get here, COST_LIMIT is less restrictive than the
2636 one recorded in the hash table, so we may be able to
2637 synthesize a multiplication. Proceed as if we didn't
2638 have the cache entry. */
2642 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2643 /* The cached algorithm shows that this multiplication
2644 requires more cost than COST_LIMIT. Just return. This
2645 way, we don't clobber this cache entry with
2646 alg_impossible but retain useful information. */
2658 goto do_alg_addsub_t_m2
;
2660 case alg_add_factor
:
2661 case alg_sub_factor
:
2662 goto do_alg_addsub_factor
;
2665 goto do_alg_add_t2_m
;
2668 goto do_alg_sub_t2_m
;
2676 /* If we have a group of zero bits at the low-order part of T, try
2677 multiplying by the remaining bits and then doing a shift. */
2682 m
= floor_log2 (t
& -t
); /* m = number of low zero bits */
2686 /* The function expand_shift will choose between a shift and
2687 a sequence of additions, so the observed cost is given as
2688 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2689 op_cost
= m
* add_cost (speed
, mode
);
2690 if (shift_cost (speed
, mode
, m
) < op_cost
)
2691 op_cost
= shift_cost (speed
, mode
, m
);
2692 new_limit
.cost
= best_cost
.cost
- op_cost
;
2693 new_limit
.latency
= best_cost
.latency
- op_cost
;
2694 synth_mult (alg_in
, q
, &new_limit
, mode
);
2696 alg_in
->cost
.cost
+= op_cost
;
2697 alg_in
->cost
.latency
+= op_cost
;
2698 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2700 best_cost
= alg_in
->cost
;
2701 std::swap (alg_in
, best_alg
);
2702 best_alg
->log
[best_alg
->ops
] = m
;
2703 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2706 /* See if treating ORIG_T as a signed number yields a better
2707 sequence. Try this sequence only for a negative ORIG_T
2708 as it would be useless for a non-negative ORIG_T. */
2709 if ((HOST_WIDE_INT
) orig_t
< 0)
2711 /* Shift ORIG_T as follows because a right shift of a
2712 negative-valued signed type is implementation
2714 q
= ~(~orig_t
>> m
);
2715 /* The function expand_shift will choose between a shift
2716 and a sequence of additions, so the observed cost is
2717 given as MIN (m * add_cost(speed, mode),
2718 shift_cost(speed, mode, m)). */
2719 op_cost
= m
* add_cost (speed
, mode
);
2720 if (shift_cost (speed
, mode
, m
) < op_cost
)
2721 op_cost
= shift_cost (speed
, mode
, m
);
2722 new_limit
.cost
= best_cost
.cost
- op_cost
;
2723 new_limit
.latency
= best_cost
.latency
- op_cost
;
2724 synth_mult (alg_in
, q
, &new_limit
, mode
);
2726 alg_in
->cost
.cost
+= op_cost
;
2727 alg_in
->cost
.latency
+= op_cost
;
2728 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2730 best_cost
= alg_in
->cost
;
2731 std::swap (alg_in
, best_alg
);
2732 best_alg
->log
[best_alg
->ops
] = m
;
2733 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2741 /* If we have an odd number, add or subtract one. */
2744 unsigned HOST_WIDE_INT w
;
2747 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2749 /* If T was -1, then W will be zero after the loop. This is another
2750 case where T ends with ...111. Handling this with (T + 1) and
2751 subtract 1 produces slightly better code and results in algorithm
2752 selection much faster than treating it like the ...0111 case
2756 /* Reject the case where t is 3.
2757 Thus we prefer addition in that case. */
2760 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2762 op_cost
= add_cost (speed
, mode
);
2763 new_limit
.cost
= best_cost
.cost
- op_cost
;
2764 new_limit
.latency
= best_cost
.latency
- op_cost
;
2765 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2767 alg_in
->cost
.cost
+= op_cost
;
2768 alg_in
->cost
.latency
+= op_cost
;
2769 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2771 best_cost
= alg_in
->cost
;
2772 std::swap (alg_in
, best_alg
);
2773 best_alg
->log
[best_alg
->ops
] = 0;
2774 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2779 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2781 op_cost
= add_cost (speed
, mode
);
2782 new_limit
.cost
= best_cost
.cost
- op_cost
;
2783 new_limit
.latency
= best_cost
.latency
- op_cost
;
2784 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2786 alg_in
->cost
.cost
+= op_cost
;
2787 alg_in
->cost
.latency
+= op_cost
;
2788 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2790 best_cost
= alg_in
->cost
;
2791 std::swap (alg_in
, best_alg
);
2792 best_alg
->log
[best_alg
->ops
] = 0;
2793 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2797 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2798 quickly with a - a * n for some appropriate constant n. */
2799 m
= exact_log2 (-orig_t
+ 1);
2800 if (m
>= 0 && m
< maxm
)
2802 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2803 /* If the target has a cheap shift-and-subtract insn use
2804 that in preference to a shift insn followed by a sub insn.
2805 Assume that the shift-and-sub is "atomic" with a latency
2806 equal to it's cost, otherwise assume that on superscalar
2807 hardware the shift may be executed concurrently with the
2808 earlier steps in the algorithm. */
2809 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2811 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2812 op_latency
= op_cost
;
2815 op_latency
= add_cost (speed
, mode
);
2817 new_limit
.cost
= best_cost
.cost
- op_cost
;
2818 new_limit
.latency
= best_cost
.latency
- op_latency
;
2819 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2822 alg_in
->cost
.cost
+= op_cost
;
2823 alg_in
->cost
.latency
+= op_latency
;
2824 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2826 best_cost
= alg_in
->cost
;
2827 std::swap (alg_in
, best_alg
);
2828 best_alg
->log
[best_alg
->ops
] = m
;
2829 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2837 /* Look for factors of t of the form
2838 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2839 If we find such a factor, we can multiply by t using an algorithm that
2840 multiplies by q, shift the result by m and add/subtract it to itself.
2842 We search for large factors first and loop down, even if large factors
2843 are less probable than small; if we find a large factor we will find a
2844 good sequence quickly, and therefore be able to prune (by decreasing
2845 COST_LIMIT) the search. */
2847 do_alg_addsub_factor
:
2848 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
2850 unsigned HOST_WIDE_INT d
;
2852 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) + 1;
2853 if (t
% d
== 0 && t
> d
&& m
< maxm
2854 && (!cache_hit
|| cache_alg
== alg_add_factor
))
2856 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2857 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
2858 op_cost
= shiftadd_cost (speed
, mode
, m
);
2860 op_latency
= op_cost
;
2863 new_limit
.cost
= best_cost
.cost
- op_cost
;
2864 new_limit
.latency
= best_cost
.latency
- op_latency
;
2865 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2867 alg_in
->cost
.cost
+= op_cost
;
2868 alg_in
->cost
.latency
+= op_latency
;
2869 if (alg_in
->cost
.latency
< op_cost
)
2870 alg_in
->cost
.latency
= op_cost
;
2871 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2873 best_cost
= alg_in
->cost
;
2874 std::swap (alg_in
, best_alg
);
2875 best_alg
->log
[best_alg
->ops
] = m
;
2876 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
2878 /* Other factors will have been taken care of in the recursion. */
2882 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) - 1;
2883 if (t
% d
== 0 && t
> d
&& m
< maxm
2884 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
2886 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2887 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
2888 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2890 op_latency
= op_cost
;
2892 new_limit
.cost
= best_cost
.cost
- op_cost
;
2893 new_limit
.latency
= best_cost
.latency
- op_latency
;
2894 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2896 alg_in
->cost
.cost
+= op_cost
;
2897 alg_in
->cost
.latency
+= op_latency
;
2898 if (alg_in
->cost
.latency
< op_cost
)
2899 alg_in
->cost
.latency
= op_cost
;
2900 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2902 best_cost
= alg_in
->cost
;
2903 std::swap (alg_in
, best_alg
);
2904 best_alg
->log
[best_alg
->ops
] = m
;
2905 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
2913 /* Try shift-and-add (load effective address) instructions,
2914 i.e. do a*3, a*5, a*9. */
2921 if (m
>= 0 && m
< maxm
)
2923 op_cost
= shiftadd_cost (speed
, mode
, m
);
2924 new_limit
.cost
= best_cost
.cost
- op_cost
;
2925 new_limit
.latency
= best_cost
.latency
- op_cost
;
2926 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
2928 alg_in
->cost
.cost
+= op_cost
;
2929 alg_in
->cost
.latency
+= op_cost
;
2930 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2932 best_cost
= alg_in
->cost
;
2933 std::swap (alg_in
, best_alg
);
2934 best_alg
->log
[best_alg
->ops
] = m
;
2935 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
2945 if (m
>= 0 && m
< maxm
)
2947 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2948 new_limit
.cost
= best_cost
.cost
- op_cost
;
2949 new_limit
.latency
= best_cost
.latency
- op_cost
;
2950 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
2952 alg_in
->cost
.cost
+= op_cost
;
2953 alg_in
->cost
.latency
+= op_cost
;
2954 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2956 best_cost
= alg_in
->cost
;
2957 std::swap (alg_in
, best_alg
);
2958 best_alg
->log
[best_alg
->ops
] = m
;
2959 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
2967 /* If best_cost has not decreased, we have not found any algorithm. */
2968 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
2970 /* We failed to find an algorithm. Record alg_impossible for
2971 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2972 we are asked to find an algorithm for T within the same or
2973 lower COST_LIMIT, we can immediately return to the
2976 entry_ptr
->mode
= mode
;
2977 entry_ptr
->speed
= speed
;
2978 entry_ptr
->alg
= alg_impossible
;
2979 entry_ptr
->cost
= *cost_limit
;
2983 /* Cache the result. */
2987 entry_ptr
->mode
= mode
;
2988 entry_ptr
->speed
= speed
;
2989 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
2990 entry_ptr
->cost
.cost
= best_cost
.cost
;
2991 entry_ptr
->cost
.latency
= best_cost
.latency
;
2994 /* If we are getting a too long sequence for `struct algorithm'
2995 to record, make this search fail. */
2996 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
2999 /* Copy the algorithm from temporary space to the space at alg_out.
3000 We avoid using structure assignment because the majority of
3001 best_alg is normally undefined, and this is a critical function. */
3002 alg_out
->ops
= best_alg
->ops
+ 1;
3003 alg_out
->cost
= best_cost
;
3004 memcpy (alg_out
->op
, best_alg
->op
,
3005 alg_out
->ops
* sizeof *alg_out
->op
);
3006 memcpy (alg_out
->log
, best_alg
->log
,
3007 alg_out
->ops
* sizeof *alg_out
->log
);
3010 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3011 Try three variations:
3013 - a shift/add sequence based on VAL itself
3014 - a shift/add sequence based on -VAL, followed by a negation
3015 - a shift/add sequence based on VAL - 1, followed by an addition.
3017 Return true if the cheapest of these cost less than MULT_COST,
3018 describing the algorithm in *ALG and final fixup in *VARIANT. */
3021 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
3022 struct algorithm
*alg
, enum mult_variant
*variant
,
3025 struct algorithm alg2
;
3026 struct mult_cost limit
;
3028 bool speed
= optimize_insn_for_speed_p ();
3030 /* Fail quickly for impossible bounds. */
3034 /* Ensure that mult_cost provides a reasonable upper bound.
3035 Any constant multiplication can be performed with less
3036 than 2 * bits additions. */
3037 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3038 if (mult_cost
> op_cost
)
3039 mult_cost
= op_cost
;
3041 *variant
= basic_variant
;
3042 limit
.cost
= mult_cost
;
3043 limit
.latency
= mult_cost
;
3044 synth_mult (alg
, val
, &limit
, mode
);
3046 /* This works only if the inverted value actually fits in an
3048 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3050 op_cost
= neg_cost (speed
, mode
);
3051 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3053 limit
.cost
= alg
->cost
.cost
- op_cost
;
3054 limit
.latency
= alg
->cost
.latency
- op_cost
;
3058 limit
.cost
= mult_cost
- op_cost
;
3059 limit
.latency
= mult_cost
- op_cost
;
3062 synth_mult (&alg2
, -val
, &limit
, mode
);
3063 alg2
.cost
.cost
+= op_cost
;
3064 alg2
.cost
.latency
+= op_cost
;
3065 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3066 *alg
= alg2
, *variant
= negate_variant
;
3069 /* This proves very useful for division-by-constant. */
3070 op_cost
= add_cost (speed
, mode
);
3071 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3073 limit
.cost
= alg
->cost
.cost
- op_cost
;
3074 limit
.latency
= alg
->cost
.latency
- op_cost
;
3078 limit
.cost
= mult_cost
- op_cost
;
3079 limit
.latency
= mult_cost
- op_cost
;
3082 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3083 alg2
.cost
.cost
+= op_cost
;
3084 alg2
.cost
.latency
+= op_cost
;
3085 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3086 *alg
= alg2
, *variant
= add_variant
;
3088 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3091 /* A subroutine of expand_mult, used for constant multiplications.
3092 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3093 convenient. Use the shift/add sequence described by ALG and apply
3094 the final fixup specified by VARIANT. */
3097 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3098 rtx target
, const struct algorithm
*alg
,
3099 enum mult_variant variant
)
3101 HOST_WIDE_INT val_so_far
;
3107 /* Avoid referencing memory over and over and invalid sharing
3109 op0
= force_reg (mode
, op0
);
3111 /* ACCUM starts out either as OP0 or as a zero, depending on
3112 the first operation. */
3114 if (alg
->op
[0] == alg_zero
)
3116 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3119 else if (alg
->op
[0] == alg_m
)
3121 accum
= copy_to_mode_reg (mode
, op0
);
3127 for (opno
= 1; opno
< alg
->ops
; opno
++)
3129 int log
= alg
->log
[opno
];
3130 rtx shift_subtarget
= optimize
? 0 : accum
;
3132 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3135 rtx accum_target
= optimize
? 0 : accum
;
3138 switch (alg
->op
[opno
])
3141 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3142 /* REG_EQUAL note will be attached to the following insn. */
3143 emit_move_insn (accum
, tem
);
3148 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3149 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3150 add_target
? add_target
: accum_target
);
3151 val_so_far
+= (HOST_WIDE_INT
) 1 << log
;
3155 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3156 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3157 add_target
? add_target
: accum_target
);
3158 val_so_far
-= (HOST_WIDE_INT
) 1 << log
;
3162 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3163 log
, shift_subtarget
, 0);
3164 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3165 add_target
? add_target
: accum_target
);
3166 val_so_far
= (val_so_far
<< log
) + 1;
3170 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3171 log
, shift_subtarget
, 0);
3172 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3173 add_target
? add_target
: accum_target
);
3174 val_so_far
= (val_so_far
<< log
) - 1;
3177 case alg_add_factor
:
3178 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3179 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3180 add_target
? add_target
: accum_target
);
3181 val_so_far
+= val_so_far
<< log
;
3184 case alg_sub_factor
:
3185 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3186 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3188 ? add_target
: (optimize
? 0 : tem
)));
3189 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3196 if (SCALAR_INT_MODE_P (mode
))
3198 /* Write a REG_EQUAL note on the last insn so that we can cse
3199 multiplication sequences. Note that if ACCUM is a SUBREG,
3200 we've set the inner register and must properly indicate that. */
3201 tem
= op0
, nmode
= mode
;
3202 accum_inner
= accum
;
3203 if (GET_CODE (accum
) == SUBREG
)
3205 accum_inner
= SUBREG_REG (accum
);
3206 nmode
= GET_MODE (accum_inner
);
3207 tem
= gen_lowpart (nmode
, op0
);
3210 insn
= get_last_insn ();
3211 set_dst_reg_note (insn
, REG_EQUAL
,
3212 gen_rtx_MULT (nmode
, tem
,
3213 gen_int_mode (val_so_far
, nmode
)),
3218 if (variant
== negate_variant
)
3220 val_so_far
= -val_so_far
;
3221 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3223 else if (variant
== add_variant
)
3225 val_so_far
= val_so_far
+ 1;
3226 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3229 /* Compare only the bits of val and val_so_far that are significant
3230 in the result mode, to avoid sign-/zero-extension confusion. */
3231 nmode
= GET_MODE_INNER (mode
);
3232 val
&= GET_MODE_MASK (nmode
);
3233 val_so_far
&= GET_MODE_MASK (nmode
);
3234 gcc_assert (val
== val_so_far
);
3239 /* Perform a multiplication and return an rtx for the result.
3240 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3241 TARGET is a suggestion for where to store the result (an rtx).
3243 We check specially for a constant integer as OP1.
3244 If you want this check for OP0 as well, then before calling
3245 you should swap the two operands if OP0 would be constant. */
3248 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3251 enum mult_variant variant
;
3252 struct algorithm algorithm
;
3255 bool speed
= optimize_insn_for_speed_p ();
3256 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3258 if (CONSTANT_P (op0
))
3259 std::swap (op0
, op1
);
3261 /* For vectors, there are several simplifications that can be made if
3262 all elements of the vector constant are identical. */
3263 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3265 if (INTEGRAL_MODE_P (mode
))
3268 HOST_WIDE_INT coeff
;
3272 if (op1
== CONST0_RTX (mode
))
3274 if (op1
== CONST1_RTX (mode
))
3276 if (op1
== CONSTM1_RTX (mode
))
3277 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3283 /* If mode is integer vector mode, check if the backend supports
3284 vector lshift (by scalar or vector) at all. If not, we can't use
3285 synthetized multiply. */
3286 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3287 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3288 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3291 /* These are the operations that are potentially turned into
3292 a sequence of shifts and additions. */
3293 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3295 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3296 less than or equal in size to `unsigned int' this doesn't matter.
3297 If the mode is larger than `unsigned int', then synth_mult works
3298 only if the constant value exactly fits in an `unsigned int' without
3299 any truncation. This means that multiplying by negative values does
3300 not work; results are off by 2^32 on a 32 bit machine. */
3301 if (CONST_INT_P (scalar_op1
))
3303 coeff
= INTVAL (scalar_op1
);
3306 #if TARGET_SUPPORTS_WIDE_INT
3307 else if (CONST_WIDE_INT_P (scalar_op1
))
3309 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3312 int shift
= wi::exact_log2 (std::make_pair (scalar_op1
, mode
));
3313 /* Perfect power of 2 (other than 1, which is handled above). */
3315 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3316 shift
, target
, unsignedp
);
3323 /* We used to test optimize here, on the grounds that it's better to
3324 produce a smaller program when -O is not used. But this causes
3325 such a terrible slowdown sometimes that it seems better to always
3328 /* Special case powers of two. */
3329 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3330 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3331 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3332 floor_log2 (coeff
), target
, unsignedp
);
3334 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3336 /* Attempt to handle multiplication of DImode values by negative
3337 coefficients, by performing the multiplication by a positive
3338 multiplier and then inverting the result. */
3339 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3341 /* Its safe to use -coeff even for INT_MIN, as the
3342 result is interpreted as an unsigned coefficient.
3343 Exclude cost of op0 from max_cost to match the cost
3344 calculation of the synth_mult. */
3345 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3346 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3348 - neg_cost (speed
, mode
));
3352 /* Special case powers of two. */
3353 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3355 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3356 floor_log2 (coeff
), target
, unsignedp
);
3357 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3360 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3363 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3364 &algorithm
, variant
);
3365 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3370 /* Exclude cost of op0 from max_cost to match the cost
3371 calculation of the synth_mult. */
3372 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3373 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3374 return expand_mult_const (mode
, op0
, coeff
, target
,
3375 &algorithm
, variant
);
3379 /* Expand x*2.0 as x+x. */
3380 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3381 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3383 op0
= force_reg (GET_MODE (op0
), op0
);
3384 return expand_binop (mode
, add_optab
, op0
, op0
,
3385 target
, unsignedp
, OPTAB_LIB_WIDEN
);
3388 /* This used to use umul_optab if unsigned, but for non-widening multiply
3389 there is no difference between signed and unsigned. */
3390 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3391 op0
, op1
, target
, unsignedp
, OPTAB_LIB_WIDEN
);
3396 /* Return a cost estimate for multiplying a register by the given
3397 COEFFicient in the given MODE and SPEED. */
3400 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3403 struct algorithm algorithm
;
3404 enum mult_variant variant
;
3406 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3407 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3409 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3410 return algorithm
.cost
.cost
;
3415 /* Perform a widening multiplication and return an rtx for the result.
3416 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3417 TARGET is a suggestion for where to store the result (an rtx).
3418 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3419 or smul_widen_optab.
3421 We check specially for a constant integer as OP1, comparing the
3422 cost of a widening multiply against the cost of a sequence of shifts
3426 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3427 int unsignedp
, optab this_optab
)
3429 bool speed
= optimize_insn_for_speed_p ();
3432 if (CONST_INT_P (op1
)
3433 && GET_MODE (op0
) != VOIDmode
3434 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3435 this_optab
== umul_widen_optab
))
3436 && CONST_INT_P (cop1
)
3437 && (INTVAL (cop1
) >= 0
3438 || HWI_COMPUTABLE_MODE_P (mode
)))
3440 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3442 enum mult_variant variant
;
3443 struct algorithm algorithm
;
3446 return CONST0_RTX (mode
);
3448 /* Special case powers of two. */
3449 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3451 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3452 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3453 floor_log2 (coeff
), target
, unsignedp
);
3456 /* Exclude cost of op0 from max_cost to match the cost
3457 calculation of the synth_mult. */
3458 max_cost
= mul_widen_cost (speed
, mode
);
3459 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3462 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3463 return expand_mult_const (mode
, op0
, coeff
, target
,
3464 &algorithm
, variant
);
3467 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3468 unsignedp
, OPTAB_LIB_WIDEN
);
3471 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3472 replace division by D, and put the least significant N bits of the result
3473 in *MULTIPLIER_PTR and return the most significant bit.
3475 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3476 needed precision is in PRECISION (should be <= N).
3478 PRECISION should be as small as possible so this function can choose
3479 multiplier more freely.
3481 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3482 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3484 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3485 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3487 unsigned HOST_WIDE_INT
3488 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3489 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3490 int *post_shift_ptr
, int *lgup_ptr
)
3492 int lgup
, post_shift
;
3495 /* lgup = ceil(log2(divisor)); */
3496 lgup
= ceil_log2 (d
);
3498 gcc_assert (lgup
<= n
);
3501 pow2
= n
+ lgup
- precision
;
3503 /* mlow = 2^(N + lgup)/d */
3504 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3505 wide_int mlow
= wi::udiv_trunc (val
, d
);
3507 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3508 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3509 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3511 /* If precision == N, then mlow, mhigh exceed 2^N
3512 (but they do not exceed 2^(N+1)). */
3514 /* Reduce to lowest terms. */
3515 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3517 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3518 HOST_BITS_PER_WIDE_INT
);
3519 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3520 HOST_BITS_PER_WIDE_INT
);
3524 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3525 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3528 *post_shift_ptr
= post_shift
;
3530 if (n
< HOST_BITS_PER_WIDE_INT
)
3532 unsigned HOST_WIDE_INT mask
= ((unsigned HOST_WIDE_INT
) 1 << n
) - 1;
3533 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3534 return mhigh
.to_uhwi () >= mask
;
3538 *multiplier_ptr
= mhigh
.to_uhwi ();
3539 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3543 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3544 congruent to 1 (mod 2**N). */
3546 static unsigned HOST_WIDE_INT
3547 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3549 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3551 /* The algorithm notes that the choice y = x satisfies
3552 x*y == 1 mod 2^3, since x is assumed odd.
3553 Each iteration doubles the number of bits of significance in y. */
3555 unsigned HOST_WIDE_INT mask
;
3556 unsigned HOST_WIDE_INT y
= x
;
3559 mask
= (n
== HOST_BITS_PER_WIDE_INT
3560 ? ~(unsigned HOST_WIDE_INT
) 0
3561 : ((unsigned HOST_WIDE_INT
) 1 << n
) - 1);
3565 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3571 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3572 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3573 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3574 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3577 The result is put in TARGET if that is convenient.
3579 MODE is the mode of operation. */
3582 expand_mult_highpart_adjust (machine_mode mode
, rtx adj_operand
, rtx op0
,
3583 rtx op1
, rtx target
, int unsignedp
)
3586 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3588 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3589 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3590 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3592 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3595 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3596 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3597 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3598 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3604 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3607 extract_high_half (machine_mode mode
, rtx op
)
3609 machine_mode wider_mode
;
3611 if (mode
== word_mode
)
3612 return gen_highpart (mode
, op
);
3614 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3616 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3617 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3618 GET_MODE_BITSIZE (mode
), 0, 1);
3619 return convert_modes (mode
, wider_mode
, op
, 0);
3622 /* Like expmed_mult_highpart, but only consider using a multiplication
3623 optab. OP1 is an rtx for the constant operand. */
3626 expmed_mult_highpart_optab (machine_mode mode
, rtx op0
, rtx op1
,
3627 rtx target
, int unsignedp
, int max_cost
)
3629 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3630 machine_mode wider_mode
;
3634 bool speed
= optimize_insn_for_speed_p ();
3636 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3638 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3639 size
= GET_MODE_BITSIZE (mode
);
3641 /* Firstly, try using a multiplication insn that only generates the needed
3642 high part of the product, and in the sign flavor of unsignedp. */
3643 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3645 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3646 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3647 unsignedp
, OPTAB_DIRECT
);
3652 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3653 Need to adjust the result after the multiplication. */
3654 if (size
- 1 < BITS_PER_WORD
3655 && (mul_highpart_cost (speed
, mode
)
3656 + 2 * shift_cost (speed
, mode
, size
-1)
3657 + 4 * add_cost (speed
, mode
) < max_cost
))
3659 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3660 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3661 unsignedp
, OPTAB_DIRECT
);
3663 /* We used the wrong signedness. Adjust the result. */
3664 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3668 /* Try widening multiplication. */
3669 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3670 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3671 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3673 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3674 unsignedp
, OPTAB_WIDEN
);
3676 return extract_high_half (mode
, tem
);
3679 /* Try widening the mode and perform a non-widening multiplication. */
3680 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3681 && size
- 1 < BITS_PER_WORD
3682 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3688 /* We need to widen the operands, for example to ensure the
3689 constant multiplier is correctly sign or zero extended.
3690 Use a sequence to clean-up any instructions emitted by
3691 the conversions if things don't work out. */
3693 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3694 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3695 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3696 unsignedp
, OPTAB_WIDEN
);
3697 insns
= get_insns ();
3703 return extract_high_half (mode
, tem
);
3707 /* Try widening multiplication of opposite signedness, and adjust. */
3708 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3709 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3710 && size
- 1 < BITS_PER_WORD
3711 && (mul_widen_cost (speed
, wider_mode
)
3712 + 2 * shift_cost (speed
, mode
, size
-1)
3713 + 4 * add_cost (speed
, mode
) < max_cost
))
3715 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3716 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3719 tem
= extract_high_half (mode
, tem
);
3720 /* We used the wrong signedness. Adjust the result. */
3721 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3729 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3730 putting the high half of the result in TARGET if that is convenient,
3731 and return where the result is. If the operation can not be performed,
3734 MODE is the mode of operation and result.
3736 UNSIGNEDP nonzero means unsigned multiply.
3738 MAX_COST is the total allowed cost for the expanded RTL. */
3741 expmed_mult_highpart (machine_mode mode
, rtx op0
, rtx op1
,
3742 rtx target
, int unsignedp
, int max_cost
)
3744 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
3745 unsigned HOST_WIDE_INT cnst1
;
3747 bool sign_adjust
= false;
3748 enum mult_variant variant
;
3749 struct algorithm alg
;
3751 bool speed
= optimize_insn_for_speed_p ();
3753 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3754 /* We can't support modes wider than HOST_BITS_PER_INT. */
3755 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3757 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3759 /* We can't optimize modes wider than BITS_PER_WORD.
3760 ??? We might be able to perform double-word arithmetic if
3761 mode == word_mode, however all the cost calculations in
3762 synth_mult etc. assume single-word operations. */
3763 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3764 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3765 unsignedp
, max_cost
);
3767 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3769 /* Check whether we try to multiply by a negative constant. */
3770 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3773 extra_cost
+= add_cost (speed
, mode
);
3776 /* See whether shift/add multiplication is cheap enough. */
3777 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3778 max_cost
- extra_cost
))
3780 /* See whether the specialized multiplication optabs are
3781 cheaper than the shift/add version. */
3782 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3783 alg
.cost
.cost
+ extra_cost
);
3787 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3788 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3789 tem
= extract_high_half (mode
, tem
);
3791 /* Adjust result for signedness. */
3793 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3797 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3798 unsignedp
, max_cost
);
3802 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3805 expand_smod_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3807 rtx result
, temp
, shift
;
3808 rtx_code_label
*label
;
3810 int prec
= GET_MODE_PRECISION (mode
);
3812 logd
= floor_log2 (d
);
3813 result
= gen_reg_rtx (mode
);
3815 /* Avoid conditional branches when they're expensive. */
3816 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3817 && optimize_insn_for_speed_p ())
3819 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3823 HOST_WIDE_INT masklow
= ((HOST_WIDE_INT
) 1 << logd
) - 1;
3824 signmask
= force_reg (mode
, signmask
);
3825 shift
= GEN_INT (GET_MODE_BITSIZE (mode
) - logd
);
3827 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3828 which instruction sequence to use. If logical right shifts
3829 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3830 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3832 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3833 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3834 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
3835 > COSTS_N_INSNS (2)))
3837 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3838 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3839 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3840 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3841 temp
= expand_binop (mode
, and_optab
, temp
,
3842 gen_int_mode (masklow
, mode
),
3843 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3844 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
3845 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3846 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3847 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3851 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
3852 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3853 signmask
= force_reg (mode
, signmask
);
3855 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
3856 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3857 temp
= expand_binop (mode
, and_optab
, temp
,
3858 gen_int_mode (masklow
, mode
),
3859 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3860 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3861 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3867 /* Mask contains the mode's signbit and the significant bits of the
3868 modulus. By including the signbit in the operation, many targets
3869 can avoid an explicit compare operation in the following comparison
3871 wide_int mask
= wi::mask (logd
, false, prec
);
3872 mask
= wi::set_bit (mask
, prec
- 1);
3874 temp
= expand_binop (mode
, and_optab
, op0
,
3875 immed_wide_int_const (mask
, mode
),
3876 result
, 1, OPTAB_LIB_WIDEN
);
3878 emit_move_insn (result
, temp
);
3880 label
= gen_label_rtx ();
3881 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
3883 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
3884 0, OPTAB_LIB_WIDEN
);
3886 mask
= wi::mask (logd
, true, prec
);
3887 temp
= expand_binop (mode
, ior_optab
, temp
,
3888 immed_wide_int_const (mask
, mode
),
3889 result
, 1, OPTAB_LIB_WIDEN
);
3890 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
3891 0, OPTAB_LIB_WIDEN
);
3893 emit_move_insn (result
, temp
);
3898 /* Expand signed division of OP0 by a power of two D in mode MODE.
3899 This routine is only called for positive values of D. */
3902 expand_sdiv_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3905 rtx_code_label
*label
;
3908 logd
= floor_log2 (d
);
3911 && BRANCH_COST (optimize_insn_for_speed_p (),
3914 temp
= gen_reg_rtx (mode
);
3915 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
3916 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3917 0, OPTAB_LIB_WIDEN
);
3918 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3921 if (HAVE_conditional_move
3922 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3927 temp2
= copy_to_mode_reg (mode
, op0
);
3928 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
3929 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3930 temp
= force_reg (mode
, temp
);
3932 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3933 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
3934 mode
, temp
, temp2
, mode
, 0);
3937 rtx_insn
*seq
= get_insns ();
3940 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
3945 if (BRANCH_COST (optimize_insn_for_speed_p (),
3948 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
3950 temp
= gen_reg_rtx (mode
);
3951 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
3952 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
3953 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
3954 > COSTS_N_INSNS (1))
3955 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
3956 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3958 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
3959 ushift
, NULL_RTX
, 1);
3960 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3961 0, OPTAB_LIB_WIDEN
);
3962 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3965 label
= gen_label_rtx ();
3966 temp
= copy_to_mode_reg (mode
, op0
);
3967 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
3968 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
3970 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3973 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3974 if that is convenient, and returning where the result is.
3975 You may request either the quotient or the remainder as the result;
3976 specify REM_FLAG nonzero to get the remainder.
3978 CODE is the expression code for which kind of division this is;
3979 it controls how rounding is done. MODE is the machine mode to use.
3980 UNSIGNEDP nonzero means do unsigned division. */
3982 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3983 and then correct it by or'ing in missing high bits
3984 if result of ANDI is nonzero.
3985 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3986 This could optimize to a bfexts instruction.
3987 But C doesn't use these operations, so their optimizations are
3989 /* ??? For modulo, we don't actually need the highpart of the first product,
3990 the low part will do nicely. And for small divisors, the second multiply
3991 can also be a low-part only multiply or even be completely left out.
3992 E.g. to calculate the remainder of a division by 3 with a 32 bit
3993 multiply, multiply with 0x55555556 and extract the upper two bits;
3994 the result is exact for inputs up to 0x1fffffff.
3995 The input range can be reduced by using cross-sum rules.
3996 For odd divisors >= 3, the following table gives right shift counts
3997 so that if a number is shifted by an integer multiple of the given
3998 amount, the remainder stays the same:
3999 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4000 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4001 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4002 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4003 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4005 Cross-sum rules for even numbers can be derived by leaving as many bits
4006 to the right alone as the divisor has zeros to the right.
4007 E.g. if x is an unsigned 32 bit number:
4008 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4012 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
4013 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
4015 machine_mode compute_mode
;
4017 rtx quotient
= 0, remainder
= 0;
4021 optab optab1
, optab2
;
4022 int op1_is_constant
, op1_is_pow2
= 0;
4023 int max_cost
, extra_cost
;
4024 static HOST_WIDE_INT last_div_const
= 0;
4025 bool speed
= optimize_insn_for_speed_p ();
4027 op1_is_constant
= CONST_INT_P (op1
);
4028 if (op1_is_constant
)
4030 unsigned HOST_WIDE_INT ext_op1
= UINTVAL (op1
);
4032 ext_op1
&= GET_MODE_MASK (mode
);
4033 op1_is_pow2
= ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1
)
4034 || (! unsignedp
&& EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1
))));
4038 This is the structure of expand_divmod:
4040 First comes code to fix up the operands so we can perform the operations
4041 correctly and efficiently.
4043 Second comes a switch statement with code specific for each rounding mode.
4044 For some special operands this code emits all RTL for the desired
4045 operation, for other cases, it generates only a quotient and stores it in
4046 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4047 to indicate that it has not done anything.
4049 Last comes code that finishes the operation. If QUOTIENT is set and
4050 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4051 QUOTIENT is not set, it is computed using trunc rounding.
4053 We try to generate special code for division and remainder when OP1 is a
4054 constant. If |OP1| = 2**n we can use shifts and some other fast
4055 operations. For other values of OP1, we compute a carefully selected
4056 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4059 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4060 half of the product. Different strategies for generating the product are
4061 implemented in expmed_mult_highpart.
4063 If what we actually want is the remainder, we generate that by another
4064 by-constant multiplication and a subtraction. */
4066 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4067 code below will malfunction if we are, so check here and handle
4068 the special case if so. */
4069 if (op1
== const1_rtx
)
4070 return rem_flag
? const0_rtx
: op0
;
4072 /* When dividing by -1, we could get an overflow.
4073 negv_optab can handle overflows. */
4074 if (! unsignedp
&& op1
== constm1_rtx
)
4078 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4079 ? negv_optab
: neg_optab
, op0
, target
, 0);
4083 /* Don't use the function value register as a target
4084 since we have to read it as well as write it,
4085 and function-inlining gets confused by this. */
4086 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4087 /* Don't clobber an operand while doing a multi-step calculation. */
4088 || ((rem_flag
|| op1_is_constant
)
4089 && (reg_mentioned_p (target
, op0
)
4090 || (MEM_P (op0
) && MEM_P (target
))))
4091 || reg_mentioned_p (target
, op1
)
4092 || (MEM_P (op1
) && MEM_P (target
))))
4095 /* Get the mode in which to perform this computation. Normally it will
4096 be MODE, but sometimes we can't do the desired operation in MODE.
4097 If so, pick a wider mode in which we can do the operation. Convert
4098 to that mode at the start to avoid repeated conversions.
4100 First see what operations we need. These depend on the expression
4101 we are evaluating. (We assume that divxx3 insns exist under the
4102 same conditions that modxx3 insns and that these insns don't normally
4103 fail. If these assumptions are not correct, we may generate less
4104 efficient code in some cases.)
4106 Then see if we find a mode in which we can open-code that operation
4107 (either a division, modulus, or shift). Finally, check for the smallest
4108 mode for which we can do the operation with a library call. */
4110 /* We might want to refine this now that we have division-by-constant
4111 optimization. Since expmed_mult_highpart tries so many variants, it is
4112 not straightforward to generalize this. Maybe we should make an array
4113 of possible modes in init_expmed? Save this for GCC 2.7. */
4115 optab1
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4116 ? (unsignedp
? lshr_optab
: ashr_optab
)
4117 : (unsignedp
? udiv_optab
: sdiv_optab
));
4118 optab2
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4120 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4122 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4123 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4124 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4125 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4128 if (compute_mode
== VOIDmode
)
4129 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4130 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4131 if (optab_libfunc (optab1
, compute_mode
)
4132 || optab_libfunc (optab2
, compute_mode
))
4135 /* If we still couldn't find a mode, use MODE, but expand_binop will
4137 if (compute_mode
== VOIDmode
)
4138 compute_mode
= mode
;
4140 if (target
&& GET_MODE (target
) == compute_mode
)
4143 tquotient
= gen_reg_rtx (compute_mode
);
4145 size
= GET_MODE_BITSIZE (compute_mode
);
4147 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4148 (mode), and thereby get better code when OP1 is a constant. Do that
4149 later. It will require going over all usages of SIZE below. */
4150 size
= GET_MODE_BITSIZE (mode
);
4153 /* Only deduct something for a REM if the last divide done was
4154 for a different constant. Then set the constant of the last
4156 max_cost
= (unsignedp
4157 ? udiv_cost (speed
, compute_mode
)
4158 : sdiv_cost (speed
, compute_mode
));
4159 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4160 && INTVAL (op1
) == last_div_const
))
4161 max_cost
-= (mul_cost (speed
, compute_mode
)
4162 + add_cost (speed
, compute_mode
));
4164 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4166 /* Now convert to the best mode to use. */
4167 if (compute_mode
!= mode
)
4169 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4170 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4172 /* convert_modes may have placed op1 into a register, so we
4173 must recompute the following. */
4174 op1_is_constant
= CONST_INT_P (op1
);
4175 op1_is_pow2
= (op1_is_constant
4176 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4178 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1
))))));
4181 /* If one of the operands is a volatile MEM, copy it into a register. */
4183 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4184 op0
= force_reg (compute_mode
, op0
);
4185 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4186 op1
= force_reg (compute_mode
, op1
);
4188 /* If we need the remainder or if OP1 is constant, we need to
4189 put OP0 in a register in case it has any queued subexpressions. */
4190 if (rem_flag
|| op1_is_constant
)
4191 op0
= force_reg (compute_mode
, op0
);
4193 last
= get_last_insn ();
4195 /* Promote floor rounding to trunc rounding for unsigned operations. */
4198 if (code
== FLOOR_DIV_EXPR
)
4199 code
= TRUNC_DIV_EXPR
;
4200 if (code
== FLOOR_MOD_EXPR
)
4201 code
= TRUNC_MOD_EXPR
;
4202 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4203 code
= TRUNC_DIV_EXPR
;
4206 if (op1
!= const0_rtx
)
4209 case TRUNC_MOD_EXPR
:
4210 case TRUNC_DIV_EXPR
:
4211 if (op1_is_constant
)
4215 unsigned HOST_WIDE_INT mh
, ml
;
4216 int pre_shift
, post_shift
;
4218 unsigned HOST_WIDE_INT d
= (INTVAL (op1
)
4219 & GET_MODE_MASK (compute_mode
));
4221 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4223 pre_shift
= floor_log2 (d
);
4226 unsigned HOST_WIDE_INT mask
4227 = ((unsigned HOST_WIDE_INT
) 1 << pre_shift
) - 1;
4229 = expand_binop (compute_mode
, and_optab
, op0
,
4230 gen_int_mode (mask
, compute_mode
),
4234 return gen_lowpart (mode
, remainder
);
4236 quotient
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4237 pre_shift
, tquotient
, 1);
4239 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4241 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (size
- 1)))
4243 /* Most significant bit of divisor is set; emit an scc
4245 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4246 compute_mode
, 1, 1);
4250 /* Find a suitable multiplier and right shift count
4251 instead of multiplying with D. */
4253 mh
= choose_multiplier (d
, size
, size
,
4254 &ml
, &post_shift
, &dummy
);
4256 /* If the suggested multiplier is more than SIZE bits,
4257 we can do better for even divisors, using an
4258 initial right shift. */
4259 if (mh
!= 0 && (d
& 1) == 0)
4261 pre_shift
= floor_log2 (d
& -d
);
4262 mh
= choose_multiplier (d
>> pre_shift
, size
,
4264 &ml
, &post_shift
, &dummy
);
4274 if (post_shift
- 1 >= BITS_PER_WORD
)
4278 = (shift_cost (speed
, compute_mode
, post_shift
- 1)
4279 + shift_cost (speed
, compute_mode
, 1)
4280 + 2 * add_cost (speed
, compute_mode
));
4281 t1
= expmed_mult_highpart
4283 gen_int_mode (ml
, compute_mode
),
4284 NULL_RTX
, 1, max_cost
- extra_cost
);
4287 t2
= force_operand (gen_rtx_MINUS (compute_mode
,
4290 t3
= expand_shift (RSHIFT_EXPR
, compute_mode
,
4291 t2
, 1, NULL_RTX
, 1);
4292 t4
= force_operand (gen_rtx_PLUS (compute_mode
,
4295 quotient
= expand_shift
4296 (RSHIFT_EXPR
, compute_mode
, t4
,
4297 post_shift
- 1, tquotient
, 1);
4303 if (pre_shift
>= BITS_PER_WORD
4304 || post_shift
>= BITS_PER_WORD
)
4308 (RSHIFT_EXPR
, compute_mode
, op0
,
4309 pre_shift
, NULL_RTX
, 1);
4311 = (shift_cost (speed
, compute_mode
, pre_shift
)
4312 + shift_cost (speed
, compute_mode
, post_shift
));
4313 t2
= expmed_mult_highpart
4315 gen_int_mode (ml
, compute_mode
),
4316 NULL_RTX
, 1, max_cost
- extra_cost
);
4319 quotient
= expand_shift
4320 (RSHIFT_EXPR
, compute_mode
, t2
,
4321 post_shift
, tquotient
, 1);
4325 else /* Too wide mode to use tricky code */
4328 insn
= get_last_insn ();
4330 set_dst_reg_note (insn
, REG_EQUAL
,
4331 gen_rtx_UDIV (compute_mode
, op0
, op1
),
4334 else /* TRUNC_DIV, signed */
4336 unsigned HOST_WIDE_INT ml
;
4337 int lgup
, post_shift
;
4339 HOST_WIDE_INT d
= INTVAL (op1
);
4340 unsigned HOST_WIDE_INT abs_d
;
4342 /* Since d might be INT_MIN, we have to cast to
4343 unsigned HOST_WIDE_INT before negating to avoid
4344 undefined signed overflow. */
4346 ? (unsigned HOST_WIDE_INT
) d
4347 : - (unsigned HOST_WIDE_INT
) d
);
4349 /* n rem d = n rem -d */
4350 if (rem_flag
&& d
< 0)
4353 op1
= gen_int_mode (abs_d
, compute_mode
);
4359 quotient
= expand_unop (compute_mode
, neg_optab
, op0
,
4361 else if (HOST_BITS_PER_WIDE_INT
>= size
4362 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4364 /* This case is not handled correctly below. */
4365 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4366 compute_mode
, 1, 1);
4370 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4372 ? smod_pow2_cheap (speed
, compute_mode
)
4373 : sdiv_pow2_cheap (speed
, compute_mode
))
4374 /* We assume that cheap metric is true if the
4375 optab has an expander for this mode. */
4376 && ((optab_handler ((rem_flag
? smod_optab
4379 != CODE_FOR_nothing
)
4380 || (optab_handler (sdivmod_optab
,
4382 != CODE_FOR_nothing
)))
4384 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
))
4388 remainder
= expand_smod_pow2 (compute_mode
, op0
, d
);
4390 return gen_lowpart (mode
, remainder
);
4393 if (sdiv_pow2_cheap (speed
, compute_mode
)
4394 && ((optab_handler (sdiv_optab
, compute_mode
)
4395 != CODE_FOR_nothing
)
4396 || (optab_handler (sdivmod_optab
, compute_mode
)
4397 != CODE_FOR_nothing
)))
4398 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4400 gen_int_mode (abs_d
,
4404 quotient
= expand_sdiv_pow2 (compute_mode
, op0
, abs_d
);
4406 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4407 negate the quotient. */
4410 insn
= get_last_insn ();
4412 && abs_d
< ((unsigned HOST_WIDE_INT
) 1
4413 << (HOST_BITS_PER_WIDE_INT
- 1)))
4414 set_dst_reg_note (insn
, REG_EQUAL
,
4415 gen_rtx_DIV (compute_mode
, op0
,
4421 quotient
= expand_unop (compute_mode
, neg_optab
,
4422 quotient
, quotient
, 0);
4425 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4427 choose_multiplier (abs_d
, size
, size
- 1,
4428 &ml
, &post_shift
, &lgup
);
4429 if (ml
< (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4433 if (post_shift
>= BITS_PER_WORD
4434 || size
- 1 >= BITS_PER_WORD
)
4437 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4438 + shift_cost (speed
, compute_mode
, size
- 1)
4439 + add_cost (speed
, compute_mode
));
4440 t1
= expmed_mult_highpart
4441 (compute_mode
, op0
, gen_int_mode (ml
, compute_mode
),
4442 NULL_RTX
, 0, max_cost
- extra_cost
);
4446 (RSHIFT_EXPR
, compute_mode
, t1
,
4447 post_shift
, NULL_RTX
, 0);
4449 (RSHIFT_EXPR
, compute_mode
, op0
,
4450 size
- 1, NULL_RTX
, 0);
4453 = force_operand (gen_rtx_MINUS (compute_mode
,
4458 = force_operand (gen_rtx_MINUS (compute_mode
,
4466 if (post_shift
>= BITS_PER_WORD
4467 || size
- 1 >= BITS_PER_WORD
)
4470 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (size
- 1);
4471 mlr
= gen_int_mode (ml
, compute_mode
);
4472 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4473 + shift_cost (speed
, compute_mode
, size
- 1)
4474 + 2 * add_cost (speed
, compute_mode
));
4475 t1
= expmed_mult_highpart (compute_mode
, op0
, mlr
,
4477 max_cost
- extra_cost
);
4480 t2
= force_operand (gen_rtx_PLUS (compute_mode
,
4484 (RSHIFT_EXPR
, compute_mode
, t2
,
4485 post_shift
, NULL_RTX
, 0);
4487 (RSHIFT_EXPR
, compute_mode
, op0
,
4488 size
- 1, NULL_RTX
, 0);
4491 = force_operand (gen_rtx_MINUS (compute_mode
,
4496 = force_operand (gen_rtx_MINUS (compute_mode
,
4501 else /* Too wide mode to use tricky code */
4504 insn
= get_last_insn ();
4506 set_dst_reg_note (insn
, REG_EQUAL
,
4507 gen_rtx_DIV (compute_mode
, op0
, op1
),
4513 delete_insns_since (last
);
4516 case FLOOR_DIV_EXPR
:
4517 case FLOOR_MOD_EXPR
:
4518 /* We will come here only for signed operations. */
4519 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4521 unsigned HOST_WIDE_INT mh
, ml
;
4522 int pre_shift
, lgup
, post_shift
;
4523 HOST_WIDE_INT d
= INTVAL (op1
);
4527 /* We could just as easily deal with negative constants here,
4528 but it does not seem worth the trouble for GCC 2.6. */
4529 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4531 pre_shift
= floor_log2 (d
);
4534 unsigned HOST_WIDE_INT mask
4535 = ((unsigned HOST_WIDE_INT
) 1 << pre_shift
) - 1;
4536 remainder
= expand_binop
4537 (compute_mode
, and_optab
, op0
,
4538 gen_int_mode (mask
, compute_mode
),
4539 remainder
, 0, OPTAB_LIB_WIDEN
);
4541 return gen_lowpart (mode
, remainder
);
4543 quotient
= expand_shift
4544 (RSHIFT_EXPR
, compute_mode
, op0
,
4545 pre_shift
, tquotient
, 0);
4551 mh
= choose_multiplier (d
, size
, size
- 1,
4552 &ml
, &post_shift
, &lgup
);
4555 if (post_shift
< BITS_PER_WORD
4556 && size
- 1 < BITS_PER_WORD
)
4559 (RSHIFT_EXPR
, compute_mode
, op0
,
4560 size
- 1, NULL_RTX
, 0);
4561 t2
= expand_binop (compute_mode
, xor_optab
, op0
, t1
,
4562 NULL_RTX
, 0, OPTAB_WIDEN
);
4563 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4564 + shift_cost (speed
, compute_mode
, size
- 1)
4565 + 2 * add_cost (speed
, compute_mode
));
4566 t3
= expmed_mult_highpart
4567 (compute_mode
, t2
, gen_int_mode (ml
, compute_mode
),
4568 NULL_RTX
, 1, max_cost
- extra_cost
);
4572 (RSHIFT_EXPR
, compute_mode
, t3
,
4573 post_shift
, NULL_RTX
, 1);
4574 quotient
= expand_binop (compute_mode
, xor_optab
,
4575 t4
, t1
, tquotient
, 0,
4583 rtx nsign
, t1
, t2
, t3
, t4
;
4584 t1
= force_operand (gen_rtx_PLUS (compute_mode
,
4585 op0
, constm1_rtx
), NULL_RTX
);
4586 t2
= expand_binop (compute_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4588 nsign
= expand_shift
4589 (RSHIFT_EXPR
, compute_mode
, t2
,
4590 size
- 1, NULL_RTX
, 0);
4591 t3
= force_operand (gen_rtx_MINUS (compute_mode
, t1
, nsign
),
4593 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, compute_mode
, t3
, op1
,
4598 t5
= expand_unop (compute_mode
, one_cmpl_optab
, nsign
,
4600 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4609 delete_insns_since (last
);
4611 /* Try using an instruction that produces both the quotient and
4612 remainder, using truncation. We can easily compensate the quotient
4613 or remainder to get floor rounding, once we have the remainder.
4614 Notice that we compute also the final remainder value here,
4615 and return the result right away. */
4616 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4617 target
= gen_reg_rtx (compute_mode
);
4622 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4623 quotient
= gen_reg_rtx (compute_mode
);
4628 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4629 remainder
= gen_reg_rtx (compute_mode
);
4632 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4633 quotient
, remainder
, 0))
4635 /* This could be computed with a branch-less sequence.
4636 Save that for later. */
4638 rtx_code_label
*label
= gen_label_rtx ();
4639 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4640 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4641 NULL_RTX
, 0, OPTAB_WIDEN
);
4642 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4643 expand_dec (quotient
, const1_rtx
);
4644 expand_inc (remainder
, op1
);
4646 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4649 /* No luck with division elimination or divmod. Have to do it
4650 by conditionally adjusting op0 *and* the result. */
4652 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4656 quotient
= gen_reg_rtx (compute_mode
);
4657 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4658 label1
= gen_label_rtx ();
4659 label2
= gen_label_rtx ();
4660 label3
= gen_label_rtx ();
4661 label4
= gen_label_rtx ();
4662 label5
= gen_label_rtx ();
4663 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4664 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4665 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4666 quotient
, 0, OPTAB_LIB_WIDEN
);
4667 if (tem
!= quotient
)
4668 emit_move_insn (quotient
, tem
);
4669 emit_jump_insn (targetm
.gen_jump (label5
));
4671 emit_label (label1
);
4672 expand_inc (adjusted_op0
, const1_rtx
);
4673 emit_jump_insn (targetm
.gen_jump (label4
));
4675 emit_label (label2
);
4676 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4677 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4678 quotient
, 0, OPTAB_LIB_WIDEN
);
4679 if (tem
!= quotient
)
4680 emit_move_insn (quotient
, tem
);
4681 emit_jump_insn (targetm
.gen_jump (label5
));
4683 emit_label (label3
);
4684 expand_dec (adjusted_op0
, const1_rtx
);
4685 emit_label (label4
);
4686 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4687 quotient
, 0, OPTAB_LIB_WIDEN
);
4688 if (tem
!= quotient
)
4689 emit_move_insn (quotient
, tem
);
4690 expand_dec (quotient
, const1_rtx
);
4691 emit_label (label5
);
4699 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
)))
4702 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4703 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4704 floor_log2 (d
), tquotient
, 1);
4705 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4706 gen_int_mode (d
- 1, compute_mode
),
4707 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4708 t3
= gen_reg_rtx (compute_mode
);
4709 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4710 compute_mode
, 1, 1);
4713 rtx_code_label
*lab
;
4714 lab
= gen_label_rtx ();
4715 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4716 expand_inc (t1
, const1_rtx
);
4721 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4727 /* Try using an instruction that produces both the quotient and
4728 remainder, using truncation. We can easily compensate the
4729 quotient or remainder to get ceiling rounding, once we have the
4730 remainder. Notice that we compute also the final remainder
4731 value here, and return the result right away. */
4732 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4733 target
= gen_reg_rtx (compute_mode
);
4737 remainder
= (REG_P (target
)
4738 ? target
: gen_reg_rtx (compute_mode
));
4739 quotient
= gen_reg_rtx (compute_mode
);
4743 quotient
= (REG_P (target
)
4744 ? target
: gen_reg_rtx (compute_mode
));
4745 remainder
= gen_reg_rtx (compute_mode
);
4748 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4751 /* This could be computed with a branch-less sequence.
4752 Save that for later. */
4753 rtx_code_label
*label
= gen_label_rtx ();
4754 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4755 compute_mode
, label
);
4756 expand_inc (quotient
, const1_rtx
);
4757 expand_dec (remainder
, op1
);
4759 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4762 /* No luck with division elimination or divmod. Have to do it
4763 by conditionally adjusting op0 *and* the result. */
4765 rtx_code_label
*label1
, *label2
;
4766 rtx adjusted_op0
, tem
;
4768 quotient
= gen_reg_rtx (compute_mode
);
4769 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4770 label1
= gen_label_rtx ();
4771 label2
= gen_label_rtx ();
4772 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4773 compute_mode
, label1
);
4774 emit_move_insn (quotient
, const0_rtx
);
4775 emit_jump_insn (targetm
.gen_jump (label2
));
4777 emit_label (label1
);
4778 expand_dec (adjusted_op0
, const1_rtx
);
4779 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4780 quotient
, 1, OPTAB_LIB_WIDEN
);
4781 if (tem
!= quotient
)
4782 emit_move_insn (quotient
, tem
);
4783 expand_inc (quotient
, const1_rtx
);
4784 emit_label (label2
);
4789 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4790 && INTVAL (op1
) >= 0)
4792 /* This is extremely similar to the code for the unsigned case
4793 above. For 2.7 we should merge these variants, but for
4794 2.6.1 I don't want to touch the code for unsigned since that
4795 get used in C. The signed case will only be used by other
4799 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4800 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4801 floor_log2 (d
), tquotient
, 0);
4802 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4803 gen_int_mode (d
- 1, compute_mode
),
4804 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4805 t3
= gen_reg_rtx (compute_mode
);
4806 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4807 compute_mode
, 1, 1);
4810 rtx_code_label
*lab
;
4811 lab
= gen_label_rtx ();
4812 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4813 expand_inc (t1
, const1_rtx
);
4818 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4824 /* Try using an instruction that produces both the quotient and
4825 remainder, using truncation. We can easily compensate the
4826 quotient or remainder to get ceiling rounding, once we have the
4827 remainder. Notice that we compute also the final remainder
4828 value here, and return the result right away. */
4829 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4830 target
= gen_reg_rtx (compute_mode
);
4833 remainder
= (REG_P (target
)
4834 ? target
: gen_reg_rtx (compute_mode
));
4835 quotient
= gen_reg_rtx (compute_mode
);
4839 quotient
= (REG_P (target
)
4840 ? target
: gen_reg_rtx (compute_mode
));
4841 remainder
= gen_reg_rtx (compute_mode
);
4844 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
4847 /* This could be computed with a branch-less sequence.
4848 Save that for later. */
4850 rtx_code_label
*label
= gen_label_rtx ();
4851 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4852 compute_mode
, label
);
4853 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4854 NULL_RTX
, 0, OPTAB_WIDEN
);
4855 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
4856 expand_inc (quotient
, const1_rtx
);
4857 expand_dec (remainder
, op1
);
4859 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4862 /* No luck with division elimination or divmod. Have to do it
4863 by conditionally adjusting op0 *and* the result. */
4865 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4869 quotient
= gen_reg_rtx (compute_mode
);
4870 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4871 label1
= gen_label_rtx ();
4872 label2
= gen_label_rtx ();
4873 label3
= gen_label_rtx ();
4874 label4
= gen_label_rtx ();
4875 label5
= gen_label_rtx ();
4876 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4877 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
4878 compute_mode
, label1
);
4879 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4880 quotient
, 0, OPTAB_LIB_WIDEN
);
4881 if (tem
!= quotient
)
4882 emit_move_insn (quotient
, tem
);
4883 emit_jump_insn (targetm
.gen_jump (label5
));
4885 emit_label (label1
);
4886 expand_dec (adjusted_op0
, const1_rtx
);
4887 emit_jump_insn (targetm
.gen_jump (label4
));
4889 emit_label (label2
);
4890 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
4891 compute_mode
, label3
);
4892 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4893 quotient
, 0, OPTAB_LIB_WIDEN
);
4894 if (tem
!= quotient
)
4895 emit_move_insn (quotient
, tem
);
4896 emit_jump_insn (targetm
.gen_jump (label5
));
4898 emit_label (label3
);
4899 expand_inc (adjusted_op0
, const1_rtx
);
4900 emit_label (label4
);
4901 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4902 quotient
, 0, OPTAB_LIB_WIDEN
);
4903 if (tem
!= quotient
)
4904 emit_move_insn (quotient
, tem
);
4905 expand_inc (quotient
, const1_rtx
);
4906 emit_label (label5
);
4911 case EXACT_DIV_EXPR
:
4912 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4914 HOST_WIDE_INT d
= INTVAL (op1
);
4915 unsigned HOST_WIDE_INT ml
;
4919 pre_shift
= floor_log2 (d
& -d
);
4920 ml
= invert_mod2n (d
>> pre_shift
, size
);
4921 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4922 pre_shift
, NULL_RTX
, unsignedp
);
4923 quotient
= expand_mult (compute_mode
, t1
,
4924 gen_int_mode (ml
, compute_mode
),
4927 insn
= get_last_insn ();
4928 set_dst_reg_note (insn
, REG_EQUAL
,
4929 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
4930 compute_mode
, op0
, op1
),
4935 case ROUND_DIV_EXPR
:
4936 case ROUND_MOD_EXPR
:
4940 rtx_code_label
*label
;
4941 label
= gen_label_rtx ();
4942 quotient
= gen_reg_rtx (compute_mode
);
4943 remainder
= gen_reg_rtx (compute_mode
);
4944 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
4947 quotient
= expand_binop (compute_mode
, udiv_optab
, op0
, op1
,
4948 quotient
, 1, OPTAB_LIB_WIDEN
);
4949 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 1);
4950 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4951 remainder
, 1, OPTAB_LIB_WIDEN
);
4953 tem
= plus_constant (compute_mode
, op1
, -1);
4954 tem
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
, 1, NULL_RTX
, 1);
4955 do_cmp_and_jump (remainder
, tem
, LEU
, compute_mode
, label
);
4956 expand_inc (quotient
, const1_rtx
);
4957 expand_dec (remainder
, op1
);
4962 rtx abs_rem
, abs_op1
, tem
, mask
;
4963 rtx_code_label
*label
;
4964 label
= gen_label_rtx ();
4965 quotient
= gen_reg_rtx (compute_mode
);
4966 remainder
= gen_reg_rtx (compute_mode
);
4967 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
4970 quotient
= expand_binop (compute_mode
, sdiv_optab
, op0
, op1
,
4971 quotient
, 0, OPTAB_LIB_WIDEN
);
4972 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 0);
4973 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4974 remainder
, 0, OPTAB_LIB_WIDEN
);
4976 abs_rem
= expand_abs (compute_mode
, remainder
, NULL_RTX
, 1, 0);
4977 abs_op1
= expand_abs (compute_mode
, op1
, NULL_RTX
, 1, 0);
4978 tem
= expand_shift (LSHIFT_EXPR
, compute_mode
, abs_rem
,
4980 do_cmp_and_jump (tem
, abs_op1
, LTU
, compute_mode
, label
);
4981 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4982 NULL_RTX
, 0, OPTAB_WIDEN
);
4983 mask
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
,
4984 size
- 1, NULL_RTX
, 0);
4985 tem
= expand_binop (compute_mode
, xor_optab
, mask
, const1_rtx
,
4986 NULL_RTX
, 0, OPTAB_WIDEN
);
4987 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4988 NULL_RTX
, 0, OPTAB_WIDEN
);
4989 expand_inc (quotient
, tem
);
4990 tem
= expand_binop (compute_mode
, xor_optab
, mask
, op1
,
4991 NULL_RTX
, 0, OPTAB_WIDEN
);
4992 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4993 NULL_RTX
, 0, OPTAB_WIDEN
);
4994 expand_dec (remainder
, tem
);
4997 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5005 if (target
&& GET_MODE (target
) != compute_mode
)
5010 /* Try to produce the remainder without producing the quotient.
5011 If we seem to have a divmod pattern that does not require widening,
5012 don't try widening here. We should really have a WIDEN argument
5013 to expand_twoval_binop, since what we'd really like to do here is
5014 1) try a mod insn in compute_mode
5015 2) try a divmod insn in compute_mode
5016 3) try a div insn in compute_mode and multiply-subtract to get
5018 4) try the same things with widening allowed. */
5020 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5023 ((optab_handler (optab2
, compute_mode
)
5024 != CODE_FOR_nothing
)
5025 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5028 /* No luck there. Can we do remainder and divide at once
5029 without a library call? */
5030 remainder
= gen_reg_rtx (compute_mode
);
5031 if (! expand_twoval_binop ((unsignedp
5035 NULL_RTX
, remainder
, unsignedp
))
5040 return gen_lowpart (mode
, remainder
);
5043 /* Produce the quotient. Try a quotient insn, but not a library call.
5044 If we have a divmod in this mode, use it in preference to widening
5045 the div (for this test we assume it will not fail). Note that optab2
5046 is set to the one of the two optabs that the call below will use. */
5048 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5049 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5051 ((optab_handler (optab2
, compute_mode
)
5052 != CODE_FOR_nothing
)
5053 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5057 /* No luck there. Try a quotient-and-remainder insn,
5058 keeping the quotient alone. */
5059 quotient
= gen_reg_rtx (compute_mode
);
5060 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5062 quotient
, NULL_RTX
, unsignedp
))
5066 /* Still no luck. If we are not computing the remainder,
5067 use a library call for the quotient. */
5068 quotient
= sign_expand_binop (compute_mode
,
5069 udiv_optab
, sdiv_optab
,
5071 unsignedp
, OPTAB_LIB_WIDEN
);
5078 if (target
&& GET_MODE (target
) != compute_mode
)
5083 /* No divide instruction either. Use library for remainder. */
5084 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5086 unsignedp
, OPTAB_LIB_WIDEN
);
5087 /* No remainder function. Try a quotient-and-remainder
5088 function, keeping the remainder. */
5091 remainder
= gen_reg_rtx (compute_mode
);
5092 if (!expand_twoval_binop_libfunc
5093 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5095 NULL_RTX
, remainder
,
5096 unsignedp
? UMOD
: MOD
))
5097 remainder
= NULL_RTX
;
5102 /* We divided. Now finish doing X - Y * (X / Y). */
5103 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5104 NULL_RTX
, unsignedp
);
5105 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5106 remainder
, target
, unsignedp
,
5111 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5114 /* Return a tree node with data type TYPE, describing the value of X.
5115 Usually this is an VAR_DECL, if there is no obvious better choice.
5116 X may be an expression, however we only support those expressions
5117 generated by loop.c. */
5120 make_tree (tree type
, rtx x
)
5124 switch (GET_CODE (x
))
5127 case CONST_WIDE_INT
:
5128 t
= wide_int_to_tree (type
, std::make_pair (x
, TYPE_MODE (type
)));
5132 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5133 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5134 t
= wide_int_to_tree (type
,
5135 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5136 HOST_BITS_PER_WIDE_INT
* 2));
5138 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5144 int units
= CONST_VECTOR_NUNITS (x
);
5145 tree itype
= TREE_TYPE (type
);
5149 /* Build a tree with vector elements. */
5150 elts
= XALLOCAVEC (tree
, units
);
5151 for (i
= units
- 1; i
>= 0; --i
)
5153 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5154 elts
[i
] = make_tree (itype
, elt
);
5157 return build_vector (type
, elts
);
5161 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5162 make_tree (type
, XEXP (x
, 1)));
5165 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5166 make_tree (type
, XEXP (x
, 1)));
5169 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5172 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5173 make_tree (type
, XEXP (x
, 1)));
5176 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5177 make_tree (type
, XEXP (x
, 1)));
5180 t
= unsigned_type_for (type
);
5181 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5182 make_tree (t
, XEXP (x
, 0)),
5183 make_tree (type
, XEXP (x
, 1))));
5186 t
= signed_type_for (type
);
5187 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5188 make_tree (t
, XEXP (x
, 0)),
5189 make_tree (type
, XEXP (x
, 1))));
5192 if (TREE_CODE (type
) != REAL_TYPE
)
5193 t
= signed_type_for (type
);
5197 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5198 make_tree (t
, XEXP (x
, 0)),
5199 make_tree (t
, XEXP (x
, 1))));
5201 t
= unsigned_type_for (type
);
5202 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5203 make_tree (t
, XEXP (x
, 0)),
5204 make_tree (t
, XEXP (x
, 1))));
5208 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5209 GET_CODE (x
) == ZERO_EXTEND
);
5210 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5213 return make_tree (type
, XEXP (x
, 0));
5216 t
= SYMBOL_REF_DECL (x
);
5218 return fold_convert (type
, build_fold_addr_expr (t
));
5219 /* else fall through. */
5222 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5224 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5225 address mode to pointer mode. */
5226 if (POINTER_TYPE_P (type
))
5227 x
= convert_memory_address_addr_space
5228 (TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5230 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5231 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5232 t
->decl_with_rtl
.rtl
= x
;
5238 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5239 and returning TARGET.
5241 If TARGET is 0, a pseudo-register or constant is returned. */
5244 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5248 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5249 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5251 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5255 else if (tem
!= target
)
5256 emit_move_insn (target
, tem
);
5260 /* Helper function for emit_store_flag. */
5262 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5263 machine_mode mode
, machine_mode compare_mode
,
5264 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5265 machine_mode target_mode
)
5267 struct expand_operand ops
[4];
5268 rtx op0
, comparison
, subtarget
;
5270 machine_mode result_mode
= targetm
.cstore_mode (icode
);
5272 last
= get_last_insn ();
5273 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5274 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5277 delete_insns_since (last
);
5281 if (target_mode
== VOIDmode
)
5282 target_mode
= result_mode
;
5284 target
= gen_reg_rtx (target_mode
);
5286 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5288 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5289 create_fixed_operand (&ops
[1], comparison
);
5290 create_fixed_operand (&ops
[2], x
);
5291 create_fixed_operand (&ops
[3], y
);
5292 if (!maybe_expand_insn (icode
, 4, ops
))
5294 delete_insns_since (last
);
5297 subtarget
= ops
[0].value
;
5299 /* If we are converting to a wider mode, first convert to
5300 TARGET_MODE, then normalize. This produces better combining
5301 opportunities on machines that have a SIGN_EXTRACT when we are
5302 testing a single bit. This mostly benefits the 68k.
5304 If STORE_FLAG_VALUE does not have the sign bit set when
5305 interpreted in MODE, we can do this conversion as unsigned, which
5306 is usually more efficient. */
5307 if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (result_mode
))
5309 convert_move (target
, subtarget
,
5310 val_signbit_known_clear_p (result_mode
,
5313 result_mode
= target_mode
;
5318 /* If we want to keep subexpressions around, don't reuse our last
5323 /* Now normalize to the proper value in MODE. Sometimes we don't
5324 have to do anything. */
5325 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5327 /* STORE_FLAG_VALUE might be the most negative number, so write
5328 the comparison this way to avoid a compiler-time warning. */
5329 else if (- normalizep
== STORE_FLAG_VALUE
)
5330 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5332 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5333 it hard to use a value of just the sign bit due to ANSI integer
5334 constant typing rules. */
5335 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5336 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5337 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5341 gcc_assert (STORE_FLAG_VALUE
& 1);
5343 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5344 if (normalizep
== -1)
5345 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5348 /* If we were converting to a smaller mode, do the conversion now. */
5349 if (target_mode
!= result_mode
)
5351 convert_move (target
, op0
, 0);
5359 /* A subroutine of emit_store_flag only including "tricks" that do not
5360 need a recursive call. These are kept separate to avoid infinite
5364 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5365 machine_mode mode
, int unsignedp
, int normalizep
,
5366 machine_mode target_mode
)
5369 enum insn_code icode
;
5370 machine_mode compare_mode
;
5371 enum mode_class mclass
;
5372 enum rtx_code scode
;
5375 code
= unsigned_condition (code
);
5376 scode
= swap_condition (code
);
5378 /* If one operand is constant, make it the second one. Only do this
5379 if the other operand is not constant as well. */
5381 if (swap_commutative_operands_p (op0
, op1
))
5383 std::swap (op0
, op1
);
5384 code
= swap_condition (code
);
5387 if (mode
== VOIDmode
)
5388 mode
= GET_MODE (op0
);
5390 /* For some comparisons with 1 and -1, we can convert this to
5391 comparisons with zero. This will often produce more opportunities for
5392 store-flag insns. */
5397 if (op1
== const1_rtx
)
5398 op1
= const0_rtx
, code
= LE
;
5401 if (op1
== constm1_rtx
)
5402 op1
= const0_rtx
, code
= LT
;
5405 if (op1
== const1_rtx
)
5406 op1
= const0_rtx
, code
= GT
;
5409 if (op1
== constm1_rtx
)
5410 op1
= const0_rtx
, code
= GE
;
5413 if (op1
== const1_rtx
)
5414 op1
= const0_rtx
, code
= NE
;
5417 if (op1
== const1_rtx
)
5418 op1
= const0_rtx
, code
= EQ
;
5424 /* If we are comparing a double-word integer with zero or -1, we can
5425 convert the comparison into one involving a single word. */
5426 if (GET_MODE_BITSIZE (mode
) == BITS_PER_WORD
* 2
5427 && GET_MODE_CLASS (mode
) == MODE_INT
5428 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5431 if ((code
== EQ
|| code
== NE
)
5432 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5436 /* Do a logical OR or AND of the two words and compare the
5438 op00
= simplify_gen_subreg (word_mode
, op0
, mode
, 0);
5439 op01
= simplify_gen_subreg (word_mode
, op0
, mode
, UNITS_PER_WORD
);
5440 tem
= expand_binop (word_mode
,
5441 op1
== const0_rtx
? ior_optab
: and_optab
,
5442 op00
, op01
, NULL_RTX
, unsignedp
,
5446 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5447 unsignedp
, normalizep
);
5449 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5453 /* If testing the sign bit, can just test on high word. */
5454 op0h
= simplify_gen_subreg (word_mode
, op0
, mode
,
5455 subreg_highpart_offset (word_mode
,
5457 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5458 unsignedp
, normalizep
);
5465 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5468 target
= gen_reg_rtx (target_mode
);
5470 convert_move (target
, tem
,
5471 !val_signbit_known_set_p (word_mode
,
5472 (normalizep
? normalizep
5473 : STORE_FLAG_VALUE
)));
5478 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5479 complement of A (for GE) and shifting the sign bit to the low bit. */
5480 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5481 && GET_MODE_CLASS (mode
) == MODE_INT
5482 && (normalizep
|| STORE_FLAG_VALUE
== 1
5483 || val_signbit_p (mode
, STORE_FLAG_VALUE
)))
5490 /* If the result is to be wider than OP0, it is best to convert it
5491 first. If it is to be narrower, it is *incorrect* to convert it
5493 else if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (mode
))
5495 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5499 if (target_mode
!= mode
)
5503 op0
= expand_unop (mode
, one_cmpl_optab
, op0
,
5504 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5505 ? 0 : subtarget
), 0);
5507 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5508 /* If we are supposed to produce a 0/1 value, we want to do
5509 a logical shift from the sign bit to the low-order bit; for
5510 a -1/0 value, we do an arithmetic shift. */
5511 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5512 GET_MODE_BITSIZE (mode
) - 1,
5513 subtarget
, normalizep
!= -1);
5515 if (mode
!= target_mode
)
5516 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5521 mclass
= GET_MODE_CLASS (mode
);
5522 for (compare_mode
= mode
; compare_mode
!= VOIDmode
;
5523 compare_mode
= GET_MODE_WIDER_MODE (compare_mode
))
5525 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5526 icode
= optab_handler (cstore_optab
, optab_mode
);
5527 if (icode
!= CODE_FOR_nothing
)
5529 do_pending_stack_adjust ();
5530 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5531 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5535 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5537 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5538 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5549 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5550 and storing in TARGET. Normally return TARGET.
5551 Return 0 if that cannot be done.
5553 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5554 it is VOIDmode, they cannot both be CONST_INT.
5556 UNSIGNEDP is for the case where we have to widen the operands
5557 to perform the operation. It says to use zero-extension.
5559 NORMALIZEP is 1 if we should convert the result to be either zero
5560 or one. Normalize is -1 if we should convert the result to be
5561 either zero or -1. If NORMALIZEP is zero, the result will be left
5562 "raw" out of the scc insn. */
5565 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5566 machine_mode mode
, int unsignedp
, int normalizep
)
5568 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5569 enum rtx_code rcode
;
5574 /* If we compare constants, we shouldn't use a store-flag operation,
5575 but a constant load. We can get there via the vanilla route that
5576 usually generates a compare-branch sequence, but will in this case
5577 fold the comparison to a constant, and thus elide the branch. */
5578 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5581 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5586 /* If we reached here, we can't do this with a scc insn, however there
5587 are some comparisons that can be done in other ways. Don't do any
5588 of these cases if branches are very cheap. */
5589 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5592 /* See what we need to return. We can only return a 1, -1, or the
5595 if (normalizep
== 0)
5597 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5598 normalizep
= STORE_FLAG_VALUE
;
5600 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5606 last
= get_last_insn ();
5608 /* If optimizing, use different pseudo registers for each insn, instead
5609 of reusing the same pseudo. This leads to better CSE, but slows
5610 down the compiler, since there are more pseudos */
5611 subtarget
= (!optimize
5612 && (target_mode
== mode
)) ? target
: NULL_RTX
;
5613 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
5615 /* For floating-point comparisons, try the reverse comparison or try
5616 changing the "orderedness" of the comparison. */
5617 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5619 enum rtx_code first_code
;
5622 rcode
= reverse_condition_maybe_unordered (code
);
5623 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5624 && (code
== ORDERED
|| code
== UNORDERED
5625 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5626 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5628 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5629 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5631 /* For the reverse comparison, use either an addition or a XOR. */
5633 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5634 optimize_insn_for_speed_p ()) == 0)
5636 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5637 STORE_FLAG_VALUE
, target_mode
);
5639 return expand_binop (target_mode
, add_optab
, tem
,
5640 gen_int_mode (normalizep
, target_mode
),
5641 target
, 0, OPTAB_WIDEN
);
5644 && rtx_cost (trueval
, mode
, XOR
, 1,
5645 optimize_insn_for_speed_p ()) == 0)
5647 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5648 normalizep
, target_mode
);
5650 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
5651 target
, INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5655 delete_insns_since (last
);
5657 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5658 if (code
== ORDERED
|| code
== UNORDERED
)
5661 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
5663 /* If there are no NaNs, the first comparison should always fall through.
5664 Effectively change the comparison to the other one. */
5665 if (!HONOR_NANS (mode
))
5667 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
5668 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
5672 if (!HAVE_conditional_move
)
5675 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5676 conditional move. */
5677 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
5678 normalizep
, target_mode
);
5683 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5684 tem
, const0_rtx
, GET_MODE (tem
), 0);
5686 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5687 trueval
, tem
, GET_MODE (tem
), 0);
5690 delete_insns_since (last
);
5694 /* The remaining tricks only apply to integer comparisons. */
5696 if (GET_MODE_CLASS (mode
) != MODE_INT
)
5699 /* If this is an equality comparison of integers, we can try to exclusive-or
5700 (or subtract) the two operands and use a recursive call to try the
5701 comparison with zero. Don't do any of these cases if branches are
5704 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5706 tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5710 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5713 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5714 mode
, unsignedp
, normalizep
);
5718 delete_insns_since (last
);
5721 /* For integer comparisons, try the reverse comparison. However, for
5722 small X and if we'd have anyway to extend, implementing "X != 0"
5723 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5724 rcode
= reverse_condition (code
);
5725 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5726 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5728 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5729 && op1
== const0_rtx
))
5731 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5732 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5734 /* Again, for the reverse comparison, use either an addition or a XOR. */
5736 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5737 optimize_insn_for_speed_p ()) == 0)
5739 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5740 STORE_FLAG_VALUE
, target_mode
);
5742 tem
= expand_binop (target_mode
, add_optab
, tem
,
5743 gen_int_mode (normalizep
, target_mode
),
5744 target
, 0, OPTAB_WIDEN
);
5747 && rtx_cost (trueval
, mode
, XOR
, 1,
5748 optimize_insn_for_speed_p ()) == 0)
5750 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5751 normalizep
, target_mode
);
5753 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5754 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5759 delete_insns_since (last
);
5762 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5763 the constant zero. Reject all other comparisons at this point. Only
5764 do LE and GT if branches are expensive since they are expensive on
5765 2-operand machines. */
5767 if (op1
!= const0_rtx
5768 || (code
!= EQ
&& code
!= NE
5769 && (BRANCH_COST (optimize_insn_for_speed_p (),
5770 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5773 /* Try to put the result of the comparison in the sign bit. Assume we can't
5774 do the necessary operation below. */
5778 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5779 the sign bit set. */
5783 /* This is destructive, so SUBTARGET can't be OP0. */
5784 if (rtx_equal_p (subtarget
, op0
))
5787 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5790 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5794 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5795 number of bits in the mode of OP0, minus one. */
5799 if (rtx_equal_p (subtarget
, op0
))
5802 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5803 GET_MODE_BITSIZE (mode
) - 1,
5805 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5809 if (code
== EQ
|| code
== NE
)
5811 /* For EQ or NE, one way to do the comparison is to apply an operation
5812 that converts the operand into a positive number if it is nonzero
5813 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5814 for NE we negate. This puts the result in the sign bit. Then we
5815 normalize with a shift, if needed.
5817 Two operations that can do the above actions are ABS and FFS, so try
5818 them. If that doesn't work, and MODE is smaller than a full word,
5819 we can use zero-extension to the wider mode (an unsigned conversion)
5820 as the operation. */
5822 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5823 that is compensated by the subsequent overflow when subtracting
5826 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5827 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5828 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5829 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5830 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5832 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5839 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5842 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5845 /* If we couldn't do it that way, for NE we can "or" the two's complement
5846 of the value with itself. For EQ, we take the one's complement of
5847 that "or", which is an extra insn, so we only handle EQ if branches
5852 || BRANCH_COST (optimize_insn_for_speed_p (),
5855 if (rtx_equal_p (subtarget
, op0
))
5858 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5859 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5862 if (tem
&& code
== EQ
)
5863 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5867 if (tem
&& normalizep
)
5868 tem
= expand_shift (RSHIFT_EXPR
, mode
, tem
,
5869 GET_MODE_BITSIZE (mode
) - 1,
5870 subtarget
, normalizep
== 1);
5876 else if (GET_MODE (tem
) != target_mode
)
5878 convert_move (target
, tem
, 0);
5881 else if (!subtarget
)
5883 emit_move_insn (target
, tem
);
5888 delete_insns_since (last
);
5893 /* Like emit_store_flag, but always succeeds. */
5896 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5897 machine_mode mode
, int unsignedp
, int normalizep
)
5900 rtx_code_label
*label
;
5901 rtx trueval
, falseval
;
5903 /* First see if emit_store_flag can do the job. */
5904 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
5909 target
= gen_reg_rtx (word_mode
);
5911 /* If this failed, we have to do this with set/compare/jump/set code.
5912 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5913 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
5915 && GET_MODE_CLASS (mode
) == MODE_INT
5918 && op1
== const0_rtx
)
5920 label
= gen_label_rtx ();
5921 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
5922 NULL_RTX
, NULL
, label
, -1);
5923 emit_move_insn (target
, trueval
);
5929 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
5930 target
= gen_reg_rtx (GET_MODE (target
));
5932 /* Jump in the right direction if the target cannot implement CODE
5933 but can jump on its reverse condition. */
5934 falseval
= const0_rtx
;
5935 if (! can_compare_p (code
, mode
, ccp_jump
)
5936 && (! FLOAT_MODE_P (mode
)
5937 || code
== ORDERED
|| code
== UNORDERED
5938 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5939 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5941 enum rtx_code rcode
;
5942 if (FLOAT_MODE_P (mode
))
5943 rcode
= reverse_condition_maybe_unordered (code
);
5945 rcode
= reverse_condition (code
);
5947 /* Canonicalize to UNORDERED for the libcall. */
5948 if (can_compare_p (rcode
, mode
, ccp_jump
)
5949 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
5952 trueval
= const0_rtx
;
5957 emit_move_insn (target
, trueval
);
5958 label
= gen_label_rtx ();
5959 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
5962 emit_move_insn (target
, falseval
);
5968 /* Perform possibly multi-word comparison and conditional jump to LABEL
5969 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5970 now a thin wrapper around do_compare_rtx_and_jump. */
5973 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
5974 rtx_code_label
*label
)
5976 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
5977 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,