1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2016 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
40 #include "langhooks.h"
42 struct target_expmed default_target_expmed
;
44 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
47 static void store_fixed_bit_field (rtx
, unsigned HOST_WIDE_INT
,
48 unsigned HOST_WIDE_INT
,
49 unsigned HOST_WIDE_INT
,
50 unsigned HOST_WIDE_INT
,
52 static void store_fixed_bit_field_1 (rtx
, unsigned HOST_WIDE_INT
,
53 unsigned HOST_WIDE_INT
,
55 static void store_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
56 unsigned HOST_WIDE_INT
,
57 unsigned HOST_WIDE_INT
,
58 unsigned HOST_WIDE_INT
,
60 static rtx
extract_fixed_bit_field (machine_mode
, rtx
,
61 unsigned HOST_WIDE_INT
,
62 unsigned HOST_WIDE_INT
, rtx
, int, bool);
63 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
,
64 unsigned HOST_WIDE_INT
,
65 unsigned HOST_WIDE_INT
, rtx
, int, bool);
66 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
67 static rtx
extract_split_bit_field (rtx
, unsigned HOST_WIDE_INT
,
68 unsigned HOST_WIDE_INT
, int, bool);
69 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
70 static rtx
expand_smod_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
71 static rtx
expand_sdiv_pow2 (machine_mode
, rtx
, HOST_WIDE_INT
);
73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
74 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
75 The mask is truncated if necessary to the width of mode MODE. The
76 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 mask_rtx (machine_mode mode
, int bitpos
, int bitsize
, bool complement
)
81 return immed_wide_int_const
82 (wi::shifted_mask (bitpos
, bitsize
, complement
,
83 GET_MODE_PRECISION (mode
)), mode
);
86 /* Test whether a value is zero of a power of two. */
87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
88 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
90 struct init_expmed_rtl
111 rtx pow2
[MAX_BITS_PER_WORD
];
112 rtx cint
[MAX_BITS_PER_WORD
];
116 init_expmed_one_conv (struct init_expmed_rtl
*all
, machine_mode to_mode
,
117 machine_mode from_mode
, bool speed
)
119 int to_size
, from_size
;
122 to_size
= GET_MODE_PRECISION (to_mode
);
123 from_size
= GET_MODE_PRECISION (from_mode
);
125 /* Most partial integers have a precision less than the "full"
126 integer it requires for storage. In case one doesn't, for
127 comparison purposes here, reduce the bit size by one in that
129 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
130 && exact_log2 (to_size
) != -1)
132 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
133 && exact_log2 (from_size
) != -1)
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
139 PUT_MODE (all
->reg
, from_mode
);
140 set_convert_cost (to_mode
, from_mode
, speed
,
141 set_src_cost (which
, to_mode
, speed
));
145 init_expmed_one_mode (struct init_expmed_rtl
*all
,
146 machine_mode mode
, int speed
)
148 int m
, n
, mode_bitsize
;
149 machine_mode mode_from
;
151 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
153 PUT_MODE (all
->reg
, mode
);
154 PUT_MODE (all
->plus
, mode
);
155 PUT_MODE (all
->neg
, mode
);
156 PUT_MODE (all
->mult
, mode
);
157 PUT_MODE (all
->sdiv
, mode
);
158 PUT_MODE (all
->udiv
, mode
);
159 PUT_MODE (all
->sdiv_32
, mode
);
160 PUT_MODE (all
->smod_32
, mode
);
161 PUT_MODE (all
->wide_trunc
, mode
);
162 PUT_MODE (all
->shift
, mode
);
163 PUT_MODE (all
->shift_mult
, mode
);
164 PUT_MODE (all
->shift_add
, mode
);
165 PUT_MODE (all
->shift_sub0
, mode
);
166 PUT_MODE (all
->shift_sub1
, mode
);
167 PUT_MODE (all
->zext
, mode
);
168 PUT_MODE (all
->trunc
, mode
);
170 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
171 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
172 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
173 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
174 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
176 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
177 <= 2 * add_cost (speed
, mode
)));
178 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
179 <= 4 * add_cost (speed
, mode
)));
181 set_shift_cost (speed
, mode
, 0, 0);
183 int cost
= add_cost (speed
, mode
);
184 set_shiftadd_cost (speed
, mode
, 0, cost
);
185 set_shiftsub0_cost (speed
, mode
, 0, cost
);
186 set_shiftsub1_cost (speed
, mode
, 0, cost
);
189 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
190 for (m
= 1; m
< n
; m
++)
192 XEXP (all
->shift
, 1) = all
->cint
[m
];
193 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
195 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
196 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
198 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
200 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
204 if (SCALAR_INT_MODE_P (mode
))
206 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
207 mode_from
= (machine_mode
)(mode_from
+ 1))
208 init_expmed_one_conv (all
, mode
, mode_from
, speed
);
210 if (GET_MODE_CLASS (mode
) == MODE_INT
)
212 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
213 if (wider_mode
!= VOIDmode
)
215 PUT_MODE (all
->zext
, wider_mode
);
216 PUT_MODE (all
->wide_mult
, wider_mode
);
217 PUT_MODE (all
->wide_lshr
, wider_mode
);
218 XEXP (all
->wide_lshr
, 1) = GEN_INT (mode_bitsize
);
220 set_mul_widen_cost (speed
, wider_mode
,
221 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
222 set_mul_highpart_cost (speed
, mode
,
223 set_src_cost (all
->wide_trunc
, mode
, speed
));
231 struct init_expmed_rtl all
;
232 machine_mode mode
= QImode
;
235 memset (&all
, 0, sizeof all
);
236 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
238 all
.pow2
[m
] = GEN_INT ((HOST_WIDE_INT
) 1 << m
);
239 all
.cint
[m
] = GEN_INT (m
);
242 /* Avoid using hard regs in ways which may be unsupported. */
243 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
244 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
245 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
246 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
247 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
248 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
249 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
250 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
251 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
252 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
253 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
254 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
255 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
256 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
257 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
258 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
259 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
260 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
262 for (speed
= 0; speed
< 2; speed
++)
264 crtl
->maybe_hot_insn_p
= speed
;
265 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
267 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
268 mode
= (machine_mode
)(mode
+ 1))
269 init_expmed_one_mode (&all
, mode
, speed
);
271 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
272 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
273 mode
= (machine_mode
)(mode
+ 1))
274 init_expmed_one_mode (&all
, mode
, speed
);
276 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
277 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
278 mode
= (machine_mode
)(mode
+ 1))
279 init_expmed_one_mode (&all
, mode
, speed
);
282 if (alg_hash_used_p ())
284 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
285 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
288 set_alg_hash_used_p (true);
289 default_rtl_profile ();
291 ggc_free (all
.trunc
);
292 ggc_free (all
.shift_sub1
);
293 ggc_free (all
.shift_sub0
);
294 ggc_free (all
.shift_add
);
295 ggc_free (all
.shift_mult
);
296 ggc_free (all
.shift
);
297 ggc_free (all
.wide_trunc
);
298 ggc_free (all
.wide_lshr
);
299 ggc_free (all
.wide_mult
);
301 ggc_free (all
.smod_32
);
302 ggc_free (all
.sdiv_32
);
311 /* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
316 negate_rtx (machine_mode mode
, rtx x
)
318 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
321 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
326 /* Whether reverse storage order is supported on the target. */
327 static int reverse_storage_order_supported
= -1;
329 /* Check whether reverse storage order is supported on the target. */
332 check_reverse_storage_order_support (void)
334 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
336 reverse_storage_order_supported
= 0;
337 sorry ("reverse scalar storage order");
340 reverse_storage_order_supported
= 1;
343 /* Whether reverse FP storage order is supported on the target. */
344 static int reverse_float_storage_order_supported
= -1;
346 /* Check whether reverse FP storage order is supported on the target. */
349 check_reverse_float_storage_order_support (void)
351 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
353 reverse_float_storage_order_supported
= 0;
354 sorry ("reverse floating-point scalar storage order");
357 reverse_float_storage_order_supported
= 1;
360 /* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
365 flip_storage_order (enum machine_mode mode
, rtx x
)
367 enum machine_mode int_mode
;
373 if (COMPLEX_MODE_P (mode
))
375 rtx real
= read_complex_part (x
, false);
376 rtx imag
= read_complex_part (x
, true);
378 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
379 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
381 return gen_rtx_CONCAT (mode
, real
, imag
);
384 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
385 check_reverse_storage_order_support ();
387 if (SCALAR_INT_MODE_P (mode
))
391 if (FLOAT_MODE_P (mode
)
392 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
393 check_reverse_float_storage_order_support ();
395 int_mode
= mode_for_size (GET_MODE_PRECISION (mode
), MODE_INT
, 0);
396 if (int_mode
== BLKmode
)
398 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
401 x
= gen_lowpart (int_mode
, x
);
404 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
406 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
408 if (int_mode
!= mode
)
409 result
= gen_lowpart (mode
, result
);
414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
415 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
416 If MODE is BLKmode, return a reference to every byte in the bitfield.
417 Set *NEW_BITNUM to the bit position of the field within the new memory. */
420 narrow_bit_field_mem (rtx mem
, machine_mode mode
,
421 unsigned HOST_WIDE_INT bitsize
,
422 unsigned HOST_WIDE_INT bitnum
,
423 unsigned HOST_WIDE_INT
*new_bitnum
)
427 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
428 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
429 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
431 return adjust_bitfield_address_size (mem
, mode
, offset
, size
);
435 unsigned int unit
= GET_MODE_BITSIZE (mode
);
436 *new_bitnum
= bitnum
% unit
;
437 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
438 return adjust_bitfield_address (mem
, mode
, offset
);
442 /* The caller wants to perform insertion or extraction PATTERN on a
443 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
444 BITREGION_START and BITREGION_END are as for store_bit_field
445 and FIELDMODE is the natural mode of the field.
447 Search for a mode that is compatible with the memory access
448 restrictions and (where applicable) with a register insertion or
449 extraction. Return the new memory on success, storing the adjusted
450 bit position in *NEW_BITNUM. Return null otherwise. */
453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
454 rtx op0
, HOST_WIDE_INT bitsize
,
455 HOST_WIDE_INT bitnum
,
456 unsigned HOST_WIDE_INT bitregion_start
,
457 unsigned HOST_WIDE_INT bitregion_end
,
458 machine_mode fieldmode
,
459 unsigned HOST_WIDE_INT
*new_bitnum
)
461 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
462 bitregion_end
, MEM_ALIGN (op0
),
463 MEM_VOLATILE_P (op0
));
464 machine_mode best_mode
;
465 if (iter
.next_mode (&best_mode
))
467 /* We can use a memory in BEST_MODE. See whether this is true for
468 any wider modes. All other things being equal, we prefer to
469 use the widest mode possible because it tends to expose more
470 CSE opportunities. */
471 if (!iter
.prefer_smaller_modes ())
473 /* Limit the search to the mode required by the corresponding
474 register insertion or extraction instruction, if any. */
475 machine_mode limit_mode
= word_mode
;
476 extraction_insn insn
;
477 if (get_best_reg_extraction_insn (&insn
, pattern
,
478 GET_MODE_BITSIZE (best_mode
),
480 limit_mode
= insn
.field_mode
;
482 machine_mode wider_mode
;
483 while (iter
.next_mode (&wider_mode
)
484 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
485 best_mode
= wider_mode
;
487 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
494 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
495 offset is then BITNUM / BITS_PER_UNIT. */
498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum
,
499 unsigned HOST_WIDE_INT bitsize
,
500 machine_mode struct_mode
)
502 if (BYTES_BIG_ENDIAN
)
503 return (bitnum
% BITS_PER_UNIT
== 0
504 && (bitnum
+ bitsize
== GET_MODE_BITSIZE (struct_mode
)
505 || (bitnum
+ bitsize
) % BITS_PER_WORD
== 0));
507 return bitnum
% BITS_PER_WORD
== 0;
510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
511 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
512 Return false if the access would touch memory outside the range
513 BITREGION_START to BITREGION_END for conformance to the C++ memory
517 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
518 unsigned HOST_WIDE_INT bitnum
,
519 machine_mode fieldmode
,
520 unsigned HOST_WIDE_INT bitregion_start
,
521 unsigned HOST_WIDE_INT bitregion_end
)
523 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
525 /* -fstrict-volatile-bitfields must be enabled and we must have a
528 || !MEM_VOLATILE_P (op0
)
529 || flag_strict_volatile_bitfields
<= 0)
532 /* Non-integral modes likely only happen with packed structures.
534 if (!SCALAR_INT_MODE_P (fieldmode
))
537 /* The bit size must not be larger than the field mode, and
538 the field mode must not be larger than a word. */
539 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
542 /* Check for cases of unaligned fields that must be split. */
543 if (bitnum
% modesize
+ bitsize
> modesize
)
546 /* The memory must be sufficiently aligned for a MODESIZE access.
547 This condition guarantees, that the memory access will not
548 touch anything after the end of the structure. */
549 if (MEM_ALIGN (op0
) < modesize
)
552 /* Check for cases where the C++ memory model applies. */
553 if (bitregion_end
!= 0
554 && (bitnum
- bitnum
% modesize
< bitregion_start
555 || bitnum
- bitnum
% modesize
+ modesize
- 1 > bitregion_end
))
561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
562 bit number BITNUM can be treated as a simple value of mode MODE. */
565 simple_mem_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
566 unsigned HOST_WIDE_INT bitnum
, machine_mode mode
)
569 && bitnum
% BITS_PER_UNIT
== 0
570 && bitsize
== GET_MODE_BITSIZE (mode
)
571 && (!SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (op0
))
572 || (bitnum
% GET_MODE_ALIGNMENT (mode
) == 0
573 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
576 /* Try to use instruction INSV to store VALUE into a field of OP0.
577 BITSIZE and BITNUM are as for store_bit_field. */
580 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
581 unsigned HOST_WIDE_INT bitsize
,
582 unsigned HOST_WIDE_INT bitnum
,
585 struct expand_operand ops
[4];
588 rtx_insn
*last
= get_last_insn ();
589 bool copy_back
= false;
591 machine_mode op_mode
= insv
->field_mode
;
592 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
593 if (bitsize
== 0 || bitsize
> unit
)
597 /* Get a reference to the first byte of the field. */
598 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
602 /* Convert from counting within OP0 to counting in OP_MODE. */
603 if (BYTES_BIG_ENDIAN
)
604 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
606 /* If xop0 is a register, we need it in OP_MODE
607 to make it acceptable to the format of insv. */
608 if (GET_CODE (xop0
) == SUBREG
)
609 /* We can't just change the mode, because this might clobber op0,
610 and we will need the original value of op0 if insv fails. */
611 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
612 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
613 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
616 /* If the destination is a paradoxical subreg such that we need a
617 truncate to the inner mode, perform the insertion on a temporary and
618 truncate the result to the original destination. Note that we can't
619 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
620 X) 0)) is (reg:N X). */
621 if (GET_CODE (xop0
) == SUBREG
622 && REG_P (SUBREG_REG (xop0
))
623 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
626 rtx tem
= gen_reg_rtx (op_mode
);
627 emit_move_insn (tem
, xop0
);
632 /* There are similar overflow check at the start of store_bit_field_1,
633 but that only check the situation where the field lies completely
634 outside the register, while there do have situation where the field
635 lies partialy in the register, we need to adjust bitsize for this
636 partial overflow situation. Without this fix, pr48335-2.c on big-endian
637 will broken on those arch support bit insert instruction, like arm, aarch64
639 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
641 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
642 "destination object, data truncated into %wu-bit",
643 bitsize
, unit
- bitnum
);
644 bitsize
= unit
- bitnum
;
647 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
648 "backwards" from the size of the unit we are inserting into.
649 Otherwise, we count bits from the most significant on a
650 BYTES/BITS_BIG_ENDIAN machine. */
652 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
653 bitnum
= unit
- bitsize
- bitnum
;
655 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
657 if (GET_MODE (value
) != op_mode
)
659 if (GET_MODE_BITSIZE (GET_MODE (value
)) >= bitsize
)
662 /* Optimization: Don't bother really extending VALUE
663 if it has all the bits we will actually use. However,
664 if we must narrow it, be sure we do it correctly. */
666 if (GET_MODE_SIZE (GET_MODE (value
)) < GET_MODE_SIZE (op_mode
))
668 tmp
= simplify_subreg (op_mode
, value1
, GET_MODE (value
), 0);
670 tmp
= simplify_gen_subreg (op_mode
,
671 force_reg (GET_MODE (value
),
673 GET_MODE (value
), 0);
677 tmp
= gen_lowpart_if_possible (op_mode
, value1
);
679 tmp
= gen_lowpart (op_mode
, force_reg (GET_MODE (value
),
684 else if (CONST_INT_P (value
))
685 value1
= gen_int_mode (INTVAL (value
), op_mode
);
687 /* Parse phase is supposed to make VALUE's data type
688 match that of the component reference, which is a type
689 at least as wide as the field; so VALUE should have
690 a mode that corresponds to that type. */
691 gcc_assert (CONSTANT_P (value
));
694 create_fixed_operand (&ops
[0], xop0
);
695 create_integer_operand (&ops
[1], bitsize
);
696 create_integer_operand (&ops
[2], bitnum
);
697 create_input_operand (&ops
[3], value1
, op_mode
);
698 if (maybe_expand_insn (insv
->icode
, 4, ops
))
701 convert_move (op0
, xop0
, true);
704 delete_insns_since (last
);
708 /* A subroutine of store_bit_field, with the same arguments. Return true
709 if the operation could be implemented.
711 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
712 no other way of implementing the operation. If FALLBACK_P is false,
713 return false instead. */
716 store_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
717 unsigned HOST_WIDE_INT bitnum
,
718 unsigned HOST_WIDE_INT bitregion_start
,
719 unsigned HOST_WIDE_INT bitregion_end
,
720 machine_mode fieldmode
,
721 rtx value
, bool reverse
, bool fallback_p
)
726 while (GET_CODE (op0
) == SUBREG
)
728 /* The following line once was done only if WORDS_BIG_ENDIAN,
729 but I think that is a mistake. WORDS_BIG_ENDIAN is
730 meaningful at a much higher level; when structures are copied
731 between memory and regs, the higher-numbered regs
732 always get higher addresses. */
733 int inner_mode_size
= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0
)));
734 int outer_mode_size
= GET_MODE_SIZE (GET_MODE (op0
));
737 /* Paradoxical subregs need special handling on big-endian machines. */
738 if (SUBREG_BYTE (op0
) == 0 && inner_mode_size
< outer_mode_size
)
740 int difference
= inner_mode_size
- outer_mode_size
;
742 if (WORDS_BIG_ENDIAN
)
743 byte_offset
+= (difference
/ UNITS_PER_WORD
) * UNITS_PER_WORD
;
744 if (BYTES_BIG_ENDIAN
)
745 byte_offset
+= difference
% UNITS_PER_WORD
;
748 byte_offset
= SUBREG_BYTE (op0
);
750 bitnum
+= byte_offset
* BITS_PER_UNIT
;
751 op0
= SUBREG_REG (op0
);
754 /* No action is needed if the target is a register and if the field
755 lies completely outside that register. This can occur if the source
756 code contains an out-of-bounds access to a small array. */
757 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
760 /* Use vec_set patterns for inserting parts of vectors whenever
762 if (VECTOR_MODE_P (GET_MODE (op0
))
764 && optab_handler (vec_set_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
765 && fieldmode
== GET_MODE_INNER (GET_MODE (op0
))
766 && bitsize
== GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
767 && !(bitnum
% GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
769 struct expand_operand ops
[3];
770 machine_mode outermode
= GET_MODE (op0
);
771 machine_mode innermode
= GET_MODE_INNER (outermode
);
772 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
773 int pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
775 create_fixed_operand (&ops
[0], op0
);
776 create_input_operand (&ops
[1], value
, innermode
);
777 create_integer_operand (&ops
[2], pos
);
778 if (maybe_expand_insn (icode
, 3, ops
))
782 /* If the target is a register, overwriting the entire object, or storing
783 a full-word or multi-word field can be done with just a SUBREG. */
785 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
786 && ((bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)) && bitnum
== 0)
787 || (bitsize
% BITS_PER_WORD
== 0 && bitnum
% BITS_PER_WORD
== 0)))
789 /* Use the subreg machinery either to narrow OP0 to the required
790 words or to cope with mode punning between equal-sized modes.
791 In the latter case, use subreg on the rhs side, not lhs. */
794 if (bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
796 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
800 sub
= flip_storage_order (GET_MODE (op0
), sub
);
801 emit_move_insn (op0
, sub
);
807 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
808 bitnum
/ BITS_PER_UNIT
);
812 value
= flip_storage_order (fieldmode
, value
);
813 emit_move_insn (sub
, value
);
819 /* If the target is memory, storing any naturally aligned field can be
820 done with a simple store. For targets that support fast unaligned
821 memory, any naturally sized, unit aligned field can be done directly. */
822 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
))
824 op0
= adjust_bitfield_address (op0
, fieldmode
, bitnum
/ BITS_PER_UNIT
);
826 value
= flip_storage_order (fieldmode
, value
);
827 emit_move_insn (op0
, value
);
831 /* Make sure we are playing with integral modes. Pun with subregs
832 if we aren't. This must come after the entire register case above,
833 since that case is valid for any mode. The following cases are only
834 valid for integral modes. */
836 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
837 if (imode
!= GET_MODE (op0
))
840 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
843 gcc_assert (imode
!= BLKmode
);
844 op0
= gen_lowpart (imode
, op0
);
849 /* Storing an lsb-aligned field in a register
850 can be done with a movstrict instruction. */
854 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
855 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
856 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
858 struct expand_operand ops
[2];
859 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
861 unsigned HOST_WIDE_INT subreg_off
;
863 if (GET_CODE (arg0
) == SUBREG
)
865 /* Else we've got some float mode source being extracted into
866 a different float mode destination -- this combination of
867 subregs results in Severe Tire Damage. */
868 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
869 || GET_MODE_CLASS (fieldmode
) == MODE_INT
870 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
871 arg0
= SUBREG_REG (arg0
);
874 subreg_off
= bitnum
/ BITS_PER_UNIT
;
875 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
877 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
879 create_fixed_operand (&ops
[0], arg0
);
880 /* Shrink the source operand to FIELDMODE. */
881 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
882 if (maybe_expand_insn (icode
, 2, ops
))
887 /* Handle fields bigger than a word. */
889 if (bitsize
> BITS_PER_WORD
)
891 /* Here we transfer the words of the field
892 in the order least significant first.
893 This is because the most significant word is the one which may
895 However, only do that if the value is not BLKmode. */
897 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
898 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
902 /* This is the mode we must force value to, so that there will be enough
903 subwords to extract. Note that fieldmode will often (always?) be
904 VOIDmode, because that is what store_field uses to indicate that this
905 is a bit field, but passing VOIDmode to operand_subword_force
907 fieldmode
= GET_MODE (value
);
908 if (fieldmode
== VOIDmode
)
909 fieldmode
= smallest_mode_for_size (nwords
* BITS_PER_WORD
, MODE_INT
);
911 last
= get_last_insn ();
912 for (i
= 0; i
< nwords
; i
++)
914 /* If I is 0, use the low-order word in both field and target;
915 if I is 1, use the next to lowest word; and so on. */
916 unsigned int wordnum
= (backwards
917 ? GET_MODE_SIZE (fieldmode
) / UNITS_PER_WORD
920 unsigned int bit_offset
= (backwards
^ reverse
921 ? MAX ((int) bitsize
- ((int) i
+ 1)
924 : (int) i
* BITS_PER_WORD
);
925 rtx value_word
= operand_subword_force (value
, wordnum
, fieldmode
);
926 unsigned HOST_WIDE_INT new_bitsize
=
927 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
929 /* If the remaining chunk doesn't have full wordsize we have
930 to make sure that for big-endian machines the higher order
932 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
933 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
935 GEN_INT (BITS_PER_WORD
940 if (!store_bit_field_1 (op0
, new_bitsize
,
942 bitregion_start
, bitregion_end
,
944 value_word
, reverse
, fallback_p
))
946 delete_insns_since (last
);
953 /* If VALUE has a floating-point or complex mode, access it as an
954 integer of the corresponding size. This can occur on a machine
955 with 64 bit registers that uses SFmode for float. It can also
956 occur for unaligned float or complex fields. */
958 if (GET_MODE (value
) != VOIDmode
959 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_INT
960 && GET_MODE_CLASS (GET_MODE (value
)) != MODE_PARTIAL_INT
)
962 value
= gen_reg_rtx (int_mode_for_mode (GET_MODE (value
)));
963 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
966 /* If OP0 is a multi-word register, narrow it to the affected word.
967 If the region spans two words, defer to store_split_bit_field. */
968 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
970 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
971 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
973 bitnum
%= BITS_PER_WORD
;
974 if (bitnum
+ bitsize
> BITS_PER_WORD
)
979 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
980 bitregion_end
, value
, reverse
);
985 /* From here on we can assume that the field to be stored in fits
986 within a word. If the destination is a register, it too fits
989 extraction_insn insv
;
992 && get_best_reg_extraction_insn (&insv
, EP_insv
,
993 GET_MODE_BITSIZE (GET_MODE (op0
)),
995 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
998 /* If OP0 is a memory, try copying it to a register and seeing if a
999 cheap register alternative is available. */
1000 if (MEM_P (op0
) && !reverse
)
1002 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
1004 && store_bit_field_using_insv (&insv
, op0
, bitsize
, bitnum
, value
))
1007 rtx_insn
*last
= get_last_insn ();
1009 /* Try loading part of OP0 into a register, inserting the bitfield
1010 into that, and then copying the result back to OP0. */
1011 unsigned HOST_WIDE_INT bitpos
;
1012 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1013 bitregion_start
, bitregion_end
,
1014 fieldmode
, &bitpos
);
1017 rtx tempreg
= copy_to_reg (xop0
);
1018 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1019 bitregion_start
, bitregion_end
,
1020 fieldmode
, orig_value
, reverse
, false))
1022 emit_move_insn (xop0
, tempreg
);
1025 delete_insns_since (last
);
1032 store_fixed_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1033 bitregion_end
, value
, reverse
);
1037 /* Generate code to store value from rtx VALUE
1038 into a bit-field within structure STR_RTX
1039 containing BITSIZE bits starting at bit BITNUM.
1041 BITREGION_START is bitpos of the first bitfield in this region.
1042 BITREGION_END is the bitpos of the ending bitfield in this region.
1043 These two fields are 0, if the C++ memory model does not apply,
1044 or we are not interested in keeping track of bitfield regions.
1046 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1048 If REVERSE is true, the store is to be done in reverse order. */
1051 store_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1052 unsigned HOST_WIDE_INT bitnum
,
1053 unsigned HOST_WIDE_INT bitregion_start
,
1054 unsigned HOST_WIDE_INT bitregion_end
,
1055 machine_mode fieldmode
,
1056 rtx value
, bool reverse
)
1058 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1059 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, fieldmode
,
1060 bitregion_start
, bitregion_end
))
1062 /* Storing of a full word can be done with a simple store.
1063 We know here that the field can be accessed with one single
1064 instruction. For targets that support unaligned memory,
1065 an unaligned access may be necessary. */
1066 if (bitsize
== GET_MODE_BITSIZE (fieldmode
))
1068 str_rtx
= adjust_bitfield_address (str_rtx
, fieldmode
,
1069 bitnum
/ BITS_PER_UNIT
);
1071 value
= flip_storage_order (fieldmode
, value
);
1072 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1073 emit_move_insn (str_rtx
, value
);
1079 str_rtx
= narrow_bit_field_mem (str_rtx
, fieldmode
, bitsize
, bitnum
,
1081 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (fieldmode
));
1082 temp
= copy_to_reg (str_rtx
);
1083 if (!store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0,
1084 fieldmode
, value
, reverse
, true))
1087 emit_move_insn (str_rtx
, temp
);
1093 /* Under the C++0x memory model, we must not touch bits outside the
1094 bit region. Adjust the address to start at the beginning of the
1096 if (MEM_P (str_rtx
) && bitregion_start
> 0)
1098 machine_mode bestmode
;
1099 HOST_WIDE_INT offset
, size
;
1101 gcc_assert ((bitregion_start
% BITS_PER_UNIT
) == 0);
1103 offset
= bitregion_start
/ BITS_PER_UNIT
;
1104 bitnum
-= bitregion_start
;
1105 size
= (bitnum
+ bitsize
+ BITS_PER_UNIT
- 1) / BITS_PER_UNIT
;
1106 bitregion_end
-= bitregion_start
;
1107 bitregion_start
= 0;
1108 bestmode
= get_best_mode (bitsize
, bitnum
,
1109 bitregion_start
, bitregion_end
,
1110 MEM_ALIGN (str_rtx
), VOIDmode
,
1111 MEM_VOLATILE_P (str_rtx
));
1112 str_rtx
= adjust_bitfield_address_size (str_rtx
, bestmode
, offset
, size
);
1115 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1116 bitregion_start
, bitregion_end
,
1117 fieldmode
, value
, reverse
, true))
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122 width BITSIZE in OP0, starting at bit BITNUM.
1124 If REVERSE is true, the store is to be done in reverse order. */
1127 store_fixed_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1128 unsigned HOST_WIDE_INT bitnum
,
1129 unsigned HOST_WIDE_INT bitregion_start
,
1130 unsigned HOST_WIDE_INT bitregion_end
,
1131 rtx value
, bool reverse
)
1133 /* There is a case not handled here:
1134 a structure with a known alignment of just a halfword
1135 and a field split across two aligned halfwords within the structure.
1136 Or likewise a structure with a known alignment of just a byte
1137 and a field split across two bytes.
1138 Such cases are not supposed to be able to occur. */
1142 machine_mode mode
= GET_MODE (op0
);
1143 if (GET_MODE_BITSIZE (mode
) == 0
1144 || GET_MODE_BITSIZE (mode
) > GET_MODE_BITSIZE (word_mode
))
1146 mode
= get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1147 MEM_ALIGN (op0
), mode
, MEM_VOLATILE_P (op0
));
1149 if (mode
== VOIDmode
)
1151 /* The only way this should occur is if the field spans word
1153 store_split_bit_field (op0
, bitsize
, bitnum
, bitregion_start
,
1154 bitregion_end
, value
, reverse
);
1158 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1161 store_fixed_bit_field_1 (op0
, bitsize
, bitnum
, value
, reverse
);
1164 /* Helper function for store_fixed_bit_field, stores
1165 the bit field always using the MODE of OP0. */
1168 store_fixed_bit_field_1 (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1169 unsigned HOST_WIDE_INT bitnum
,
1170 rtx value
, bool reverse
)
1177 mode
= GET_MODE (op0
);
1178 gcc_assert (SCALAR_INT_MODE_P (mode
));
1180 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1183 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1184 /* BITNUM is the distance between our msb
1185 and that of the containing datum.
1186 Convert it to the distance from the lsb. */
1187 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1189 /* Now BITNUM is always the distance between our lsb
1192 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1193 we must first convert its mode to MODE. */
1195 if (CONST_INT_P (value
))
1197 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1199 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1200 v
&= ((unsigned HOST_WIDE_INT
) 1 << bitsize
) - 1;
1204 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1205 && v
== ((unsigned HOST_WIDE_INT
) 1 << bitsize
) - 1)
1206 || (bitsize
== HOST_BITS_PER_WIDE_INT
1207 && v
== (unsigned HOST_WIDE_INT
) -1))
1210 value
= lshift_value (mode
, v
, bitnum
);
1214 int must_and
= (GET_MODE_BITSIZE (GET_MODE (value
)) != bitsize
1215 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1217 if (GET_MODE (value
) != mode
)
1218 value
= convert_to_mode (mode
, value
, 1);
1221 value
= expand_binop (mode
, and_optab
, value
,
1222 mask_rtx (mode
, 0, bitsize
, 0),
1223 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1225 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1226 bitnum
, NULL_RTX
, 1);
1230 value
= flip_storage_order (mode
, value
);
1232 /* Now clear the chosen bits in OP0,
1233 except that if VALUE is -1 we need not bother. */
1234 /* We keep the intermediates in registers to allow CSE to combine
1235 consecutive bitfield assignments. */
1237 temp
= force_reg (mode
, op0
);
1241 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1243 mask
= flip_storage_order (mode
, mask
);
1244 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1245 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1246 temp
= force_reg (mode
, temp
);
1249 /* Now logical-or VALUE into OP0, unless it is zero. */
1253 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1254 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1255 temp
= force_reg (mode
, temp
);
1260 op0
= copy_rtx (op0
);
1261 emit_move_insn (op0
, temp
);
1265 /* Store a bit field that is split across multiple accessible memory objects.
1267 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268 BITSIZE is the field width; BITPOS the position of its first bit
1270 VALUE is the value to store.
1272 If REVERSE is true, the store is to be done in reverse order.
1274 This does not yet handle fields wider than BITS_PER_WORD. */
1277 store_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
1278 unsigned HOST_WIDE_INT bitpos
,
1279 unsigned HOST_WIDE_INT bitregion_start
,
1280 unsigned HOST_WIDE_INT bitregion_end
,
1281 rtx value
, bool reverse
)
1283 unsigned int unit
, total_bits
, bitsdone
= 0;
1285 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1287 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1288 unit
= BITS_PER_WORD
;
1290 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1292 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1294 again, and we will mutually recurse forever. */
1295 if (MEM_P (op0
) && GET_MODE_BITSIZE (GET_MODE (op0
)) > 0)
1296 unit
= MIN (unit
, GET_MODE_BITSIZE (GET_MODE (op0
)));
1298 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1300 that VALUE might be a floating-point constant. */
1301 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1303 rtx word
= gen_lowpart_common (word_mode
, value
);
1305 if (word
&& (value
!= word
))
1308 value
= gen_lowpart_common (word_mode
,
1309 force_reg (GET_MODE (value
) != VOIDmode
1311 : word_mode
, value
));
1314 total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1316 while (bitsdone
< bitsize
)
1318 unsigned HOST_WIDE_INT thissize
;
1319 unsigned HOST_WIDE_INT thispos
;
1320 unsigned HOST_WIDE_INT offset
;
1323 offset
= (bitpos
+ bitsdone
) / unit
;
1324 thispos
= (bitpos
+ bitsdone
) % unit
;
1326 /* When region of bytes we can touch is restricted, decrease
1327 UNIT close to the end of the region as needed. If op0 is a REG
1328 or SUBREG of REG, don't do this, as there can't be data races
1329 on a register and we can expand shorter code in some cases. */
1331 && unit
> BITS_PER_UNIT
1332 && bitpos
+ bitsdone
- thispos
+ unit
> bitregion_end
+ 1
1334 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1340 /* THISSIZE must not overrun a word boundary. Otherwise,
1341 store_fixed_bit_field will call us again, and we will mutually
1343 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1344 thissize
= MIN (thissize
, unit
- thispos
);
1346 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1348 /* Fetch successively less significant portions. */
1349 if (CONST_INT_P (value
))
1350 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1351 >> (bitsize
- bitsdone
- thissize
))
1352 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1353 /* Likewise, but the source is little-endian. */
1355 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1356 bitsize
- bitsdone
- thissize
,
1357 NULL_RTX
, 1, false);
1360 int total_bits
= GET_MODE_BITSIZE (GET_MODE (value
));
1361 /* The args are chosen so that the last part includes the
1362 lsb. Give extract_bit_field the value it needs (with
1363 endianness compensation) to fetch the piece we want. */
1364 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1365 total_bits
- bitsize
+ bitsdone
,
1366 NULL_RTX
, 1, false);
1371 /* Fetch successively more significant portions. */
1372 if (CONST_INT_P (value
))
1373 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1375 & (((HOST_WIDE_INT
) 1 << thissize
) - 1));
1376 /* Likewise, but the source is big-endian. */
1378 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1379 total_bits
- bitsdone
- thissize
,
1380 NULL_RTX
, 1, false);
1382 part
= extract_fixed_bit_field (word_mode
, value
, thissize
,
1383 bitsdone
, NULL_RTX
, 1, false);
1386 /* If OP0 is a register, then handle OFFSET here.
1388 When handling multiword bitfields, extract_bit_field may pass
1389 down a word_mode SUBREG of a larger REG for a bitfield that actually
1390 crosses a word boundary. Thus, for a SUBREG, we must find
1391 the current word starting from the base register. */
1392 if (GET_CODE (op0
) == SUBREG
)
1394 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
)
1395 + (offset
* unit
/ BITS_PER_WORD
);
1396 machine_mode sub_mode
= GET_MODE (SUBREG_REG (op0
));
1397 if (sub_mode
!= BLKmode
&& GET_MODE_SIZE (sub_mode
) < UNITS_PER_WORD
)
1398 word
= word_offset
? const0_rtx
: op0
;
1400 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
1401 GET_MODE (SUBREG_REG (op0
)));
1402 offset
&= BITS_PER_WORD
/ unit
- 1;
1404 else if (REG_P (op0
))
1406 machine_mode op0_mode
= GET_MODE (op0
);
1407 if (op0_mode
!= BLKmode
&& GET_MODE_SIZE (op0_mode
) < UNITS_PER_WORD
)
1408 word
= offset
? const0_rtx
: op0
;
1410 word
= operand_subword_force (op0
, offset
* unit
/ BITS_PER_WORD
,
1412 offset
&= BITS_PER_WORD
/ unit
- 1;
1417 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1418 it is just an out-of-bounds access. Ignore it. */
1419 if (word
!= const0_rtx
)
1420 store_fixed_bit_field (word
, thissize
, offset
* unit
+ thispos
,
1421 bitregion_start
, bitregion_end
, part
,
1423 bitsdone
+= thissize
;
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1429 to extract_bit_field. */
1432 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1433 machine_mode tmode
, bool unsignedp
)
1435 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1438 /* If the x mode is not a scalar integral, first convert to the
1439 integer mode of that size and then access it as a floating-point
1440 value via a SUBREG. */
1441 if (!SCALAR_INT_MODE_P (tmode
))
1445 smode
= mode_for_size (GET_MODE_BITSIZE (tmode
), MODE_INT
, 0);
1446 x
= convert_to_mode (smode
, x
, unsignedp
);
1447 x
= force_reg (smode
, x
);
1448 return gen_lowpart (tmode
, x
);
1451 return convert_to_mode (tmode
, x
, unsignedp
);
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455 Return the extracted value on success, otherwise return null.
1456 EXT_MODE is the mode of the extraction and the other arguments
1457 are as for extract_bit_field. */
1460 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1461 unsigned HOST_WIDE_INT bitsize
,
1462 unsigned HOST_WIDE_INT bitnum
,
1463 int unsignedp
, rtx target
,
1464 machine_mode mode
, machine_mode tmode
)
1466 struct expand_operand ops
[4];
1467 rtx spec_target
= target
;
1468 rtx spec_target_subreg
= 0;
1469 machine_mode ext_mode
= extv
->field_mode
;
1470 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1472 if (bitsize
== 0 || unit
< bitsize
)
1476 /* Get a reference to the first byte of the field. */
1477 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1481 /* Convert from counting within OP0 to counting in EXT_MODE. */
1482 if (BYTES_BIG_ENDIAN
)
1483 bitnum
+= unit
- GET_MODE_BITSIZE (GET_MODE (op0
));
1485 /* If op0 is a register, we need it in EXT_MODE to make it
1486 acceptable to the format of ext(z)v. */
1487 if (GET_CODE (op0
) == SUBREG
&& GET_MODE (op0
) != ext_mode
)
1489 if (REG_P (op0
) && GET_MODE (op0
) != ext_mode
)
1490 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1493 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494 "backwards" from the size of the unit we are extracting from.
1495 Otherwise, we count bits from the most significant on a
1496 BYTES/BITS_BIG_ENDIAN machine. */
1498 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1499 bitnum
= unit
- bitsize
- bitnum
;
1502 target
= spec_target
= gen_reg_rtx (tmode
);
1504 if (GET_MODE (target
) != ext_mode
)
1506 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507 between the mode of the extraction (word_mode) and the target
1508 mode. Instead, create a temporary and use convert_move to set
1511 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1513 target
= gen_lowpart (ext_mode
, target
);
1514 if (GET_MODE_PRECISION (ext_mode
)
1515 > GET_MODE_PRECISION (GET_MODE (spec_target
)))
1516 spec_target_subreg
= target
;
1519 target
= gen_reg_rtx (ext_mode
);
1522 create_output_operand (&ops
[0], target
, ext_mode
);
1523 create_fixed_operand (&ops
[1], op0
);
1524 create_integer_operand (&ops
[2], bitsize
);
1525 create_integer_operand (&ops
[3], bitnum
);
1526 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1528 target
= ops
[0].value
;
1529 if (target
== spec_target
)
1531 if (target
== spec_target_subreg
)
1533 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540 if we can find no other means of implementing the operation.
1541 if FALLBACK_P is false, return NULL instead. */
1544 extract_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1545 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1546 machine_mode mode
, machine_mode tmode
,
1547 bool reverse
, bool fallback_p
)
1550 machine_mode int_mode
;
1553 if (tmode
== VOIDmode
)
1556 while (GET_CODE (op0
) == SUBREG
)
1558 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1559 op0
= SUBREG_REG (op0
);
1562 /* If we have an out-of-bounds access to a register, just return an
1563 uninitialized register of the required mode. This can occur if the
1564 source code contains an out-of-bounds access to a small array. */
1565 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
1566 return gen_reg_rtx (tmode
);
1569 && mode
== GET_MODE (op0
)
1571 && bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
1574 op0
= flip_storage_order (mode
, op0
);
1575 /* We're trying to extract a full register from itself. */
1579 /* See if we can get a better vector mode before extracting. */
1580 if (VECTOR_MODE_P (GET_MODE (op0
))
1582 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1584 machine_mode new_mode
;
1586 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1587 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1588 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1589 new_mode
= MIN_MODE_VECTOR_FRACT
;
1590 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1591 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1592 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1593 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1594 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1595 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1597 new_mode
= MIN_MODE_VECTOR_INT
;
1599 for (; new_mode
!= VOIDmode
; new_mode
= GET_MODE_WIDER_MODE (new_mode
))
1600 if (GET_MODE_SIZE (new_mode
) == GET_MODE_SIZE (GET_MODE (op0
))
1601 && targetm
.vector_mode_supported_p (new_mode
))
1603 if (new_mode
!= VOIDmode
)
1604 op0
= gen_lowpart (new_mode
, op0
);
1607 /* Use vec_extract patterns for extracting parts of vectors whenever
1609 if (VECTOR_MODE_P (GET_MODE (op0
))
1611 && optab_handler (vec_extract_optab
, GET_MODE (op0
)) != CODE_FOR_nothing
1612 && ((bitnum
+ bitsize
- 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))
1613 == bitnum
/ GET_MODE_UNIT_BITSIZE (GET_MODE (op0
))))
1615 struct expand_operand ops
[3];
1616 machine_mode outermode
= GET_MODE (op0
);
1617 machine_mode innermode
= GET_MODE_INNER (outermode
);
1618 enum insn_code icode
= optab_handler (vec_extract_optab
, outermode
);
1619 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1621 create_output_operand (&ops
[0], target
, innermode
);
1622 create_input_operand (&ops
[1], op0
, outermode
);
1623 create_integer_operand (&ops
[2], pos
);
1624 if (maybe_expand_insn (icode
, 3, ops
))
1626 target
= ops
[0].value
;
1627 if (GET_MODE (target
) != mode
)
1628 return gen_lowpart (tmode
, target
);
1633 /* Make sure we are playing with integral modes. Pun with subregs
1636 machine_mode imode
= int_mode_for_mode (GET_MODE (op0
));
1637 if (imode
!= GET_MODE (op0
))
1640 op0
= adjust_bitfield_address_size (op0
, imode
, 0, MEM_SIZE (op0
));
1641 else if (imode
!= BLKmode
)
1643 op0
= gen_lowpart (imode
, op0
);
1645 /* If we got a SUBREG, force it into a register since we
1646 aren't going to be able to do another SUBREG on it. */
1647 if (GET_CODE (op0
) == SUBREG
)
1648 op0
= force_reg (imode
, op0
);
1650 else if (REG_P (op0
))
1653 imode
= smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0
)),
1655 reg
= gen_reg_rtx (imode
);
1656 subreg
= gen_lowpart_SUBREG (GET_MODE (op0
), reg
);
1657 emit_move_insn (subreg
, op0
);
1659 bitnum
+= SUBREG_BYTE (subreg
) * BITS_PER_UNIT
;
1663 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (op0
));
1664 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1665 emit_move_insn (mem
, op0
);
1666 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1671 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1672 If that's wrong, the solution is to test for it and set TARGET to 0
1675 /* Get the mode of the field to use for atomic access or subreg
1678 if (SCALAR_INT_MODE_P (tmode
))
1680 machine_mode try_mode
= mode_for_size (bitsize
,
1681 GET_MODE_CLASS (tmode
), 0);
1682 if (try_mode
!= BLKmode
)
1685 gcc_assert (mode1
!= BLKmode
);
1687 /* Extraction of a full MODE1 value can be done with a subreg as long
1688 as the least significant bit of the value is the least significant
1689 bit of either OP0 or a word of OP0. */
1692 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
1693 && bitsize
== GET_MODE_BITSIZE (mode1
)
1694 && TRULY_NOOP_TRUNCATION_MODES_P (mode1
, GET_MODE (op0
)))
1696 rtx sub
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
),
1697 bitnum
/ BITS_PER_UNIT
);
1699 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1702 /* Extraction of a full MODE1 value can be done with a load as long as
1703 the field is on a byte boundary and is sufficiently aligned. */
1704 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
))
1706 op0
= adjust_bitfield_address (op0
, mode1
, bitnum
/ BITS_PER_UNIT
);
1708 op0
= flip_storage_order (mode1
, op0
);
1709 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1712 /* Handle fields bigger than a word. */
1714 if (bitsize
> BITS_PER_WORD
)
1716 /* Here we transfer the words of the field
1717 in the order least significant first.
1718 This is because the most significant word is the one which may
1719 be less than full. */
1721 const bool backwards
= WORDS_BIG_ENDIAN
;
1722 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1726 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1727 target
= gen_reg_rtx (mode
);
1729 /* In case we're about to clobber a base register or something
1730 (see gcc.c-torture/execute/20040625-1.c). */
1731 if (reg_mentioned_p (target
, str_rtx
))
1732 target
= gen_reg_rtx (mode
);
1734 /* Indicate for flow that the entire target reg is being set. */
1735 emit_clobber (target
);
1737 last
= get_last_insn ();
1738 for (i
= 0; i
< nwords
; i
++)
1740 /* If I is 0, use the low-order word in both field and target;
1741 if I is 1, use the next to lowest word; and so on. */
1742 /* Word number in TARGET to use. */
1743 unsigned int wordnum
1745 ? GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
- i
- 1
1747 /* Offset from start of field in OP0. */
1748 unsigned int bit_offset
= (backwards
^ reverse
1749 ? MAX ((int) bitsize
- ((int) i
+ 1)
1752 : (int) i
* BITS_PER_WORD
);
1753 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1755 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1756 bitsize
- i
* BITS_PER_WORD
),
1757 bitnum
+ bit_offset
, 1, target_part
,
1758 mode
, word_mode
, reverse
, fallback_p
);
1760 gcc_assert (target_part
);
1763 delete_insns_since (last
);
1767 if (result_part
!= target_part
)
1768 emit_move_insn (target_part
, result_part
);
1773 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1774 need to be zero'd out. */
1775 if (GET_MODE_SIZE (GET_MODE (target
)) > nwords
* UNITS_PER_WORD
)
1777 unsigned int i
, total_words
;
1779 total_words
= GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
;
1780 for (i
= nwords
; i
< total_words
; i
++)
1782 (operand_subword (target
,
1783 backwards
? total_words
- i
- 1 : i
,
1790 /* Signed bit field: sign-extend with two arithmetic shifts. */
1791 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1792 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1793 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1794 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1797 /* If OP0 is a multi-word register, narrow it to the affected word.
1798 If the region spans two words, defer to extract_split_bit_field. */
1799 if (!MEM_P (op0
) && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
1801 op0
= simplify_gen_subreg (word_mode
, op0
, GET_MODE (op0
),
1802 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1803 bitnum
%= BITS_PER_WORD
;
1804 if (bitnum
+ bitsize
> BITS_PER_WORD
)
1808 target
= extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1810 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1814 /* From here on we know the desired field is smaller than a word.
1815 If OP0 is a register, it too fits within a word. */
1816 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1817 extraction_insn extv
;
1820 /* ??? We could limit the structure size to the part of OP0 that
1821 contains the field, with appropriate checks for endianness
1822 and TRULY_NOOP_TRUNCATION. */
1823 && get_best_reg_extraction_insn (&extv
, pattern
,
1824 GET_MODE_BITSIZE (GET_MODE (op0
)),
1827 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
, bitnum
,
1828 unsignedp
, target
, mode
,
1834 /* If OP0 is a memory, try copying it to a register and seeing if a
1835 cheap register alternative is available. */
1836 if (MEM_P (op0
) & !reverse
)
1838 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1841 rtx result
= extract_bit_field_using_extv (&extv
, op0
, bitsize
,
1849 rtx_insn
*last
= get_last_insn ();
1851 /* Try loading part of OP0 into a register and extracting the
1852 bitfield from that. */
1853 unsigned HOST_WIDE_INT bitpos
;
1854 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
1855 0, 0, tmode
, &bitpos
);
1858 xop0
= copy_to_reg (xop0
);
1859 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
1861 mode
, tmode
, reverse
, false);
1864 delete_insns_since (last
);
1871 /* Find a correspondingly-sized integer field, so we can apply
1872 shifts and masks to it. */
1873 int_mode
= int_mode_for_mode (tmode
);
1874 if (int_mode
== BLKmode
)
1875 int_mode
= int_mode_for_mode (mode
);
1876 /* Should probably push op0 out to memory and then do a load. */
1877 gcc_assert (int_mode
!= BLKmode
);
1879 target
= extract_fixed_bit_field (int_mode
, op0
, bitsize
, bitnum
, target
,
1880 unsignedp
, reverse
);
1882 /* Complex values must be reversed piecewise, so we need to undo the global
1883 reversal, convert to the complex mode and reverse again. */
1884 if (reverse
&& COMPLEX_MODE_P (tmode
))
1886 target
= flip_storage_order (int_mode
, target
);
1887 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1888 target
= flip_storage_order (tmode
, target
);
1891 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1896 /* Generate code to extract a byte-field from STR_RTX
1897 containing BITSIZE bits, starting at BITNUM,
1898 and put it in TARGET if possible (if TARGET is nonzero).
1899 Regardless of TARGET, we return the rtx for where the value is placed.
1901 STR_RTX is the structure containing the byte (a REG or MEM).
1902 UNSIGNEDP is nonzero if this is an unsigned bit field.
1903 MODE is the natural mode of the field value once extracted.
1904 TMODE is the mode the caller would like the value to have;
1905 but the value may be returned with type MODE instead.
1907 If REVERSE is true, the extraction is to be done in reverse order.
1909 If a TARGET is specified and we can store in it at no extra cost,
1910 we do so, and return TARGET.
1911 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1912 if they are equally easy. */
1915 extract_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1916 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1917 machine_mode mode
, machine_mode tmode
, bool reverse
)
1921 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1922 if (GET_MODE_BITSIZE (GET_MODE (str_rtx
)) > 0)
1923 mode1
= GET_MODE (str_rtx
);
1924 else if (target
&& GET_MODE_BITSIZE (GET_MODE (target
)) > 0)
1925 mode1
= GET_MODE (target
);
1929 if (strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, mode1
, 0, 0))
1931 /* Extraction of a full MODE1 value can be done with a simple load.
1932 We know here that the field can be accessed with one single
1933 instruction. For targets that support unaligned memory,
1934 an unaligned access may be necessary. */
1935 if (bitsize
== GET_MODE_BITSIZE (mode1
))
1937 rtx result
= adjust_bitfield_address (str_rtx
, mode1
,
1938 bitnum
/ BITS_PER_UNIT
);
1940 result
= flip_storage_order (mode1
, result
);
1941 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1942 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
1945 str_rtx
= narrow_bit_field_mem (str_rtx
, mode1
, bitsize
, bitnum
,
1947 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (mode1
));
1948 str_rtx
= copy_to_reg (str_rtx
);
1951 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
1952 target
, mode
, tmode
, reverse
, true);
1955 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1956 from bit BITNUM of OP0.
1958 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1959 If REVERSE is true, the extraction is to be done in reverse order.
1961 If TARGET is nonzero, attempts to store the value there
1962 and return TARGET, but this is not guaranteed.
1963 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1966 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
1967 unsigned HOST_WIDE_INT bitsize
,
1968 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1969 int unsignedp
, bool reverse
)
1974 = get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
), word_mode
,
1975 MEM_VOLATILE_P (op0
));
1977 if (mode
== VOIDmode
)
1978 /* The only way this should occur is if the field spans word
1980 return extract_split_bit_field (op0
, bitsize
, bitnum
, unsignedp
,
1983 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
1986 return extract_fixed_bit_field_1 (tmode
, op0
, bitsize
, bitnum
,
1987 target
, unsignedp
, reverse
);
1990 /* Helper function for extract_fixed_bit_field, extracts
1991 the bit field always using the MODE of OP0. */
1994 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
,
1995 unsigned HOST_WIDE_INT bitsize
,
1996 unsigned HOST_WIDE_INT bitnum
, rtx target
,
1997 int unsignedp
, bool reverse
)
1999 machine_mode mode
= GET_MODE (op0
);
2000 gcc_assert (SCALAR_INT_MODE_P (mode
));
2002 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2003 for invalid input, such as extract equivalent of f5 from
2004 gcc.dg/pr48335-2.c. */
2006 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2007 /* BITNUM is the distance between our msb and that of OP0.
2008 Convert it to the distance from the lsb. */
2009 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
2011 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2012 We have reduced the big-endian case to the little-endian case. */
2014 op0
= flip_storage_order (mode
, op0
);
2020 /* If the field does not already start at the lsb,
2021 shift it so it does. */
2022 /* Maybe propagate the target for the shift. */
2023 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2026 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2028 /* Convert the value to the desired mode. */
2030 op0
= convert_to_mode (tmode
, op0
, 1);
2032 /* Unless the msb of the field used to be the msb when we shifted,
2033 mask out the upper bits. */
2035 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2036 return expand_binop (GET_MODE (op0
), and_optab
, op0
,
2037 mask_rtx (GET_MODE (op0
), 0, bitsize
, 0),
2038 target
, 1, OPTAB_LIB_WIDEN
);
2042 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2043 then arithmetic-shift its lsb to the lsb of the word. */
2044 op0
= force_reg (mode
, op0
);
2046 /* Find the narrowest integer mode that contains the field. */
2048 for (mode
= GET_CLASS_NARROWEST_MODE (MODE_INT
); mode
!= VOIDmode
;
2049 mode
= GET_MODE_WIDER_MODE (mode
))
2050 if (GET_MODE_BITSIZE (mode
) >= bitsize
+ bitnum
)
2052 op0
= convert_to_mode (mode
, op0
, 0);
2059 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2061 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2062 /* Maybe propagate the target for the shift. */
2063 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2064 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2067 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2068 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2071 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2075 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2078 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2081 /* Extract a bit field that is split across two words
2082 and return an RTX for the result.
2084 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2085 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2086 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2088 If REVERSE is true, the extraction is to be done in reverse order. */
2091 extract_split_bit_field (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
2092 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2096 unsigned int bitsdone
= 0;
2097 rtx result
= NULL_RTX
;
2100 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2102 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2103 unit
= BITS_PER_WORD
;
2105 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2107 while (bitsdone
< bitsize
)
2109 unsigned HOST_WIDE_INT thissize
;
2111 unsigned HOST_WIDE_INT thispos
;
2112 unsigned HOST_WIDE_INT offset
;
2114 offset
= (bitpos
+ bitsdone
) / unit
;
2115 thispos
= (bitpos
+ bitsdone
) % unit
;
2117 /* THISSIZE must not overrun a word boundary. Otherwise,
2118 extract_fixed_bit_field will call us again, and we will mutually
2120 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2121 thissize
= MIN (thissize
, unit
- thispos
);
2123 /* If OP0 is a register, then handle OFFSET here.
2125 When handling multiword bitfields, extract_bit_field may pass
2126 down a word_mode SUBREG of a larger REG for a bitfield that actually
2127 crosses a word boundary. Thus, for a SUBREG, we must find
2128 the current word starting from the base register. */
2129 if (GET_CODE (op0
) == SUBREG
)
2131 int word_offset
= (SUBREG_BYTE (op0
) / UNITS_PER_WORD
) + offset
;
2132 word
= operand_subword_force (SUBREG_REG (op0
), word_offset
,
2133 GET_MODE (SUBREG_REG (op0
)));
2136 else if (REG_P (op0
))
2138 word
= operand_subword_force (op0
, offset
, GET_MODE (op0
));
2144 /* Extract the parts in bit-counting order,
2145 whose meaning is determined by BYTES_PER_UNIT.
2146 OFFSET is in UNITs, and UNIT is in bits. */
2147 part
= extract_fixed_bit_field (word_mode
, word
, thissize
,
2148 offset
* unit
+ thispos
, 0, 1, reverse
);
2149 bitsdone
+= thissize
;
2151 /* Shift this part into place for the result. */
2152 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2154 if (bitsize
!= bitsdone
)
2155 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2156 bitsize
- bitsdone
, 0, 1);
2160 if (bitsdone
!= thissize
)
2161 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2162 bitsdone
- thissize
, 0, 1);
2168 /* Combine the parts with bitwise or. This works
2169 because we extracted each part as an unsigned bit field. */
2170 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2176 /* Unsigned bit field: we are done. */
2179 /* Signed bit field: sign-extend with two arithmetic shifts. */
2180 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2181 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2182 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2183 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2186 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2187 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2188 MODE, fill the upper bits with zeros. Fail if the layout of either
2189 mode is unknown (as for CC modes) or if the extraction would involve
2190 unprofitable mode punning. Return the value on success, otherwise
2193 This is different from gen_lowpart* in these respects:
2195 - the returned value must always be considered an rvalue
2197 - when MODE is wider than SRC_MODE, the extraction involves
2200 - when MODE is smaller than SRC_MODE, the extraction involves
2201 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2203 In other words, this routine performs a computation, whereas the
2204 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2208 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2210 machine_mode int_mode
, src_int_mode
;
2212 if (mode
== src_mode
)
2215 if (CONSTANT_P (src
))
2217 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2218 fails, it will happily create (subreg (symbol_ref)) or similar
2220 unsigned int byte
= subreg_lowpart_offset (mode
, src_mode
);
2221 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2225 if (GET_MODE (src
) == VOIDmode
2226 || !validate_subreg (mode
, src_mode
, src
, byte
))
2229 src
= force_reg (GET_MODE (src
), src
);
2230 return gen_rtx_SUBREG (mode
, src
, byte
);
2233 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2236 if (GET_MODE_BITSIZE (mode
) == GET_MODE_BITSIZE (src_mode
)
2237 && MODES_TIEABLE_P (mode
, src_mode
))
2239 rtx x
= gen_lowpart_common (mode
, src
);
2244 src_int_mode
= int_mode_for_mode (src_mode
);
2245 int_mode
= int_mode_for_mode (mode
);
2246 if (src_int_mode
== BLKmode
|| int_mode
== BLKmode
)
2249 if (!MODES_TIEABLE_P (src_int_mode
, src_mode
))
2251 if (!MODES_TIEABLE_P (int_mode
, mode
))
2254 src
= gen_lowpart (src_int_mode
, src
);
2255 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2256 src
= gen_lowpart (mode
, src
);
2260 /* Add INC into TARGET. */
2263 expand_inc (rtx target
, rtx inc
)
2265 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2267 target
, 0, OPTAB_LIB_WIDEN
);
2268 if (value
!= target
)
2269 emit_move_insn (target
, value
);
2272 /* Subtract DEC from TARGET. */
2275 expand_dec (rtx target
, rtx dec
)
2277 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2279 target
, 0, OPTAB_LIB_WIDEN
);
2280 if (value
!= target
)
2281 emit_move_insn (target
, value
);
2284 /* Output a shift instruction for expression code CODE,
2285 with SHIFTED being the rtx for the value to shift,
2286 and AMOUNT the rtx for the amount to shift by.
2287 Store the result in the rtx TARGET, if that is convenient.
2288 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2289 Return the rtx for where the value is. */
2292 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2293 rtx amount
, rtx target
, int unsignedp
)
2296 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2297 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2298 optab lshift_optab
= ashl_optab
;
2299 optab rshift_arith_optab
= ashr_optab
;
2300 optab rshift_uns_optab
= lshr_optab
;
2301 optab lrotate_optab
= rotl_optab
;
2302 optab rrotate_optab
= rotr_optab
;
2303 machine_mode op1_mode
;
2304 machine_mode scalar_mode
= mode
;
2306 bool speed
= optimize_insn_for_speed_p ();
2308 if (VECTOR_MODE_P (mode
))
2309 scalar_mode
= GET_MODE_INNER (mode
);
2311 op1_mode
= GET_MODE (op1
);
2313 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2314 shift amount is a vector, use the vector/vector shift patterns. */
2315 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2317 lshift_optab
= vashl_optab
;
2318 rshift_arith_optab
= vashr_optab
;
2319 rshift_uns_optab
= vlshr_optab
;
2320 lrotate_optab
= vrotl_optab
;
2321 rrotate_optab
= vrotr_optab
;
2324 /* Previously detected shift-counts computed by NEGATE_EXPR
2325 and shifted in the other direction; but that does not work
2328 if (SHIFT_COUNT_TRUNCATED
)
2330 if (CONST_INT_P (op1
)
2331 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2332 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2333 op1
= GEN_INT ((unsigned HOST_WIDE_INT
) INTVAL (op1
)
2334 % GET_MODE_BITSIZE (scalar_mode
));
2335 else if (GET_CODE (op1
) == SUBREG
2336 && subreg_lowpart_p (op1
)
2337 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2338 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2339 op1
= SUBREG_REG (op1
);
2342 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2343 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2344 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2347 && CONST_INT_P (op1
)
2348 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2349 GET_MODE_BITSIZE (scalar_mode
) - 1))
2351 op1
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2353 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2356 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2357 Note that this is not the case for bigger values. For instance a rotation
2358 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2359 0x04030201 (bswapsi). */
2361 && CONST_INT_P (op1
)
2362 && INTVAL (op1
) == BITS_PER_UNIT
2363 && GET_MODE_SIZE (scalar_mode
) == 2
2364 && optab_handler (bswap_optab
, HImode
) != CODE_FOR_nothing
)
2365 return expand_unop (HImode
, bswap_optab
, shifted
, NULL_RTX
,
2368 if (op1
== const0_rtx
)
2371 /* Check whether its cheaper to implement a left shift by a constant
2372 bit count by a sequence of additions. */
2373 if (code
== LSHIFT_EXPR
2374 && CONST_INT_P (op1
)
2376 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2377 && INTVAL (op1
) < MAX_BITS_PER_WORD
2378 && (shift_cost (speed
, mode
, INTVAL (op1
))
2379 > INTVAL (op1
) * add_cost (speed
, mode
))
2380 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2383 for (i
= 0; i
< INTVAL (op1
); i
++)
2385 temp
= force_reg (mode
, shifted
);
2386 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2387 unsignedp
, OPTAB_LIB_WIDEN
);
2392 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2394 enum optab_methods methods
;
2397 methods
= OPTAB_DIRECT
;
2398 else if (attempt
== 1)
2399 methods
= OPTAB_WIDEN
;
2401 methods
= OPTAB_LIB_WIDEN
;
2405 /* Widening does not work for rotation. */
2406 if (methods
== OPTAB_WIDEN
)
2408 else if (methods
== OPTAB_LIB_WIDEN
)
2410 /* If we have been unable to open-code this by a rotation,
2411 do it as the IOR of two shifts. I.e., to rotate A
2413 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2414 where C is the bitsize of A.
2416 It is theoretically possible that the target machine might
2417 not be able to perform either shift and hence we would
2418 be making two libcalls rather than just the one for the
2419 shift (similarly if IOR could not be done). We will allow
2420 this extremely unlikely lossage to avoid complicating the
2423 rtx subtarget
= target
== shifted
? 0 : target
;
2424 rtx new_amount
, other_amount
;
2428 if (op1
== const0_rtx
)
2430 else if (CONST_INT_P (op1
))
2431 other_amount
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
)
2436 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2437 op1
, GET_MODE (op1
));
2438 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2440 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2441 gen_int_mode (mask
, GET_MODE (op1
)));
2444 shifted
= force_reg (mode
, shifted
);
2446 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2447 mode
, shifted
, new_amount
, 0, 1);
2448 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2449 mode
, shifted
, other_amount
,
2451 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2452 unsignedp
, methods
);
2455 temp
= expand_binop (mode
,
2456 left
? lrotate_optab
: rrotate_optab
,
2457 shifted
, op1
, target
, unsignedp
, methods
);
2460 temp
= expand_binop (mode
,
2461 left
? lshift_optab
: rshift_uns_optab
,
2462 shifted
, op1
, target
, unsignedp
, methods
);
2464 /* Do arithmetic shifts.
2465 Also, if we are going to widen the operand, we can just as well
2466 use an arithmetic right-shift instead of a logical one. */
2467 if (temp
== 0 && ! rotate
2468 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2470 enum optab_methods methods1
= methods
;
2472 /* If trying to widen a log shift to an arithmetic shift,
2473 don't accept an arithmetic shift of the same size. */
2475 methods1
= OPTAB_MUST_WIDEN
;
2477 /* Arithmetic shift */
2479 temp
= expand_binop (mode
,
2480 left
? lshift_optab
: rshift_arith_optab
,
2481 shifted
, op1
, target
, unsignedp
, methods1
);
2484 /* We used to try extzv here for logical right shifts, but that was
2485 only useful for one machine, the VAX, and caused poor code
2486 generation there for lshrdi3, so the code was deleted and a
2487 define_expand for lshrsi3 was added to vax.md. */
2494 /* Output a shift instruction for expression code CODE,
2495 with SHIFTED being the rtx for the value to shift,
2496 and AMOUNT the amount to shift by.
2497 Store the result in the rtx TARGET, if that is convenient.
2498 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2499 Return the rtx for where the value is. */
2502 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2503 int amount
, rtx target
, int unsignedp
)
2505 return expand_shift_1 (code
, mode
,
2506 shifted
, GEN_INT (amount
), target
, unsignedp
);
2509 /* Output a shift instruction for expression code CODE,
2510 with SHIFTED being the rtx for the value to shift,
2511 and AMOUNT the tree for the amount to shift by.
2512 Store the result in the rtx TARGET, if that is convenient.
2513 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2514 Return the rtx for where the value is. */
2517 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2518 tree amount
, rtx target
, int unsignedp
)
2520 return expand_shift_1 (code
, mode
,
2521 shifted
, expand_normal (amount
), target
, unsignedp
);
2525 /* Indicates the type of fixup needed after a constant multiplication.
2526 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2527 the result should be negated, and ADD_VARIANT means that the
2528 multiplicand should be added to the result. */
2529 enum mult_variant
{basic_variant
, negate_variant
, add_variant
};
2531 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2532 const struct mult_cost
*, machine_mode mode
);
2533 static bool choose_mult_variant (machine_mode
, HOST_WIDE_INT
,
2534 struct algorithm
*, enum mult_variant
*, int);
2535 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2536 const struct algorithm
*, enum mult_variant
);
2537 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2538 static rtx
extract_high_half (machine_mode
, rtx
);
2539 static rtx
expmed_mult_highpart (machine_mode
, rtx
, rtx
, rtx
, int, int);
2540 static rtx
expmed_mult_highpart_optab (machine_mode
, rtx
, rtx
, rtx
,
2542 /* Compute and return the best algorithm for multiplying by T.
2543 The algorithm must cost less than cost_limit
2544 If retval.cost >= COST_LIMIT, no algorithm was found and all
2545 other field of the returned struct are undefined.
2546 MODE is the machine mode of the multiplication. */
2549 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2550 const struct mult_cost
*cost_limit
, machine_mode mode
)
2553 struct algorithm
*alg_in
, *best_alg
;
2554 struct mult_cost best_cost
;
2555 struct mult_cost new_limit
;
2556 int op_cost
, op_latency
;
2557 unsigned HOST_WIDE_INT orig_t
= t
;
2558 unsigned HOST_WIDE_INT q
;
2559 int maxm
, hash_index
;
2560 bool cache_hit
= false;
2561 enum alg_code cache_alg
= alg_zero
;
2562 bool speed
= optimize_insn_for_speed_p ();
2564 struct alg_hash_entry
*entry_ptr
;
2566 /* Indicate that no algorithm is yet found. If no algorithm
2567 is found, this value will be returned and indicate failure. */
2568 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2569 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2571 if (cost_limit
->cost
< 0
2572 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2575 /* Be prepared for vector modes. */
2576 imode
= GET_MODE_INNER (mode
);
2578 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2580 /* Restrict the bits of "t" to the multiplication's mode. */
2581 t
&= GET_MODE_MASK (imode
);
2583 /* t == 1 can be done in zero cost. */
2587 alg_out
->cost
.cost
= 0;
2588 alg_out
->cost
.latency
= 0;
2589 alg_out
->op
[0] = alg_m
;
2593 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2597 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2602 alg_out
->cost
.cost
= zero_cost (speed
);
2603 alg_out
->cost
.latency
= zero_cost (speed
);
2604 alg_out
->op
[0] = alg_zero
;
2609 /* We'll be needing a couple extra algorithm structures now. */
2611 alg_in
= XALLOCA (struct algorithm
);
2612 best_alg
= XALLOCA (struct algorithm
);
2613 best_cost
= *cost_limit
;
2615 /* Compute the hash index. */
2616 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2618 /* See if we already know what to do for T. */
2619 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2620 if (entry_ptr
->t
== t
2621 && entry_ptr
->mode
== mode
2622 && entry_ptr
->mode
== mode
2623 && entry_ptr
->speed
== speed
2624 && entry_ptr
->alg
!= alg_unknown
)
2626 cache_alg
= entry_ptr
->alg
;
2628 if (cache_alg
== alg_impossible
)
2630 /* The cache tells us that it's impossible to synthesize
2631 multiplication by T within entry_ptr->cost. */
2632 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2633 /* COST_LIMIT is at least as restrictive as the one
2634 recorded in the hash table, in which case we have no
2635 hope of synthesizing a multiplication. Just
2639 /* If we get here, COST_LIMIT is less restrictive than the
2640 one recorded in the hash table, so we may be able to
2641 synthesize a multiplication. Proceed as if we didn't
2642 have the cache entry. */
2646 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2647 /* The cached algorithm shows that this multiplication
2648 requires more cost than COST_LIMIT. Just return. This
2649 way, we don't clobber this cache entry with
2650 alg_impossible but retain useful information. */
2662 goto do_alg_addsub_t_m2
;
2664 case alg_add_factor
:
2665 case alg_sub_factor
:
2666 goto do_alg_addsub_factor
;
2669 goto do_alg_add_t2_m
;
2672 goto do_alg_sub_t2_m
;
2680 /* If we have a group of zero bits at the low-order part of T, try
2681 multiplying by the remaining bits and then doing a shift. */
2686 m
= floor_log2 (t
& -t
); /* m = number of low zero bits */
2690 /* The function expand_shift will choose between a shift and
2691 a sequence of additions, so the observed cost is given as
2692 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2693 op_cost
= m
* add_cost (speed
, mode
);
2694 if (shift_cost (speed
, mode
, m
) < op_cost
)
2695 op_cost
= shift_cost (speed
, mode
, m
);
2696 new_limit
.cost
= best_cost
.cost
- op_cost
;
2697 new_limit
.latency
= best_cost
.latency
- op_cost
;
2698 synth_mult (alg_in
, q
, &new_limit
, mode
);
2700 alg_in
->cost
.cost
+= op_cost
;
2701 alg_in
->cost
.latency
+= op_cost
;
2702 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2704 best_cost
= alg_in
->cost
;
2705 std::swap (alg_in
, best_alg
);
2706 best_alg
->log
[best_alg
->ops
] = m
;
2707 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2710 /* See if treating ORIG_T as a signed number yields a better
2711 sequence. Try this sequence only for a negative ORIG_T
2712 as it would be useless for a non-negative ORIG_T. */
2713 if ((HOST_WIDE_INT
) orig_t
< 0)
2715 /* Shift ORIG_T as follows because a right shift of a
2716 negative-valued signed type is implementation
2718 q
= ~(~orig_t
>> m
);
2719 /* The function expand_shift will choose between a shift
2720 and a sequence of additions, so the observed cost is
2721 given as MIN (m * add_cost(speed, mode),
2722 shift_cost(speed, mode, m)). */
2723 op_cost
= m
* add_cost (speed
, mode
);
2724 if (shift_cost (speed
, mode
, m
) < op_cost
)
2725 op_cost
= shift_cost (speed
, mode
, m
);
2726 new_limit
.cost
= best_cost
.cost
- op_cost
;
2727 new_limit
.latency
= best_cost
.latency
- op_cost
;
2728 synth_mult (alg_in
, q
, &new_limit
, mode
);
2730 alg_in
->cost
.cost
+= op_cost
;
2731 alg_in
->cost
.latency
+= op_cost
;
2732 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2734 best_cost
= alg_in
->cost
;
2735 std::swap (alg_in
, best_alg
);
2736 best_alg
->log
[best_alg
->ops
] = m
;
2737 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2745 /* If we have an odd number, add or subtract one. */
2748 unsigned HOST_WIDE_INT w
;
2751 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2753 /* If T was -1, then W will be zero after the loop. This is another
2754 case where T ends with ...111. Handling this with (T + 1) and
2755 subtract 1 produces slightly better code and results in algorithm
2756 selection much faster than treating it like the ...0111 case
2760 /* Reject the case where t is 3.
2761 Thus we prefer addition in that case. */
2764 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2766 op_cost
= add_cost (speed
, mode
);
2767 new_limit
.cost
= best_cost
.cost
- op_cost
;
2768 new_limit
.latency
= best_cost
.latency
- op_cost
;
2769 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2771 alg_in
->cost
.cost
+= op_cost
;
2772 alg_in
->cost
.latency
+= op_cost
;
2773 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2775 best_cost
= alg_in
->cost
;
2776 std::swap (alg_in
, best_alg
);
2777 best_alg
->log
[best_alg
->ops
] = 0;
2778 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2783 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2785 op_cost
= add_cost (speed
, mode
);
2786 new_limit
.cost
= best_cost
.cost
- op_cost
;
2787 new_limit
.latency
= best_cost
.latency
- op_cost
;
2788 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2790 alg_in
->cost
.cost
+= op_cost
;
2791 alg_in
->cost
.latency
+= op_cost
;
2792 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2794 best_cost
= alg_in
->cost
;
2795 std::swap (alg_in
, best_alg
);
2796 best_alg
->log
[best_alg
->ops
] = 0;
2797 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2801 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2802 quickly with a - a * n for some appropriate constant n. */
2803 m
= exact_log2 (-orig_t
+ 1);
2804 if (m
>= 0 && m
< maxm
)
2806 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2807 /* If the target has a cheap shift-and-subtract insn use
2808 that in preference to a shift insn followed by a sub insn.
2809 Assume that the shift-and-sub is "atomic" with a latency
2810 equal to it's cost, otherwise assume that on superscalar
2811 hardware the shift may be executed concurrently with the
2812 earlier steps in the algorithm. */
2813 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2815 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2816 op_latency
= op_cost
;
2819 op_latency
= add_cost (speed
, mode
);
2821 new_limit
.cost
= best_cost
.cost
- op_cost
;
2822 new_limit
.latency
= best_cost
.latency
- op_latency
;
2823 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2826 alg_in
->cost
.cost
+= op_cost
;
2827 alg_in
->cost
.latency
+= op_latency
;
2828 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2830 best_cost
= alg_in
->cost
;
2831 std::swap (alg_in
, best_alg
);
2832 best_alg
->log
[best_alg
->ops
] = m
;
2833 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2841 /* Look for factors of t of the form
2842 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2843 If we find such a factor, we can multiply by t using an algorithm that
2844 multiplies by q, shift the result by m and add/subtract it to itself.
2846 We search for large factors first and loop down, even if large factors
2847 are less probable than small; if we find a large factor we will find a
2848 good sequence quickly, and therefore be able to prune (by decreasing
2849 COST_LIMIT) the search. */
2851 do_alg_addsub_factor
:
2852 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
2854 unsigned HOST_WIDE_INT d
;
2856 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) + 1;
2857 if (t
% d
== 0 && t
> d
&& m
< maxm
2858 && (!cache_hit
|| cache_alg
== alg_add_factor
))
2860 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2861 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
2862 op_cost
= shiftadd_cost (speed
, mode
, m
);
2864 op_latency
= op_cost
;
2867 new_limit
.cost
= best_cost
.cost
- op_cost
;
2868 new_limit
.latency
= best_cost
.latency
- op_latency
;
2869 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2871 alg_in
->cost
.cost
+= op_cost
;
2872 alg_in
->cost
.latency
+= op_latency
;
2873 if (alg_in
->cost
.latency
< op_cost
)
2874 alg_in
->cost
.latency
= op_cost
;
2875 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2877 best_cost
= alg_in
->cost
;
2878 std::swap (alg_in
, best_alg
);
2879 best_alg
->log
[best_alg
->ops
] = m
;
2880 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
2882 /* Other factors will have been taken care of in the recursion. */
2886 d
= ((unsigned HOST_WIDE_INT
) 1 << m
) - 1;
2887 if (t
% d
== 0 && t
> d
&& m
< maxm
2888 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
2890 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2891 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
2892 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2894 op_latency
= op_cost
;
2896 new_limit
.cost
= best_cost
.cost
- op_cost
;
2897 new_limit
.latency
= best_cost
.latency
- op_latency
;
2898 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2900 alg_in
->cost
.cost
+= op_cost
;
2901 alg_in
->cost
.latency
+= op_latency
;
2902 if (alg_in
->cost
.latency
< op_cost
)
2903 alg_in
->cost
.latency
= op_cost
;
2904 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2906 best_cost
= alg_in
->cost
;
2907 std::swap (alg_in
, best_alg
);
2908 best_alg
->log
[best_alg
->ops
] = m
;
2909 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
2917 /* Try shift-and-add (load effective address) instructions,
2918 i.e. do a*3, a*5, a*9. */
2925 if (m
>= 0 && m
< maxm
)
2927 op_cost
= shiftadd_cost (speed
, mode
, m
);
2928 new_limit
.cost
= best_cost
.cost
- op_cost
;
2929 new_limit
.latency
= best_cost
.latency
- op_cost
;
2930 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
2932 alg_in
->cost
.cost
+= op_cost
;
2933 alg_in
->cost
.latency
+= op_cost
;
2934 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2936 best_cost
= alg_in
->cost
;
2937 std::swap (alg_in
, best_alg
);
2938 best_alg
->log
[best_alg
->ops
] = m
;
2939 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
2949 if (m
>= 0 && m
< maxm
)
2951 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2952 new_limit
.cost
= best_cost
.cost
- op_cost
;
2953 new_limit
.latency
= best_cost
.latency
- op_cost
;
2954 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
2956 alg_in
->cost
.cost
+= op_cost
;
2957 alg_in
->cost
.latency
+= op_cost
;
2958 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2960 best_cost
= alg_in
->cost
;
2961 std::swap (alg_in
, best_alg
);
2962 best_alg
->log
[best_alg
->ops
] = m
;
2963 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
2971 /* If best_cost has not decreased, we have not found any algorithm. */
2972 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
2974 /* We failed to find an algorithm. Record alg_impossible for
2975 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2976 we are asked to find an algorithm for T within the same or
2977 lower COST_LIMIT, we can immediately return to the
2980 entry_ptr
->mode
= mode
;
2981 entry_ptr
->speed
= speed
;
2982 entry_ptr
->alg
= alg_impossible
;
2983 entry_ptr
->cost
= *cost_limit
;
2987 /* Cache the result. */
2991 entry_ptr
->mode
= mode
;
2992 entry_ptr
->speed
= speed
;
2993 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
2994 entry_ptr
->cost
.cost
= best_cost
.cost
;
2995 entry_ptr
->cost
.latency
= best_cost
.latency
;
2998 /* If we are getting a too long sequence for `struct algorithm'
2999 to record, make this search fail. */
3000 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
3003 /* Copy the algorithm from temporary space to the space at alg_out.
3004 We avoid using structure assignment because the majority of
3005 best_alg is normally undefined, and this is a critical function. */
3006 alg_out
->ops
= best_alg
->ops
+ 1;
3007 alg_out
->cost
= best_cost
;
3008 memcpy (alg_out
->op
, best_alg
->op
,
3009 alg_out
->ops
* sizeof *alg_out
->op
);
3010 memcpy (alg_out
->log
, best_alg
->log
,
3011 alg_out
->ops
* sizeof *alg_out
->log
);
3014 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3015 Try three variations:
3017 - a shift/add sequence based on VAL itself
3018 - a shift/add sequence based on -VAL, followed by a negation
3019 - a shift/add sequence based on VAL - 1, followed by an addition.
3021 Return true if the cheapest of these cost less than MULT_COST,
3022 describing the algorithm in *ALG and final fixup in *VARIANT. */
3025 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
3026 struct algorithm
*alg
, enum mult_variant
*variant
,
3029 struct algorithm alg2
;
3030 struct mult_cost limit
;
3032 bool speed
= optimize_insn_for_speed_p ();
3034 /* Fail quickly for impossible bounds. */
3038 /* Ensure that mult_cost provides a reasonable upper bound.
3039 Any constant multiplication can be performed with less
3040 than 2 * bits additions. */
3041 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3042 if (mult_cost
> op_cost
)
3043 mult_cost
= op_cost
;
3045 *variant
= basic_variant
;
3046 limit
.cost
= mult_cost
;
3047 limit
.latency
= mult_cost
;
3048 synth_mult (alg
, val
, &limit
, mode
);
3050 /* This works only if the inverted value actually fits in an
3052 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3054 op_cost
= neg_cost (speed
, mode
);
3055 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3057 limit
.cost
= alg
->cost
.cost
- op_cost
;
3058 limit
.latency
= alg
->cost
.latency
- op_cost
;
3062 limit
.cost
= mult_cost
- op_cost
;
3063 limit
.latency
= mult_cost
- op_cost
;
3066 synth_mult (&alg2
, -val
, &limit
, mode
);
3067 alg2
.cost
.cost
+= op_cost
;
3068 alg2
.cost
.latency
+= op_cost
;
3069 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3070 *alg
= alg2
, *variant
= negate_variant
;
3073 /* This proves very useful for division-by-constant. */
3074 op_cost
= add_cost (speed
, mode
);
3075 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3077 limit
.cost
= alg
->cost
.cost
- op_cost
;
3078 limit
.latency
= alg
->cost
.latency
- op_cost
;
3082 limit
.cost
= mult_cost
- op_cost
;
3083 limit
.latency
= mult_cost
- op_cost
;
3086 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3087 alg2
.cost
.cost
+= op_cost
;
3088 alg2
.cost
.latency
+= op_cost
;
3089 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3090 *alg
= alg2
, *variant
= add_variant
;
3092 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3095 /* A subroutine of expand_mult, used for constant multiplications.
3096 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3097 convenient. Use the shift/add sequence described by ALG and apply
3098 the final fixup specified by VARIANT. */
3101 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3102 rtx target
, const struct algorithm
*alg
,
3103 enum mult_variant variant
)
3105 HOST_WIDE_INT val_so_far
;
3111 /* Avoid referencing memory over and over and invalid sharing
3113 op0
= force_reg (mode
, op0
);
3115 /* ACCUM starts out either as OP0 or as a zero, depending on
3116 the first operation. */
3118 if (alg
->op
[0] == alg_zero
)
3120 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3123 else if (alg
->op
[0] == alg_m
)
3125 accum
= copy_to_mode_reg (mode
, op0
);
3131 for (opno
= 1; opno
< alg
->ops
; opno
++)
3133 int log
= alg
->log
[opno
];
3134 rtx shift_subtarget
= optimize
? 0 : accum
;
3136 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3139 rtx accum_target
= optimize
? 0 : accum
;
3142 switch (alg
->op
[opno
])
3145 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3146 /* REG_EQUAL note will be attached to the following insn. */
3147 emit_move_insn (accum
, tem
);
3152 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3153 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3154 add_target
? add_target
: accum_target
);
3155 val_so_far
+= (HOST_WIDE_INT
) 1 << log
;
3159 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3160 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3161 add_target
? add_target
: accum_target
);
3162 val_so_far
-= (HOST_WIDE_INT
) 1 << log
;
3166 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3167 log
, shift_subtarget
, 0);
3168 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3169 add_target
? add_target
: accum_target
);
3170 val_so_far
= (val_so_far
<< log
) + 1;
3174 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3175 log
, shift_subtarget
, 0);
3176 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3177 add_target
? add_target
: accum_target
);
3178 val_so_far
= (val_so_far
<< log
) - 1;
3181 case alg_add_factor
:
3182 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3183 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3184 add_target
? add_target
: accum_target
);
3185 val_so_far
+= val_so_far
<< log
;
3188 case alg_sub_factor
:
3189 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3190 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3192 ? add_target
: (optimize
? 0 : tem
)));
3193 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3200 if (SCALAR_INT_MODE_P (mode
))
3202 /* Write a REG_EQUAL note on the last insn so that we can cse
3203 multiplication sequences. Note that if ACCUM is a SUBREG,
3204 we've set the inner register and must properly indicate that. */
3205 tem
= op0
, nmode
= mode
;
3206 accum_inner
= accum
;
3207 if (GET_CODE (accum
) == SUBREG
)
3209 accum_inner
= SUBREG_REG (accum
);
3210 nmode
= GET_MODE (accum_inner
);
3211 tem
= gen_lowpart (nmode
, op0
);
3214 insn
= get_last_insn ();
3215 set_dst_reg_note (insn
, REG_EQUAL
,
3216 gen_rtx_MULT (nmode
, tem
,
3217 gen_int_mode (val_so_far
, nmode
)),
3222 if (variant
== negate_variant
)
3224 val_so_far
= -val_so_far
;
3225 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3227 else if (variant
== add_variant
)
3229 val_so_far
= val_so_far
+ 1;
3230 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3233 /* Compare only the bits of val and val_so_far that are significant
3234 in the result mode, to avoid sign-/zero-extension confusion. */
3235 nmode
= GET_MODE_INNER (mode
);
3236 val
&= GET_MODE_MASK (nmode
);
3237 val_so_far
&= GET_MODE_MASK (nmode
);
3238 gcc_assert (val
== val_so_far
);
3243 /* Perform a multiplication and return an rtx for the result.
3244 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3245 TARGET is a suggestion for where to store the result (an rtx).
3247 We check specially for a constant integer as OP1.
3248 If you want this check for OP0 as well, then before calling
3249 you should swap the two operands if OP0 would be constant. */
3252 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3255 enum mult_variant variant
;
3256 struct algorithm algorithm
;
3259 bool speed
= optimize_insn_for_speed_p ();
3260 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3262 if (CONSTANT_P (op0
))
3263 std::swap (op0
, op1
);
3265 /* For vectors, there are several simplifications that can be made if
3266 all elements of the vector constant are identical. */
3267 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3269 if (INTEGRAL_MODE_P (mode
))
3272 HOST_WIDE_INT coeff
;
3276 if (op1
== CONST0_RTX (mode
))
3278 if (op1
== CONST1_RTX (mode
))
3280 if (op1
== CONSTM1_RTX (mode
))
3281 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3287 /* If mode is integer vector mode, check if the backend supports
3288 vector lshift (by scalar or vector) at all. If not, we can't use
3289 synthetized multiply. */
3290 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3291 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3292 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3295 /* These are the operations that are potentially turned into
3296 a sequence of shifts and additions. */
3297 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3299 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3300 less than or equal in size to `unsigned int' this doesn't matter.
3301 If the mode is larger than `unsigned int', then synth_mult works
3302 only if the constant value exactly fits in an `unsigned int' without
3303 any truncation. This means that multiplying by negative values does
3304 not work; results are off by 2^32 on a 32 bit machine. */
3305 if (CONST_INT_P (scalar_op1
))
3307 coeff
= INTVAL (scalar_op1
);
3310 #if TARGET_SUPPORTS_WIDE_INT
3311 else if (CONST_WIDE_INT_P (scalar_op1
))
3313 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3316 int shift
= wi::exact_log2 (std::make_pair (scalar_op1
, mode
));
3317 /* Perfect power of 2 (other than 1, which is handled above). */
3319 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3320 shift
, target
, unsignedp
);
3327 /* We used to test optimize here, on the grounds that it's better to
3328 produce a smaller program when -O is not used. But this causes
3329 such a terrible slowdown sometimes that it seems better to always
3332 /* Special case powers of two. */
3333 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3334 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3335 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3336 floor_log2 (coeff
), target
, unsignedp
);
3338 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3340 /* Attempt to handle multiplication of DImode values by negative
3341 coefficients, by performing the multiplication by a positive
3342 multiplier and then inverting the result. */
3343 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3345 /* Its safe to use -coeff even for INT_MIN, as the
3346 result is interpreted as an unsigned coefficient.
3347 Exclude cost of op0 from max_cost to match the cost
3348 calculation of the synth_mult. */
3349 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3350 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3352 - neg_cost (speed
, mode
));
3356 /* Special case powers of two. */
3357 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3359 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3360 floor_log2 (coeff
), target
, unsignedp
);
3361 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3364 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3367 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3368 &algorithm
, variant
);
3369 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3374 /* Exclude cost of op0 from max_cost to match the cost
3375 calculation of the synth_mult. */
3376 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3377 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3378 return expand_mult_const (mode
, op0
, coeff
, target
,
3379 &algorithm
, variant
);
3383 /* Expand x*2.0 as x+x. */
3384 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3385 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3387 op0
= force_reg (GET_MODE (op0
), op0
);
3388 return expand_binop (mode
, add_optab
, op0
, op0
,
3389 target
, unsignedp
, OPTAB_LIB_WIDEN
);
3392 /* This used to use umul_optab if unsigned, but for non-widening multiply
3393 there is no difference between signed and unsigned. */
3394 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3395 op0
, op1
, target
, unsignedp
, OPTAB_LIB_WIDEN
);
3400 /* Return a cost estimate for multiplying a register by the given
3401 COEFFicient in the given MODE and SPEED. */
3404 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3407 struct algorithm algorithm
;
3408 enum mult_variant variant
;
3410 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3411 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3413 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3414 return algorithm
.cost
.cost
;
3419 /* Perform a widening multiplication and return an rtx for the result.
3420 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3421 TARGET is a suggestion for where to store the result (an rtx).
3422 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3423 or smul_widen_optab.
3425 We check specially for a constant integer as OP1, comparing the
3426 cost of a widening multiply against the cost of a sequence of shifts
3430 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3431 int unsignedp
, optab this_optab
)
3433 bool speed
= optimize_insn_for_speed_p ();
3436 if (CONST_INT_P (op1
)
3437 && GET_MODE (op0
) != VOIDmode
3438 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3439 this_optab
== umul_widen_optab
))
3440 && CONST_INT_P (cop1
)
3441 && (INTVAL (cop1
) >= 0
3442 || HWI_COMPUTABLE_MODE_P (mode
)))
3444 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3446 enum mult_variant variant
;
3447 struct algorithm algorithm
;
3450 return CONST0_RTX (mode
);
3452 /* Special case powers of two. */
3453 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3455 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3456 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3457 floor_log2 (coeff
), target
, unsignedp
);
3460 /* Exclude cost of op0 from max_cost to match the cost
3461 calculation of the synth_mult. */
3462 max_cost
= mul_widen_cost (speed
, mode
);
3463 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3466 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3467 return expand_mult_const (mode
, op0
, coeff
, target
,
3468 &algorithm
, variant
);
3471 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3472 unsignedp
, OPTAB_LIB_WIDEN
);
3475 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3476 replace division by D, and put the least significant N bits of the result
3477 in *MULTIPLIER_PTR and return the most significant bit.
3479 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3480 needed precision is in PRECISION (should be <= N).
3482 PRECISION should be as small as possible so this function can choose
3483 multiplier more freely.
3485 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3486 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3488 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3489 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3491 unsigned HOST_WIDE_INT
3492 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3493 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3494 int *post_shift_ptr
, int *lgup_ptr
)
3496 int lgup
, post_shift
;
3499 /* lgup = ceil(log2(divisor)); */
3500 lgup
= ceil_log2 (d
);
3502 gcc_assert (lgup
<= n
);
3505 pow2
= n
+ lgup
- precision
;
3507 /* mlow = 2^(N + lgup)/d */
3508 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3509 wide_int mlow
= wi::udiv_trunc (val
, d
);
3511 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3512 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3513 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3515 /* If precision == N, then mlow, mhigh exceed 2^N
3516 (but they do not exceed 2^(N+1)). */
3518 /* Reduce to lowest terms. */
3519 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3521 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3522 HOST_BITS_PER_WIDE_INT
);
3523 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3524 HOST_BITS_PER_WIDE_INT
);
3528 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3529 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3532 *post_shift_ptr
= post_shift
;
3534 if (n
< HOST_BITS_PER_WIDE_INT
)
3536 unsigned HOST_WIDE_INT mask
= ((unsigned HOST_WIDE_INT
) 1 << n
) - 1;
3537 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3538 return mhigh
.to_uhwi () >= mask
;
3542 *multiplier_ptr
= mhigh
.to_uhwi ();
3543 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3547 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3548 congruent to 1 (mod 2**N). */
3550 static unsigned HOST_WIDE_INT
3551 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3553 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3555 /* The algorithm notes that the choice y = x satisfies
3556 x*y == 1 mod 2^3, since x is assumed odd.
3557 Each iteration doubles the number of bits of significance in y. */
3559 unsigned HOST_WIDE_INT mask
;
3560 unsigned HOST_WIDE_INT y
= x
;
3563 mask
= (n
== HOST_BITS_PER_WIDE_INT
3564 ? ~(unsigned HOST_WIDE_INT
) 0
3565 : ((unsigned HOST_WIDE_INT
) 1 << n
) - 1);
3569 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3575 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3576 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3577 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3578 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3581 The result is put in TARGET if that is convenient.
3583 MODE is the mode of operation. */
3586 expand_mult_highpart_adjust (machine_mode mode
, rtx adj_operand
, rtx op0
,
3587 rtx op1
, rtx target
, int unsignedp
)
3590 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3592 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3593 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3594 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3596 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3599 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3600 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3601 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3602 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3608 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3611 extract_high_half (machine_mode mode
, rtx op
)
3613 machine_mode wider_mode
;
3615 if (mode
== word_mode
)
3616 return gen_highpart (mode
, op
);
3618 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3620 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3621 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3622 GET_MODE_BITSIZE (mode
), 0, 1);
3623 return convert_modes (mode
, wider_mode
, op
, 0);
3626 /* Like expmed_mult_highpart, but only consider using a multiplication
3627 optab. OP1 is an rtx for the constant operand. */
3630 expmed_mult_highpart_optab (machine_mode mode
, rtx op0
, rtx op1
,
3631 rtx target
, int unsignedp
, int max_cost
)
3633 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3634 machine_mode wider_mode
;
3638 bool speed
= optimize_insn_for_speed_p ();
3640 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3642 wider_mode
= GET_MODE_WIDER_MODE (mode
);
3643 size
= GET_MODE_BITSIZE (mode
);
3645 /* Firstly, try using a multiplication insn that only generates the needed
3646 high part of the product, and in the sign flavor of unsignedp. */
3647 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3649 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3650 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3651 unsignedp
, OPTAB_DIRECT
);
3656 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3657 Need to adjust the result after the multiplication. */
3658 if (size
- 1 < BITS_PER_WORD
3659 && (mul_highpart_cost (speed
, mode
)
3660 + 2 * shift_cost (speed
, mode
, size
-1)
3661 + 4 * add_cost (speed
, mode
) < max_cost
))
3663 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3664 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3665 unsignedp
, OPTAB_DIRECT
);
3667 /* We used the wrong signedness. Adjust the result. */
3668 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3672 /* Try widening multiplication. */
3673 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3674 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3675 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3677 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3678 unsignedp
, OPTAB_WIDEN
);
3680 return extract_high_half (mode
, tem
);
3683 /* Try widening the mode and perform a non-widening multiplication. */
3684 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3685 && size
- 1 < BITS_PER_WORD
3686 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3692 /* We need to widen the operands, for example to ensure the
3693 constant multiplier is correctly sign or zero extended.
3694 Use a sequence to clean-up any instructions emitted by
3695 the conversions if things don't work out. */
3697 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3698 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3699 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3700 unsignedp
, OPTAB_WIDEN
);
3701 insns
= get_insns ();
3707 return extract_high_half (mode
, tem
);
3711 /* Try widening multiplication of opposite signedness, and adjust. */
3712 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3713 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3714 && size
- 1 < BITS_PER_WORD
3715 && (mul_widen_cost (speed
, wider_mode
)
3716 + 2 * shift_cost (speed
, mode
, size
-1)
3717 + 4 * add_cost (speed
, mode
) < max_cost
))
3719 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3720 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3723 tem
= extract_high_half (mode
, tem
);
3724 /* We used the wrong signedness. Adjust the result. */
3725 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3733 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3734 putting the high half of the result in TARGET if that is convenient,
3735 and return where the result is. If the operation can not be performed,
3738 MODE is the mode of operation and result.
3740 UNSIGNEDP nonzero means unsigned multiply.
3742 MAX_COST is the total allowed cost for the expanded RTL. */
3745 expmed_mult_highpart (machine_mode mode
, rtx op0
, rtx op1
,
3746 rtx target
, int unsignedp
, int max_cost
)
3748 machine_mode wider_mode
= GET_MODE_WIDER_MODE (mode
);
3749 unsigned HOST_WIDE_INT cnst1
;
3751 bool sign_adjust
= false;
3752 enum mult_variant variant
;
3753 struct algorithm alg
;
3755 bool speed
= optimize_insn_for_speed_p ();
3757 gcc_assert (!SCALAR_FLOAT_MODE_P (mode
));
3758 /* We can't support modes wider than HOST_BITS_PER_INT. */
3759 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3761 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3763 /* We can't optimize modes wider than BITS_PER_WORD.
3764 ??? We might be able to perform double-word arithmetic if
3765 mode == word_mode, however all the cost calculations in
3766 synth_mult etc. assume single-word operations. */
3767 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3768 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3769 unsignedp
, max_cost
);
3771 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3773 /* Check whether we try to multiply by a negative constant. */
3774 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3777 extra_cost
+= add_cost (speed
, mode
);
3780 /* See whether shift/add multiplication is cheap enough. */
3781 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3782 max_cost
- extra_cost
))
3784 /* See whether the specialized multiplication optabs are
3785 cheaper than the shift/add version. */
3786 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3787 alg
.cost
.cost
+ extra_cost
);
3791 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3792 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3793 tem
= extract_high_half (mode
, tem
);
3795 /* Adjust result for signedness. */
3797 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3801 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3802 unsignedp
, max_cost
);
3806 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3809 expand_smod_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3811 rtx result
, temp
, shift
;
3812 rtx_code_label
*label
;
3814 int prec
= GET_MODE_PRECISION (mode
);
3816 logd
= floor_log2 (d
);
3817 result
= gen_reg_rtx (mode
);
3819 /* Avoid conditional branches when they're expensive. */
3820 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3821 && optimize_insn_for_speed_p ())
3823 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3827 HOST_WIDE_INT masklow
= ((HOST_WIDE_INT
) 1 << logd
) - 1;
3828 signmask
= force_reg (mode
, signmask
);
3829 shift
= GEN_INT (GET_MODE_BITSIZE (mode
) - logd
);
3831 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3832 which instruction sequence to use. If logical right shifts
3833 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3834 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3836 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3837 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3838 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
3839 > COSTS_N_INSNS (2)))
3841 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3842 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3843 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3844 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3845 temp
= expand_binop (mode
, and_optab
, temp
,
3846 gen_int_mode (masklow
, mode
),
3847 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3848 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
3849 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3850 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3851 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3855 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
3856 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3857 signmask
= force_reg (mode
, signmask
);
3859 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
3860 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3861 temp
= expand_binop (mode
, and_optab
, temp
,
3862 gen_int_mode (masklow
, mode
),
3863 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3864 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3865 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3871 /* Mask contains the mode's signbit and the significant bits of the
3872 modulus. By including the signbit in the operation, many targets
3873 can avoid an explicit compare operation in the following comparison
3875 wide_int mask
= wi::mask (logd
, false, prec
);
3876 mask
= wi::set_bit (mask
, prec
- 1);
3878 temp
= expand_binop (mode
, and_optab
, op0
,
3879 immed_wide_int_const (mask
, mode
),
3880 result
, 1, OPTAB_LIB_WIDEN
);
3882 emit_move_insn (result
, temp
);
3884 label
= gen_label_rtx ();
3885 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
3887 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
3888 0, OPTAB_LIB_WIDEN
);
3890 mask
= wi::mask (logd
, true, prec
);
3891 temp
= expand_binop (mode
, ior_optab
, temp
,
3892 immed_wide_int_const (mask
, mode
),
3893 result
, 1, OPTAB_LIB_WIDEN
);
3894 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
3895 0, OPTAB_LIB_WIDEN
);
3897 emit_move_insn (result
, temp
);
3902 /* Expand signed division of OP0 by a power of two D in mode MODE.
3903 This routine is only called for positive values of D. */
3906 expand_sdiv_pow2 (machine_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3909 rtx_code_label
*label
;
3912 logd
= floor_log2 (d
);
3915 && BRANCH_COST (optimize_insn_for_speed_p (),
3918 temp
= gen_reg_rtx (mode
);
3919 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
3920 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3921 0, OPTAB_LIB_WIDEN
);
3922 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3925 if (HAVE_conditional_move
3926 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3931 temp2
= copy_to_mode_reg (mode
, op0
);
3932 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
3933 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3934 temp
= force_reg (mode
, temp
);
3936 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3937 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
3938 mode
, temp
, temp2
, mode
, 0);
3941 rtx_insn
*seq
= get_insns ();
3944 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
3949 if (BRANCH_COST (optimize_insn_for_speed_p (),
3952 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
3954 temp
= gen_reg_rtx (mode
);
3955 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
3956 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
3957 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
3958 > COSTS_N_INSNS (1))
3959 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
3960 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3962 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
3963 ushift
, NULL_RTX
, 1);
3964 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3965 0, OPTAB_LIB_WIDEN
);
3966 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3969 label
= gen_label_rtx ();
3970 temp
= copy_to_mode_reg (mode
, op0
);
3971 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
3972 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
3974 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3977 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3978 if that is convenient, and returning where the result is.
3979 You may request either the quotient or the remainder as the result;
3980 specify REM_FLAG nonzero to get the remainder.
3982 CODE is the expression code for which kind of division this is;
3983 it controls how rounding is done. MODE is the machine mode to use.
3984 UNSIGNEDP nonzero means do unsigned division. */
3986 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3987 and then correct it by or'ing in missing high bits
3988 if result of ANDI is nonzero.
3989 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3990 This could optimize to a bfexts instruction.
3991 But C doesn't use these operations, so their optimizations are
3993 /* ??? For modulo, we don't actually need the highpart of the first product,
3994 the low part will do nicely. And for small divisors, the second multiply
3995 can also be a low-part only multiply or even be completely left out.
3996 E.g. to calculate the remainder of a division by 3 with a 32 bit
3997 multiply, multiply with 0x55555556 and extract the upper two bits;
3998 the result is exact for inputs up to 0x1fffffff.
3999 The input range can be reduced by using cross-sum rules.
4000 For odd divisors >= 3, the following table gives right shift counts
4001 so that if a number is shifted by an integer multiple of the given
4002 amount, the remainder stays the same:
4003 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4004 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4005 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4006 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4007 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4009 Cross-sum rules for even numbers can be derived by leaving as many bits
4010 to the right alone as the divisor has zeros to the right.
4011 E.g. if x is an unsigned 32 bit number:
4012 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4016 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
4017 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
4019 machine_mode compute_mode
;
4021 rtx quotient
= 0, remainder
= 0;
4025 optab optab1
, optab2
;
4026 int op1_is_constant
, op1_is_pow2
= 0;
4027 int max_cost
, extra_cost
;
4028 static HOST_WIDE_INT last_div_const
= 0;
4029 bool speed
= optimize_insn_for_speed_p ();
4031 op1_is_constant
= CONST_INT_P (op1
);
4032 if (op1_is_constant
)
4034 unsigned HOST_WIDE_INT ext_op1
= UINTVAL (op1
);
4036 ext_op1
&= GET_MODE_MASK (mode
);
4037 op1_is_pow2
= ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1
)
4038 || (! unsignedp
&& EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1
))));
4042 This is the structure of expand_divmod:
4044 First comes code to fix up the operands so we can perform the operations
4045 correctly and efficiently.
4047 Second comes a switch statement with code specific for each rounding mode.
4048 For some special operands this code emits all RTL for the desired
4049 operation, for other cases, it generates only a quotient and stores it in
4050 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4051 to indicate that it has not done anything.
4053 Last comes code that finishes the operation. If QUOTIENT is set and
4054 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4055 QUOTIENT is not set, it is computed using trunc rounding.
4057 We try to generate special code for division and remainder when OP1 is a
4058 constant. If |OP1| = 2**n we can use shifts and some other fast
4059 operations. For other values of OP1, we compute a carefully selected
4060 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4063 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4064 half of the product. Different strategies for generating the product are
4065 implemented in expmed_mult_highpart.
4067 If what we actually want is the remainder, we generate that by another
4068 by-constant multiplication and a subtraction. */
4070 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4071 code below will malfunction if we are, so check here and handle
4072 the special case if so. */
4073 if (op1
== const1_rtx
)
4074 return rem_flag
? const0_rtx
: op0
;
4076 /* When dividing by -1, we could get an overflow.
4077 negv_optab can handle overflows. */
4078 if (! unsignedp
&& op1
== constm1_rtx
)
4082 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4083 ? negv_optab
: neg_optab
, op0
, target
, 0);
4087 /* Don't use the function value register as a target
4088 since we have to read it as well as write it,
4089 and function-inlining gets confused by this. */
4090 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4091 /* Don't clobber an operand while doing a multi-step calculation. */
4092 || ((rem_flag
|| op1_is_constant
)
4093 && (reg_mentioned_p (target
, op0
)
4094 || (MEM_P (op0
) && MEM_P (target
))))
4095 || reg_mentioned_p (target
, op1
)
4096 || (MEM_P (op1
) && MEM_P (target
))))
4099 /* Get the mode in which to perform this computation. Normally it will
4100 be MODE, but sometimes we can't do the desired operation in MODE.
4101 If so, pick a wider mode in which we can do the operation. Convert
4102 to that mode at the start to avoid repeated conversions.
4104 First see what operations we need. These depend on the expression
4105 we are evaluating. (We assume that divxx3 insns exist under the
4106 same conditions that modxx3 insns and that these insns don't normally
4107 fail. If these assumptions are not correct, we may generate less
4108 efficient code in some cases.)
4110 Then see if we find a mode in which we can open-code that operation
4111 (either a division, modulus, or shift). Finally, check for the smallest
4112 mode for which we can do the operation with a library call. */
4114 /* We might want to refine this now that we have division-by-constant
4115 optimization. Since expmed_mult_highpart tries so many variants, it is
4116 not straightforward to generalize this. Maybe we should make an array
4117 of possible modes in init_expmed? Save this for GCC 2.7. */
4119 optab1
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4120 ? (unsignedp
? lshr_optab
: ashr_optab
)
4121 : (unsignedp
? udiv_optab
: sdiv_optab
));
4122 optab2
= ((op1_is_pow2
&& op1
!= const0_rtx
)
4124 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4126 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4127 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4128 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4129 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4132 if (compute_mode
== VOIDmode
)
4133 for (compute_mode
= mode
; compute_mode
!= VOIDmode
;
4134 compute_mode
= GET_MODE_WIDER_MODE (compute_mode
))
4135 if (optab_libfunc (optab1
, compute_mode
)
4136 || optab_libfunc (optab2
, compute_mode
))
4139 /* If we still couldn't find a mode, use MODE, but expand_binop will
4141 if (compute_mode
== VOIDmode
)
4142 compute_mode
= mode
;
4144 if (target
&& GET_MODE (target
) == compute_mode
)
4147 tquotient
= gen_reg_rtx (compute_mode
);
4149 size
= GET_MODE_BITSIZE (compute_mode
);
4151 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4152 (mode), and thereby get better code when OP1 is a constant. Do that
4153 later. It will require going over all usages of SIZE below. */
4154 size
= GET_MODE_BITSIZE (mode
);
4157 /* Only deduct something for a REM if the last divide done was
4158 for a different constant. Then set the constant of the last
4160 max_cost
= (unsignedp
4161 ? udiv_cost (speed
, compute_mode
)
4162 : sdiv_cost (speed
, compute_mode
));
4163 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4164 && INTVAL (op1
) == last_div_const
))
4165 max_cost
-= (mul_cost (speed
, compute_mode
)
4166 + add_cost (speed
, compute_mode
));
4168 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4170 /* Now convert to the best mode to use. */
4171 if (compute_mode
!= mode
)
4173 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4174 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4176 /* convert_modes may have placed op1 into a register, so we
4177 must recompute the following. */
4178 op1_is_constant
= CONST_INT_P (op1
);
4179 op1_is_pow2
= (op1_is_constant
4180 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4182 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1
))))));
4185 /* If one of the operands is a volatile MEM, copy it into a register. */
4187 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4188 op0
= force_reg (compute_mode
, op0
);
4189 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4190 op1
= force_reg (compute_mode
, op1
);
4192 /* If we need the remainder or if OP1 is constant, we need to
4193 put OP0 in a register in case it has any queued subexpressions. */
4194 if (rem_flag
|| op1_is_constant
)
4195 op0
= force_reg (compute_mode
, op0
);
4197 last
= get_last_insn ();
4199 /* Promote floor rounding to trunc rounding for unsigned operations. */
4202 if (code
== FLOOR_DIV_EXPR
)
4203 code
= TRUNC_DIV_EXPR
;
4204 if (code
== FLOOR_MOD_EXPR
)
4205 code
= TRUNC_MOD_EXPR
;
4206 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4207 code
= TRUNC_DIV_EXPR
;
4210 if (op1
!= const0_rtx
)
4213 case TRUNC_MOD_EXPR
:
4214 case TRUNC_DIV_EXPR
:
4215 if (op1_is_constant
)
4219 unsigned HOST_WIDE_INT mh
, ml
;
4220 int pre_shift
, post_shift
;
4222 unsigned HOST_WIDE_INT d
= (INTVAL (op1
)
4223 & GET_MODE_MASK (compute_mode
));
4225 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4227 pre_shift
= floor_log2 (d
);
4230 unsigned HOST_WIDE_INT mask
4231 = ((unsigned HOST_WIDE_INT
) 1 << pre_shift
) - 1;
4233 = expand_binop (compute_mode
, and_optab
, op0
,
4234 gen_int_mode (mask
, compute_mode
),
4238 return gen_lowpart (mode
, remainder
);
4240 quotient
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4241 pre_shift
, tquotient
, 1);
4243 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4245 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (size
- 1)))
4247 /* Most significant bit of divisor is set; emit an scc
4249 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4250 compute_mode
, 1, 1);
4254 /* Find a suitable multiplier and right shift count
4255 instead of multiplying with D. */
4257 mh
= choose_multiplier (d
, size
, size
,
4258 &ml
, &post_shift
, &dummy
);
4260 /* If the suggested multiplier is more than SIZE bits,
4261 we can do better for even divisors, using an
4262 initial right shift. */
4263 if (mh
!= 0 && (d
& 1) == 0)
4265 pre_shift
= floor_log2 (d
& -d
);
4266 mh
= choose_multiplier (d
>> pre_shift
, size
,
4268 &ml
, &post_shift
, &dummy
);
4278 if (post_shift
- 1 >= BITS_PER_WORD
)
4282 = (shift_cost (speed
, compute_mode
, post_shift
- 1)
4283 + shift_cost (speed
, compute_mode
, 1)
4284 + 2 * add_cost (speed
, compute_mode
));
4285 t1
= expmed_mult_highpart
4287 gen_int_mode (ml
, compute_mode
),
4288 NULL_RTX
, 1, max_cost
- extra_cost
);
4291 t2
= force_operand (gen_rtx_MINUS (compute_mode
,
4294 t3
= expand_shift (RSHIFT_EXPR
, compute_mode
,
4295 t2
, 1, NULL_RTX
, 1);
4296 t4
= force_operand (gen_rtx_PLUS (compute_mode
,
4299 quotient
= expand_shift
4300 (RSHIFT_EXPR
, compute_mode
, t4
,
4301 post_shift
- 1, tquotient
, 1);
4307 if (pre_shift
>= BITS_PER_WORD
4308 || post_shift
>= BITS_PER_WORD
)
4312 (RSHIFT_EXPR
, compute_mode
, op0
,
4313 pre_shift
, NULL_RTX
, 1);
4315 = (shift_cost (speed
, compute_mode
, pre_shift
)
4316 + shift_cost (speed
, compute_mode
, post_shift
));
4317 t2
= expmed_mult_highpart
4319 gen_int_mode (ml
, compute_mode
),
4320 NULL_RTX
, 1, max_cost
- extra_cost
);
4323 quotient
= expand_shift
4324 (RSHIFT_EXPR
, compute_mode
, t2
,
4325 post_shift
, tquotient
, 1);
4329 else /* Too wide mode to use tricky code */
4332 insn
= get_last_insn ();
4334 set_dst_reg_note (insn
, REG_EQUAL
,
4335 gen_rtx_UDIV (compute_mode
, op0
, op1
),
4338 else /* TRUNC_DIV, signed */
4340 unsigned HOST_WIDE_INT ml
;
4341 int lgup
, post_shift
;
4343 HOST_WIDE_INT d
= INTVAL (op1
);
4344 unsigned HOST_WIDE_INT abs_d
;
4346 /* Since d might be INT_MIN, we have to cast to
4347 unsigned HOST_WIDE_INT before negating to avoid
4348 undefined signed overflow. */
4350 ? (unsigned HOST_WIDE_INT
) d
4351 : - (unsigned HOST_WIDE_INT
) d
);
4353 /* n rem d = n rem -d */
4354 if (rem_flag
&& d
< 0)
4357 op1
= gen_int_mode (abs_d
, compute_mode
);
4363 quotient
= expand_unop (compute_mode
, neg_optab
, op0
,
4365 else if (HOST_BITS_PER_WIDE_INT
>= size
4366 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4368 /* This case is not handled correctly below. */
4369 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4370 compute_mode
, 1, 1);
4374 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4376 ? smod_pow2_cheap (speed
, compute_mode
)
4377 : sdiv_pow2_cheap (speed
, compute_mode
))
4378 /* We assume that cheap metric is true if the
4379 optab has an expander for this mode. */
4380 && ((optab_handler ((rem_flag
? smod_optab
4383 != CODE_FOR_nothing
)
4384 || (optab_handler (sdivmod_optab
,
4386 != CODE_FOR_nothing
)))
4388 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
))
4392 remainder
= expand_smod_pow2 (compute_mode
, op0
, d
);
4394 return gen_lowpart (mode
, remainder
);
4397 if (sdiv_pow2_cheap (speed
, compute_mode
)
4398 && ((optab_handler (sdiv_optab
, compute_mode
)
4399 != CODE_FOR_nothing
)
4400 || (optab_handler (sdivmod_optab
, compute_mode
)
4401 != CODE_FOR_nothing
)))
4402 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4404 gen_int_mode (abs_d
,
4408 quotient
= expand_sdiv_pow2 (compute_mode
, op0
, abs_d
);
4410 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4411 negate the quotient. */
4414 insn
= get_last_insn ();
4416 && abs_d
< ((unsigned HOST_WIDE_INT
) 1
4417 << (HOST_BITS_PER_WIDE_INT
- 1)))
4418 set_dst_reg_note (insn
, REG_EQUAL
,
4419 gen_rtx_DIV (compute_mode
, op0
,
4425 quotient
= expand_unop (compute_mode
, neg_optab
,
4426 quotient
, quotient
, 0);
4429 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4431 choose_multiplier (abs_d
, size
, size
- 1,
4432 &ml
, &post_shift
, &lgup
);
4433 if (ml
< (unsigned HOST_WIDE_INT
) 1 << (size
- 1))
4437 if (post_shift
>= BITS_PER_WORD
4438 || size
- 1 >= BITS_PER_WORD
)
4441 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4442 + shift_cost (speed
, compute_mode
, size
- 1)
4443 + add_cost (speed
, compute_mode
));
4444 t1
= expmed_mult_highpart
4445 (compute_mode
, op0
, gen_int_mode (ml
, compute_mode
),
4446 NULL_RTX
, 0, max_cost
- extra_cost
);
4450 (RSHIFT_EXPR
, compute_mode
, t1
,
4451 post_shift
, NULL_RTX
, 0);
4453 (RSHIFT_EXPR
, compute_mode
, op0
,
4454 size
- 1, NULL_RTX
, 0);
4457 = force_operand (gen_rtx_MINUS (compute_mode
,
4462 = force_operand (gen_rtx_MINUS (compute_mode
,
4470 if (post_shift
>= BITS_PER_WORD
4471 || size
- 1 >= BITS_PER_WORD
)
4474 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (size
- 1);
4475 mlr
= gen_int_mode (ml
, compute_mode
);
4476 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4477 + shift_cost (speed
, compute_mode
, size
- 1)
4478 + 2 * add_cost (speed
, compute_mode
));
4479 t1
= expmed_mult_highpart (compute_mode
, op0
, mlr
,
4481 max_cost
- extra_cost
);
4484 t2
= force_operand (gen_rtx_PLUS (compute_mode
,
4488 (RSHIFT_EXPR
, compute_mode
, t2
,
4489 post_shift
, NULL_RTX
, 0);
4491 (RSHIFT_EXPR
, compute_mode
, op0
,
4492 size
- 1, NULL_RTX
, 0);
4495 = force_operand (gen_rtx_MINUS (compute_mode
,
4500 = force_operand (gen_rtx_MINUS (compute_mode
,
4505 else /* Too wide mode to use tricky code */
4508 insn
= get_last_insn ();
4510 set_dst_reg_note (insn
, REG_EQUAL
,
4511 gen_rtx_DIV (compute_mode
, op0
, op1
),
4517 delete_insns_since (last
);
4520 case FLOOR_DIV_EXPR
:
4521 case FLOOR_MOD_EXPR
:
4522 /* We will come here only for signed operations. */
4523 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4525 unsigned HOST_WIDE_INT mh
, ml
;
4526 int pre_shift
, lgup
, post_shift
;
4527 HOST_WIDE_INT d
= INTVAL (op1
);
4531 /* We could just as easily deal with negative constants here,
4532 but it does not seem worth the trouble for GCC 2.6. */
4533 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4535 pre_shift
= floor_log2 (d
);
4538 unsigned HOST_WIDE_INT mask
4539 = ((unsigned HOST_WIDE_INT
) 1 << pre_shift
) - 1;
4540 remainder
= expand_binop
4541 (compute_mode
, and_optab
, op0
,
4542 gen_int_mode (mask
, compute_mode
),
4543 remainder
, 0, OPTAB_LIB_WIDEN
);
4545 return gen_lowpart (mode
, remainder
);
4547 quotient
= expand_shift
4548 (RSHIFT_EXPR
, compute_mode
, op0
,
4549 pre_shift
, tquotient
, 0);
4555 mh
= choose_multiplier (d
, size
, size
- 1,
4556 &ml
, &post_shift
, &lgup
);
4559 if (post_shift
< BITS_PER_WORD
4560 && size
- 1 < BITS_PER_WORD
)
4563 (RSHIFT_EXPR
, compute_mode
, op0
,
4564 size
- 1, NULL_RTX
, 0);
4565 t2
= expand_binop (compute_mode
, xor_optab
, op0
, t1
,
4566 NULL_RTX
, 0, OPTAB_WIDEN
);
4567 extra_cost
= (shift_cost (speed
, compute_mode
, post_shift
)
4568 + shift_cost (speed
, compute_mode
, size
- 1)
4569 + 2 * add_cost (speed
, compute_mode
));
4570 t3
= expmed_mult_highpart
4571 (compute_mode
, t2
, gen_int_mode (ml
, compute_mode
),
4572 NULL_RTX
, 1, max_cost
- extra_cost
);
4576 (RSHIFT_EXPR
, compute_mode
, t3
,
4577 post_shift
, NULL_RTX
, 1);
4578 quotient
= expand_binop (compute_mode
, xor_optab
,
4579 t4
, t1
, tquotient
, 0,
4587 rtx nsign
, t1
, t2
, t3
, t4
;
4588 t1
= force_operand (gen_rtx_PLUS (compute_mode
,
4589 op0
, constm1_rtx
), NULL_RTX
);
4590 t2
= expand_binop (compute_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4592 nsign
= expand_shift
4593 (RSHIFT_EXPR
, compute_mode
, t2
,
4594 size
- 1, NULL_RTX
, 0);
4595 t3
= force_operand (gen_rtx_MINUS (compute_mode
, t1
, nsign
),
4597 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, compute_mode
, t3
, op1
,
4602 t5
= expand_unop (compute_mode
, one_cmpl_optab
, nsign
,
4604 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4613 delete_insns_since (last
);
4615 /* Try using an instruction that produces both the quotient and
4616 remainder, using truncation. We can easily compensate the quotient
4617 or remainder to get floor rounding, once we have the remainder.
4618 Notice that we compute also the final remainder value here,
4619 and return the result right away. */
4620 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4621 target
= gen_reg_rtx (compute_mode
);
4626 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4627 quotient
= gen_reg_rtx (compute_mode
);
4632 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4633 remainder
= gen_reg_rtx (compute_mode
);
4636 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4637 quotient
, remainder
, 0))
4639 /* This could be computed with a branch-less sequence.
4640 Save that for later. */
4642 rtx_code_label
*label
= gen_label_rtx ();
4643 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4644 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4645 NULL_RTX
, 0, OPTAB_WIDEN
);
4646 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4647 expand_dec (quotient
, const1_rtx
);
4648 expand_inc (remainder
, op1
);
4650 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4653 /* No luck with division elimination or divmod. Have to do it
4654 by conditionally adjusting op0 *and* the result. */
4656 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4660 quotient
= gen_reg_rtx (compute_mode
);
4661 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4662 label1
= gen_label_rtx ();
4663 label2
= gen_label_rtx ();
4664 label3
= gen_label_rtx ();
4665 label4
= gen_label_rtx ();
4666 label5
= gen_label_rtx ();
4667 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4668 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4669 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4670 quotient
, 0, OPTAB_LIB_WIDEN
);
4671 if (tem
!= quotient
)
4672 emit_move_insn (quotient
, tem
);
4673 emit_jump_insn (targetm
.gen_jump (label5
));
4675 emit_label (label1
);
4676 expand_inc (adjusted_op0
, const1_rtx
);
4677 emit_jump_insn (targetm
.gen_jump (label4
));
4679 emit_label (label2
);
4680 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4681 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4682 quotient
, 0, OPTAB_LIB_WIDEN
);
4683 if (tem
!= quotient
)
4684 emit_move_insn (quotient
, tem
);
4685 emit_jump_insn (targetm
.gen_jump (label5
));
4687 emit_label (label3
);
4688 expand_dec (adjusted_op0
, const1_rtx
);
4689 emit_label (label4
);
4690 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4691 quotient
, 0, OPTAB_LIB_WIDEN
);
4692 if (tem
!= quotient
)
4693 emit_move_insn (quotient
, tem
);
4694 expand_dec (quotient
, const1_rtx
);
4695 emit_label (label5
);
4703 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
)))
4706 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4707 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4708 floor_log2 (d
), tquotient
, 1);
4709 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4710 gen_int_mode (d
- 1, compute_mode
),
4711 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4712 t3
= gen_reg_rtx (compute_mode
);
4713 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4714 compute_mode
, 1, 1);
4717 rtx_code_label
*lab
;
4718 lab
= gen_label_rtx ();
4719 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4720 expand_inc (t1
, const1_rtx
);
4725 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4731 /* Try using an instruction that produces both the quotient and
4732 remainder, using truncation. We can easily compensate the
4733 quotient or remainder to get ceiling rounding, once we have the
4734 remainder. Notice that we compute also the final remainder
4735 value here, and return the result right away. */
4736 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4737 target
= gen_reg_rtx (compute_mode
);
4741 remainder
= (REG_P (target
)
4742 ? target
: gen_reg_rtx (compute_mode
));
4743 quotient
= gen_reg_rtx (compute_mode
);
4747 quotient
= (REG_P (target
)
4748 ? target
: gen_reg_rtx (compute_mode
));
4749 remainder
= gen_reg_rtx (compute_mode
);
4752 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4755 /* This could be computed with a branch-less sequence.
4756 Save that for later. */
4757 rtx_code_label
*label
= gen_label_rtx ();
4758 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4759 compute_mode
, label
);
4760 expand_inc (quotient
, const1_rtx
);
4761 expand_dec (remainder
, op1
);
4763 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4766 /* No luck with division elimination or divmod. Have to do it
4767 by conditionally adjusting op0 *and* the result. */
4769 rtx_code_label
*label1
, *label2
;
4770 rtx adjusted_op0
, tem
;
4772 quotient
= gen_reg_rtx (compute_mode
);
4773 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4774 label1
= gen_label_rtx ();
4775 label2
= gen_label_rtx ();
4776 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4777 compute_mode
, label1
);
4778 emit_move_insn (quotient
, const0_rtx
);
4779 emit_jump_insn (targetm
.gen_jump (label2
));
4781 emit_label (label1
);
4782 expand_dec (adjusted_op0
, const1_rtx
);
4783 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4784 quotient
, 1, OPTAB_LIB_WIDEN
);
4785 if (tem
!= quotient
)
4786 emit_move_insn (quotient
, tem
);
4787 expand_inc (quotient
, const1_rtx
);
4788 emit_label (label2
);
4793 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4794 && INTVAL (op1
) >= 0)
4796 /* This is extremely similar to the code for the unsigned case
4797 above. For 2.7 we should merge these variants, but for
4798 2.6.1 I don't want to touch the code for unsigned since that
4799 get used in C. The signed case will only be used by other
4803 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4804 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4805 floor_log2 (d
), tquotient
, 0);
4806 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4807 gen_int_mode (d
- 1, compute_mode
),
4808 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4809 t3
= gen_reg_rtx (compute_mode
);
4810 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4811 compute_mode
, 1, 1);
4814 rtx_code_label
*lab
;
4815 lab
= gen_label_rtx ();
4816 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4817 expand_inc (t1
, const1_rtx
);
4822 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4828 /* Try using an instruction that produces both the quotient and
4829 remainder, using truncation. We can easily compensate the
4830 quotient or remainder to get ceiling rounding, once we have the
4831 remainder. Notice that we compute also the final remainder
4832 value here, and return the result right away. */
4833 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4834 target
= gen_reg_rtx (compute_mode
);
4837 remainder
= (REG_P (target
)
4838 ? target
: gen_reg_rtx (compute_mode
));
4839 quotient
= gen_reg_rtx (compute_mode
);
4843 quotient
= (REG_P (target
)
4844 ? target
: gen_reg_rtx (compute_mode
));
4845 remainder
= gen_reg_rtx (compute_mode
);
4848 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
4851 /* This could be computed with a branch-less sequence.
4852 Save that for later. */
4854 rtx_code_label
*label
= gen_label_rtx ();
4855 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4856 compute_mode
, label
);
4857 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4858 NULL_RTX
, 0, OPTAB_WIDEN
);
4859 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
4860 expand_inc (quotient
, const1_rtx
);
4861 expand_dec (remainder
, op1
);
4863 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4866 /* No luck with division elimination or divmod. Have to do it
4867 by conditionally adjusting op0 *and* the result. */
4869 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4873 quotient
= gen_reg_rtx (compute_mode
);
4874 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4875 label1
= gen_label_rtx ();
4876 label2
= gen_label_rtx ();
4877 label3
= gen_label_rtx ();
4878 label4
= gen_label_rtx ();
4879 label5
= gen_label_rtx ();
4880 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4881 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
4882 compute_mode
, label1
);
4883 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4884 quotient
, 0, OPTAB_LIB_WIDEN
);
4885 if (tem
!= quotient
)
4886 emit_move_insn (quotient
, tem
);
4887 emit_jump_insn (targetm
.gen_jump (label5
));
4889 emit_label (label1
);
4890 expand_dec (adjusted_op0
, const1_rtx
);
4891 emit_jump_insn (targetm
.gen_jump (label4
));
4893 emit_label (label2
);
4894 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
4895 compute_mode
, label3
);
4896 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4897 quotient
, 0, OPTAB_LIB_WIDEN
);
4898 if (tem
!= quotient
)
4899 emit_move_insn (quotient
, tem
);
4900 emit_jump_insn (targetm
.gen_jump (label5
));
4902 emit_label (label3
);
4903 expand_inc (adjusted_op0
, const1_rtx
);
4904 emit_label (label4
);
4905 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4906 quotient
, 0, OPTAB_LIB_WIDEN
);
4907 if (tem
!= quotient
)
4908 emit_move_insn (quotient
, tem
);
4909 expand_inc (quotient
, const1_rtx
);
4910 emit_label (label5
);
4915 case EXACT_DIV_EXPR
:
4916 if (op1_is_constant
&& HOST_BITS_PER_WIDE_INT
>= size
)
4918 HOST_WIDE_INT d
= INTVAL (op1
);
4919 unsigned HOST_WIDE_INT ml
;
4923 pre_shift
= floor_log2 (d
& -d
);
4924 ml
= invert_mod2n (d
>> pre_shift
, size
);
4925 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4926 pre_shift
, NULL_RTX
, unsignedp
);
4927 quotient
= expand_mult (compute_mode
, t1
,
4928 gen_int_mode (ml
, compute_mode
),
4931 insn
= get_last_insn ();
4932 set_dst_reg_note (insn
, REG_EQUAL
,
4933 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
4934 compute_mode
, op0
, op1
),
4939 case ROUND_DIV_EXPR
:
4940 case ROUND_MOD_EXPR
:
4944 rtx_code_label
*label
;
4945 label
= gen_label_rtx ();
4946 quotient
= gen_reg_rtx (compute_mode
);
4947 remainder
= gen_reg_rtx (compute_mode
);
4948 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
4951 quotient
= expand_binop (compute_mode
, udiv_optab
, op0
, op1
,
4952 quotient
, 1, OPTAB_LIB_WIDEN
);
4953 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 1);
4954 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4955 remainder
, 1, OPTAB_LIB_WIDEN
);
4957 tem
= plus_constant (compute_mode
, op1
, -1);
4958 tem
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
, 1, NULL_RTX
, 1);
4959 do_cmp_and_jump (remainder
, tem
, LEU
, compute_mode
, label
);
4960 expand_inc (quotient
, const1_rtx
);
4961 expand_dec (remainder
, op1
);
4966 rtx abs_rem
, abs_op1
, tem
, mask
;
4967 rtx_code_label
*label
;
4968 label
= gen_label_rtx ();
4969 quotient
= gen_reg_rtx (compute_mode
);
4970 remainder
= gen_reg_rtx (compute_mode
);
4971 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
4974 quotient
= expand_binop (compute_mode
, sdiv_optab
, op0
, op1
,
4975 quotient
, 0, OPTAB_LIB_WIDEN
);
4976 tem
= expand_mult (compute_mode
, quotient
, op1
, NULL_RTX
, 0);
4977 remainder
= expand_binop (compute_mode
, sub_optab
, op0
, tem
,
4978 remainder
, 0, OPTAB_LIB_WIDEN
);
4980 abs_rem
= expand_abs (compute_mode
, remainder
, NULL_RTX
, 1, 0);
4981 abs_op1
= expand_abs (compute_mode
, op1
, NULL_RTX
, 1, 0);
4982 tem
= expand_shift (LSHIFT_EXPR
, compute_mode
, abs_rem
,
4984 do_cmp_and_jump (tem
, abs_op1
, LTU
, compute_mode
, label
);
4985 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4986 NULL_RTX
, 0, OPTAB_WIDEN
);
4987 mask
= expand_shift (RSHIFT_EXPR
, compute_mode
, tem
,
4988 size
- 1, NULL_RTX
, 0);
4989 tem
= expand_binop (compute_mode
, xor_optab
, mask
, const1_rtx
,
4990 NULL_RTX
, 0, OPTAB_WIDEN
);
4991 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4992 NULL_RTX
, 0, OPTAB_WIDEN
);
4993 expand_inc (quotient
, tem
);
4994 tem
= expand_binop (compute_mode
, xor_optab
, mask
, op1
,
4995 NULL_RTX
, 0, OPTAB_WIDEN
);
4996 tem
= expand_binop (compute_mode
, sub_optab
, tem
, mask
,
4997 NULL_RTX
, 0, OPTAB_WIDEN
);
4998 expand_dec (remainder
, tem
);
5001 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5009 if (target
&& GET_MODE (target
) != compute_mode
)
5014 /* Try to produce the remainder without producing the quotient.
5015 If we seem to have a divmod pattern that does not require widening,
5016 don't try widening here. We should really have a WIDEN argument
5017 to expand_twoval_binop, since what we'd really like to do here is
5018 1) try a mod insn in compute_mode
5019 2) try a divmod insn in compute_mode
5020 3) try a div insn in compute_mode and multiply-subtract to get
5022 4) try the same things with widening allowed. */
5024 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5027 ((optab_handler (optab2
, compute_mode
)
5028 != CODE_FOR_nothing
)
5029 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5032 /* No luck there. Can we do remainder and divide at once
5033 without a library call? */
5034 remainder
= gen_reg_rtx (compute_mode
);
5035 if (! expand_twoval_binop ((unsignedp
5039 NULL_RTX
, remainder
, unsignedp
))
5044 return gen_lowpart (mode
, remainder
);
5047 /* Produce the quotient. Try a quotient insn, but not a library call.
5048 If we have a divmod in this mode, use it in preference to widening
5049 the div (for this test we assume it will not fail). Note that optab2
5050 is set to the one of the two optabs that the call below will use. */
5052 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5053 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5055 ((optab_handler (optab2
, compute_mode
)
5056 != CODE_FOR_nothing
)
5057 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5061 /* No luck there. Try a quotient-and-remainder insn,
5062 keeping the quotient alone. */
5063 quotient
= gen_reg_rtx (compute_mode
);
5064 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5066 quotient
, NULL_RTX
, unsignedp
))
5070 /* Still no luck. If we are not computing the remainder,
5071 use a library call for the quotient. */
5072 quotient
= sign_expand_binop (compute_mode
,
5073 udiv_optab
, sdiv_optab
,
5075 unsignedp
, OPTAB_LIB_WIDEN
);
5082 if (target
&& GET_MODE (target
) != compute_mode
)
5087 /* No divide instruction either. Use library for remainder. */
5088 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5090 unsignedp
, OPTAB_LIB_WIDEN
);
5091 /* No remainder function. Try a quotient-and-remainder
5092 function, keeping the remainder. */
5095 remainder
= gen_reg_rtx (compute_mode
);
5096 if (!expand_twoval_binop_libfunc
5097 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5099 NULL_RTX
, remainder
,
5100 unsignedp
? UMOD
: MOD
))
5101 remainder
= NULL_RTX
;
5106 /* We divided. Now finish doing X - Y * (X / Y). */
5107 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5108 NULL_RTX
, unsignedp
);
5109 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5110 remainder
, target
, unsignedp
,
5115 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5118 /* Return a tree node with data type TYPE, describing the value of X.
5119 Usually this is an VAR_DECL, if there is no obvious better choice.
5120 X may be an expression, however we only support those expressions
5121 generated by loop.c. */
5124 make_tree (tree type
, rtx x
)
5128 switch (GET_CODE (x
))
5131 case CONST_WIDE_INT
:
5132 t
= wide_int_to_tree (type
, std::make_pair (x
, TYPE_MODE (type
)));
5136 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5137 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5138 t
= wide_int_to_tree (type
,
5139 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5140 HOST_BITS_PER_WIDE_INT
* 2));
5142 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5148 int units
= CONST_VECTOR_NUNITS (x
);
5149 tree itype
= TREE_TYPE (type
);
5153 /* Build a tree with vector elements. */
5154 elts
= XALLOCAVEC (tree
, units
);
5155 for (i
= units
- 1; i
>= 0; --i
)
5157 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5158 elts
[i
] = make_tree (itype
, elt
);
5161 return build_vector (type
, elts
);
5165 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5166 make_tree (type
, XEXP (x
, 1)));
5169 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5170 make_tree (type
, XEXP (x
, 1)));
5173 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5176 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5177 make_tree (type
, XEXP (x
, 1)));
5180 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5181 make_tree (type
, XEXP (x
, 1)));
5184 t
= unsigned_type_for (type
);
5185 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5186 make_tree (t
, XEXP (x
, 0)),
5187 make_tree (type
, XEXP (x
, 1))));
5190 t
= signed_type_for (type
);
5191 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5192 make_tree (t
, XEXP (x
, 0)),
5193 make_tree (type
, XEXP (x
, 1))));
5196 if (TREE_CODE (type
) != REAL_TYPE
)
5197 t
= signed_type_for (type
);
5201 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5202 make_tree (t
, XEXP (x
, 0)),
5203 make_tree (t
, XEXP (x
, 1))));
5205 t
= unsigned_type_for (type
);
5206 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5207 make_tree (t
, XEXP (x
, 0)),
5208 make_tree (t
, XEXP (x
, 1))));
5212 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5213 GET_CODE (x
) == ZERO_EXTEND
);
5214 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5217 return make_tree (type
, XEXP (x
, 0));
5220 t
= SYMBOL_REF_DECL (x
);
5222 return fold_convert (type
, build_fold_addr_expr (t
));
5223 /* else fall through. */
5226 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5228 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5229 address mode to pointer mode. */
5230 if (POINTER_TYPE_P (type
))
5231 x
= convert_memory_address_addr_space
5232 (TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5234 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5235 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5236 t
->decl_with_rtl
.rtl
= x
;
5242 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5243 and returning TARGET.
5245 If TARGET is 0, a pseudo-register or constant is returned. */
5248 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5252 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5253 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5255 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5259 else if (tem
!= target
)
5260 emit_move_insn (target
, tem
);
5264 /* Helper function for emit_store_flag. */
5266 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5267 machine_mode mode
, machine_mode compare_mode
,
5268 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5269 machine_mode target_mode
)
5271 struct expand_operand ops
[4];
5272 rtx op0
, comparison
, subtarget
;
5274 machine_mode result_mode
= targetm
.cstore_mode (icode
);
5276 last
= get_last_insn ();
5277 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5278 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5281 delete_insns_since (last
);
5285 if (target_mode
== VOIDmode
)
5286 target_mode
= result_mode
;
5288 target
= gen_reg_rtx (target_mode
);
5290 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5292 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5293 create_fixed_operand (&ops
[1], comparison
);
5294 create_fixed_operand (&ops
[2], x
);
5295 create_fixed_operand (&ops
[3], y
);
5296 if (!maybe_expand_insn (icode
, 4, ops
))
5298 delete_insns_since (last
);
5301 subtarget
= ops
[0].value
;
5303 /* If we are converting to a wider mode, first convert to
5304 TARGET_MODE, then normalize. This produces better combining
5305 opportunities on machines that have a SIGN_EXTRACT when we are
5306 testing a single bit. This mostly benefits the 68k.
5308 If STORE_FLAG_VALUE does not have the sign bit set when
5309 interpreted in MODE, we can do this conversion as unsigned, which
5310 is usually more efficient. */
5311 if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (result_mode
))
5313 convert_move (target
, subtarget
,
5314 val_signbit_known_clear_p (result_mode
,
5317 result_mode
= target_mode
;
5322 /* If we want to keep subexpressions around, don't reuse our last
5327 /* Now normalize to the proper value in MODE. Sometimes we don't
5328 have to do anything. */
5329 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5331 /* STORE_FLAG_VALUE might be the most negative number, so write
5332 the comparison this way to avoid a compiler-time warning. */
5333 else if (- normalizep
== STORE_FLAG_VALUE
)
5334 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5336 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5337 it hard to use a value of just the sign bit due to ANSI integer
5338 constant typing rules. */
5339 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5340 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5341 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5345 gcc_assert (STORE_FLAG_VALUE
& 1);
5347 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5348 if (normalizep
== -1)
5349 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5352 /* If we were converting to a smaller mode, do the conversion now. */
5353 if (target_mode
!= result_mode
)
5355 convert_move (target
, op0
, 0);
5363 /* A subroutine of emit_store_flag only including "tricks" that do not
5364 need a recursive call. These are kept separate to avoid infinite
5368 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5369 machine_mode mode
, int unsignedp
, int normalizep
,
5370 machine_mode target_mode
)
5373 enum insn_code icode
;
5374 machine_mode compare_mode
;
5375 enum mode_class mclass
;
5376 enum rtx_code scode
;
5379 code
= unsigned_condition (code
);
5380 scode
= swap_condition (code
);
5382 /* If one operand is constant, make it the second one. Only do this
5383 if the other operand is not constant as well. */
5385 if (swap_commutative_operands_p (op0
, op1
))
5387 std::swap (op0
, op1
);
5388 code
= swap_condition (code
);
5391 if (mode
== VOIDmode
)
5392 mode
= GET_MODE (op0
);
5394 /* For some comparisons with 1 and -1, we can convert this to
5395 comparisons with zero. This will often produce more opportunities for
5396 store-flag insns. */
5401 if (op1
== const1_rtx
)
5402 op1
= const0_rtx
, code
= LE
;
5405 if (op1
== constm1_rtx
)
5406 op1
= const0_rtx
, code
= LT
;
5409 if (op1
== const1_rtx
)
5410 op1
= const0_rtx
, code
= GT
;
5413 if (op1
== constm1_rtx
)
5414 op1
= const0_rtx
, code
= GE
;
5417 if (op1
== const1_rtx
)
5418 op1
= const0_rtx
, code
= NE
;
5421 if (op1
== const1_rtx
)
5422 op1
= const0_rtx
, code
= EQ
;
5428 /* If we are comparing a double-word integer with zero or -1, we can
5429 convert the comparison into one involving a single word. */
5430 if (GET_MODE_BITSIZE (mode
) == BITS_PER_WORD
* 2
5431 && GET_MODE_CLASS (mode
) == MODE_INT
5432 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5435 if ((code
== EQ
|| code
== NE
)
5436 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5440 /* Do a logical OR or AND of the two words and compare the
5442 op00
= simplify_gen_subreg (word_mode
, op0
, mode
, 0);
5443 op01
= simplify_gen_subreg (word_mode
, op0
, mode
, UNITS_PER_WORD
);
5444 tem
= expand_binop (word_mode
,
5445 op1
== const0_rtx
? ior_optab
: and_optab
,
5446 op00
, op01
, NULL_RTX
, unsignedp
,
5450 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5451 unsignedp
, normalizep
);
5453 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5457 /* If testing the sign bit, can just test on high word. */
5458 op0h
= simplify_gen_subreg (word_mode
, op0
, mode
,
5459 subreg_highpart_offset (word_mode
,
5461 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5462 unsignedp
, normalizep
);
5469 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5472 target
= gen_reg_rtx (target_mode
);
5474 convert_move (target
, tem
,
5475 !val_signbit_known_set_p (word_mode
,
5476 (normalizep
? normalizep
5477 : STORE_FLAG_VALUE
)));
5482 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5483 complement of A (for GE) and shifting the sign bit to the low bit. */
5484 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5485 && GET_MODE_CLASS (mode
) == MODE_INT
5486 && (normalizep
|| STORE_FLAG_VALUE
== 1
5487 || val_signbit_p (mode
, STORE_FLAG_VALUE
)))
5494 /* If the result is to be wider than OP0, it is best to convert it
5495 first. If it is to be narrower, it is *incorrect* to convert it
5497 else if (GET_MODE_SIZE (target_mode
) > GET_MODE_SIZE (mode
))
5499 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5503 if (target_mode
!= mode
)
5507 op0
= expand_unop (mode
, one_cmpl_optab
, op0
,
5508 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5509 ? 0 : subtarget
), 0);
5511 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5512 /* If we are supposed to produce a 0/1 value, we want to do
5513 a logical shift from the sign bit to the low-order bit; for
5514 a -1/0 value, we do an arithmetic shift. */
5515 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5516 GET_MODE_BITSIZE (mode
) - 1,
5517 subtarget
, normalizep
!= -1);
5519 if (mode
!= target_mode
)
5520 op0
= convert_modes (target_mode
, mode
, op0
, 0);
5525 mclass
= GET_MODE_CLASS (mode
);
5526 for (compare_mode
= mode
; compare_mode
!= VOIDmode
;
5527 compare_mode
= GET_MODE_WIDER_MODE (compare_mode
))
5529 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5530 icode
= optab_handler (cstore_optab
, optab_mode
);
5531 if (icode
!= CODE_FOR_nothing
)
5533 do_pending_stack_adjust ();
5534 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5535 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5539 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5541 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5542 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5553 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5554 and storing in TARGET. Normally return TARGET.
5555 Return 0 if that cannot be done.
5557 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5558 it is VOIDmode, they cannot both be CONST_INT.
5560 UNSIGNEDP is for the case where we have to widen the operands
5561 to perform the operation. It says to use zero-extension.
5563 NORMALIZEP is 1 if we should convert the result to be either zero
5564 or one. Normalize is -1 if we should convert the result to be
5565 either zero or -1. If NORMALIZEP is zero, the result will be left
5566 "raw" out of the scc insn. */
5569 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5570 machine_mode mode
, int unsignedp
, int normalizep
)
5572 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5573 enum rtx_code rcode
;
5578 /* If we compare constants, we shouldn't use a store-flag operation,
5579 but a constant load. We can get there via the vanilla route that
5580 usually generates a compare-branch sequence, but will in this case
5581 fold the comparison to a constant, and thus elide the branch. */
5582 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5585 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5590 /* If we reached here, we can't do this with a scc insn, however there
5591 are some comparisons that can be done in other ways. Don't do any
5592 of these cases if branches are very cheap. */
5593 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5596 /* See what we need to return. We can only return a 1, -1, or the
5599 if (normalizep
== 0)
5601 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5602 normalizep
= STORE_FLAG_VALUE
;
5604 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5610 last
= get_last_insn ();
5612 /* If optimizing, use different pseudo registers for each insn, instead
5613 of reusing the same pseudo. This leads to better CSE, but slows
5614 down the compiler, since there are more pseudos */
5615 subtarget
= (!optimize
5616 && (target_mode
== mode
)) ? target
: NULL_RTX
;
5617 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
5619 /* For floating-point comparisons, try the reverse comparison or try
5620 changing the "orderedness" of the comparison. */
5621 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5623 enum rtx_code first_code
;
5626 rcode
= reverse_condition_maybe_unordered (code
);
5627 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5628 && (code
== ORDERED
|| code
== UNORDERED
5629 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5630 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5632 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5633 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5635 /* For the reverse comparison, use either an addition or a XOR. */
5637 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5638 optimize_insn_for_speed_p ()) == 0)
5640 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5641 STORE_FLAG_VALUE
, target_mode
);
5643 return expand_binop (target_mode
, add_optab
, tem
,
5644 gen_int_mode (normalizep
, target_mode
),
5645 target
, 0, OPTAB_WIDEN
);
5648 && rtx_cost (trueval
, mode
, XOR
, 1,
5649 optimize_insn_for_speed_p ()) == 0)
5651 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5652 normalizep
, target_mode
);
5654 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
5655 target
, INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5659 delete_insns_since (last
);
5661 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5662 if (code
== ORDERED
|| code
== UNORDERED
)
5665 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
5667 /* If there are no NaNs, the first comparison should always fall through.
5668 Effectively change the comparison to the other one. */
5669 if (!HONOR_NANS (mode
))
5671 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
5672 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
5676 if (!HAVE_conditional_move
)
5679 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5680 conditional move. */
5681 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
5682 normalizep
, target_mode
);
5687 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5688 tem
, const0_rtx
, GET_MODE (tem
), 0);
5690 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5691 trueval
, tem
, GET_MODE (tem
), 0);
5694 delete_insns_since (last
);
5698 /* The remaining tricks only apply to integer comparisons. */
5700 if (GET_MODE_CLASS (mode
) != MODE_INT
)
5703 /* If this is an equality comparison of integers, we can try to exclusive-or
5704 (or subtract) the two operands and use a recursive call to try the
5705 comparison with zero. Don't do any of these cases if branches are
5708 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5710 tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5714 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5717 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5718 mode
, unsignedp
, normalizep
);
5722 delete_insns_since (last
);
5725 /* For integer comparisons, try the reverse comparison. However, for
5726 small X and if we'd have anyway to extend, implementing "X != 0"
5727 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5728 rcode
= reverse_condition (code
);
5729 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5730 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5732 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5733 && op1
== const0_rtx
))
5735 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5736 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5738 /* Again, for the reverse comparison, use either an addition or a XOR. */
5740 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5741 optimize_insn_for_speed_p ()) == 0)
5743 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5744 STORE_FLAG_VALUE
, target_mode
);
5746 tem
= expand_binop (target_mode
, add_optab
, tem
,
5747 gen_int_mode (normalizep
, target_mode
),
5748 target
, 0, OPTAB_WIDEN
);
5751 && rtx_cost (trueval
, mode
, XOR
, 1,
5752 optimize_insn_for_speed_p ()) == 0)
5754 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5755 normalizep
, target_mode
);
5757 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5758 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5763 delete_insns_since (last
);
5766 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5767 the constant zero. Reject all other comparisons at this point. Only
5768 do LE and GT if branches are expensive since they are expensive on
5769 2-operand machines. */
5771 if (op1
!= const0_rtx
5772 || (code
!= EQ
&& code
!= NE
5773 && (BRANCH_COST (optimize_insn_for_speed_p (),
5774 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5777 /* Try to put the result of the comparison in the sign bit. Assume we can't
5778 do the necessary operation below. */
5782 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5783 the sign bit set. */
5787 /* This is destructive, so SUBTARGET can't be OP0. */
5788 if (rtx_equal_p (subtarget
, op0
))
5791 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5794 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5798 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5799 number of bits in the mode of OP0, minus one. */
5803 if (rtx_equal_p (subtarget
, op0
))
5806 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
5807 GET_MODE_BITSIZE (mode
) - 1,
5809 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5813 if (code
== EQ
|| code
== NE
)
5815 /* For EQ or NE, one way to do the comparison is to apply an operation
5816 that converts the operand into a positive number if it is nonzero
5817 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5818 for NE we negate. This puts the result in the sign bit. Then we
5819 normalize with a shift, if needed.
5821 Two operations that can do the above actions are ABS and FFS, so try
5822 them. If that doesn't work, and MODE is smaller than a full word,
5823 we can use zero-extension to the wider mode (an unsigned conversion)
5824 as the operation. */
5826 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5827 that is compensated by the subsequent overflow when subtracting
5830 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5831 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5832 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5833 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5834 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5836 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5843 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5846 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5849 /* If we couldn't do it that way, for NE we can "or" the two's complement
5850 of the value with itself. For EQ, we take the one's complement of
5851 that "or", which is an extra insn, so we only handle EQ if branches
5856 || BRANCH_COST (optimize_insn_for_speed_p (),
5859 if (rtx_equal_p (subtarget
, op0
))
5862 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5863 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5866 if (tem
&& code
== EQ
)
5867 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5871 if (tem
&& normalizep
)
5872 tem
= expand_shift (RSHIFT_EXPR
, mode
, tem
,
5873 GET_MODE_BITSIZE (mode
) - 1,
5874 subtarget
, normalizep
== 1);
5880 else if (GET_MODE (tem
) != target_mode
)
5882 convert_move (target
, tem
, 0);
5885 else if (!subtarget
)
5887 emit_move_insn (target
, tem
);
5892 delete_insns_since (last
);
5897 /* Like emit_store_flag, but always succeeds. */
5900 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5901 machine_mode mode
, int unsignedp
, int normalizep
)
5904 rtx_code_label
*label
;
5905 rtx trueval
, falseval
;
5907 /* First see if emit_store_flag can do the job. */
5908 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
5913 target
= gen_reg_rtx (word_mode
);
5915 /* If this failed, we have to do this with set/compare/jump/set code.
5916 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5917 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
5919 && GET_MODE_CLASS (mode
) == MODE_INT
5922 && op1
== const0_rtx
)
5924 label
= gen_label_rtx ();
5925 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
5926 NULL_RTX
, NULL
, label
, -1);
5927 emit_move_insn (target
, trueval
);
5933 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
5934 target
= gen_reg_rtx (GET_MODE (target
));
5936 /* Jump in the right direction if the target cannot implement CODE
5937 but can jump on its reverse condition. */
5938 falseval
= const0_rtx
;
5939 if (! can_compare_p (code
, mode
, ccp_jump
)
5940 && (! FLOAT_MODE_P (mode
)
5941 || code
== ORDERED
|| code
== UNORDERED
5942 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5943 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5945 enum rtx_code rcode
;
5946 if (FLOAT_MODE_P (mode
))
5947 rcode
= reverse_condition_maybe_unordered (code
);
5949 rcode
= reverse_condition (code
);
5951 /* Canonicalize to UNORDERED for the libcall. */
5952 if (can_compare_p (rcode
, mode
, ccp_jump
)
5953 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
5956 trueval
= const0_rtx
;
5961 emit_move_insn (target
, trueval
);
5962 label
= gen_label_rtx ();
5963 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
5966 emit_move_insn (target
, falseval
);
5972 /* Perform possibly multi-word comparison and conditional jump to LABEL
5973 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5974 now a thin wrapper around do_compare_rtx_and_jump. */
5977 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
5978 rtx_code_label
*label
)
5980 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
5981 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,