1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2021 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
143 #include "coretypes.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
151 #include "gimple-pretty-print.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
167 #include "gimplify-me.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
186 /* Number of hand-written 16-bit nop / bswaps found. */
189 /* Number of hand-written 32-bit nop / bswaps found. */
192 /* Number of hand-written 64-bit nop / bswaps found. */
194 } nop_stats
, bswap_stats
;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number
{
230 poly_int64_pod bytepos
;
234 unsigned HOST_WIDE_INT range
;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
261 do_shift_rotate (enum tree_code code
,
262 struct symbolic_number
*n
,
265 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
266 unsigned head_marker
;
269 || count
>= TYPE_PRECISION (n
->type
)
270 || count
% BITS_PER_UNIT
!= 0)
272 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size
< 64 / BITS_PER_MARKER
)
277 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
285 head_marker
= HEAD_MARKER (n
->n
, size
);
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
289 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
290 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size
- 1 - i
) * BITS_PER_MARKER
);
294 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
297 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
302 /* Zero unused bits for size. */
303 if (size
< 64 / BITS_PER_MARKER
)
304 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
308 /* Perform sanity checking for the symbolic number N and the gimple
312 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
316 lhs_type
= TREE_TYPE (gimple_get_lhs (stmt
));
318 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
319 && TREE_CODE (lhs_type
) != ENUMERAL_TYPE
)
322 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
332 init_symbolic_number (struct symbolic_number
*n
, tree src
)
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src
)) && !POINTER_TYPE_P (TREE_TYPE (src
)))
339 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n
->type
= TREE_TYPE (src
);
346 size
= TYPE_PRECISION (n
->type
);
347 if (size
% BITS_PER_UNIT
!= 0)
349 size
/= BITS_PER_UNIT
;
350 if (size
> 64 / BITS_PER_MARKER
)
356 if (size
< 64 / BITS_PER_MARKER
)
357 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
367 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize
, bitpos
, bytepos
;
373 int unsignedp
, reversep
, volatilep
;
374 tree offset
, base_addr
;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
380 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
383 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
384 &unsignedp
, &reversep
, &volatilep
);
386 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
387 /* Do not rewrite TARGET_MEM_REF. */
389 else if (TREE_CODE (base_addr
) == MEM_REF
)
391 poly_offset_int bit_offset
= 0;
392 tree off
= TREE_OPERAND (base_addr
, 1);
394 if (!integer_zerop (off
))
396 poly_offset_int boff
= mem_ref_offset (base_addr
);
397 boff
<<= LOG2_BITS_PER_UNIT
;
401 base_addr
= TREE_OPERAND (base_addr
, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset
, 0))
406 tree byte_offset
= wide_int_to_tree
407 (sizetype
, bits_to_bytes_round_down (bit_offset
));
408 bit_offset
= num_trailing_bits (bit_offset
);
410 offset
= size_binop (PLUS_EXPR
, offset
, byte_offset
);
412 offset
= byte_offset
;
415 bitpos
+= bit_offset
.force_shwi ();
418 base_addr
= build_fold_addr_expr (base_addr
);
420 if (!multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
422 if (!multiple_p (bitsize
, BITS_PER_UNIT
))
427 if (!init_symbolic_number (n
, ref
))
429 n
->base_addr
= base_addr
;
431 n
->bytepos
= bytepos
;
432 n
->alias_set
= reference_alias_ptr_type (ref
);
433 n
->vuse
= gimple_vuse (stmt
);
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
442 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
443 gimple
*source_stmt2
, struct symbolic_number
*n2
,
444 struct symbolic_number
*n
)
449 struct symbolic_number
*n_start
;
451 tree rhs1
= gimple_assign_rhs1 (source_stmt1
);
452 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
454 rhs1
= TREE_OPERAND (rhs1
, 0);
455 tree rhs2
= gimple_assign_rhs1 (source_stmt2
);
456 if (TREE_CODE (rhs2
) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2
, 0)) == SSA_NAME
)
458 rhs2
= TREE_OPERAND (rhs2
, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
465 HOST_WIDE_INT start1
, start2
, start_sub
, end_sub
, end1
, end2
, end
;
466 struct symbolic_number
*toinc_n_ptr
, *n_end
;
467 basic_block bb1
, bb2
;
469 if (!n1
->base_addr
|| !n2
->base_addr
470 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
473 if (!n1
->offset
!= !n2
->offset
474 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
478 if (!(n2
->bytepos
- n1
->bytepos
).is_constant (&start2
))
484 start_sub
= start2
- start1
;
489 start_sub
= start1
- start2
;
492 bb1
= gimple_bb (source_stmt1
);
493 bb2
= gimple_bb (source_stmt2
);
494 if (dominated_by_p (CDI_DOMINATORS
, bb1
, bb2
))
495 source_stmt
= source_stmt1
;
497 source_stmt
= source_stmt2
;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1
= start1
+ (n1
->range
- 1);
502 end2
= start2
+ (n2
->range
- 1);
506 end_sub
= end2
- end1
;
511 end_sub
= end1
- end2
;
513 n_end
= (end2
> end1
) ? n2
: n1
;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN
)
517 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
519 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
521 n
->range
= end
- MIN (start1
, start2
) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n
->range
> 64 / BITS_PER_MARKER
)
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
531 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
532 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
535 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
536 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
537 toinc_n_ptr
->n
+= inc
;
542 n
->range
= n1
->range
;
544 source_stmt
= source_stmt1
;
548 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
549 n
->alias_set
= n1
->alias_set
;
551 n
->alias_set
= ptr_type_node
;
552 n
->vuse
= n_start
->vuse
;
553 n
->base_addr
= n_start
->base_addr
;
554 n
->offset
= n_start
->offset
;
555 n
->src
= n_start
->src
;
556 n
->bytepos
= n_start
->bytepos
;
557 n
->type
= n_start
->type
;
558 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
560 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
562 uint64_t masked1
, masked2
;
564 masked1
= n1
->n
& mask
;
565 masked2
= n2
->n
& mask
;
566 if (masked1
&& masked2
&& masked1
!= masked2
)
569 n
->n
= n1
->n
| n2
->n
;
570 n
->n_ops
= n1
->n_ops
+ n2
->n_ops
;
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
582 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
585 tree rhs1
, rhs2
= NULL
;
586 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
587 enum gimple_rhs_class rhs_class
;
589 if (!limit
|| !is_gimple_assign (stmt
))
592 rhs1
= gimple_assign_rhs1 (stmt
);
594 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 2)))
605 unsigned HOST_WIDE_INT bitsize
= tree_to_uhwi (TREE_OPERAND (rhs1
, 1));
606 unsigned HOST_WIDE_INT bitpos
= tree_to_uhwi (TREE_OPERAND (rhs1
, 2));
607 if (bitpos
% BITS_PER_UNIT
== 0
608 && bitsize
% BITS_PER_UNIT
== 0
609 && init_symbolic_number (n
, TREE_OPERAND (rhs1
, 0)))
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN
)
613 bitpos
= TYPE_PRECISION (n
->type
) - bitpos
- bitsize
;
616 if (!do_shift_rotate (RSHIFT_EXPR
, n
, bitpos
))
621 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
622 for (unsigned i
= 0; i
< bitsize
/ BITS_PER_UNIT
;
623 i
++, tmp
<<= BITS_PER_UNIT
)
624 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
628 n
->type
= TREE_TYPE (rhs1
);
630 n
->range
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
632 return verify_symbolic_number_p (n
, stmt
) ? stmt
: NULL
;
638 if (TREE_CODE (rhs1
) != SSA_NAME
)
641 code
= gimple_assign_rhs_code (stmt
);
642 rhs_class
= gimple_assign_rhs_class (stmt
);
643 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
645 if (rhs_class
== GIMPLE_BINARY_RHS
)
646 rhs2
= gimple_assign_rhs2 (stmt
);
648 /* Handle unary rhs and binary rhs with integer constants as second
651 if (rhs_class
== GIMPLE_UNARY_RHS
652 || (rhs_class
== GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2
) == INTEGER_CST
))
655 if (code
!= BIT_AND_EXPR
656 && code
!= LSHIFT_EXPR
657 && code
!= RSHIFT_EXPR
658 && code
!= LROTATE_EXPR
659 && code
!= RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code
))
663 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
669 if (gimple_assign_load_p (stmt
)
670 || !init_symbolic_number (n
, rhs1
))
679 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
680 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
681 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
683 /* Only constants masking full bytes are allowed. */
684 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
685 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
688 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
697 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
702 int i
, type_size
, old_type_size
;
705 type
= TREE_TYPE (gimple_assign_lhs (stmt
));
706 type_size
= TYPE_PRECISION (type
);
707 if (type_size
% BITS_PER_UNIT
!= 0)
709 type_size
/= BITS_PER_UNIT
;
710 if (type_size
> 64 / BITS_PER_MARKER
)
713 /* Sign extension: result is dependent on the value. */
714 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
715 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
716 && HEAD_MARKER (n
->n
, old_type_size
))
717 for (i
= 0; i
< type_size
- old_type_size
; i
++)
718 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
721 if (type_size
< 64 / BITS_PER_MARKER
)
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
729 n
->range
= type_size
;
735 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
738 /* Handle binary rhs. */
740 if (rhs_class
== GIMPLE_BINARY_RHS
)
742 struct symbolic_number n1
, n2
;
743 gimple
*source_stmt
, *source_stmt2
;
745 if (code
!= BIT_IOR_EXPR
)
748 if (TREE_CODE (rhs2
) != SSA_NAME
)
751 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
756 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
761 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
766 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
769 if (n1
.vuse
!= n2
.vuse
)
773 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
);
778 if (!verify_symbolic_number_p (n
, stmt
))
790 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
794 find_bswap_or_nop_finalize (struct symbolic_number
*n
, uint64_t *cmpxchg
,
795 uint64_t *cmpnop
, bool *cast64_to_32
)
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
805 *cast64_to_32
= false;
807 /* Find real size of result (highest non-zero byte). */
809 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
813 /* Zero out the bits corresponding to untouched bytes in original gimple
815 if (n
->range
< (int) sizeof (int64_t))
817 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
818 if (n
->base_addr
== NULL
820 && int_size_in_bytes (TREE_TYPE (n
->src
)) == 8)
822 /* If all bytes in n->n are either 0 or in [5..8] range, this
823 might be a candidate for (unsigned) __builtin_bswap64 (src).
824 It is not worth it for (unsigned short) __builtin_bswap64 (src)
825 or (unsigned short) __builtin_bswap32 (src). */
826 *cast64_to_32
= true;
827 for (tmpn
= n
->n
; tmpn
; tmpn
>>= BITS_PER_MARKER
)
828 if ((tmpn
& MARKER_MASK
)
829 && ((tmpn
& MARKER_MASK
) <= 4 || (tmpn
& MARKER_MASK
) > 8))
831 *cast64_to_32
= false;
838 *cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
842 /* Zero out the bits corresponding to unused bytes in the result of the
843 gimple expression. */
844 if (rsize
< n
->range
)
846 if (BYTES_BIG_ENDIAN
)
848 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
850 *cmpnop
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
854 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
855 *cmpxchg
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
863 n
->range
*= BITS_PER_UNIT
;
866 /* Check if STMT completes a bswap implementation or a read in a given
867 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
868 accordingly. It also sets N to represent the kind of operations
869 performed: size of the resulting expression and whether it works on
870 a memory source, and if so alias-set and vuse. At last, the
871 function returns a stmt whose rhs's first tree is the source
875 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
,
876 bool *cast64_to_32
, uint64_t *mask
)
878 tree type_size
= TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt
)));
879 if (!tree_fits_uhwi_p (type_size
))
882 /* The last parameter determines the depth search limit. It usually
883 correlates directly to the number n of bytes to be touched. We
884 increase that number by 2 * (log2(n) + 1) here in order to also
885 cover signed -> unsigned conversions of the src operand as can be seen
886 in libgcc, and for initial shift/and operation of the src operand. */
887 int limit
= tree_to_uhwi (type_size
);
888 limit
+= 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
));
889 gimple
*ins_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
893 if (gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
894 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
896 unsigned HOST_WIDE_INT sz
= tree_to_uhwi (type_size
) * BITS_PER_UNIT
;
897 if (sz
!= 16 && sz
!= 32 && sz
!= 64)
899 tree rhs
= gimple_assign_rhs1 (stmt
);
900 if (CONSTRUCTOR_NELTS (rhs
) == 0)
902 tree eltype
= TREE_TYPE (TREE_TYPE (rhs
));
903 unsigned HOST_WIDE_INT eltsz
904 = int_size_in_bytes (eltype
) * BITS_PER_UNIT
;
905 if (TYPE_PRECISION (eltype
) != eltsz
)
907 constructor_elt
*elt
;
909 tree type
= build_nonstandard_integer_type (sz
, 1);
910 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
912 if (TREE_CODE (elt
->value
) != SSA_NAME
913 || !INTEGRAL_TYPE_P (TREE_TYPE (elt
->value
)))
915 struct symbolic_number n1
;
917 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt
->value
), &n1
,
925 n1
.range
= sz
/ BITS_PER_UNIT
;
929 ins_stmt
= source_stmt
;
934 if (n
->vuse
!= n1
.vuse
)
937 struct symbolic_number n0
= *n
;
939 if (!BYTES_BIG_ENDIAN
)
941 if (!do_shift_rotate (LSHIFT_EXPR
, &n1
, i
* eltsz
))
944 else if (!do_shift_rotate (LSHIFT_EXPR
, &n0
, eltsz
))
947 = perform_symbolic_merge (ins_stmt
, &n0
, source_stmt
, &n1
, n
);
955 uint64_t cmpxchg
, cmpnop
;
956 find_bswap_or_nop_finalize (n
, &cmpxchg
, &cmpnop
, cast64_to_32
);
958 /* A complete byte swap should make the symbolic number to start with
959 the largest digit in the highest order byte. Unchanged symbolic
960 number indicates a read with same endianness as target architecture. */
961 *mask
= ~(uint64_t) 0;
964 else if (n
->n
== cmpxchg
)
969 for (uint64_t msk
= MARKER_MASK
; msk
; msk
<<= BITS_PER_MARKER
)
970 if ((n
->n
& msk
) == 0)
972 else if ((n
->n
& msk
) == (cmpxchg
& msk
))
981 /* Useless bit manipulation performed by code. */
982 if (!n
->base_addr
&& n
->n
== cmpnop
&& n
->n_ops
== 1)
988 const pass_data pass_data_optimize_bswap
=
990 GIMPLE_PASS
, /* type */
992 OPTGROUP_NONE
, /* optinfo_flags */
994 PROP_ssa
, /* properties_required */
995 0, /* properties_provided */
996 0, /* properties_destroyed */
997 0, /* todo_flags_start */
998 0, /* todo_flags_finish */
1001 class pass_optimize_bswap
: public gimple_opt_pass
1004 pass_optimize_bswap (gcc::context
*ctxt
)
1005 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
1008 /* opt_pass methods: */
1009 virtual bool gate (function
*)
1011 return flag_expensive_optimizations
&& optimize
&& BITS_PER_UNIT
== 8;
1014 virtual unsigned int execute (function
*);
1016 }; // class pass_optimize_bswap
1018 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1019 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1023 bswap_view_convert (gimple_stmt_iterator
*gsi
, tree type
, tree val
)
1025 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val
))
1026 || POINTER_TYPE_P (TREE_TYPE (val
)));
1027 if (TYPE_SIZE (type
) != TYPE_SIZE (TREE_TYPE (val
)))
1029 HOST_WIDE_INT prec
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
1030 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1033 = gimple_build_assign (make_ssa_name (pointer_sized_int_node
),
1035 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1036 val
= gimple_assign_lhs (g
);
1038 tree itype
= build_nonstandard_integer_type (prec
, 1);
1039 gimple
*g
= gimple_build_assign (make_ssa_name (itype
), NOP_EXPR
, val
);
1040 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1041 val
= gimple_assign_lhs (g
);
1043 return build1 (VIEW_CONVERT_EXPR
, type
, val
);
1046 /* Perform the bswap optimization: replace the expression computed in the rhs
1047 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1048 bswap, load or load + bswap expression.
1049 Which of these alternatives replace the rhs is given by N->base_addr (non
1050 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1051 load to perform are also given in N while the builtin bswap invoke is given
1052 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1053 load statements involved to construct the rhs in gsi_stmt (GSI) and
1054 N->range gives the size of the rhs expression for maintaining some
1057 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1058 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1059 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1062 bswap_replace (gimple_stmt_iterator gsi
, gimple
*ins_stmt
, tree fndecl
,
1063 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
1064 bool bswap
, uint64_t mask
)
1066 tree src
, tmp
, tgt
= NULL_TREE
;
1067 gimple
*bswap_stmt
, *mask_stmt
= NULL
;
1068 tree_code conv_code
= NOP_EXPR
;
1070 gimple
*cur_stmt
= gsi_stmt (gsi
);
1074 tgt
= gimple_assign_lhs (cur_stmt
);
1075 if (gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
1077 && VECTOR_TYPE_P (TREE_TYPE (tgt
)))
1078 conv_code
= VIEW_CONVERT_EXPR
;
1081 /* Need to load the value from memory first. */
1084 gimple_stmt_iterator gsi_ins
= gsi
;
1086 gsi_ins
= gsi_for_stmt (ins_stmt
);
1087 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
1088 tree load_offset_ptr
, aligned_load_type
;
1090 unsigned align
= get_object_alignment (src
);
1091 poly_int64 load_offset
= 0;
1095 basic_block ins_bb
= gimple_bb (ins_stmt
);
1096 basic_block cur_bb
= gimple_bb (cur_stmt
);
1097 if (!dominated_by_p (CDI_DOMINATORS
, cur_bb
, ins_bb
))
1100 /* Move cur_stmt just before one of the load of the original
1101 to ensure it has the same VUSE. See PR61517 for what could
1103 if (gimple_bb (cur_stmt
) != gimple_bb (ins_stmt
))
1104 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt
));
1105 gsi_move_before (&gsi
, &gsi_ins
);
1106 gsi
= gsi_for_stmt (cur_stmt
);
1111 /* Compute address to load from and cast according to the size
1113 addr_expr
= build_fold_addr_expr (src
);
1114 if (is_gimple_mem_ref_addr (addr_expr
))
1115 addr_tmp
= unshare_expr (addr_expr
);
1118 addr_tmp
= unshare_expr (n
->base_addr
);
1119 if (!is_gimple_mem_ref_addr (addr_tmp
))
1120 addr_tmp
= force_gimple_operand_gsi_1 (&gsi
, addr_tmp
,
1121 is_gimple_mem_ref_addr
,
1124 load_offset
= n
->bytepos
;
1128 = force_gimple_operand_gsi (&gsi
, unshare_expr (n
->offset
),
1129 true, NULL_TREE
, true,
1132 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp
)),
1133 POINTER_PLUS_EXPR
, addr_tmp
, off
);
1134 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1135 addr_tmp
= gimple_assign_lhs (stmt
);
1139 /* Perform the load. */
1140 aligned_load_type
= load_type
;
1141 if (align
< TYPE_ALIGN (load_type
))
1142 aligned_load_type
= build_aligned_type (load_type
, align
);
1143 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
1144 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
1150 nop_stats
.found_16bit
++;
1151 else if (n
->range
== 32)
1152 nop_stats
.found_32bit
++;
1155 gcc_assert (n
->range
== 64);
1156 nop_stats
.found_64bit
++;
1159 /* Convert the result of load if necessary. */
1160 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
1162 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
1164 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1165 gimple_set_vuse (load_stmt
, n
->vuse
);
1166 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1167 if (conv_code
== VIEW_CONVERT_EXPR
)
1168 val_tmp
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), val_tmp
);
1169 gimple_assign_set_rhs_with_ops (&gsi
, conv_code
, val_tmp
);
1170 update_stmt (cur_stmt
);
1174 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
1175 gimple_set_vuse (cur_stmt
, n
->vuse
);
1176 update_stmt (cur_stmt
);
1180 tgt
= make_ssa_name (load_type
);
1181 cur_stmt
= gimple_build_assign (tgt
, MEM_REF
, val_expr
);
1182 gimple_set_vuse (cur_stmt
, n
->vuse
);
1183 gsi_insert_before (&gsi
, cur_stmt
, GSI_SAME_STMT
);
1189 "%d bit load in target endianness found at: ",
1191 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1197 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
1198 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1199 gimple_set_vuse (load_stmt
, n
->vuse
);
1200 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1207 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), TREE_TYPE (src
)))
1209 if (!is_gimple_val (src
))
1211 if (conv_code
== VIEW_CONVERT_EXPR
)
1212 src
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), src
);
1213 g
= gimple_build_assign (tgt
, conv_code
, src
);
1216 g
= gimple_build_assign (tgt
, src
);
1220 nop_stats
.found_16bit
++;
1221 else if (n
->range
== 32)
1222 nop_stats
.found_32bit
++;
1225 gcc_assert (n
->range
== 64);
1226 nop_stats
.found_64bit
++;
1231 "%d bit reshuffle in target endianness found at: ",
1234 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1237 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1238 fprintf (dump_file
, "\n");
1242 gsi_replace (&gsi
, g
, true);
1245 else if (TREE_CODE (src
) == BIT_FIELD_REF
)
1246 src
= TREE_OPERAND (src
, 0);
1249 bswap_stats
.found_16bit
++;
1250 else if (n
->range
== 32)
1251 bswap_stats
.found_32bit
++;
1254 gcc_assert (n
->range
== 64);
1255 bswap_stats
.found_64bit
++;
1260 /* Convert the src expression if necessary. */
1261 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
1263 gimple
*convert_stmt
;
1265 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
1266 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
1267 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1270 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1271 are considered as rotation of 2N bit values by N bits is generally not
1272 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1273 gives 0x03040102 while a bswap for that value is 0x04030201. */
1274 if (bswap
&& n
->range
== 16)
1276 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
1277 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
1278 bswap_stmt
= gimple_build_assign (NULL
, src
);
1281 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
1283 if (tgt
== NULL_TREE
)
1284 tgt
= make_ssa_name (bswap_type
);
1287 if (mask
!= ~(uint64_t) 0)
1289 tree m
= build_int_cst (bswap_type
, mask
);
1290 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1291 gimple_set_lhs (bswap_stmt
, tmp
);
1292 mask_stmt
= gimple_build_assign (tgt
, BIT_AND_EXPR
, tmp
, m
);
1296 /* Convert the result if necessary. */
1297 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
1299 gimple
*convert_stmt
;
1301 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1303 if (conv_code
== VIEW_CONVERT_EXPR
)
1304 atmp
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), tmp
);
1305 convert_stmt
= gimple_build_assign (tgt
, conv_code
, atmp
);
1306 gsi_insert_after (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1309 gimple_set_lhs (mask_stmt
? mask_stmt
: bswap_stmt
, tmp
);
1313 fprintf (dump_file
, "%d bit bswap implementation found at: ",
1316 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1319 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1320 fprintf (dump_file
, "\n");
1327 gsi_insert_after (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1328 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1329 gsi_remove (&gsi
, true);
1333 gsi_insert_before (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1335 gsi_insert_before (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1340 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1341 using bswap optimizations. CDI_DOMINATORS need to be
1342 computed on entry. Return true if it has been optimized and
1343 TODO_update_ssa is needed. */
1346 maybe_optimize_vector_constructor (gimple
*cur_stmt
)
1348 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1349 struct symbolic_number n
;
1352 gcc_assert (is_gimple_assign (cur_stmt
)
1353 && gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
);
1355 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1356 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
1357 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
1358 || gimple_assign_lhs (cur_stmt
) == NULL_TREE
)
1361 HOST_WIDE_INT sz
= int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
1365 load_type
= bswap_type
= uint16_type_node
;
1368 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1369 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
1371 load_type
= uint32_type_node
;
1372 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1373 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1379 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1380 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1381 || (word_mode
== SImode
1382 && builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1383 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)))
1385 load_type
= uint64_type_node
;
1386 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1387 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1398 gimple
*ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1399 &cast64_to_32
, &mask
);
1401 || n
.range
!= (unsigned HOST_WIDE_INT
) sz
1403 || mask
!= ~(uint64_t) 0)
1406 if (bswap
&& !fndecl
&& n
.range
!= 16)
1409 memset (&nop_stats
, 0, sizeof (nop_stats
));
1410 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1411 return bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1412 bswap_type
, load_type
, &n
, bswap
, mask
) != NULL_TREE
;
1415 /* Find manual byte swap implementations as well as load in a given
1416 endianness. Byte swaps are turned into a bswap builtin invokation
1417 while endian loads are converted to bswap builtin invokation or
1418 simple load according to the target endianness. */
1421 pass_optimize_bswap::execute (function
*fun
)
1424 bool bswap32_p
, bswap64_p
;
1425 bool changed
= false;
1426 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
1428 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1429 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
1430 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1431 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1432 || (bswap32_p
&& word_mode
== SImode
)));
1434 /* Determine the argument type of the builtins. The code later on
1435 assumes that the return and argument type are the same. */
1438 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1439 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1444 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1445 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1448 memset (&nop_stats
, 0, sizeof (nop_stats
));
1449 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1450 calculate_dominance_info (CDI_DOMINATORS
);
1452 FOR_EACH_BB_FN (bb
, fun
)
1454 gimple_stmt_iterator gsi
;
1456 /* We do a reverse scan for bswap patterns to make sure we get the
1457 widest match. As bswap pattern matching doesn't handle previously
1458 inserted smaller bswap replacements as sub-patterns, the wider
1459 variant wouldn't be detected. */
1460 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
1462 gimple
*ins_stmt
, *cur_stmt
= gsi_stmt (gsi
);
1463 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1464 enum tree_code code
;
1465 struct symbolic_number n
;
1466 bool bswap
, cast64_to_32
;
1469 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1470 might be moved to a different basic block by bswap_replace and gsi
1471 must not points to it if that's the case. Moving the gsi_prev
1472 there make sure that gsi points to the statement previous to
1473 cur_stmt while still making sure that all statements are
1474 considered in this basic block. */
1477 if (!is_gimple_assign (cur_stmt
))
1480 code
= gimple_assign_rhs_code (cur_stmt
);
1485 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
1486 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
1494 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1495 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1496 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
))))
1504 ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1505 &cast64_to_32
, &mask
);
1513 /* Already in canonical form, nothing to do. */
1514 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1516 load_type
= bswap_type
= uint16_type_node
;
1519 load_type
= uint32_type_node
;
1522 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1523 bswap_type
= bswap32_type
;
1527 load_type
= uint64_type_node
;
1530 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1531 bswap_type
= bswap64_type
;
1538 if (bswap
&& !fndecl
&& n
.range
!= 16)
1541 if (bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1542 bswap_type
, load_type
, &n
, bswap
, mask
))
1547 statistics_counter_event (fun
, "16-bit nop implementations found",
1548 nop_stats
.found_16bit
);
1549 statistics_counter_event (fun
, "32-bit nop implementations found",
1550 nop_stats
.found_32bit
);
1551 statistics_counter_event (fun
, "64-bit nop implementations found",
1552 nop_stats
.found_64bit
);
1553 statistics_counter_event (fun
, "16-bit bswap implementations found",
1554 bswap_stats
.found_16bit
);
1555 statistics_counter_event (fun
, "32-bit bswap implementations found",
1556 bswap_stats
.found_32bit
);
1557 statistics_counter_event (fun
, "64-bit bswap implementations found",
1558 bswap_stats
.found_64bit
);
1560 return (changed
? TODO_update_ssa
: 0);
1566 make_pass_optimize_bswap (gcc::context
*ctxt
)
1568 return new pass_optimize_bswap (ctxt
);
1573 /* Struct recording one operand for the store, which is either a constant,
1574 then VAL represents the constant and all the other fields are zero, or
1575 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1576 and the other fields also reflect the memory load, or an SSA name, then
1577 VAL represents the SSA name and all the other fields are zero, */
1579 class store_operand_info
1584 poly_uint64 bitsize
;
1586 poly_uint64 bitregion_start
;
1587 poly_uint64 bitregion_end
;
1590 store_operand_info ();
1593 store_operand_info::store_operand_info ()
1594 : val (NULL_TREE
), base_addr (NULL_TREE
), bitsize (0), bitpos (0),
1595 bitregion_start (0), bitregion_end (0), stmt (NULL
), bit_not_p (false)
1599 /* Struct recording the information about a single store of an immediate
1600 to memory. These are created in the first phase and coalesced into
1601 merged_store_group objects in the second phase. */
1603 class store_immediate_info
1606 unsigned HOST_WIDE_INT bitsize
;
1607 unsigned HOST_WIDE_INT bitpos
;
1608 unsigned HOST_WIDE_INT bitregion_start
;
1609 /* This is one past the last bit of the bit region. */
1610 unsigned HOST_WIDE_INT bitregion_end
;
1613 /* INTEGER_CST for constant store, STRING_CST for string store,
1614 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1615 BIT_INSERT_EXPR for bit insertion.
1616 LROTATE_EXPR if it can be only bswap optimized and
1617 ops are not really meaningful.
1618 NOP_EXPR if bswap optimization detected identity, ops
1619 are not meaningful. */
1620 enum tree_code rhs_code
;
1621 /* Two fields for bswap optimization purposes. */
1622 struct symbolic_number n
;
1624 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1626 /* True if ops have been swapped and thus ops[1] represents
1627 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1629 /* The index number of the landing pad, or 0 if there is none. */
1631 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1632 just the first one. */
1633 store_operand_info ops
[2];
1634 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1635 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1636 gimple
*, unsigned int, enum tree_code
,
1637 struct symbolic_number
&, gimple
*, bool, int,
1638 const store_operand_info
&,
1639 const store_operand_info
&);
1642 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
1643 unsigned HOST_WIDE_INT bp
,
1644 unsigned HOST_WIDE_INT brs
,
1645 unsigned HOST_WIDE_INT bre
,
1648 enum tree_code rhscode
,
1649 struct symbolic_number
&nr
,
1653 const store_operand_info
&op0r
,
1654 const store_operand_info
&op1r
)
1655 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
1656 stmt (st
), order (ord
), rhs_code (rhscode
), n (nr
),
1657 ins_stmt (ins_stmtp
), bit_not_p (bitnotp
), ops_swapped_p (false),
1658 lp_nr (nr2
), ops
{ op0r
, op1r
}
1662 /* Struct representing a group of stores to contiguous memory locations.
1663 These are produced by the second phase (coalescing) and consumed in the
1664 third phase that outputs the widened stores. */
1666 class merged_store_group
1669 unsigned HOST_WIDE_INT start
;
1670 unsigned HOST_WIDE_INT width
;
1671 unsigned HOST_WIDE_INT bitregion_start
;
1672 unsigned HOST_WIDE_INT bitregion_end
;
1673 /* The size of the allocated memory for val and mask. */
1674 unsigned HOST_WIDE_INT buf_size
;
1675 unsigned HOST_WIDE_INT align_base
;
1676 poly_uint64 load_align_base
[2];
1679 unsigned int load_align
[2];
1680 unsigned int first_order
;
1681 unsigned int last_order
;
1683 bool string_concatenation
;
1684 bool only_constants
;
1686 unsigned int first_nonmergeable_order
;
1689 auto_vec
<store_immediate_info
*> stores
;
1690 /* We record the first and last original statements in the sequence because
1691 we'll need their vuse/vdef and replacement position. It's easier to keep
1692 track of them separately as 'stores' is reordered by apply_stores. */
1696 unsigned char *mask
;
1698 merged_store_group (store_immediate_info
*);
1699 ~merged_store_group ();
1700 bool can_be_merged_into (store_immediate_info
*);
1701 void merge_into (store_immediate_info
*);
1702 void merge_overlapping (store_immediate_info
*);
1703 bool apply_stores ();
1705 void do_merge (store_immediate_info
*);
1708 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1711 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
1716 for (unsigned int i
= 0; i
< len
; i
++)
1717 fprintf (fd
, "%02x ", ptr
[i
]);
1721 /* Clear out LEN bits starting from bit START in the byte array
1722 PTR. This clears the bits to the *right* from START.
1723 START must be within [0, BITS_PER_UNIT) and counts starting from
1724 the least significant bit. */
1727 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
1732 /* Clear len bits to the right of start. */
1733 else if (len
<= start
+ 1)
1735 unsigned char mask
= (~(~0U << len
));
1736 mask
= mask
<< (start
+ 1U - len
);
1739 else if (start
!= BITS_PER_UNIT
- 1)
1741 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
1742 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
1743 len
- (start
% BITS_PER_UNIT
) - 1);
1745 else if (start
== BITS_PER_UNIT
- 1
1746 && len
> BITS_PER_UNIT
)
1748 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1749 memset (ptr
, 0, nbytes
);
1750 if (len
% BITS_PER_UNIT
!= 0)
1751 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
1752 len
% BITS_PER_UNIT
);
1758 /* In the byte array PTR clear the bit region starting at bit
1759 START and is LEN bits wide.
1760 For regions spanning multiple bytes do this recursively until we reach
1761 zero LEN or a region contained within a single byte. */
1764 clear_bit_region (unsigned char *ptr
, unsigned int start
,
1767 /* Degenerate base case. */
1770 else if (start
>= BITS_PER_UNIT
)
1771 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
1772 /* Second base case. */
1773 else if ((start
+ len
) <= BITS_PER_UNIT
)
1775 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
1776 mask
>>= BITS_PER_UNIT
- (start
+ len
);
1782 /* Clear most significant bits in a byte and proceed with the next byte. */
1783 else if (start
!= 0)
1785 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
1786 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
1788 /* Whole bytes need to be cleared. */
1789 else if (start
== 0 && len
> BITS_PER_UNIT
)
1791 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1792 /* We could recurse on each byte but we clear whole bytes, so a simple
1794 memset (ptr
, '\0', nbytes
);
1795 /* Clear the remaining sub-byte region if there is one. */
1796 if (len
% BITS_PER_UNIT
!= 0)
1797 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
1803 /* Write BITLEN bits of EXPR to the byte array PTR at
1804 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1805 Return true if the operation succeeded. */
1808 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
1809 unsigned int total_bytes
)
1811 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
1812 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
1813 || (bitpos
% BITS_PER_UNIT
)
1814 || !int_mode_for_size (bitlen
, 0).exists ());
1816 = (TREE_CODE (expr
) == CONSTRUCTOR
1817 && CONSTRUCTOR_NELTS (expr
) == 0
1818 && TYPE_SIZE_UNIT (TREE_TYPE (expr
))
1819 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr
))));
1823 if (first_byte
>= total_bytes
)
1825 total_bytes
-= first_byte
;
1828 unsigned HOST_WIDE_INT rhs_bytes
1829 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1830 if (rhs_bytes
> total_bytes
)
1832 memset (ptr
+ first_byte
, '\0', rhs_bytes
);
1835 return native_encode_expr (expr
, ptr
+ first_byte
, total_bytes
) != 0;
1839 We are writing a non byte-sized quantity or at a position that is not
1841 |--------|--------|--------| ptr + first_byte
1843 xxx xxxxxxxx xxx< bp>
1846 First native_encode_expr EXPR into a temporary buffer and shift each
1847 byte in the buffer by 'bp' (carrying the bits over as necessary).
1848 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1849 <------bitlen---->< bp>
1850 Then we clear the destination bits:
1851 |---00000|00000000|000-----| ptr + first_byte
1852 <-------bitlen--->< bp>
1854 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1855 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1858 We are writing a non byte-sized quantity or at a position that is not
1860 ptr + first_byte |--------|--------|--------|
1862 <bp >xxx xxxxxxxx xxx
1865 First native_encode_expr EXPR into a temporary buffer and shift each
1866 byte in the buffer to the right by (carrying the bits over as necessary).
1867 We shift by as much as needed to align the most significant bit of EXPR
1869 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1870 <---bitlen----> <bp ><-----bitlen----->
1871 Then we clear the destination bits:
1872 ptr + first_byte |-----000||00000000||00000---|
1873 <bp ><-------bitlen----->
1875 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1876 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1877 The awkwardness comes from the fact that bitpos is counted from the
1878 most significant bit of a byte. */
1880 /* We must be dealing with fixed-size data at this point, since the
1881 total size is also fixed. */
1882 unsigned int byte_size
;
1885 unsigned HOST_WIDE_INT rhs_bytes
1886 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1887 if (rhs_bytes
> total_bytes
)
1889 byte_size
= rhs_bytes
;
1893 fixed_size_mode mode
1894 = as_a
<fixed_size_mode
> (TYPE_MODE (TREE_TYPE (expr
)));
1897 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)))
1898 : GET_MODE_SIZE (mode
);
1900 /* Allocate an extra byte so that we have space to shift into. */
1902 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
1903 memset (tmpbuf
, '\0', byte_size
);
1904 /* The store detection code should only have allowed constants that are
1905 accepted by native_encode_expr or empty ctors. */
1907 && native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
1910 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1911 bytes to write. This means it can write more than
1912 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1913 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1914 bitlen and zero out the bits that are not relevant as well (that may
1915 contain a sign bit due to sign-extension). */
1916 unsigned int padding
1917 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
1918 /* On big-endian the padding is at the 'front' so just skip the initial
1920 if (BYTES_BIG_ENDIAN
)
1923 byte_size
-= padding
;
1925 if (bitlen
% BITS_PER_UNIT
!= 0)
1927 if (BYTES_BIG_ENDIAN
)
1928 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
1929 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
1931 clear_bit_region (tmpbuf
, bitlen
,
1932 byte_size
* BITS_PER_UNIT
- bitlen
);
1934 /* Left shifting relies on the last byte being clear if bitlen is
1935 a multiple of BITS_PER_UNIT, which might not be clear if
1936 there are padding bytes. */
1937 else if (!BYTES_BIG_ENDIAN
)
1938 tmpbuf
[byte_size
- 1] = '\0';
1940 /* Clear the bit region in PTR where the bits from TMPBUF will be
1942 if (BYTES_BIG_ENDIAN
)
1943 clear_bit_region_be (ptr
+ first_byte
,
1944 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
1946 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
1949 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
1950 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
1952 bool skip_byte
= false;
1953 if (BYTES_BIG_ENDIAN
)
1955 /* BITPOS and BITLEN are exactly aligned and no shifting
1957 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
1958 || (bitpos_mod
== 0 && bitlen_mod
== 0))
1960 /* |. . . . . . . .|
1962 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1963 of the value until it aligns with 'bp' in the next byte over. */
1964 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
1966 shift_amnt
= bitlen_mod
+ bitpos_mod
;
1967 skip_byte
= bitlen_mod
!= 0;
1969 /* |. . . . . . . .|
1972 Shift the value right within the same byte so it aligns with 'bp'. */
1974 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
1977 shift_amnt
= bitpos
% BITS_PER_UNIT
;
1979 /* Create the shifted version of EXPR. */
1980 if (!BYTES_BIG_ENDIAN
)
1982 shift_bytes_in_array_left (tmpbuf
, byte_size
, shift_amnt
);
1983 if (shift_amnt
== 0)
1988 gcc_assert (BYTES_BIG_ENDIAN
);
1989 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
1990 /* If shifting right forced us to move into the next byte skip the now
1999 /* Insert the bits from TMPBUF. */
2000 for (unsigned int i
= 0; i
< byte_size
; i
++)
2001 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
2006 /* Sorting function for store_immediate_info objects.
2007 Sorts them by bitposition. */
2010 sort_by_bitpos (const void *x
, const void *y
)
2012 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2013 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2015 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
2017 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
2020 /* If they are the same let's use the order which is guaranteed to
2022 return (*tmp
)->order
- (*tmp2
)->order
;
2025 /* Sorting function for store_immediate_info objects.
2026 Sorts them by the order field. */
2029 sort_by_order (const void *x
, const void *y
)
2031 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2032 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2034 if ((*tmp
)->order
< (*tmp2
)->order
)
2036 else if ((*tmp
)->order
> (*tmp2
)->order
)
2042 /* Initialize a merged_store_group object from a store_immediate_info
2045 merged_store_group::merged_store_group (store_immediate_info
*info
)
2047 start
= info
->bitpos
;
2048 width
= info
->bitsize
;
2049 bitregion_start
= info
->bitregion_start
;
2050 bitregion_end
= info
->bitregion_end
;
2051 /* VAL has memory allocated for it in apply_stores once the group
2052 width has been finalized. */
2055 bit_insertion
= info
->rhs_code
== BIT_INSERT_EXPR
;
2056 string_concatenation
= info
->rhs_code
== STRING_CST
;
2057 only_constants
= info
->rhs_code
== INTEGER_CST
;
2059 first_nonmergeable_order
= ~0U;
2060 lp_nr
= info
->lp_nr
;
2061 unsigned HOST_WIDE_INT align_bitpos
= 0;
2062 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2063 &align
, &align_bitpos
);
2064 align_base
= start
- align_bitpos
;
2065 for (int i
= 0; i
< 2; ++i
)
2067 store_operand_info
&op
= info
->ops
[i
];
2068 if (op
.base_addr
== NULL_TREE
)
2071 load_align_base
[i
] = 0;
2075 get_object_alignment_1 (op
.val
, &load_align
[i
], &align_bitpos
);
2076 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2080 stores
.safe_push (info
);
2081 last_stmt
= info
->stmt
;
2082 last_order
= info
->order
;
2083 first_stmt
= last_stmt
;
2084 first_order
= last_order
;
2088 merged_store_group::~merged_store_group ()
2094 /* Return true if the store described by INFO can be merged into the group. */
2097 merged_store_group::can_be_merged_into (store_immediate_info
*info
)
2099 /* Do not merge bswap patterns. */
2100 if (info
->rhs_code
== LROTATE_EXPR
)
2103 if (info
->lp_nr
!= lp_nr
)
2106 /* The canonical case. */
2107 if (info
->rhs_code
== stores
[0]->rhs_code
)
2110 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2111 if (info
->rhs_code
== BIT_INSERT_EXPR
&& stores
[0]->rhs_code
== INTEGER_CST
)
2112 return !string_concatenation
;
2114 if (stores
[0]->rhs_code
== BIT_INSERT_EXPR
&& info
->rhs_code
== INTEGER_CST
)
2115 return !string_concatenation
;
2117 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2118 only for small regions since this can generate a lot of instructions. */
2119 if (info
->rhs_code
== MEM_REF
2120 && (stores
[0]->rhs_code
== INTEGER_CST
2121 || stores
[0]->rhs_code
== BIT_INSERT_EXPR
)
2122 && info
->bitregion_start
== stores
[0]->bitregion_start
2123 && info
->bitregion_end
== stores
[0]->bitregion_end
2124 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2125 return !string_concatenation
;
2127 if (stores
[0]->rhs_code
== MEM_REF
2128 && (info
->rhs_code
== INTEGER_CST
2129 || info
->rhs_code
== BIT_INSERT_EXPR
)
2130 && info
->bitregion_start
== stores
[0]->bitregion_start
2131 && info
->bitregion_end
== stores
[0]->bitregion_end
2132 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2133 return !string_concatenation
;
2135 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2136 if (info
->rhs_code
== STRING_CST
2137 && stores
[0]->rhs_code
== INTEGER_CST
2138 && stores
[0]->bitsize
== CHAR_BIT
)
2139 return !bit_insertion
;
2141 if (stores
[0]->rhs_code
== STRING_CST
2142 && info
->rhs_code
== INTEGER_CST
2143 && info
->bitsize
== CHAR_BIT
)
2144 return !bit_insertion
;
2149 /* Helper method for merge_into and merge_overlapping to do
2153 merged_store_group::do_merge (store_immediate_info
*info
)
2155 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
2156 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
2158 unsigned int this_align
;
2159 unsigned HOST_WIDE_INT align_bitpos
= 0;
2160 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2161 &this_align
, &align_bitpos
);
2162 if (this_align
> align
)
2165 align_base
= info
->bitpos
- align_bitpos
;
2167 for (int i
= 0; i
< 2; ++i
)
2169 store_operand_info
&op
= info
->ops
[i
];
2173 get_object_alignment_1 (op
.val
, &this_align
, &align_bitpos
);
2174 if (this_align
> load_align
[i
])
2176 load_align
[i
] = this_align
;
2177 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2181 gimple
*stmt
= info
->stmt
;
2182 stores
.safe_push (info
);
2183 if (info
->order
> last_order
)
2185 last_order
= info
->order
;
2188 else if (info
->order
< first_order
)
2190 first_order
= info
->order
;
2194 if (info
->bitpos
!= start
+ width
)
2195 consecutive
= false;
2197 /* We need to use extraction if there is any bit-field. */
2198 if (info
->rhs_code
== BIT_INSERT_EXPR
)
2200 bit_insertion
= true;
2201 gcc_assert (!string_concatenation
);
2204 /* We want to use concatenation if there is any string. */
2205 if (info
->rhs_code
== STRING_CST
)
2207 string_concatenation
= true;
2208 gcc_assert (!bit_insertion
);
2211 /* But we cannot use it if we don't have consecutive stores. */
2213 string_concatenation
= false;
2215 if (info
->rhs_code
!= INTEGER_CST
)
2216 only_constants
= false;
2219 /* Merge a store recorded by INFO into this merged store.
2220 The store is not overlapping with the existing recorded
2224 merged_store_group::merge_into (store_immediate_info
*info
)
2228 /* Make sure we're inserting in the position we think we're inserting. */
2229 gcc_assert (info
->bitpos
>= start
+ width
2230 && info
->bitregion_start
<= bitregion_end
);
2232 width
= info
->bitpos
+ info
->bitsize
- start
;
2235 /* Merge a store described by INFO into this merged store.
2236 INFO overlaps in some way with the current store (i.e. it's not contiguous
2237 which is handled by merged_store_group::merge_into). */
2240 merged_store_group::merge_overlapping (store_immediate_info
*info
)
2244 /* If the store extends the size of the group, extend the width. */
2245 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
2246 width
= info
->bitpos
+ info
->bitsize
- start
;
2249 /* Go through all the recorded stores in this group in program order and
2250 apply their values to the VAL byte array to create the final merged
2251 value. Return true if the operation succeeded. */
2254 merged_store_group::apply_stores ()
2256 store_immediate_info
*info
;
2259 /* Make sure we have more than one store in the group, otherwise we cannot
2261 if (bitregion_start
% BITS_PER_UNIT
!= 0
2262 || bitregion_end
% BITS_PER_UNIT
!= 0
2263 || stores
.length () == 1)
2266 buf_size
= (bitregion_end
- bitregion_start
) / BITS_PER_UNIT
;
2268 /* Really do string concatenation for large strings only. */
2269 if (buf_size
<= MOVE_MAX
)
2270 string_concatenation
= false;
2272 /* Create a power-of-2-sized buffer for native_encode_expr. */
2273 if (!string_concatenation
)
2274 buf_size
= 1 << ceil_log2 (buf_size
);
2276 val
= XNEWVEC (unsigned char, 2 * buf_size
);
2277 mask
= val
+ buf_size
;
2278 memset (val
, 0, buf_size
);
2279 memset (mask
, ~0U, buf_size
);
2281 stores
.qsort (sort_by_order
);
2283 FOR_EACH_VEC_ELT (stores
, i
, info
)
2285 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
2287 if (info
->ops
[0].val
&& info
->ops
[0].base_addr
== NULL_TREE
)
2288 cst
= info
->ops
[0].val
;
2289 else if (info
->ops
[1].val
&& info
->ops
[1].base_addr
== NULL_TREE
)
2290 cst
= info
->ops
[1].val
;
2294 if (cst
&& info
->rhs_code
!= BIT_INSERT_EXPR
)
2295 ret
= encode_tree_to_bitpos (cst
, val
, info
->bitsize
, pos_in_buffer
,
2297 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
2298 if (BYTES_BIG_ENDIAN
)
2299 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
2300 - (pos_in_buffer
% BITS_PER_UNIT
)),
2303 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
2304 if (cst
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
2308 fputs ("After writing ", dump_file
);
2309 print_generic_expr (dump_file
, cst
, TDF_NONE
);
2310 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
2311 " at position %d\n", info
->bitsize
, pos_in_buffer
);
2312 fputs (" the merged value contains ", dump_file
);
2313 dump_char_array (dump_file
, val
, buf_size
);
2314 fputs (" the merged mask contains ", dump_file
);
2315 dump_char_array (dump_file
, mask
, buf_size
);
2317 fputs (" bit insertion is required\n", dump_file
);
2318 if (string_concatenation
)
2319 fputs (" string concatenation is required\n", dump_file
);
2322 fprintf (dump_file
, "Failed to merge stores\n");
2327 stores
.qsort (sort_by_bitpos
);
2331 /* Structure describing the store chain. */
2333 class imm_store_chain_info
2336 /* Doubly-linked list that imposes an order on chain processing.
2337 PNXP (prev's next pointer) points to the head of a list, or to
2338 the next field in the previous chain in the list.
2339 See pass_store_merging::m_stores_head for more rationale. */
2340 imm_store_chain_info
*next
, **pnxp
;
2342 auto_vec
<store_immediate_info
*> m_store_info
;
2343 auto_vec
<merged_store_group
*> m_merged_store_groups
;
2345 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
2346 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
2351 gcc_checking_assert (pnxp
== next
->pnxp
);
2355 ~imm_store_chain_info ()
2360 gcc_checking_assert (&next
== next
->pnxp
);
2364 bool terminate_and_process_chain ();
2365 bool try_coalesce_bswap (merged_store_group
*, unsigned int, unsigned int,
2367 bool coalesce_immediate_stores ();
2368 bool output_merged_store (merged_store_group
*);
2369 bool output_merged_stores ();
2372 const pass_data pass_data_tree_store_merging
= {
2373 GIMPLE_PASS
, /* type */
2374 "store-merging", /* name */
2375 OPTGROUP_NONE
, /* optinfo_flags */
2376 TV_GIMPLE_STORE_MERGING
, /* tv_id */
2377 PROP_ssa
, /* properties_required */
2378 0, /* properties_provided */
2379 0, /* properties_destroyed */
2380 0, /* todo_flags_start */
2381 TODO_update_ssa
, /* todo_flags_finish */
2384 class pass_store_merging
: public gimple_opt_pass
2387 pass_store_merging (gcc::context
*ctxt
)
2388 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head (),
2389 m_n_chains (0), m_n_stores (0)
2393 /* Pass not supported for PDP-endian, nor for insane hosts or
2394 target character sizes where native_{encode,interpret}_expr
2395 doesn't work properly. */
2399 return flag_store_merging
2400 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
2402 && BITS_PER_UNIT
== 8;
2405 virtual unsigned int execute (function
*);
2408 hash_map
<tree_operand_hash
, class imm_store_chain_info
*> m_stores
;
2410 /* Form a doubly-linked stack of the elements of m_stores, so that
2411 we can iterate over them in a predictable way. Using this order
2412 avoids extraneous differences in the compiler output just because
2413 of tree pointer variations (e.g. different chains end up in
2414 different positions of m_stores, so they are handled in different
2415 orders, so they allocate or release SSA names in different
2416 orders, and when they get reused, subsequent passes end up
2417 getting different SSA names, which may ultimately change
2418 decisions when going out of SSA). */
2419 imm_store_chain_info
*m_stores_head
;
2421 /* The number of store chains currently tracked. */
2422 unsigned m_n_chains
;
2423 /* The number of stores currently tracked. */
2424 unsigned m_n_stores
;
2426 bool process_store (gimple
*);
2427 bool terminate_and_process_chain (imm_store_chain_info
*);
2428 bool terminate_all_aliasing_chains (imm_store_chain_info
**, gimple
*);
2429 bool terminate_and_process_all_chains ();
2430 }; // class pass_store_merging
2432 /* Terminate and process all recorded chains. Return true if any changes
2436 pass_store_merging::terminate_and_process_all_chains ()
2439 while (m_stores_head
)
2440 ret
|= terminate_and_process_chain (m_stores_head
);
2441 gcc_assert (m_stores
.is_empty ());
2445 /* Terminate all chains that are affected by the statement STMT.
2446 CHAIN_INFO is the chain we should ignore from the checks if
2447 non-NULL. Return true if any changes were made. */
2450 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2456 /* If the statement doesn't touch memory it can't alias. */
2457 if (!gimple_vuse (stmt
))
2460 tree store_lhs
= gimple_store_p (stmt
) ? gimple_get_lhs (stmt
) : NULL_TREE
;
2461 ao_ref store_lhs_ref
;
2462 ao_ref_init (&store_lhs_ref
, store_lhs
);
2463 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
2467 /* We already checked all the stores in chain_info and terminated the
2468 chain if necessary. Skip it here. */
2469 if (chain_info
&& *chain_info
== cur
)
2472 store_immediate_info
*info
;
2474 FOR_EACH_VEC_ELT (cur
->m_store_info
, i
, info
)
2476 tree lhs
= gimple_assign_lhs (info
->stmt
);
2478 ao_ref_init (&lhs_ref
, lhs
);
2479 if (ref_maybe_used_by_stmt_p (stmt
, &lhs_ref
)
2480 || stmt_may_clobber_ref_p_1 (stmt
, &lhs_ref
)
2481 || (store_lhs
&& refs_may_alias_p_1 (&store_lhs_ref
,
2484 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2486 fprintf (dump_file
, "stmt causes chain termination:\n");
2487 print_gimple_stmt (dump_file
, stmt
, 0);
2489 ret
|= terminate_and_process_chain (cur
);
2498 /* Helper function. Terminate the recorded chain storing to base object
2499 BASE. Return true if the merging and output was successful. The m_stores
2500 entry is removed after the processing in any case. */
2503 pass_store_merging::terminate_and_process_chain (imm_store_chain_info
*chain_info
)
2505 m_n_stores
-= chain_info
->m_store_info
.length ();
2507 bool ret
= chain_info
->terminate_and_process_chain ();
2508 m_stores
.remove (chain_info
->base_addr
);
2513 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2514 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2515 be able to sink load of REF across stores between FIRST and LAST, up
2516 to right before LAST. */
2519 stmts_may_clobber_ref_p (gimple
*first
, gimple
*last
, tree ref
)
2522 ao_ref_init (&r
, ref
);
2523 unsigned int count
= 0;
2524 tree vop
= gimple_vdef (last
);
2527 /* Return true conservatively if the basic blocks are different. */
2528 if (gimple_bb (first
) != gimple_bb (last
))
2533 stmt
= SSA_NAME_DEF_STMT (vop
);
2534 if (stmt_may_clobber_ref_p_1 (stmt
, &r
))
2536 if (gimple_store_p (stmt
)
2537 && refs_anti_dependent_p (ref
, gimple_get_lhs (stmt
)))
2539 /* Avoid quadratic compile time by bounding the number of checks
2541 if (++count
> MAX_STORE_ALIAS_CHECKS
)
2543 vop
= gimple_vuse (stmt
);
2545 while (stmt
!= first
);
2550 /* Return true if INFO->ops[IDX] is mergeable with the
2551 corresponding loads already in MERGED_STORE group.
2552 BASE_ADDR is the base address of the whole store group. */
2555 compatible_load_p (merged_store_group
*merged_store
,
2556 store_immediate_info
*info
,
2557 tree base_addr
, int idx
)
2559 store_immediate_info
*infof
= merged_store
->stores
[0];
2560 if (!info
->ops
[idx
].base_addr
2561 || maybe_ne (info
->ops
[idx
].bitpos
- infof
->ops
[idx
].bitpos
,
2562 info
->bitpos
- infof
->bitpos
)
2563 || !operand_equal_p (info
->ops
[idx
].base_addr
,
2564 infof
->ops
[idx
].base_addr
, 0))
2567 store_immediate_info
*infol
= merged_store
->stores
.last ();
2568 tree load_vuse
= gimple_vuse (info
->ops
[idx
].stmt
);
2569 /* In this case all vuses should be the same, e.g.
2570 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2572 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2573 and we can emit the coalesced load next to any of those loads. */
2574 if (gimple_vuse (infof
->ops
[idx
].stmt
) == load_vuse
2575 && gimple_vuse (infol
->ops
[idx
].stmt
) == load_vuse
)
2578 /* Otherwise, at least for now require that the load has the same
2579 vuse as the store. See following examples. */
2580 if (gimple_vuse (info
->stmt
) != load_vuse
)
2583 if (gimple_vuse (infof
->stmt
) != gimple_vuse (infof
->ops
[idx
].stmt
)
2585 && gimple_vuse (infol
->stmt
) != gimple_vuse (infol
->ops
[idx
].stmt
)))
2588 /* If the load is from the same location as the store, already
2589 the construction of the immediate chain info guarantees no intervening
2590 stores, so no further checks are needed. Example:
2591 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2592 if (known_eq (info
->ops
[idx
].bitpos
, info
->bitpos
)
2593 && operand_equal_p (info
->ops
[idx
].base_addr
, base_addr
, 0))
2596 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2597 of the stores in the group, or any other stores in between those.
2598 Previous calls to compatible_load_p ensured that for all the
2599 merged_store->stores IDX loads, no stmts starting with
2600 merged_store->first_stmt and ending right before merged_store->last_stmt
2601 clobbers those loads. */
2602 gimple
*first
= merged_store
->first_stmt
;
2603 gimple
*last
= merged_store
->last_stmt
;
2604 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2605 comes before the so far first load, we'll be changing
2606 merged_store->first_stmt. In that case we need to give up if
2607 any of the earlier processed loads clobber with the stmts in the new
2609 if (info
->order
< merged_store
->first_order
)
2611 for (store_immediate_info
*infoc
: merged_store
->stores
)
2612 if (stmts_may_clobber_ref_p (info
->stmt
, first
, infoc
->ops
[idx
].val
))
2616 /* Similarly, we could change merged_store->last_stmt, so ensure
2617 in that case no stmts in the new range clobber any of the earlier
2619 else if (info
->order
> merged_store
->last_order
)
2621 for (store_immediate_info
*infoc
: merged_store
->stores
)
2622 if (stmts_may_clobber_ref_p (last
, info
->stmt
, infoc
->ops
[idx
].val
))
2626 /* And finally, we'd be adding a new load to the set, ensure it isn't
2627 clobbered in the new range. */
2628 if (stmts_may_clobber_ref_p (first
, last
, info
->ops
[idx
].val
))
2631 /* Otherwise, we are looking for:
2632 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2634 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2638 /* Add all refs loaded to compute VAL to REFS vector. */
2641 gather_bswap_load_refs (vec
<tree
> *refs
, tree val
)
2643 if (TREE_CODE (val
) != SSA_NAME
)
2646 gimple
*stmt
= SSA_NAME_DEF_STMT (val
);
2647 if (!is_gimple_assign (stmt
))
2650 if (gimple_assign_load_p (stmt
))
2652 refs
->safe_push (gimple_assign_rhs1 (stmt
));
2656 switch (gimple_assign_rhs_class (stmt
))
2658 case GIMPLE_BINARY_RHS
:
2659 gather_bswap_load_refs (refs
, gimple_assign_rhs2 (stmt
));
2661 case GIMPLE_UNARY_RHS
:
2662 gather_bswap_load_refs (refs
, gimple_assign_rhs1 (stmt
));
2669 /* Check if there are any stores in M_STORE_INFO after index I
2670 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2671 a potential group ending with END that have their order
2672 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2673 all the stores already merged and the one under consideration
2674 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2676 MEM[(long long int *)p_28] = 0;
2677 MEM[(long long int *)p_28 + 8B] = 0;
2678 MEM[(long long int *)p_28 + 16B] = 0;
2679 MEM[(long long int *)p_28 + 24B] = 0;
2681 MEM[(int *)p_28 + 8B] = _129;
2682 MEM[(int *)p_28].a = -1;
2684 MEM[(long long int *)p_28] = 0;
2685 MEM[(int *)p_28].a = -1;
2686 stmts in the current group and need to consider if it is safe to
2687 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2688 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2689 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2690 into the group and merging of those 3 stores is successful, merged
2691 stmts will be emitted at the latest store from that group, i.e.
2692 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2693 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2694 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2695 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2696 into the group. That way it will be its own store group and will
2697 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2698 INTEGER_CST stores, those are mergeable using merge_overlapping,
2699 so don't return false for those.
2701 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2702 (exclusive), whether they don't overlap the bitrange START to END
2703 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2704 prevent merging in cases like:
2705 MEM <char[12]> [&b + 8B] = {};
2706 MEM[(short *) &b] = 5;
2708 MEM <long long unsigned int> [&b + 2B] = _5;
2709 MEM[(char *)&b + 16B] = 88;
2710 MEM[(int *)&b + 20B] = 1;
2711 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2712 be merged with it, because the = _5 store overlaps these and is in between
2713 them in sort_by_order ordering. If it was merged, the merged store would
2714 go after the = _5 store and thus change behavior. */
2717 check_no_overlap (const vec
<store_immediate_info
*> &m_store_info
,
2719 bool all_integer_cst_p
, unsigned int first_order
,
2720 unsigned int last_order
, unsigned HOST_WIDE_INT start
,
2721 unsigned HOST_WIDE_INT end
, unsigned int first_earlier
,
2722 unsigned end_earlier
)
2724 unsigned int len
= m_store_info
.length ();
2725 for (unsigned int j
= first_earlier
; j
< end_earlier
; j
++)
2727 store_immediate_info
*info
= m_store_info
[j
];
2728 if (info
->order
> first_order
2729 && info
->order
< last_order
2730 && info
->bitpos
+ info
->bitsize
> start
)
2733 for (++i
; i
< len
; ++i
)
2735 store_immediate_info
*info
= m_store_info
[i
];
2736 if (info
->bitpos
>= end
)
2738 if (info
->order
< last_order
2739 && (!all_integer_cst_p
|| info
->rhs_code
!= INTEGER_CST
))
2745 /* Return true if m_store_info[first] and at least one following store
2746 form a group which store try_size bitsize value which is byte swapped
2747 from a memory load or some value, or identity from some value.
2748 This uses the bswap pass APIs. */
2751 imm_store_chain_info::try_coalesce_bswap (merged_store_group
*merged_store
,
2753 unsigned int try_size
,
2754 unsigned int first_earlier
)
2756 unsigned int len
= m_store_info
.length (), last
= first
;
2757 unsigned HOST_WIDE_INT width
= m_store_info
[first
]->bitsize
;
2758 if (width
>= try_size
)
2760 for (unsigned int i
= first
+ 1; i
< len
; ++i
)
2762 if (m_store_info
[i
]->bitpos
!= m_store_info
[first
]->bitpos
+ width
2763 || m_store_info
[i
]->lp_nr
!= merged_store
->lp_nr
2764 || m_store_info
[i
]->ins_stmt
== NULL
)
2766 width
+= m_store_info
[i
]->bitsize
;
2767 if (width
>= try_size
)
2773 if (width
!= try_size
)
2776 bool allow_unaligned
2777 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
2778 /* Punt if the combined store would not be aligned and we need alignment. */
2779 if (!allow_unaligned
)
2781 unsigned int align
= merged_store
->align
;
2782 unsigned HOST_WIDE_INT align_base
= merged_store
->align_base
;
2783 for (unsigned int i
= first
+ 1; i
<= last
; ++i
)
2785 unsigned int this_align
;
2786 unsigned HOST_WIDE_INT align_bitpos
= 0;
2787 get_object_alignment_1 (gimple_assign_lhs (m_store_info
[i
]->stmt
),
2788 &this_align
, &align_bitpos
);
2789 if (this_align
> align
)
2792 align_base
= m_store_info
[i
]->bitpos
- align_bitpos
;
2795 unsigned HOST_WIDE_INT align_bitpos
2796 = (m_store_info
[first
]->bitpos
- align_base
) & (align
- 1);
2798 align
= least_bit_hwi (align_bitpos
);
2799 if (align
< try_size
)
2806 case 16: type
= uint16_type_node
; break;
2807 case 32: type
= uint32_type_node
; break;
2808 case 64: type
= uint64_type_node
; break;
2809 default: gcc_unreachable ();
2811 struct symbolic_number n
;
2812 gimple
*ins_stmt
= NULL
;
2813 int vuse_store
= -1;
2814 unsigned int first_order
= merged_store
->first_order
;
2815 unsigned int last_order
= merged_store
->last_order
;
2816 gimple
*first_stmt
= merged_store
->first_stmt
;
2817 gimple
*last_stmt
= merged_store
->last_stmt
;
2818 unsigned HOST_WIDE_INT end
= merged_store
->start
+ merged_store
->width
;
2819 store_immediate_info
*infof
= m_store_info
[first
];
2821 for (unsigned int i
= first
; i
<= last
; ++i
)
2823 store_immediate_info
*info
= m_store_info
[i
];
2824 struct symbolic_number this_n
= info
->n
;
2826 if (!this_n
.base_addr
)
2827 this_n
.range
= try_size
/ BITS_PER_UNIT
;
2829 /* Update vuse in case it has changed by output_merged_stores. */
2830 this_n
.vuse
= gimple_vuse (info
->ins_stmt
);
2831 unsigned int bitpos
= info
->bitpos
- infof
->bitpos
;
2832 if (!do_shift_rotate (LSHIFT_EXPR
, &this_n
,
2834 ? try_size
- info
->bitsize
- bitpos
2837 if (this_n
.base_addr
&& vuse_store
)
2840 for (j
= first
; j
<= last
; ++j
)
2841 if (this_n
.vuse
== gimple_vuse (m_store_info
[j
]->stmt
))
2845 if (vuse_store
== 1)
2853 ins_stmt
= info
->ins_stmt
;
2857 if (n
.base_addr
&& n
.vuse
!= this_n
.vuse
)
2859 if (vuse_store
== 0)
2863 if (info
->order
> last_order
)
2865 last_order
= info
->order
;
2866 last_stmt
= info
->stmt
;
2868 else if (info
->order
< first_order
)
2870 first_order
= info
->order
;
2871 first_stmt
= info
->stmt
;
2873 end
= MAX (end
, info
->bitpos
+ info
->bitsize
);
2875 ins_stmt
= perform_symbolic_merge (ins_stmt
, &n
, info
->ins_stmt
,
2877 if (ins_stmt
== NULL
)
2882 uint64_t cmpxchg
, cmpnop
;
2884 find_bswap_or_nop_finalize (&n
, &cmpxchg
, &cmpnop
, &cast64_to_32
);
2886 /* A complete byte swap should make the symbolic number to start with
2887 the largest digit in the highest order byte. Unchanged symbolic
2888 number indicates a read with same endianness as target architecture. */
2889 if (n
.n
!= cmpnop
&& n
.n
!= cmpxchg
)
2896 if (n
.base_addr
== NULL_TREE
&& !is_gimple_val (n
.src
))
2899 if (!check_no_overlap (m_store_info
, last
, false, first_order
, last_order
,
2900 merged_store
->start
, end
, first_earlier
, first
))
2903 /* Don't handle memory copy this way if normal non-bswap processing
2904 would handle it too. */
2905 if (n
.n
== cmpnop
&& (unsigned) n
.n_ops
== last
- first
+ 1)
2908 for (i
= first
; i
<= last
; ++i
)
2909 if (m_store_info
[i
]->rhs_code
!= MEM_REF
)
2919 /* Will emit LROTATE_EXPR. */
2922 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2923 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
2927 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2928 && optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
)
2935 if (!allow_unaligned
&& n
.base_addr
)
2937 unsigned int align
= get_object_alignment (n
.src
);
2938 if (align
< try_size
)
2942 /* If each load has vuse of the corresponding store, need to verify
2943 the loads can be sunk right before the last store. */
2944 if (vuse_store
== 1)
2946 auto_vec
<tree
, 64> refs
;
2947 for (unsigned int i
= first
; i
<= last
; ++i
)
2948 gather_bswap_load_refs (&refs
,
2949 gimple_assign_rhs1 (m_store_info
[i
]->stmt
));
2951 for (tree ref
: refs
)
2952 if (stmts_may_clobber_ref_p (first_stmt
, last_stmt
, ref
))
2958 infof
->ins_stmt
= ins_stmt
;
2959 for (unsigned int i
= first
; i
<= last
; ++i
)
2961 m_store_info
[i
]->rhs_code
= n
.n
== cmpxchg
? LROTATE_EXPR
: NOP_EXPR
;
2962 m_store_info
[i
]->ops
[0].base_addr
= NULL_TREE
;
2963 m_store_info
[i
]->ops
[1].base_addr
= NULL_TREE
;
2965 merged_store
->merge_into (m_store_info
[i
]);
2971 /* Go through the candidate stores recorded in m_store_info and merge them
2972 into merged_store_group objects recorded into m_merged_store_groups
2973 representing the widened stores. Return true if coalescing was successful
2974 and the number of widened stores is fewer than the original number
2978 imm_store_chain_info::coalesce_immediate_stores ()
2980 /* Anything less can't be processed. */
2981 if (m_store_info
.length () < 2)
2984 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2985 fprintf (dump_file
, "Attempting to coalesce %u stores in chain\n",
2986 m_store_info
.length ());
2988 store_immediate_info
*info
;
2989 unsigned int i
, ignore
= 0;
2990 unsigned int first_earlier
= 0;
2991 unsigned int end_earlier
= 0;
2993 /* Order the stores by the bitposition they write to. */
2994 m_store_info
.qsort (sort_by_bitpos
);
2996 info
= m_store_info
[0];
2997 merged_store_group
*merged_store
= new merged_store_group (info
);
2998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2999 fputs ("New store group\n", dump_file
);
3001 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
3003 unsigned HOST_WIDE_INT new_bitregion_start
, new_bitregion_end
;
3008 while (first_earlier
< end_earlier
3009 && (m_store_info
[first_earlier
]->bitpos
3010 + m_store_info
[first_earlier
]->bitsize
3011 <= merged_store
->start
))
3014 /* First try to handle group of stores like:
3019 using the bswap framework. */
3020 if (info
->bitpos
== merged_store
->start
+ merged_store
->width
3021 && merged_store
->stores
.length () == 1
3022 && merged_store
->stores
[0]->ins_stmt
!= NULL
3023 && info
->lp_nr
== merged_store
->lp_nr
3024 && info
->ins_stmt
!= NULL
)
3026 unsigned int try_size
;
3027 for (try_size
= 64; try_size
>= 16; try_size
>>= 1)
3028 if (try_coalesce_bswap (merged_store
, i
- 1, try_size
,
3034 ignore
= i
+ merged_store
->stores
.length () - 1;
3035 m_merged_store_groups
.safe_push (merged_store
);
3036 if (ignore
< m_store_info
.length ())
3038 merged_store
= new merged_store_group (m_store_info
[ignore
]);
3039 end_earlier
= ignore
;
3042 merged_store
= NULL
;
3048 = MIN (merged_store
->bitregion_start
, info
->bitregion_start
);
3050 = MAX (merged_store
->bitregion_end
, info
->bitregion_end
);
3052 if (info
->order
>= merged_store
->first_nonmergeable_order
3053 || (((new_bitregion_end
- new_bitregion_start
+ 1) / BITS_PER_UNIT
)
3054 > (unsigned) param_store_merging_max_size
))
3059 Overlapping stores. */
3060 else if (IN_RANGE (info
->bitpos
, merged_store
->start
,
3061 merged_store
->start
+ merged_store
->width
- 1)
3062 /* |---store 1---||---store 2---|
3063 Handle also the consecutive INTEGER_CST stores case here,
3064 as we have here the code to deal with overlaps. */
3065 || (info
->bitregion_start
<= merged_store
->bitregion_end
3066 && info
->rhs_code
== INTEGER_CST
3067 && merged_store
->only_constants
3068 && merged_store
->can_be_merged_into (info
)))
3070 /* Only allow overlapping stores of constants. */
3071 if (info
->rhs_code
== INTEGER_CST
3072 && merged_store
->only_constants
3073 && info
->lp_nr
== merged_store
->lp_nr
)
3075 unsigned int first_order
3076 = MIN (merged_store
->first_order
, info
->order
);
3077 unsigned int last_order
3078 = MAX (merged_store
->last_order
, info
->order
);
3079 unsigned HOST_WIDE_INT end
3080 = MAX (merged_store
->start
+ merged_store
->width
,
3081 info
->bitpos
+ info
->bitsize
);
3082 if (check_no_overlap (m_store_info
, i
, true, first_order
,
3083 last_order
, merged_store
->start
, end
,
3084 first_earlier
, end_earlier
))
3086 /* check_no_overlap call above made sure there are no
3087 overlapping stores with non-INTEGER_CST rhs_code
3088 in between the first and last of the stores we've
3089 just merged. If there are any INTEGER_CST rhs_code
3090 stores in between, we need to merge_overlapping them
3091 even if in the sort_by_bitpos order there are other
3092 overlapping stores in between. Keep those stores as is.
3094 MEM[(int *)p_28] = 0;
3095 MEM[(char *)p_28 + 3B] = 1;
3096 MEM[(char *)p_28 + 1B] = 2;
3097 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3098 We can't merge the zero store with the store of two and
3099 not merge anything else, because the store of one is
3100 in the original order in between those two, but in
3101 store_by_bitpos order it comes after the last store that
3102 we can't merge with them. We can merge the first 3 stores
3103 and keep the last store as is though. */
3104 unsigned int len
= m_store_info
.length ();
3105 unsigned int try_order
= last_order
;
3106 unsigned int first_nonmergeable_order
;
3108 bool last_iter
= false;
3112 unsigned int max_order
= 0;
3113 unsigned int min_order
= first_order
;
3114 unsigned first_nonmergeable_int_order
= ~0U;
3115 unsigned HOST_WIDE_INT this_end
= end
;
3117 first_nonmergeable_order
= ~0U;
3118 for (unsigned int j
= i
+ 1; j
< len
; ++j
)
3120 store_immediate_info
*info2
= m_store_info
[j
];
3121 if (info2
->bitpos
>= this_end
)
3123 if (info2
->order
< try_order
)
3125 if (info2
->rhs_code
!= INTEGER_CST
3126 || info2
->lp_nr
!= merged_store
->lp_nr
)
3128 /* Normally check_no_overlap makes sure this
3129 doesn't happen, but if end grows below,
3130 then we need to process more stores than
3131 check_no_overlap verified. Example:
3132 MEM[(int *)p_5] = 0;
3133 MEM[(short *)p_5 + 3B] = 1;
3134 MEM[(char *)p_5 + 4B] = _9;
3135 MEM[(char *)p_5 + 2B] = 2; */
3140 min_order
= MIN (min_order
, info2
->order
);
3141 this_end
= MAX (this_end
,
3142 info2
->bitpos
+ info2
->bitsize
);
3144 else if (info2
->rhs_code
== INTEGER_CST
3145 && info2
->lp_nr
== merged_store
->lp_nr
3148 max_order
= MAX (max_order
, info2
->order
+ 1);
3149 first_nonmergeable_int_order
3150 = MIN (first_nonmergeable_int_order
,
3154 first_nonmergeable_order
3155 = MIN (first_nonmergeable_order
, info2
->order
);
3158 && !check_no_overlap (m_store_info
, len
- 1, true,
3159 min_order
, try_order
,
3160 merged_store
->start
, this_end
,
3161 first_earlier
, end_earlier
))
3165 if (last_order
== try_order
)
3167 /* If this failed, but only because we grew
3168 try_order, retry with the last working one,
3169 so that we merge at least something. */
3170 try_order
= last_order
;
3174 last_order
= try_order
;
3175 /* Retry with a larger try_order to see if we could
3176 merge some further INTEGER_CST stores. */
3178 && (first_nonmergeable_int_order
3179 < first_nonmergeable_order
))
3181 try_order
= MIN (max_order
,
3182 first_nonmergeable_order
);
3185 merged_store
->first_nonmergeable_order
);
3186 if (try_order
> last_order
&& ++attempts
< 16)
3189 first_nonmergeable_order
3190 = MIN (first_nonmergeable_order
,
3191 first_nonmergeable_int_order
);
3199 merged_store
->merge_overlapping (info
);
3201 merged_store
->first_nonmergeable_order
3202 = MIN (merged_store
->first_nonmergeable_order
,
3203 first_nonmergeable_order
);
3205 for (unsigned int j
= i
+ 1; j
<= k
; j
++)
3207 store_immediate_info
*info2
= m_store_info
[j
];
3208 gcc_assert (info2
->bitpos
< end
);
3209 if (info2
->order
< last_order
)
3211 gcc_assert (info2
->rhs_code
== INTEGER_CST
);
3213 merged_store
->merge_overlapping (info2
);
3215 /* Other stores are kept and not merged in any
3224 /* |---store 1---||---store 2---|
3225 This store is consecutive to the previous one.
3226 Merge it into the current store group. There can be gaps in between
3227 the stores, but there can't be gaps in between bitregions. */
3228 else if (info
->bitregion_start
<= merged_store
->bitregion_end
3229 && merged_store
->can_be_merged_into (info
))
3231 store_immediate_info
*infof
= merged_store
->stores
[0];
3233 /* All the rhs_code ops that take 2 operands are commutative,
3234 swap the operands if it could make the operands compatible. */
3235 if (infof
->ops
[0].base_addr
3236 && infof
->ops
[1].base_addr
3237 && info
->ops
[0].base_addr
3238 && info
->ops
[1].base_addr
3239 && known_eq (info
->ops
[1].bitpos
- infof
->ops
[0].bitpos
,
3240 info
->bitpos
- infof
->bitpos
)
3241 && operand_equal_p (info
->ops
[1].base_addr
,
3242 infof
->ops
[0].base_addr
, 0))
3244 std::swap (info
->ops
[0], info
->ops
[1]);
3245 info
->ops_swapped_p
= true;
3247 if (check_no_overlap (m_store_info
, i
, false,
3248 MIN (merged_store
->first_order
, info
->order
),
3249 MAX (merged_store
->last_order
, info
->order
),
3250 merged_store
->start
,
3251 MAX (merged_store
->start
+ merged_store
->width
,
3252 info
->bitpos
+ info
->bitsize
),
3253 first_earlier
, end_earlier
))
3255 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3256 if (info
->rhs_code
== MEM_REF
&& infof
->rhs_code
!= MEM_REF
)
3258 info
->rhs_code
= BIT_INSERT_EXPR
;
3259 info
->ops
[0].val
= gimple_assign_rhs1 (info
->stmt
);
3260 info
->ops
[0].base_addr
= NULL_TREE
;
3262 else if (infof
->rhs_code
== MEM_REF
&& info
->rhs_code
!= MEM_REF
)
3264 for (store_immediate_info
*infoj
: merged_store
->stores
)
3266 infoj
->rhs_code
= BIT_INSERT_EXPR
;
3267 infoj
->ops
[0].val
= gimple_assign_rhs1 (infoj
->stmt
);
3268 infoj
->ops
[0].base_addr
= NULL_TREE
;
3270 merged_store
->bit_insertion
= true;
3272 if ((infof
->ops
[0].base_addr
3273 ? compatible_load_p (merged_store
, info
, base_addr
, 0)
3274 : !info
->ops
[0].base_addr
)
3275 && (infof
->ops
[1].base_addr
3276 ? compatible_load_p (merged_store
, info
, base_addr
, 1)
3277 : !info
->ops
[1].base_addr
))
3279 merged_store
->merge_into (info
);
3285 /* |---store 1---| <gap> |---store 2---|.
3286 Gap between stores or the rhs not compatible. Start a new group. */
3288 /* Try to apply all the stores recorded for the group to determine
3289 the bitpattern they write and discard it if that fails.
3290 This will also reject single-store groups. */
3291 if (merged_store
->apply_stores ())
3292 m_merged_store_groups
.safe_push (merged_store
);
3294 delete merged_store
;
3296 merged_store
= new merged_store_group (info
);
3298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3299 fputs ("New store group\n", dump_file
);
3302 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3304 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3305 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:",
3306 i
, info
->bitsize
, info
->bitpos
);
3307 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
3308 fputc ('\n', dump_file
);
3312 /* Record or discard the last store group. */
3315 if (merged_store
->apply_stores ())
3316 m_merged_store_groups
.safe_push (merged_store
);
3318 delete merged_store
;
3321 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
3324 = !m_merged_store_groups
.is_empty ()
3325 && m_merged_store_groups
.length () < m_store_info
.length ();
3327 if (success
&& dump_file
)
3328 fprintf (dump_file
, "Coalescing successful!\nMerged into %u stores\n",
3329 m_merged_store_groups
.length ());
3334 /* Return the type to use for the merged stores or loads described by STMTS.
3335 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3336 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3337 of the MEM_REFs if any. */
3340 get_alias_type_for_stmts (vec
<gimple
*> &stmts
, bool is_load
,
3341 unsigned short *cliquep
, unsigned short *basep
)
3345 tree type
= NULL_TREE
;
3346 tree ret
= NULL_TREE
;
3350 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3352 tree ref
= is_load
? gimple_assign_rhs1 (stmt
)
3353 : gimple_assign_lhs (stmt
);
3354 tree type1
= reference_alias_ptr_type (ref
);
3355 tree base
= get_base_address (ref
);
3359 if (TREE_CODE (base
) == MEM_REF
)
3361 *cliquep
= MR_DEPENDENCE_CLIQUE (base
);
3362 *basep
= MR_DEPENDENCE_BASE (base
);
3367 if (!alias_ptr_types_compatible_p (type
, type1
))
3368 ret
= ptr_type_node
;
3369 if (TREE_CODE (base
) != MEM_REF
3370 || *cliquep
!= MR_DEPENDENCE_CLIQUE (base
)
3371 || *basep
!= MR_DEPENDENCE_BASE (base
))
3380 /* Return the location_t information we can find among the statements
3384 get_location_for_stmts (vec
<gimple
*> &stmts
)
3386 for (gimple
*stmt
: stmts
)
3387 if (gimple_has_location (stmt
))
3388 return gimple_location (stmt
);
3390 return UNKNOWN_LOCATION
;
3393 /* Used to decribe a store resulting from splitting a wide store in smaller
3394 regularly-sized stores in split_group. */
3399 unsigned HOST_WIDE_INT bytepos
;
3400 unsigned HOST_WIDE_INT size
;
3401 unsigned HOST_WIDE_INT align
;
3402 auto_vec
<store_immediate_info
*> orig_stores
;
3403 /* True if there is a single orig stmt covering the whole split store. */
3405 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
3406 unsigned HOST_WIDE_INT
);
3409 /* Simple constructor. */
3411 split_store::split_store (unsigned HOST_WIDE_INT bp
,
3412 unsigned HOST_WIDE_INT sz
,
3413 unsigned HOST_WIDE_INT al
)
3414 : bytepos (bp
), size (sz
), align (al
), orig (false)
3416 orig_stores
.create (0);
3419 /* Record all stores in GROUP that write to the region starting at BITPOS and
3420 is of size BITSIZE. Record infos for such statements in STORES if
3421 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3422 if there is exactly one original store in the range (in that case ignore
3423 clobber stmts, unless there are only clobber stmts). */
3425 static store_immediate_info
*
3426 find_constituent_stores (class merged_store_group
*group
,
3427 vec
<store_immediate_info
*> *stores
,
3428 unsigned int *first
,
3429 unsigned HOST_WIDE_INT bitpos
,
3430 unsigned HOST_WIDE_INT bitsize
)
3432 store_immediate_info
*info
, *ret
= NULL
;
3434 bool second
= false;
3435 bool update_first
= true;
3436 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
3437 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
3439 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
3440 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
3441 if (stmt_end
<= bitpos
)
3443 /* BITPOS passed to this function never decreases from within the
3444 same split_group call, so optimize and don't scan info records
3445 which are known to end before or at BITPOS next time.
3446 Only do it if all stores before this one also pass this. */
3452 update_first
= false;
3454 /* The stores in GROUP are ordered by bitposition so if we're past
3455 the region for this group return early. */
3456 if (stmt_start
>= end
)
3459 if (gimple_clobber_p (info
->stmt
))
3462 stores
->safe_push (info
);
3469 stores
->safe_push (info
);
3470 if (ret
&& !gimple_clobber_p (ret
->stmt
))
3476 else if (ret
&& !gimple_clobber_p (ret
->stmt
))
3484 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3485 store have multiple uses. If any SSA_NAME has multiple uses, also
3486 count statements needed to compute it. */
3489 count_multiple_uses (store_immediate_info
*info
)
3491 gimple
*stmt
= info
->stmt
;
3493 switch (info
->rhs_code
)
3501 if (info
->bit_not_p
)
3503 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3504 ret
= 1; /* Fall through below to return
3505 the BIT_NOT_EXPR stmt and then
3506 BIT_{AND,IOR,XOR}_EXPR and anything it
3509 /* stmt is after this the BIT_NOT_EXPR. */
3510 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3512 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3514 ret
+= 1 + info
->ops
[0].bit_not_p
;
3515 if (info
->ops
[1].base_addr
)
3516 ret
+= 1 + info
->ops
[1].bit_not_p
;
3519 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3520 /* stmt is now the BIT_*_EXPR. */
3521 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3522 ret
+= 1 + info
->ops
[info
->ops_swapped_p
].bit_not_p
;
3523 else if (info
->ops
[info
->ops_swapped_p
].bit_not_p
)
3525 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3526 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3529 if (info
->ops
[1].base_addr
== NULL_TREE
)
3531 gcc_checking_assert (!info
->ops_swapped_p
);
3534 if (!has_single_use (gimple_assign_rhs2 (stmt
)))
3535 ret
+= 1 + info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
;
3536 else if (info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
)
3538 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3539 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3544 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3545 return 1 + info
->ops
[0].bit_not_p
;
3546 else if (info
->ops
[0].bit_not_p
)
3548 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3549 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3553 case BIT_INSERT_EXPR
:
3554 return has_single_use (gimple_assign_rhs1 (stmt
)) ? 0 : 1;
3560 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3561 vector (if non-NULL) with split_store structs describing the byte offset
3562 (from the base), the bit size and alignment of each store as well as the
3563 original statements involved in each such split group.
3564 This is to separate the splitting strategy from the statement
3565 building/emission/linking done in output_merged_store.
3566 Return number of new stores.
3567 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3568 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3569 BZERO_FIRST may be true only when the first store covers the whole group
3570 and clears it; if BZERO_FIRST is true, keep that first store in the set
3571 unmodified and emit further stores for the overrides only.
3572 If SPLIT_STORES is NULL, it is just a dry run to count number of
3576 split_group (merged_store_group
*group
, bool allow_unaligned_store
,
3577 bool allow_unaligned_load
, bool bzero_first
,
3578 vec
<split_store
*> *split_stores
,
3579 unsigned *total_orig
,
3580 unsigned *total_new
)
3582 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
3583 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
3584 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
3585 unsigned HOST_WIDE_INT group_align
= group
->align
;
3586 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
3587 unsigned HOST_WIDE_INT group_load_align
= group_align
;
3588 bool any_orig
= false;
3590 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
3592 /* For bswap framework using sets of stores, all the checking has been done
3593 earlier in try_coalesce_bswap and the result always needs to be emitted
3594 as a single store. Likewise for string concatenation, */
3595 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3596 || group
->stores
[0]->rhs_code
== NOP_EXPR
3597 || group
->string_concatenation
)
3599 gcc_assert (!bzero_first
);
3602 /* Avoid the old/new stmt count heuristics. It should be
3603 always beneficial. */
3610 unsigned HOST_WIDE_INT align_bitpos
3611 = (group
->start
- align_base
) & (group_align
- 1);
3612 unsigned HOST_WIDE_INT align
= group_align
;
3614 align
= least_bit_hwi (align_bitpos
);
3615 bytepos
= group
->start
/ BITS_PER_UNIT
;
3617 = new split_store (bytepos
, group
->width
, align
);
3618 unsigned int first
= 0;
3619 find_constituent_stores (group
, &store
->orig_stores
,
3620 &first
, group
->start
, group
->width
);
3621 split_stores
->safe_push (store
);
3627 unsigned int ret
= 0, first
= 0;
3628 unsigned HOST_WIDE_INT try_pos
= bytepos
;
3633 store_immediate_info
*info
= group
->stores
[0];
3636 total_orig
[0] = 1; /* The orig store. */
3637 info
= group
->stores
[0];
3638 if (info
->ops
[0].base_addr
)
3640 if (info
->ops
[1].base_addr
)
3642 switch (info
->rhs_code
)
3647 total_orig
[0]++; /* The orig BIT_*_EXPR stmt. */
3652 total_orig
[0] *= group
->stores
.length ();
3654 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3656 total_new
[0] += count_multiple_uses (info
);
3657 total_orig
[0] += (info
->bit_not_p
3658 + info
->ops
[0].bit_not_p
3659 + info
->ops
[1].bit_not_p
);
3663 if (!allow_unaligned_load
)
3664 for (int i
= 0; i
< 2; ++i
)
3665 if (group
->load_align
[i
])
3666 group_load_align
= MIN (group_load_align
, group
->load_align
[i
]);
3670 store_immediate_info
*gstore
;
3671 FOR_EACH_VEC_ELT (group
->stores
, first
, gstore
)
3672 if (!gimple_clobber_p (gstore
->stmt
))
3679 = new split_store (bytepos
, gstore
->bitsize
, align_base
);
3680 store
->orig_stores
.safe_push (gstore
);
3683 split_stores
->safe_push (store
);
3689 if ((allow_unaligned_store
|| group_align
<= BITS_PER_UNIT
)
3690 && (group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U
3691 || (bzero_first
&& group
->val
[try_pos
- bytepos
] == 0)))
3693 /* Skip padding bytes. */
3695 size
-= BITS_PER_UNIT
;
3699 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3700 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
3701 unsigned HOST_WIDE_INT align_bitpos
3702 = (try_bitpos
- align_base
) & (group_align
- 1);
3703 unsigned HOST_WIDE_INT align
= group_align
;
3704 bool found_orig
= false;
3706 align
= least_bit_hwi (align_bitpos
);
3707 if (!allow_unaligned_store
)
3708 try_size
= MIN (try_size
, align
);
3709 if (!allow_unaligned_load
)
3711 /* If we can't do or don't want to do unaligned stores
3712 as well as loads, we need to take the loads into account
3714 unsigned HOST_WIDE_INT load_align
= group_load_align
;
3715 align_bitpos
= (try_bitpos
- align_base
) & (load_align
- 1);
3717 load_align
= least_bit_hwi (align_bitpos
);
3718 for (int i
= 0; i
< 2; ++i
)
3719 if (group
->load_align
[i
])
3722 = known_alignment (try_bitpos
3723 - group
->stores
[0]->bitpos
3724 + group
->stores
[0]->ops
[i
].bitpos
3725 - group
->load_align_base
[i
]);
3726 if (align_bitpos
& (group_load_align
- 1))
3728 unsigned HOST_WIDE_INT a
= least_bit_hwi (align_bitpos
);
3729 load_align
= MIN (load_align
, a
);
3732 try_size
= MIN (try_size
, load_align
);
3734 store_immediate_info
*info
3735 = find_constituent_stores (group
, NULL
, &first
, try_bitpos
, try_size
);
3736 if (info
&& !gimple_clobber_p (info
->stmt
))
3738 /* If there is just one original statement for the range, see if
3739 we can just reuse the original store which could be even larger
3741 unsigned HOST_WIDE_INT stmt_end
3742 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
3743 info
= find_constituent_stores (group
, NULL
, &first
, try_bitpos
,
3744 stmt_end
- try_bitpos
);
3745 if (info
&& info
->bitpos
>= try_bitpos
)
3747 store_immediate_info
*info2
= NULL
;
3748 unsigned int first_copy
= first
;
3749 if (info
->bitpos
> try_bitpos
3750 && stmt_end
- try_bitpos
<= try_size
)
3752 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3754 info
->bitpos
- try_bitpos
);
3755 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3757 if (info2
== NULL
&& stmt_end
- try_bitpos
< try_size
)
3759 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3761 (try_bitpos
+ try_size
)
3763 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3767 try_size
= stmt_end
- try_bitpos
;
3774 /* Approximate store bitsize for the case when there are no padding
3776 while (try_size
> size
)
3778 /* Now look for whole padding bytes at the end of that bitsize. */
3779 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
3780 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
3781 != (unsigned char) ~0U
3783 || group
->val
[try_pos
- bytepos
+ nonmasked
- 1] != 0))
3785 if (nonmasked
== 0 || (info
&& gimple_clobber_p (info
->stmt
)))
3787 /* If entire try_size range is padding, skip it. */
3788 try_pos
+= try_size
/ BITS_PER_UNIT
;
3792 /* Otherwise try to decrease try_size if second half, last 3 quarters
3793 etc. are padding. */
3794 nonmasked
*= BITS_PER_UNIT
;
3795 while (nonmasked
<= try_size
/ 2)
3797 if (!allow_unaligned_store
&& group_align
> BITS_PER_UNIT
)
3799 /* Now look for whole padding bytes at the start of that bitsize. */
3800 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
3801 for (masked
= 0; masked
< try_bytesize
; ++masked
)
3802 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U
3804 || group
->val
[try_pos
- bytepos
+ masked
] != 0))
3806 masked
*= BITS_PER_UNIT
;
3807 gcc_assert (masked
< try_size
);
3808 if (masked
>= try_size
/ 2)
3810 while (masked
>= try_size
/ 2)
3813 try_pos
+= try_size
/ BITS_PER_UNIT
;
3817 /* Need to recompute the alignment, so just retry at the new
3829 = new split_store (try_pos
, try_size
, align
);
3830 info
= find_constituent_stores (group
, &store
->orig_stores
,
3831 &first
, try_bitpos
, try_size
);
3833 && !gimple_clobber_p (info
->stmt
)
3834 && info
->bitpos
>= try_bitpos
3835 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
3836 && (store
->orig_stores
.length () == 1
3838 || (info
->bitpos
== try_bitpos
3839 && (info
->bitpos
+ info
->bitsize
3840 == try_bitpos
+ try_size
))))
3845 split_stores
->safe_push (store
);
3848 try_pos
+= try_size
/ BITS_PER_UNIT
;
3856 /* If we are reusing some original stores and any of the
3857 original SSA_NAMEs had multiple uses, we need to subtract
3858 those now before we add the new ones. */
3859 if (total_new
[0] && any_orig
)
3861 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3863 total_new
[0] -= count_multiple_uses (store
->orig_stores
[0]);
3865 total_new
[0] += ret
; /* The new store. */
3866 store_immediate_info
*info
= group
->stores
[0];
3867 if (info
->ops
[0].base_addr
)
3868 total_new
[0] += ret
;
3869 if (info
->ops
[1].base_addr
)
3870 total_new
[0] += ret
;
3871 switch (info
->rhs_code
)
3876 total_new
[0] += ret
; /* The new BIT_*_EXPR stmt. */
3881 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3884 bool bit_not_p
[3] = { false, false, false };
3885 /* If all orig_stores have certain bit_not_p set, then
3886 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3887 If some orig_stores have certain bit_not_p set, then
3888 we'd use a BIT_XOR_EXPR with a mask and need to account for
3890 FOR_EACH_VEC_ELT (store
->orig_stores
, j
, info
)
3892 if (info
->ops
[0].bit_not_p
)
3893 bit_not_p
[0] = true;
3894 if (info
->ops
[1].bit_not_p
)
3895 bit_not_p
[1] = true;
3896 if (info
->bit_not_p
)
3897 bit_not_p
[2] = true;
3899 total_new
[0] += bit_not_p
[0] + bit_not_p
[1] + bit_not_p
[2];
3907 /* Return the operation through which the operand IDX (if < 2) or
3908 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3909 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3910 the bits should be xored with mask. */
3912 static enum tree_code
3913 invert_op (split_store
*split_store
, int idx
, tree int_type
, tree
&mask
)
3916 store_immediate_info
*info
;
3917 unsigned int cnt
= 0;
3918 bool any_paddings
= false;
3919 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3921 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3925 tree lhs
= gimple_assign_lhs (info
->stmt
);
3926 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3927 && TYPE_PRECISION (TREE_TYPE (lhs
)) < info
->bitsize
)
3928 any_paddings
= true;
3934 if (cnt
== split_store
->orig_stores
.length () && !any_paddings
)
3935 return BIT_NOT_EXPR
;
3937 unsigned HOST_WIDE_INT try_bitpos
= split_store
->bytepos
* BITS_PER_UNIT
;
3938 unsigned buf_size
= split_store
->size
/ BITS_PER_UNIT
;
3940 = XALLOCAVEC (unsigned char, buf_size
);
3941 memset (buf
, ~0U, buf_size
);
3942 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3944 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3947 /* Clear regions with bit_not_p and invert afterwards, rather than
3948 clear regions with !bit_not_p, so that gaps in between stores aren't
3950 unsigned HOST_WIDE_INT bitsize
= info
->bitsize
;
3951 unsigned HOST_WIDE_INT prec
= bitsize
;
3952 unsigned int pos_in_buffer
= 0;
3955 tree lhs
= gimple_assign_lhs (info
->stmt
);
3956 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3957 && TYPE_PRECISION (TREE_TYPE (lhs
)) < bitsize
)
3958 prec
= TYPE_PRECISION (TREE_TYPE (lhs
));
3960 if (info
->bitpos
< try_bitpos
)
3962 gcc_assert (info
->bitpos
+ bitsize
> try_bitpos
);
3963 if (!BYTES_BIG_ENDIAN
)
3965 if (prec
<= try_bitpos
- info
->bitpos
)
3967 prec
-= try_bitpos
- info
->bitpos
;
3969 bitsize
-= try_bitpos
- info
->bitpos
;
3970 if (BYTES_BIG_ENDIAN
&& prec
> bitsize
)
3974 pos_in_buffer
= info
->bitpos
- try_bitpos
;
3977 /* If this is a bool inversion, invert just the least significant
3978 prec bits rather than all bits of it. */
3979 if (BYTES_BIG_ENDIAN
)
3981 pos_in_buffer
+= bitsize
- prec
;
3982 if (pos_in_buffer
>= split_store
->size
)
3987 if (pos_in_buffer
+ bitsize
> split_store
->size
)
3988 bitsize
= split_store
->size
- pos_in_buffer
;
3989 unsigned char *p
= buf
+ (pos_in_buffer
/ BITS_PER_UNIT
);
3990 if (BYTES_BIG_ENDIAN
)
3991 clear_bit_region_be (p
, (BITS_PER_UNIT
- 1
3992 - (pos_in_buffer
% BITS_PER_UNIT
)), bitsize
);
3994 clear_bit_region (p
, pos_in_buffer
% BITS_PER_UNIT
, bitsize
);
3996 for (unsigned int i
= 0; i
< buf_size
; ++i
)
3998 mask
= native_interpret_expr (int_type
, buf
, buf_size
);
3999 return BIT_XOR_EXPR
;
4002 /* Given a merged store group GROUP output the widened version of it.
4003 The store chain is against the base object BASE.
4004 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4005 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4006 Make sure that the number of statements output is less than the number of
4007 original statements. If a better sequence is possible emit it and
4011 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
4013 const unsigned HOST_WIDE_INT start_byte_pos
4014 = group
->bitregion_start
/ BITS_PER_UNIT
;
4015 unsigned int orig_num_stmts
= group
->stores
.length ();
4016 if (orig_num_stmts
< 2)
4019 bool allow_unaligned_store
4020 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
4021 bool allow_unaligned_load
= allow_unaligned_store
;
4022 bool bzero_first
= false;
4023 store_immediate_info
*store
;
4024 unsigned int num_clobber_stmts
= 0;
4025 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4028 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4029 if (gimple_clobber_p (store
->stmt
))
4030 num_clobber_stmts
++;
4031 else if (TREE_CODE (gimple_assign_rhs1 (store
->stmt
)) == CONSTRUCTOR
4032 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store
->stmt
)) == 0
4033 && group
->start
== store
->bitpos
4034 && group
->width
== store
->bitsize
4035 && (group
->start
% BITS_PER_UNIT
) == 0
4036 && (group
->width
% BITS_PER_UNIT
) == 0)
4043 FOR_EACH_VEC_ELT_FROM (group
->stores
, i
, store
, i
)
4044 if (gimple_clobber_p (store
->stmt
))
4045 num_clobber_stmts
++;
4046 if (num_clobber_stmts
== orig_num_stmts
)
4048 orig_num_stmts
-= num_clobber_stmts
;
4050 if (allow_unaligned_store
|| bzero_first
)
4052 /* If unaligned stores are allowed, see how many stores we'd emit
4053 for unaligned and how many stores we'd emit for aligned stores.
4054 Only use unaligned stores if it allows fewer stores than aligned.
4055 Similarly, if there is a whole region clear first, prefer expanding
4056 it together compared to expanding clear first followed by merged
4058 unsigned cnt
[4] = { ~0U, ~0U, ~0U, ~0U };
4060 for (int pass
= 0; pass
< 4; ++pass
)
4062 if (!allow_unaligned_store
&& (pass
& 1) != 0)
4064 if (!bzero_first
&& (pass
& 2) != 0)
4066 cnt
[pass
] = split_group (group
, (pass
& 1) != 0,
4067 allow_unaligned_load
, (pass
& 2) != 0,
4069 if (cnt
[pass
] < cnt
[pass_min
])
4072 if ((pass_min
& 1) == 0)
4073 allow_unaligned_store
= false;
4074 if ((pass_min
& 2) == 0)
4075 bzero_first
= false;
4078 auto_vec
<class split_store
*, 32> split_stores
;
4079 split_store
*split_store
;
4080 unsigned total_orig
, total_new
, i
;
4081 split_group (group
, allow_unaligned_store
, allow_unaligned_load
, bzero_first
,
4082 &split_stores
, &total_orig
, &total_new
);
4084 /* Determine if there is a clobber covering the whole group at the start,
4085 followed by proposed split stores that cover the whole group. In that
4086 case, prefer the transformation even if
4087 split_stores.length () == orig_num_stmts. */
4088 bool clobber_first
= false;
4089 if (num_clobber_stmts
4090 && gimple_clobber_p (group
->stores
[0]->stmt
)
4091 && group
->start
== group
->stores
[0]->bitpos
4092 && group
->width
== group
->stores
[0]->bitsize
4093 && (group
->start
% BITS_PER_UNIT
) == 0
4094 && (group
->width
% BITS_PER_UNIT
) == 0)
4096 clobber_first
= true;
4097 unsigned HOST_WIDE_INT pos
= group
->start
/ BITS_PER_UNIT
;
4098 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4099 if (split_store
->bytepos
!= pos
)
4101 clobber_first
= false;
4105 pos
+= split_store
->size
/ BITS_PER_UNIT
;
4106 if (pos
!= (group
->start
+ group
->width
) / BITS_PER_UNIT
)
4107 clobber_first
= false;
4110 if (split_stores
.length () >= orig_num_stmts
+ clobber_first
)
4113 /* We didn't manage to reduce the number of statements. Bail out. */
4114 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4115 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4116 " Not profitable to emit new sequence.\n",
4118 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4122 if (total_orig
<= total_new
)
4124 /* If number of estimated new statements is above estimated original
4125 statements, bail out too. */
4126 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4127 fprintf (dump_file
, "Estimated number of original stmts (%u)"
4128 " not larger than estimated number of new"
4130 total_orig
, total_new
);
4131 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4135 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4137 bool all_orig
= true;
4138 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4139 if (!split_store
->orig
)
4146 unsigned int cnt
= split_stores
.length ();
4147 store_immediate_info
*store
;
4148 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4149 if (gimple_clobber_p (store
->stmt
))
4151 /* Punt if we wouldn't make any real changes, i.e. keep all
4152 orig stmts + all clobbers. */
4153 if (cnt
== group
->stores
.length ())
4155 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4156 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4157 " Not profitable to emit new sequence.\n",
4159 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4166 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
4167 gimple_seq seq
= NULL
;
4168 tree last_vdef
, new_vuse
;
4169 last_vdef
= gimple_vdef (group
->last_stmt
);
4170 new_vuse
= gimple_vuse (group
->last_stmt
);
4171 tree bswap_res
= NULL_TREE
;
4173 /* Clobbers are not removed. */
4174 if (gimple_clobber_p (group
->last_stmt
))
4176 new_vuse
= make_ssa_name (gimple_vop (cfun
), group
->last_stmt
);
4177 gimple_set_vdef (group
->last_stmt
, new_vuse
);
4180 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
4181 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
4183 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
4184 gimple
*ins_stmt
= group
->stores
[0]->ins_stmt
;
4185 struct symbolic_number
*n
= &group
->stores
[0]->n
;
4186 bool bswap
= group
->stores
[0]->rhs_code
== LROTATE_EXPR
;
4191 load_type
= bswap_type
= uint16_type_node
;
4194 load_type
= uint32_type_node
;
4197 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
4198 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4202 load_type
= uint64_type_node
;
4205 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
4206 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4213 /* If the loads have each vuse of the corresponding store,
4214 we've checked the aliasing already in try_coalesce_bswap and
4215 we want to sink the need load into seq. So need to use new_vuse
4219 if (n
->vuse
== NULL
)
4225 /* Update vuse in case it has changed by output_merged_stores. */
4226 n
->vuse
= gimple_vuse (ins_stmt
);
4228 bswap_res
= bswap_replace (gsi_start (seq
), ins_stmt
, fndecl
,
4229 bswap_type
, load_type
, n
, bswap
,
4231 gcc_assert (bswap_res
);
4234 gimple
*stmt
= NULL
;
4235 auto_vec
<gimple
*, 32> orig_stmts
;
4236 gimple_seq this_seq
;
4237 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &this_seq
,
4238 is_gimple_mem_ref_addr
, NULL_TREE
);
4239 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4241 tree load_addr
[2] = { NULL_TREE
, NULL_TREE
};
4242 gimple_seq load_seq
[2] = { NULL
, NULL
};
4243 gimple_stmt_iterator load_gsi
[2] = { gsi_none (), gsi_none () };
4244 for (int j
= 0; j
< 2; ++j
)
4246 store_operand_info
&op
= group
->stores
[0]->ops
[j
];
4247 if (op
.base_addr
== NULL_TREE
)
4250 store_immediate_info
*infol
= group
->stores
.last ();
4251 if (gimple_vuse (op
.stmt
) == gimple_vuse (infol
->ops
[j
].stmt
))
4253 /* We can't pick the location randomly; while we've verified
4254 all the loads have the same vuse, they can be still in different
4255 basic blocks and we need to pick the one from the last bb:
4261 otherwise if we put the wider load at the q[0] load, we might
4262 segfault if q[1] is not mapped. */
4263 basic_block bb
= gimple_bb (op
.stmt
);
4264 gimple
*ostmt
= op
.stmt
;
4265 store_immediate_info
*info
;
4266 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
4268 gimple
*tstmt
= info
->ops
[j
].stmt
;
4269 basic_block tbb
= gimple_bb (tstmt
);
4270 if (dominated_by_p (CDI_DOMINATORS
, tbb
, bb
))
4276 load_gsi
[j
] = gsi_for_stmt (ostmt
);
4278 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4279 &load_seq
[j
], is_gimple_mem_ref_addr
,
4282 else if (operand_equal_p (base_addr
, op
.base_addr
, 0))
4283 load_addr
[j
] = addr
;
4287 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4288 &this_seq
, is_gimple_mem_ref_addr
,
4290 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4294 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4296 const unsigned HOST_WIDE_INT try_size
= split_store
->size
;
4297 const unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
4298 const unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
4299 const unsigned HOST_WIDE_INT try_align
= split_store
->align
;
4300 const unsigned HOST_WIDE_INT try_offset
= try_pos
- start_byte_pos
;
4304 if (split_store
->orig
)
4306 /* If there is just a single non-clobber constituent store
4307 which covers the whole area, just reuse the lhs and rhs. */
4308 gimple
*orig_stmt
= NULL
;
4309 store_immediate_info
*store
;
4311 FOR_EACH_VEC_ELT (split_store
->orig_stores
, j
, store
)
4312 if (!gimple_clobber_p (store
->stmt
))
4314 orig_stmt
= store
->stmt
;
4317 dest
= gimple_assign_lhs (orig_stmt
);
4318 src
= gimple_assign_rhs1 (orig_stmt
);
4319 loc
= gimple_location (orig_stmt
);
4323 store_immediate_info
*info
;
4324 unsigned short clique
, base
;
4326 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4327 orig_stmts
.safe_push (info
->stmt
);
4329 = get_alias_type_for_stmts (orig_stmts
, false, &clique
, &base
);
4331 loc
= get_location_for_stmts (orig_stmts
);
4332 orig_stmts
.truncate (0);
4334 if (group
->string_concatenation
)
4336 = build_array_type_nelts (char_type_node
,
4337 try_size
/ BITS_PER_UNIT
);
4340 dest_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
4341 dest_type
= build_aligned_type (dest_type
, try_align
);
4343 dest
= fold_build2 (MEM_REF
, dest_type
, addr
,
4344 build_int_cst (offset_type
, try_pos
));
4345 if (TREE_CODE (dest
) == MEM_REF
)
4347 MR_DEPENDENCE_CLIQUE (dest
) = clique
;
4348 MR_DEPENDENCE_BASE (dest
) = base
;
4352 if (bswap_res
|| group
->string_concatenation
)
4353 mask
= integer_zero_node
;
4355 mask
= native_interpret_expr (dest_type
,
4356 group
->mask
+ try_offset
,
4361 j
< 1 + (split_store
->orig_stores
[0]->ops
[1].val
!= NULL_TREE
);
4364 store_operand_info
&op
= split_store
->orig_stores
[0]->ops
[j
];
4367 else if (group
->string_concatenation
)
4369 ops
[j
] = build_string (try_size
/ BITS_PER_UNIT
,
4370 (const char *) group
->val
+ try_offset
);
4371 TREE_TYPE (ops
[j
]) = dest_type
;
4373 else if (op
.base_addr
)
4375 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4376 orig_stmts
.safe_push (info
->ops
[j
].stmt
);
4378 offset_type
= get_alias_type_for_stmts (orig_stmts
, true,
4380 location_t load_loc
= get_location_for_stmts (orig_stmts
);
4381 orig_stmts
.truncate (0);
4383 unsigned HOST_WIDE_INT load_align
= group
->load_align
[j
];
4384 unsigned HOST_WIDE_INT align_bitpos
4385 = known_alignment (try_bitpos
4386 - split_store
->orig_stores
[0]->bitpos
4388 if (align_bitpos
& (load_align
- 1))
4389 load_align
= least_bit_hwi (align_bitpos
);
4392 = build_nonstandard_integer_type (try_size
, UNSIGNED
);
4394 = build_aligned_type (load_int_type
, load_align
);
4396 poly_uint64 load_pos
4397 = exact_div (try_bitpos
4398 - split_store
->orig_stores
[0]->bitpos
4401 ops
[j
] = fold_build2 (MEM_REF
, load_int_type
, load_addr
[j
],
4402 build_int_cst (offset_type
, load_pos
));
4403 if (TREE_CODE (ops
[j
]) == MEM_REF
)
4405 MR_DEPENDENCE_CLIQUE (ops
[j
]) = clique
;
4406 MR_DEPENDENCE_BASE (ops
[j
]) = base
;
4408 if (!integer_zerop (mask
))
4410 /* The load might load some bits (that will be masked
4411 off later on) uninitialized, avoid -W*uninitialized
4412 warnings in that case. */
4413 suppress_warning (ops
[j
], OPT_Wuninitialized
);
4416 stmt
= gimple_build_assign (make_ssa_name (dest_type
), ops
[j
]);
4417 gimple_set_location (stmt
, load_loc
);
4418 if (gsi_bb (load_gsi
[j
]))
4420 gimple_set_vuse (stmt
, gimple_vuse (op
.stmt
));
4421 gimple_seq_add_stmt_without_update (&load_seq
[j
], stmt
);
4425 gimple_set_vuse (stmt
, new_vuse
);
4426 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4428 ops
[j
] = gimple_assign_lhs (stmt
);
4430 enum tree_code inv_op
4431 = invert_op (split_store
, j
, dest_type
, xor_mask
);
4432 if (inv_op
!= NOP_EXPR
)
4434 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4435 inv_op
, ops
[j
], xor_mask
);
4436 gimple_set_location (stmt
, load_loc
);
4437 ops
[j
] = gimple_assign_lhs (stmt
);
4439 if (gsi_bb (load_gsi
[j
]))
4440 gimple_seq_add_stmt_without_update (&load_seq
[j
],
4443 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4447 ops
[j
] = native_interpret_expr (dest_type
,
4448 group
->val
+ try_offset
,
4452 switch (split_store
->orig_stores
[0]->rhs_code
)
4457 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4459 tree rhs1
= gimple_assign_rhs1 (info
->stmt
);
4460 orig_stmts
.safe_push (SSA_NAME_DEF_STMT (rhs1
));
4463 bit_loc
= get_location_for_stmts (orig_stmts
);
4464 orig_stmts
.truncate (0);
4467 = gimple_build_assign (make_ssa_name (dest_type
),
4468 split_store
->orig_stores
[0]->rhs_code
,
4470 gimple_set_location (stmt
, bit_loc
);
4471 /* If there is just one load and there is a separate
4472 load_seq[0], emit the bitwise op right after it. */
4473 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4474 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4475 /* Otherwise, if at least one load is in seq, we need to
4476 emit the bitwise op right before the store. If there
4477 are two loads and are emitted somewhere else, it would
4478 be better to emit the bitwise op as early as possible;
4479 we don't track where that would be possible right now
4482 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4483 src
= gimple_assign_lhs (stmt
);
4485 enum tree_code inv_op
;
4486 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4487 if (inv_op
!= NOP_EXPR
)
4489 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4490 inv_op
, src
, xor_mask
);
4491 gimple_set_location (stmt
, bit_loc
);
4492 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4493 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4495 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4496 src
= gimple_assign_lhs (stmt
);
4502 if (!is_gimple_val (src
))
4504 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (src
)),
4506 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4507 src
= gimple_assign_lhs (stmt
);
4509 if (!useless_type_conversion_p (dest_type
, TREE_TYPE (src
)))
4511 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4513 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4514 src
= gimple_assign_lhs (stmt
);
4516 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4517 if (inv_op
!= NOP_EXPR
)
4519 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4520 inv_op
, src
, xor_mask
);
4521 gimple_set_location (stmt
, loc
);
4522 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4523 src
= gimple_assign_lhs (stmt
);
4531 /* If bit insertion is required, we use the source as an accumulator
4532 into which the successive bit-field values are manually inserted.
4533 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4534 if (group
->bit_insertion
)
4535 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4536 if (info
->rhs_code
== BIT_INSERT_EXPR
4537 && info
->bitpos
< try_bitpos
+ try_size
4538 && info
->bitpos
+ info
->bitsize
> try_bitpos
)
4540 /* Mask, truncate, convert to final type, shift and ior into
4541 the accumulator. Note that every step can be a no-op. */
4542 const HOST_WIDE_INT start_gap
= info
->bitpos
- try_bitpos
;
4543 const HOST_WIDE_INT end_gap
4544 = (try_bitpos
+ try_size
) - (info
->bitpos
+ info
->bitsize
);
4545 tree tem
= info
->ops
[0].val
;
4546 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem
)))
4548 const unsigned HOST_WIDE_INT size
4549 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem
)));
4551 = build_nonstandard_integer_type (size
, UNSIGNED
);
4552 tem
= gimple_build (&seq
, loc
, VIEW_CONVERT_EXPR
,
4555 if (TYPE_PRECISION (TREE_TYPE (tem
)) <= info
->bitsize
)
4558 = build_nonstandard_integer_type (info
->bitsize
,
4560 tem
= gimple_convert (&seq
, loc
, bitfield_type
, tem
);
4562 else if ((BYTES_BIG_ENDIAN
? start_gap
: end_gap
) > 0)
4564 const unsigned HOST_WIDE_INT imask
4565 = (HOST_WIDE_INT_1U
<< info
->bitsize
) - 1;
4566 tem
= gimple_build (&seq
, loc
,
4567 BIT_AND_EXPR
, TREE_TYPE (tem
), tem
,
4568 build_int_cst (TREE_TYPE (tem
),
4571 const HOST_WIDE_INT shift
4572 = (BYTES_BIG_ENDIAN
? end_gap
: start_gap
);
4574 tem
= gimple_build (&seq
, loc
,
4575 RSHIFT_EXPR
, TREE_TYPE (tem
), tem
,
4576 build_int_cst (NULL_TREE
, -shift
));
4577 tem
= gimple_convert (&seq
, loc
, dest_type
, tem
);
4579 tem
= gimple_build (&seq
, loc
,
4580 LSHIFT_EXPR
, dest_type
, tem
,
4581 build_int_cst (NULL_TREE
, shift
));
4582 src
= gimple_build (&seq
, loc
,
4583 BIT_IOR_EXPR
, dest_type
, tem
, src
);
4586 if (!integer_zerop (mask
))
4588 tree tem
= make_ssa_name (dest_type
);
4589 tree load_src
= unshare_expr (dest
);
4590 /* The load might load some or all bits uninitialized,
4591 avoid -W*uninitialized warnings in that case.
4592 As optimization, it would be nice if all the bits are
4593 provably uninitialized (no stores at all yet or previous
4594 store a CLOBBER) we'd optimize away the load and replace
4596 suppress_warning (load_src
, OPT_Wuninitialized
);
4597 stmt
= gimple_build_assign (tem
, load_src
);
4598 gimple_set_location (stmt
, loc
);
4599 gimple_set_vuse (stmt
, new_vuse
);
4600 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4602 /* FIXME: If there is a single chunk of zero bits in mask,
4603 perhaps use BIT_INSERT_EXPR instead? */
4604 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4605 BIT_AND_EXPR
, tem
, mask
);
4606 gimple_set_location (stmt
, loc
);
4607 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4608 tem
= gimple_assign_lhs (stmt
);
4610 if (TREE_CODE (src
) == INTEGER_CST
)
4611 src
= wide_int_to_tree (dest_type
,
4612 wi::bit_and_not (wi::to_wide (src
),
4613 wi::to_wide (mask
)));
4617 = wide_int_to_tree (dest_type
,
4618 wi::bit_not (wi::to_wide (mask
)));
4619 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4620 BIT_AND_EXPR
, src
, nmask
);
4621 gimple_set_location (stmt
, loc
);
4622 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4623 src
= gimple_assign_lhs (stmt
);
4625 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4626 BIT_IOR_EXPR
, tem
, src
);
4627 gimple_set_location (stmt
, loc
);
4628 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4629 src
= gimple_assign_lhs (stmt
);
4633 stmt
= gimple_build_assign (dest
, src
);
4634 gimple_set_location (stmt
, loc
);
4635 gimple_set_vuse (stmt
, new_vuse
);
4636 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4638 if (group
->lp_nr
&& stmt_could_throw_p (cfun
, stmt
))
4639 add_stmt_to_eh_lp (stmt
, group
->lp_nr
);
4642 if (i
< split_stores
.length () - 1)
4643 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
4645 new_vdef
= last_vdef
;
4647 gimple_set_vdef (stmt
, new_vdef
);
4648 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
4649 new_vuse
= new_vdef
;
4652 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4659 "New sequence of %u stores to replace old one of %u stores\n",
4660 split_stores
.length (), orig_num_stmts
);
4661 if (dump_flags
& TDF_DETAILS
)
4662 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
4665 if (gimple_clobber_p (group
->last_stmt
))
4666 update_stmt (group
->last_stmt
);
4668 if (group
->lp_nr
> 0)
4670 /* We're going to insert a sequence of (potentially) throwing stores
4671 into an active EH region. This means that we're going to create
4672 new basic blocks with EH edges pointing to the post landing pad
4673 and, therefore, to have to update its PHI nodes, if any. For the
4674 virtual PHI node, we're going to use the VDEFs created above, but
4675 for the other nodes, we need to record the original reaching defs. */
4676 eh_landing_pad lp
= get_eh_landing_pad_from_number (group
->lp_nr
);
4677 basic_block lp_bb
= label_to_block (cfun
, lp
->post_landing_pad
);
4678 basic_block last_bb
= gimple_bb (group
->last_stmt
);
4679 edge last_edge
= find_edge (last_bb
, lp_bb
);
4680 auto_vec
<tree
, 16> last_defs
;
4682 for (gpi
= gsi_start_phis (lp_bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
4684 gphi
*phi
= gpi
.phi ();
4686 if (virtual_operand_p (gimple_phi_result (phi
)))
4687 last_def
= NULL_TREE
;
4689 last_def
= gimple_phi_arg_def (phi
, last_edge
->dest_idx
);
4690 last_defs
.safe_push (last_def
);
4693 /* Do the insertion. Then, if new basic blocks have been created in the
4694 process, rewind the chain of VDEFs create above to walk the new basic
4695 blocks and update the corresponding arguments of the PHI nodes. */
4696 update_modified_stmts (seq
);
4697 if (gimple_find_sub_bbs (seq
, &last_gsi
))
4698 while (last_vdef
!= gimple_vuse (group
->last_stmt
))
4700 gimple
*stmt
= SSA_NAME_DEF_STMT (last_vdef
);
4701 if (stmt_could_throw_p (cfun
, stmt
))
4703 edge new_edge
= find_edge (gimple_bb (stmt
), lp_bb
);
4705 for (gpi
= gsi_start_phis (lp_bb
), i
= 0;
4707 gsi_next (&gpi
), i
++)
4709 gphi
*phi
= gpi
.phi ();
4711 if (virtual_operand_p (gimple_phi_result (phi
)))
4712 new_def
= last_vdef
;
4714 new_def
= last_defs
[i
];
4715 add_phi_arg (phi
, new_def
, new_edge
, UNKNOWN_LOCATION
);
4718 last_vdef
= gimple_vuse (stmt
);
4722 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
4724 for (int j
= 0; j
< 2; ++j
)
4726 gsi_insert_seq_after (&load_gsi
[j
], load_seq
[j
], GSI_SAME_STMT
);
4731 /* Process the merged_store_group objects created in the coalescing phase.
4732 The stores are all against the base object BASE.
4733 Try to output the widened stores and delete the original statements if
4734 successful. Return true iff any changes were made. */
4737 imm_store_chain_info::output_merged_stores ()
4740 merged_store_group
*merged_store
;
4742 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
4744 if (dbg_cnt (store_merging
)
4745 && output_merged_store (merged_store
))
4748 store_immediate_info
*store
;
4749 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
4751 gimple
*stmt
= store
->stmt
;
4752 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4753 /* Don't remove clobbers, they are still useful even if
4754 everything is overwritten afterwards. */
4755 if (gimple_clobber_p (stmt
))
4757 gsi_remove (&gsi
, true);
4759 remove_stmt_from_eh_lp (stmt
);
4760 if (stmt
!= merged_store
->last_stmt
)
4762 unlink_stmt_vdef (stmt
);
4763 release_defs (stmt
);
4769 if (ret
&& dump_file
)
4770 fprintf (dump_file
, "Merging successful!\n");
4775 /* Coalesce the store_immediate_info objects recorded against the base object
4776 BASE in the first phase and output them.
4777 Delete the allocated structures.
4778 Return true if any changes were made. */
4781 imm_store_chain_info::terminate_and_process_chain ()
4783 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4784 fprintf (dump_file
, "Terminating chain with %u stores\n",
4785 m_store_info
.length ());
4786 /* Process store chain. */
4788 if (m_store_info
.length () > 1)
4790 ret
= coalesce_immediate_stores ();
4792 ret
= output_merged_stores ();
4795 /* Delete all the entries we allocated ourselves. */
4796 store_immediate_info
*info
;
4798 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
4801 merged_store_group
*merged_info
;
4802 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
4808 /* Return true iff LHS is a destination potentially interesting for
4809 store merging. In practice these are the codes that get_inner_reference
4813 lhs_valid_for_store_merging_p (tree lhs
)
4818 switch (TREE_CODE (lhs
))
4821 case ARRAY_RANGE_REF
:
4825 case VIEW_CONVERT_EXPR
:
4834 /* Return true if the tree RHS is a constant we want to consider
4835 during store merging. In practice accept all codes that
4836 native_encode_expr accepts. */
4839 rhs_valid_for_store_merging_p (tree rhs
)
4841 unsigned HOST_WIDE_INT size
;
4842 if (TREE_CODE (rhs
) == CONSTRUCTOR
4843 && CONSTRUCTOR_NELTS (rhs
) == 0
4844 && TYPE_SIZE_UNIT (TREE_TYPE (rhs
))
4845 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs
))))
4847 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
))).is_constant (&size
)
4848 && native_encode_expr (rhs
, NULL
, size
) != 0);
4851 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4852 and return true on success or false on failure. */
4855 adjust_bit_pos (poly_offset_int byte_off
,
4856 poly_int64
*pbitpos
,
4857 poly_uint64
*pbitregion_start
,
4858 poly_uint64
*pbitregion_end
)
4860 poly_offset_int bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4861 bit_off
+= *pbitpos
;
4863 if (known_ge (bit_off
, 0) && bit_off
.to_shwi (pbitpos
))
4865 if (maybe_ne (*pbitregion_end
, 0U))
4867 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4868 bit_off
+= *pbitregion_start
;
4869 if (bit_off
.to_uhwi (pbitregion_start
))
4871 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4872 bit_off
+= *pbitregion_end
;
4873 if (!bit_off
.to_uhwi (pbitregion_end
))
4874 *pbitregion_end
= 0;
4877 *pbitregion_end
= 0;
4885 /* If MEM is a memory reference usable for store merging (either as
4886 store destination or for loads), return the non-NULL base_addr
4887 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4888 Otherwise return NULL, *PBITPOS should be still valid even for that
4892 mem_valid_for_store_merging (tree mem
, poly_uint64
*pbitsize
,
4893 poly_uint64
*pbitpos
,
4894 poly_uint64
*pbitregion_start
,
4895 poly_uint64
*pbitregion_end
)
4897 poly_int64 bitsize
, bitpos
;
4898 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
4900 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
4902 tree base_addr
= get_inner_reference (mem
, &bitsize
, &bitpos
, &offset
, &mode
,
4903 &unsignedp
, &reversep
, &volatilep
);
4904 *pbitsize
= bitsize
;
4905 if (known_eq (bitsize
, 0))
4908 if (TREE_CODE (mem
) == COMPONENT_REF
4909 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem
, 1)))
4911 get_bit_range (&bitregion_start
, &bitregion_end
, mem
, &bitpos
, &offset
);
4912 if (maybe_ne (bitregion_end
, 0U))
4919 /* We do not want to rewrite TARGET_MEM_REFs. */
4920 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
4922 /* In some cases get_inner_reference may return a
4923 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4924 canonicalize the base_addr to MEM_REF [ptr] and take
4925 byteoffset into account in the bitpos. This occurs in
4926 PR 23684 and this way we can catch more chains. */
4927 else if (TREE_CODE (base_addr
) == MEM_REF
)
4929 if (!adjust_bit_pos (mem_ref_offset (base_addr
), &bitpos
,
4930 &bitregion_start
, &bitregion_end
))
4932 base_addr
= TREE_OPERAND (base_addr
, 0);
4934 /* get_inner_reference returns the base object, get at its
4938 if (maybe_lt (bitpos
, 0))
4940 base_addr
= build_fold_addr_expr (base_addr
);
4945 /* If the access is variable offset then a base decl has to be
4946 address-taken to be able to emit pointer-based stores to it.
4947 ??? We might be able to get away with re-using the original
4948 base up to the first variable part and then wrapping that inside
4950 tree base
= get_base_address (base_addr
);
4951 if (!base
|| (DECL_P (base
) && !TREE_ADDRESSABLE (base
)))
4954 /* Similarly to above for the base, remove constant from the offset. */
4955 if (TREE_CODE (offset
) == PLUS_EXPR
4956 && TREE_CODE (TREE_OPERAND (offset
, 1)) == INTEGER_CST
4957 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset
, 1)),
4958 &bitpos
, &bitregion_start
, &bitregion_end
))
4959 offset
= TREE_OPERAND (offset
, 0);
4961 base_addr
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base_addr
),
4965 if (known_eq (bitregion_end
, 0U))
4967 bitregion_start
= round_down_to_byte_boundary (bitpos
);
4968 bitregion_end
= round_up_to_byte_boundary (bitpos
+ bitsize
);
4971 *pbitsize
= bitsize
;
4973 *pbitregion_start
= bitregion_start
;
4974 *pbitregion_end
= bitregion_end
;
4978 /* Return true if STMT is a load that can be used for store merging.
4979 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4980 BITREGION_END are properties of the corresponding store. */
4983 handled_load (gimple
*stmt
, store_operand_info
*op
,
4984 poly_uint64 bitsize
, poly_uint64 bitpos
,
4985 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
)
4987 if (!is_gimple_assign (stmt
))
4989 if (gimple_assign_rhs_code (stmt
) == BIT_NOT_EXPR
)
4991 tree rhs1
= gimple_assign_rhs1 (stmt
);
4992 if (TREE_CODE (rhs1
) == SSA_NAME
4993 && handled_load (SSA_NAME_DEF_STMT (rhs1
), op
, bitsize
, bitpos
,
4994 bitregion_start
, bitregion_end
))
4996 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4997 been optimized earlier, but if allowed here, would confuse the
4998 multiple uses counting. */
5001 op
->bit_not_p
= !op
->bit_not_p
;
5006 if (gimple_vuse (stmt
)
5007 && gimple_assign_load_p (stmt
)
5008 && !stmt_can_throw_internal (cfun
, stmt
)
5009 && !gimple_has_volatile_ops (stmt
))
5011 tree mem
= gimple_assign_rhs1 (stmt
);
5013 = mem_valid_for_store_merging (mem
, &op
->bitsize
, &op
->bitpos
,
5014 &op
->bitregion_start
,
5015 &op
->bitregion_end
);
5016 if (op
->base_addr
!= NULL_TREE
5017 && known_eq (op
->bitsize
, bitsize
)
5018 && multiple_p (op
->bitpos
- bitpos
, BITS_PER_UNIT
)
5019 && known_ge (op
->bitpos
- op
->bitregion_start
,
5020 bitpos
- bitregion_start
)
5021 && known_ge (op
->bitregion_end
- op
->bitpos
,
5022 bitregion_end
- bitpos
))
5026 op
->bit_not_p
= false;
5033 /* Return the index number of the landing pad for STMT, if any. */
5036 lp_nr_for_store (gimple
*stmt
)
5038 if (!cfun
->can_throw_non_call_exceptions
|| !cfun
->eh
)
5041 if (!stmt_could_throw_p (cfun
, stmt
))
5044 return lookup_stmt_eh_lp (stmt
);
5047 /* Record the store STMT for store merging optimization if it can be
5048 optimized. Return true if any changes were made. */
5051 pass_store_merging::process_store (gimple
*stmt
)
5053 tree lhs
= gimple_assign_lhs (stmt
);
5054 tree rhs
= gimple_assign_rhs1 (stmt
);
5055 poly_uint64 bitsize
, bitpos
= 0;
5056 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
5058 = mem_valid_for_store_merging (lhs
, &bitsize
, &bitpos
,
5059 &bitregion_start
, &bitregion_end
);
5060 if (known_eq (bitsize
, 0U))
5063 bool invalid
= (base_addr
== NULL_TREE
5064 || (maybe_gt (bitsize
,
5065 (unsigned int) MAX_BITSIZE_MODE_ANY_INT
)
5066 && TREE_CODE (rhs
) != INTEGER_CST
5067 && (TREE_CODE (rhs
) != CONSTRUCTOR
5068 || CONSTRUCTOR_NELTS (rhs
) != 0)));
5069 enum tree_code rhs_code
= ERROR_MARK
;
5070 bool bit_not_p
= false;
5071 struct symbolic_number n
;
5072 gimple
*ins_stmt
= NULL
;
5073 store_operand_info ops
[2];
5076 else if (TREE_CODE (rhs
) == STRING_CST
)
5078 rhs_code
= STRING_CST
;
5081 else if (rhs_valid_for_store_merging_p (rhs
))
5083 rhs_code
= INTEGER_CST
;
5086 else if (TREE_CODE (rhs
) == SSA_NAME
)
5088 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
), *def_stmt1
, *def_stmt2
;
5089 if (!is_gimple_assign (def_stmt
))
5091 else if (handled_load (def_stmt
, &ops
[0], bitsize
, bitpos
,
5092 bitregion_start
, bitregion_end
))
5094 else if (gimple_assign_rhs_code (def_stmt
) == BIT_NOT_EXPR
)
5096 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5097 if (TREE_CODE (rhs1
) == SSA_NAME
5098 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1
)))
5101 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5105 if (rhs_code
== ERROR_MARK
&& !invalid
)
5106 switch ((rhs_code
= gimple_assign_rhs_code (def_stmt
)))
5112 rhs1
= gimple_assign_rhs1 (def_stmt
);
5113 rhs2
= gimple_assign_rhs2 (def_stmt
);
5115 if (TREE_CODE (rhs1
) != SSA_NAME
)
5117 def_stmt1
= SSA_NAME_DEF_STMT (rhs1
);
5118 if (!is_gimple_assign (def_stmt1
)
5119 || !handled_load (def_stmt1
, &ops
[0], bitsize
, bitpos
,
5120 bitregion_start
, bitregion_end
))
5122 if (rhs_valid_for_store_merging_p (rhs2
))
5124 else if (TREE_CODE (rhs2
) != SSA_NAME
)
5128 def_stmt2
= SSA_NAME_DEF_STMT (rhs2
);
5129 if (!is_gimple_assign (def_stmt2
))
5131 else if (!handled_load (def_stmt2
, &ops
[1], bitsize
, bitpos
,
5132 bitregion_start
, bitregion_end
))
5142 unsigned HOST_WIDE_INT const_bitsize
;
5143 if (bitsize
.is_constant (&const_bitsize
)
5144 && (const_bitsize
% BITS_PER_UNIT
) == 0
5145 && const_bitsize
<= 64
5146 && multiple_p (bitpos
, BITS_PER_UNIT
))
5148 ins_stmt
= find_bswap_or_nop_1 (def_stmt
, &n
, 12);
5152 for (unsigned HOST_WIDE_INT i
= 0;
5154 i
+= BITS_PER_UNIT
, nn
>>= BITS_PER_MARKER
)
5155 if ((nn
& MARKER_MASK
) == 0
5156 || (nn
& MARKER_MASK
) == MARKER_BYTE_UNKNOWN
)
5165 rhs_code
= LROTATE_EXPR
;
5166 ops
[0].base_addr
= NULL_TREE
;
5167 ops
[1].base_addr
= NULL_TREE
;
5175 && bitsize
.is_constant (&const_bitsize
)
5176 && ((const_bitsize
% BITS_PER_UNIT
) != 0
5177 || !multiple_p (bitpos
, BITS_PER_UNIT
))
5178 && const_bitsize
<= MAX_FIXED_MODE_SIZE
)
5180 /* Bypass a conversion to the bit-field type. */
5182 && is_gimple_assign (def_stmt
)
5183 && CONVERT_EXPR_CODE_P (rhs_code
))
5185 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5186 if (TREE_CODE (rhs1
) == SSA_NAME
5187 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
)))
5190 rhs_code
= BIT_INSERT_EXPR
;
5193 ops
[0].base_addr
= NULL_TREE
;
5194 ops
[1].base_addr
= NULL_TREE
;
5201 unsigned HOST_WIDE_INT const_bitsize
, const_bitpos
;
5202 unsigned HOST_WIDE_INT const_bitregion_start
, const_bitregion_end
;
5204 || !bitsize
.is_constant (&const_bitsize
)
5205 || !bitpos
.is_constant (&const_bitpos
)
5206 || !bitregion_start
.is_constant (&const_bitregion_start
)
5207 || !bitregion_end
.is_constant (&const_bitregion_end
))
5208 return terminate_all_aliasing_chains (NULL
, stmt
);
5211 memset (&n
, 0, sizeof (n
));
5213 class imm_store_chain_info
**chain_info
= NULL
;
5216 chain_info
= m_stores
.get (base_addr
);
5218 store_immediate_info
*info
;
5221 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
5222 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5223 const_bitregion_start
,
5224 const_bitregion_end
,
5225 stmt
, ord
, rhs_code
, n
, ins_stmt
,
5226 bit_not_p
, lp_nr_for_store (stmt
),
5228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5230 fprintf (dump_file
, "Recording immediate store from stmt:\n");
5231 print_gimple_stmt (dump_file
, stmt
, 0);
5233 (*chain_info
)->m_store_info
.safe_push (info
);
5235 ret
|= terminate_all_aliasing_chains (chain_info
, stmt
);
5236 /* If we reach the limit of stores to merge in a chain terminate and
5237 process the chain now. */
5238 if ((*chain_info
)->m_store_info
.length ()
5239 == (unsigned int) param_max_stores_to_merge
)
5241 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5243 "Reached maximum number of statements to merge:\n");
5244 ret
|= terminate_and_process_chain (*chain_info
);
5249 /* Store aliases any existing chain? */
5250 ret
|= terminate_all_aliasing_chains (NULL
, stmt
);
5252 /* Start a new chain. */
5253 class imm_store_chain_info
*new_chain
5254 = new imm_store_chain_info (m_stores_head
, base_addr
);
5255 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5256 const_bitregion_start
,
5257 const_bitregion_end
,
5258 stmt
, 0, rhs_code
, n
, ins_stmt
,
5259 bit_not_p
, lp_nr_for_store (stmt
),
5261 new_chain
->m_store_info
.safe_push (info
);
5263 m_stores
.put (base_addr
, new_chain
);
5265 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5267 fprintf (dump_file
, "Starting active chain number %u with statement:\n",
5269 print_gimple_stmt (dump_file
, stmt
, 0);
5270 fprintf (dump_file
, "The base object is:\n");
5271 print_generic_expr (dump_file
, base_addr
);
5272 fprintf (dump_file
, "\n");
5276 /* Prune oldest chains so that after adding the chain or store above
5277 we're again within the limits set by the params. */
5278 if (m_n_chains
> (unsigned)param_max_store_chains_to_track
5279 || m_n_stores
> (unsigned)param_max_stores_to_track
)
5281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5282 fprintf (dump_file
, "Too many chains (%u > %d) or stores (%u > %d), "
5283 "terminating oldest chain(s).\n", m_n_chains
,
5284 param_max_store_chains_to_track
, m_n_stores
,
5285 param_max_stores_to_track
);
5286 imm_store_chain_info
**e
= &m_stores_head
;
5288 unsigned n_stores
= 0;
5291 if (idx
>= (unsigned)param_max_store_chains_to_track
5292 || (n_stores
+ (*e
)->m_store_info
.length ()
5293 > (unsigned)param_max_stores_to_track
))
5294 ret
|= terminate_and_process_chain (*e
);
5297 n_stores
+= (*e
)->m_store_info
.length ();
5307 /* Return true if STMT is a store valid for store merging. */
5310 store_valid_for_store_merging_p (gimple
*stmt
)
5312 return gimple_assign_single_p (stmt
)
5313 && gimple_vdef (stmt
)
5314 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
))
5315 && (!gimple_has_volatile_ops (stmt
) || gimple_clobber_p (stmt
));
5318 enum basic_block_status
{ BB_INVALID
, BB_VALID
, BB_EXTENDED_VALID
};
5320 /* Return the status of basic block BB wrt store merging. */
5322 static enum basic_block_status
5323 get_status_for_store_merging (basic_block bb
)
5325 unsigned int num_statements
= 0;
5326 unsigned int num_constructors
= 0;
5327 gimple_stmt_iterator gsi
;
5330 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5332 gimple
*stmt
= gsi_stmt (gsi
);
5334 if (is_gimple_debug (stmt
))
5337 if (store_valid_for_store_merging_p (stmt
) && ++num_statements
>= 2)
5340 if (is_gimple_assign (stmt
)
5341 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
)
5343 tree rhs
= gimple_assign_rhs1 (stmt
);
5344 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
5345 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
5346 && gimple_assign_lhs (stmt
) != NULL_TREE
)
5349 = int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
5350 if (sz
== 16 || sz
== 32 || sz
== 64)
5352 num_constructors
= 1;
5359 if (num_statements
== 0 && num_constructors
== 0)
5362 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
5363 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb
)))
5364 && (e
= find_fallthru_edge (bb
->succs
))
5365 && e
->dest
== bb
->next_bb
)
5366 return BB_EXTENDED_VALID
;
5368 return (num_statements
>= 2 || num_constructors
) ? BB_VALID
: BB_INVALID
;
5371 /* Entry point for the pass. Go over each basic block recording chains of
5372 immediate stores. Upon encountering a terminating statement (as defined
5373 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5377 pass_store_merging::execute (function
*fun
)
5380 hash_set
<gimple
*> orig_stmts
;
5381 bool changed
= false, open_chains
= false;
5383 /* If the function can throw and catch non-call exceptions, we'll be trying
5384 to merge stores across different basic blocks so we need to first unsplit
5385 the EH edges in order to streamline the CFG of the function. */
5386 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
)
5387 unsplit_eh_edges ();
5389 calculate_dominance_info (CDI_DOMINATORS
);
5391 FOR_EACH_BB_FN (bb
, fun
)
5393 const basic_block_status bb_status
= get_status_for_store_merging (bb
);
5394 gimple_stmt_iterator gsi
;
5396 if (open_chains
&& (bb_status
== BB_INVALID
|| !single_pred_p (bb
)))
5398 changed
|= terminate_and_process_all_chains ();
5399 open_chains
= false;
5402 if (bb_status
== BB_INVALID
)
5405 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5406 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
5408 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); )
5410 gimple
*stmt
= gsi_stmt (gsi
);
5413 if (is_gimple_debug (stmt
))
5416 if (gimple_has_volatile_ops (stmt
) && !gimple_clobber_p (stmt
))
5418 /* Terminate all chains. */
5419 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5420 fprintf (dump_file
, "Volatile access terminates "
5422 changed
|= terminate_and_process_all_chains ();
5423 open_chains
= false;
5427 if (is_gimple_assign (stmt
)
5428 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
5429 && maybe_optimize_vector_constructor (stmt
))
5432 if (store_valid_for_store_merging_p (stmt
))
5433 changed
|= process_store (stmt
);
5435 changed
|= terminate_all_aliasing_chains (NULL
, stmt
);
5438 if (bb_status
== BB_EXTENDED_VALID
)
5442 changed
|= terminate_and_process_all_chains ();
5443 open_chains
= false;
5448 changed
|= terminate_and_process_all_chains ();
5450 /* If the function can throw and catch non-call exceptions and something
5451 changed during the pass, then the CFG has (very likely) changed too. */
5452 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
&& changed
)
5454 free_dominance_info (CDI_DOMINATORS
);
5455 return TODO_cleanup_cfg
;
5463 /* Construct and return a store merging pass object. */
5466 make_pass_store_merging (gcc::context
*ctxt
)
5468 return new pass_store_merging (ctxt
);
5473 namespace selftest
{
5475 /* Selftests for store merging helpers. */
5477 /* Assert that all elements of the byte arrays X and Y, both of length N
5481 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
5483 for (unsigned int i
= 0; i
< n
; i
++)
5487 fprintf (stderr
, "Arrays do not match. X:\n");
5488 dump_char_array (stderr
, x
, n
);
5489 fprintf (stderr
, "Y:\n");
5490 dump_char_array (stderr
, y
, n
);
5492 ASSERT_EQ (x
[i
], y
[i
]);
5496 /* Test shift_bytes_in_array_left and that it carries bits across between
5500 verify_shift_bytes_in_array_left (void)
5503 00011111 | 11100000. */
5504 unsigned char orig
[2] = { 0xe0, 0x1f };
5505 unsigned char in
[2];
5506 memcpy (in
, orig
, sizeof orig
);
5508 unsigned char expected
[2] = { 0x80, 0x7f };
5509 shift_bytes_in_array_left (in
, sizeof (in
), 2);
5510 verify_array_eq (in
, expected
, sizeof (in
));
5512 memcpy (in
, orig
, sizeof orig
);
5513 memcpy (expected
, orig
, sizeof orig
);
5514 /* Check that shifting by zero doesn't change anything. */
5515 shift_bytes_in_array_left (in
, sizeof (in
), 0);
5516 verify_array_eq (in
, expected
, sizeof (in
));
5520 /* Test shift_bytes_in_array_right and that it carries bits across between
5524 verify_shift_bytes_in_array_right (void)
5527 00011111 | 11100000. */
5528 unsigned char orig
[2] = { 0x1f, 0xe0};
5529 unsigned char in
[2];
5530 memcpy (in
, orig
, sizeof orig
);
5531 unsigned char expected
[2] = { 0x07, 0xf8};
5532 shift_bytes_in_array_right (in
, sizeof (in
), 2);
5533 verify_array_eq (in
, expected
, sizeof (in
));
5535 memcpy (in
, orig
, sizeof orig
);
5536 memcpy (expected
, orig
, sizeof orig
);
5537 /* Check that shifting by zero doesn't change anything. */
5538 shift_bytes_in_array_right (in
, sizeof (in
), 0);
5539 verify_array_eq (in
, expected
, sizeof (in
));
5542 /* Test clear_bit_region that it clears exactly the bits asked and
5546 verify_clear_bit_region (void)
5548 /* Start with all bits set and test clearing various patterns in them. */
5549 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5550 unsigned char in
[3];
5551 unsigned char expected
[3];
5552 memcpy (in
, orig
, sizeof in
);
5554 /* Check zeroing out all the bits. */
5555 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
5556 expected
[0] = expected
[1] = expected
[2] = 0;
5557 verify_array_eq (in
, expected
, sizeof in
);
5559 memcpy (in
, orig
, sizeof in
);
5560 /* Leave the first and last bits intact. */
5561 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
5565 verify_array_eq (in
, expected
, sizeof in
);
5568 /* Test clear_bit_region_be that it clears exactly the bits asked and
5572 verify_clear_bit_region_be (void)
5574 /* Start with all bits set and test clearing various patterns in them. */
5575 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5576 unsigned char in
[3];
5577 unsigned char expected
[3];
5578 memcpy (in
, orig
, sizeof in
);
5580 /* Check zeroing out all the bits. */
5581 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
5582 expected
[0] = expected
[1] = expected
[2] = 0;
5583 verify_array_eq (in
, expected
, sizeof in
);
5585 memcpy (in
, orig
, sizeof in
);
5586 /* Leave the first and last bits intact. */
5587 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
5591 verify_array_eq (in
, expected
, sizeof in
);
5595 /* Run all of the selftests within this file. */
5598 store_merging_c_tests (void)
5600 verify_shift_bytes_in_array_left ();
5601 verify_shift_bytes_in_array_right ();
5602 verify_clear_bit_region ();
5603 verify_clear_bit_region_be ();
5606 } // namespace selftest
5607 #endif /* CHECKING_P. */