1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
143 #include "coretypes.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
151 #include "gimple-pretty-print.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
167 #include "gimplify-me.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
186 /* Number of hand-written 16-bit nop / bswaps found. */
189 /* Number of hand-written 32-bit nop / bswaps found. */
192 /* Number of hand-written 64-bit nop / bswaps found. */
194 } nop_stats
, bswap_stats
;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number
{
230 poly_int64_pod bytepos
;
234 unsigned HOST_WIDE_INT range
;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
261 do_shift_rotate (enum tree_code code
,
262 struct symbolic_number
*n
,
265 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
266 unsigned head_marker
;
269 || count
>= TYPE_PRECISION (n
->type
)
270 || count
% BITS_PER_UNIT
!= 0)
272 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size
< 64 / BITS_PER_MARKER
)
277 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
285 head_marker
= HEAD_MARKER (n
->n
, size
);
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
289 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
290 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size
- 1 - i
) * BITS_PER_MARKER
);
294 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
297 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
302 /* Zero unused bits for size. */
303 if (size
< 64 / BITS_PER_MARKER
)
304 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
308 /* Perform sanity checking for the symbolic number N and the gimple
312 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
316 lhs_type
= gimple_expr_type (stmt
);
318 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
319 && TREE_CODE (lhs_type
) != ENUMERAL_TYPE
)
322 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
332 init_symbolic_number (struct symbolic_number
*n
, tree src
)
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src
)))
339 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n
->type
= TREE_TYPE (src
);
346 size
= TYPE_PRECISION (n
->type
);
347 if (size
% BITS_PER_UNIT
!= 0)
349 size
/= BITS_PER_UNIT
;
350 if (size
> 64 / BITS_PER_MARKER
)
356 if (size
< 64 / BITS_PER_MARKER
)
357 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
367 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize
, bitpos
, bytepos
;
373 int unsignedp
, reversep
, volatilep
;
374 tree offset
, base_addr
;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
380 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
383 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
384 &unsignedp
, &reversep
, &volatilep
);
386 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
387 /* Do not rewrite TARGET_MEM_REF. */
389 else if (TREE_CODE (base_addr
) == MEM_REF
)
391 poly_offset_int bit_offset
= 0;
392 tree off
= TREE_OPERAND (base_addr
, 1);
394 if (!integer_zerop (off
))
396 poly_offset_int boff
= mem_ref_offset (base_addr
);
397 boff
<<= LOG2_BITS_PER_UNIT
;
401 base_addr
= TREE_OPERAND (base_addr
, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset
, 0))
406 tree byte_offset
= wide_int_to_tree
407 (sizetype
, bits_to_bytes_round_down (bit_offset
));
408 bit_offset
= num_trailing_bits (bit_offset
);
410 offset
= size_binop (PLUS_EXPR
, offset
, byte_offset
);
412 offset
= byte_offset
;
415 bitpos
+= bit_offset
.force_shwi ();
418 base_addr
= build_fold_addr_expr (base_addr
);
420 if (!multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
422 if (!multiple_p (bitsize
, BITS_PER_UNIT
))
427 if (!init_symbolic_number (n
, ref
))
429 n
->base_addr
= base_addr
;
431 n
->bytepos
= bytepos
;
432 n
->alias_set
= reference_alias_ptr_type (ref
);
433 n
->vuse
= gimple_vuse (stmt
);
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
442 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
443 gimple
*source_stmt2
, struct symbolic_number
*n2
,
444 struct symbolic_number
*n
)
449 struct symbolic_number
*n_start
;
451 tree rhs1
= gimple_assign_rhs1 (source_stmt1
);
452 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
454 rhs1
= TREE_OPERAND (rhs1
, 0);
455 tree rhs2
= gimple_assign_rhs1 (source_stmt2
);
456 if (TREE_CODE (rhs2
) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2
, 0)) == SSA_NAME
)
458 rhs2
= TREE_OPERAND (rhs2
, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
465 HOST_WIDE_INT start1
, start2
, start_sub
, end_sub
, end1
, end2
, end
;
466 struct symbolic_number
*toinc_n_ptr
, *n_end
;
467 basic_block bb1
, bb2
;
469 if (!n1
->base_addr
|| !n2
->base_addr
470 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
473 if (!n1
->offset
!= !n2
->offset
474 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
478 if (!(n2
->bytepos
- n1
->bytepos
).is_constant (&start2
))
484 start_sub
= start2
- start1
;
489 start_sub
= start1
- start2
;
492 bb1
= gimple_bb (source_stmt1
);
493 bb2
= gimple_bb (source_stmt2
);
494 if (dominated_by_p (CDI_DOMINATORS
, bb1
, bb2
))
495 source_stmt
= source_stmt1
;
497 source_stmt
= source_stmt2
;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1
= start1
+ (n1
->range
- 1);
502 end2
= start2
+ (n2
->range
- 1);
506 end_sub
= end2
- end1
;
511 end_sub
= end1
- end2
;
513 n_end
= (end2
> end1
) ? n2
: n1
;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN
)
517 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
519 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
521 n
->range
= end
- MIN (start1
, start2
) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n
->range
> 64 / BITS_PER_MARKER
)
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
531 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
532 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
535 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
536 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
537 toinc_n_ptr
->n
+= inc
;
542 n
->range
= n1
->range
;
544 source_stmt
= source_stmt1
;
548 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
549 n
->alias_set
= n1
->alias_set
;
551 n
->alias_set
= ptr_type_node
;
552 n
->vuse
= n_start
->vuse
;
553 n
->base_addr
= n_start
->base_addr
;
554 n
->offset
= n_start
->offset
;
555 n
->src
= n_start
->src
;
556 n
->bytepos
= n_start
->bytepos
;
557 n
->type
= n_start
->type
;
558 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
560 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
562 uint64_t masked1
, masked2
;
564 masked1
= n1
->n
& mask
;
565 masked2
= n2
->n
& mask
;
566 if (masked1
&& masked2
&& masked1
!= masked2
)
569 n
->n
= n1
->n
| n2
->n
;
570 n
->n_ops
= n1
->n_ops
+ n2
->n_ops
;
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
582 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
585 tree rhs1
, rhs2
= NULL
;
586 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
587 enum gimple_rhs_class rhs_class
;
589 if (!limit
|| !is_gimple_assign (stmt
))
592 rhs1
= gimple_assign_rhs1 (stmt
);
594 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 2)))
605 unsigned HOST_WIDE_INT bitsize
= tree_to_uhwi (TREE_OPERAND (rhs1
, 1));
606 unsigned HOST_WIDE_INT bitpos
= tree_to_uhwi (TREE_OPERAND (rhs1
, 2));
607 if (bitpos
% BITS_PER_UNIT
== 0
608 && bitsize
% BITS_PER_UNIT
== 0
609 && init_symbolic_number (n
, TREE_OPERAND (rhs1
, 0)))
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN
)
613 bitpos
= TYPE_PRECISION (n
->type
) - bitpos
- bitsize
;
616 if (!do_shift_rotate (RSHIFT_EXPR
, n
, bitpos
))
621 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
622 for (unsigned i
= 0; i
< bitsize
/ BITS_PER_UNIT
;
623 i
++, tmp
<<= BITS_PER_UNIT
)
624 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
628 n
->type
= TREE_TYPE (rhs1
);
630 n
->range
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
632 return verify_symbolic_number_p (n
, stmt
) ? stmt
: NULL
;
638 if (TREE_CODE (rhs1
) != SSA_NAME
)
641 code
= gimple_assign_rhs_code (stmt
);
642 rhs_class
= gimple_assign_rhs_class (stmt
);
643 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
645 if (rhs_class
== GIMPLE_BINARY_RHS
)
646 rhs2
= gimple_assign_rhs2 (stmt
);
648 /* Handle unary rhs and binary rhs with integer constants as second
651 if (rhs_class
== GIMPLE_UNARY_RHS
652 || (rhs_class
== GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2
) == INTEGER_CST
))
655 if (code
!= BIT_AND_EXPR
656 && code
!= LSHIFT_EXPR
657 && code
!= RSHIFT_EXPR
658 && code
!= LROTATE_EXPR
659 && code
!= RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code
))
663 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
669 if (gimple_assign_load_p (stmt
)
670 || !init_symbolic_number (n
, rhs1
))
679 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
680 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
681 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
683 /* Only constants masking full bytes are allowed. */
684 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
685 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
688 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
697 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
702 int i
, type_size
, old_type_size
;
705 type
= gimple_expr_type (stmt
);
706 type_size
= TYPE_PRECISION (type
);
707 if (type_size
% BITS_PER_UNIT
!= 0)
709 type_size
/= BITS_PER_UNIT
;
710 if (type_size
> 64 / BITS_PER_MARKER
)
713 /* Sign extension: result is dependent on the value. */
714 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
715 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
716 && HEAD_MARKER (n
->n
, old_type_size
))
717 for (i
= 0; i
< type_size
- old_type_size
; i
++)
718 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
721 if (type_size
< 64 / BITS_PER_MARKER
)
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
729 n
->range
= type_size
;
735 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
738 /* Handle binary rhs. */
740 if (rhs_class
== GIMPLE_BINARY_RHS
)
742 struct symbolic_number n1
, n2
;
743 gimple
*source_stmt
, *source_stmt2
;
745 if (code
!= BIT_IOR_EXPR
)
748 if (TREE_CODE (rhs2
) != SSA_NAME
)
751 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
756 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
761 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
766 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
769 if (n1
.vuse
!= n2
.vuse
)
773 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
);
778 if (!verify_symbolic_number_p (n
, stmt
))
790 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
794 find_bswap_or_nop_finalize (struct symbolic_number
*n
, uint64_t *cmpxchg
,
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
806 /* Find real size of result (highest non-zero byte). */
808 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
812 /* Zero out the bits corresponding to untouched bytes in original gimple
814 if (n
->range
< (int) sizeof (int64_t))
816 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
817 *cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
821 /* Zero out the bits corresponding to unused bytes in the result of the
822 gimple expression. */
823 if (rsize
< n
->range
)
825 if (BYTES_BIG_ENDIAN
)
827 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
829 *cmpnop
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
833 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
834 *cmpxchg
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
840 n
->range
*= BITS_PER_UNIT
;
843 /* Check if STMT completes a bswap implementation or a read in a given
844 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
845 accordingly. It also sets N to represent the kind of operations
846 performed: size of the resulting expression and whether it works on
847 a memory source, and if so alias-set and vuse. At last, the
848 function returns a stmt whose rhs's first tree is the source
852 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
)
854 /* The last parameter determines the depth search limit. It usually
855 correlates directly to the number n of bytes to be touched. We
856 increase that number by 2 * (log2(n) + 1) here in order to also
857 cover signed -> unsigned conversions of the src operand as can be seen
858 in libgcc, and for initial shift/and operation of the src operand. */
859 int limit
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt
)));
860 limit
+= 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
));
861 gimple
*ins_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
866 uint64_t cmpxchg
, cmpnop
;
867 find_bswap_or_nop_finalize (n
, &cmpxchg
, &cmpnop
);
869 /* A complete byte swap should make the symbolic number to start with
870 the largest digit in the highest order byte. Unchanged symbolic
871 number indicates a read with same endianness as target architecture. */
874 else if (n
->n
== cmpxchg
)
879 /* Useless bit manipulation performed by code. */
880 if (!n
->base_addr
&& n
->n
== cmpnop
&& n
->n_ops
== 1)
886 const pass_data pass_data_optimize_bswap
=
888 GIMPLE_PASS
, /* type */
890 OPTGROUP_NONE
, /* optinfo_flags */
892 PROP_ssa
, /* properties_required */
893 0, /* properties_provided */
894 0, /* properties_destroyed */
895 0, /* todo_flags_start */
896 0, /* todo_flags_finish */
899 class pass_optimize_bswap
: public gimple_opt_pass
902 pass_optimize_bswap (gcc::context
*ctxt
)
903 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
906 /* opt_pass methods: */
907 virtual bool gate (function
*)
909 return flag_expensive_optimizations
&& optimize
&& BITS_PER_UNIT
== 8;
912 virtual unsigned int execute (function
*);
914 }; // class pass_optimize_bswap
916 /* Perform the bswap optimization: replace the expression computed in the rhs
917 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
918 bswap, load or load + bswap expression.
919 Which of these alternatives replace the rhs is given by N->base_addr (non
920 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
921 load to perform are also given in N while the builtin bswap invoke is given
922 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
923 load statements involved to construct the rhs in gsi_stmt (GSI) and
924 N->range gives the size of the rhs expression for maintaining some
927 Note that if the replacement involve a load and if gsi_stmt (GSI) is
928 non-NULL, that stmt is moved just after INS_STMT to do the load with the
929 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
932 bswap_replace (gimple_stmt_iterator gsi
, gimple
*ins_stmt
, tree fndecl
,
933 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
936 tree src
, tmp
, tgt
= NULL_TREE
;
939 gimple
*cur_stmt
= gsi_stmt (gsi
);
942 tgt
= gimple_assign_lhs (cur_stmt
);
944 /* Need to load the value from memory first. */
947 gimple_stmt_iterator gsi_ins
= gsi
;
949 gsi_ins
= gsi_for_stmt (ins_stmt
);
950 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
951 tree load_offset_ptr
, aligned_load_type
;
953 unsigned align
= get_object_alignment (src
);
954 poly_int64 load_offset
= 0;
958 basic_block ins_bb
= gimple_bb (ins_stmt
);
959 basic_block cur_bb
= gimple_bb (cur_stmt
);
960 if (!dominated_by_p (CDI_DOMINATORS
, cur_bb
, ins_bb
))
963 /* Move cur_stmt just before one of the load of the original
964 to ensure it has the same VUSE. See PR61517 for what could
966 if (gimple_bb (cur_stmt
) != gimple_bb (ins_stmt
))
967 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt
));
968 gsi_move_before (&gsi
, &gsi_ins
);
969 gsi
= gsi_for_stmt (cur_stmt
);
974 /* Compute address to load from and cast according to the size
976 addr_expr
= build_fold_addr_expr (src
);
977 if (is_gimple_mem_ref_addr (addr_expr
))
978 addr_tmp
= unshare_expr (addr_expr
);
981 addr_tmp
= unshare_expr (n
->base_addr
);
982 if (!is_gimple_mem_ref_addr (addr_tmp
))
983 addr_tmp
= force_gimple_operand_gsi_1 (&gsi
, addr_tmp
,
984 is_gimple_mem_ref_addr
,
987 load_offset
= n
->bytepos
;
991 = force_gimple_operand_gsi (&gsi
, unshare_expr (n
->offset
),
992 true, NULL_TREE
, true,
995 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp
)),
996 POINTER_PLUS_EXPR
, addr_tmp
, off
);
997 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
998 addr_tmp
= gimple_assign_lhs (stmt
);
1002 /* Perform the load. */
1003 aligned_load_type
= load_type
;
1004 if (align
< TYPE_ALIGN (load_type
))
1005 aligned_load_type
= build_aligned_type (load_type
, align
);
1006 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
1007 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
1013 nop_stats
.found_16bit
++;
1014 else if (n
->range
== 32)
1015 nop_stats
.found_32bit
++;
1018 gcc_assert (n
->range
== 64);
1019 nop_stats
.found_64bit
++;
1022 /* Convert the result of load if necessary. */
1023 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
1025 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
1027 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1028 gimple_set_vuse (load_stmt
, n
->vuse
);
1029 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1030 gimple_assign_set_rhs_with_ops (&gsi
, NOP_EXPR
, val_tmp
);
1031 update_stmt (cur_stmt
);
1035 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
1036 gimple_set_vuse (cur_stmt
, n
->vuse
);
1037 update_stmt (cur_stmt
);
1041 tgt
= make_ssa_name (load_type
);
1042 cur_stmt
= gimple_build_assign (tgt
, MEM_REF
, val_expr
);
1043 gimple_set_vuse (cur_stmt
, n
->vuse
);
1044 gsi_insert_before (&gsi
, cur_stmt
, GSI_SAME_STMT
);
1050 "%d bit load in target endianness found at: ",
1052 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1058 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
1059 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1060 gimple_set_vuse (load_stmt
, n
->vuse
);
1061 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1068 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), TREE_TYPE (src
)))
1070 if (!is_gimple_val (src
))
1072 g
= gimple_build_assign (tgt
, NOP_EXPR
, src
);
1075 g
= gimple_build_assign (tgt
, src
);
1079 nop_stats
.found_16bit
++;
1080 else if (n
->range
== 32)
1081 nop_stats
.found_32bit
++;
1084 gcc_assert (n
->range
== 64);
1085 nop_stats
.found_64bit
++;
1090 "%d bit reshuffle in target endianness found at: ",
1093 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1096 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1097 fprintf (dump_file
, "\n");
1101 gsi_replace (&gsi
, g
, true);
1104 else if (TREE_CODE (src
) == BIT_FIELD_REF
)
1105 src
= TREE_OPERAND (src
, 0);
1108 bswap_stats
.found_16bit
++;
1109 else if (n
->range
== 32)
1110 bswap_stats
.found_32bit
++;
1113 gcc_assert (n
->range
== 64);
1114 bswap_stats
.found_64bit
++;
1119 /* Convert the src expression if necessary. */
1120 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
1122 gimple
*convert_stmt
;
1124 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
1125 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
1126 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1129 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1130 are considered as rotation of 2N bit values by N bits is generally not
1131 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1132 gives 0x03040102 while a bswap for that value is 0x04030201. */
1133 if (bswap
&& n
->range
== 16)
1135 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
1136 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
1137 bswap_stmt
= gimple_build_assign (NULL
, src
);
1140 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
1142 if (tgt
== NULL_TREE
)
1143 tgt
= make_ssa_name (bswap_type
);
1146 /* Convert the result if necessary. */
1147 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
1149 gimple
*convert_stmt
;
1151 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1152 convert_stmt
= gimple_build_assign (tgt
, NOP_EXPR
, tmp
);
1153 gsi_insert_after (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1156 gimple_set_lhs (bswap_stmt
, tmp
);
1160 fprintf (dump_file
, "%d bit bswap implementation found at: ",
1163 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1166 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1167 fprintf (dump_file
, "\n");
1173 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1174 gsi_remove (&gsi
, true);
1177 gsi_insert_before (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1181 /* Find manual byte swap implementations as well as load in a given
1182 endianness. Byte swaps are turned into a bswap builtin invokation
1183 while endian loads are converted to bswap builtin invokation or
1184 simple load according to the target endianness. */
1187 pass_optimize_bswap::execute (function
*fun
)
1190 bool bswap32_p
, bswap64_p
;
1191 bool changed
= false;
1192 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
1194 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1195 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
1196 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1197 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1198 || (bswap32_p
&& word_mode
== SImode
)));
1200 /* Determine the argument type of the builtins. The code later on
1201 assumes that the return and argument type are the same. */
1204 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1205 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1210 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1211 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1214 memset (&nop_stats
, 0, sizeof (nop_stats
));
1215 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1216 calculate_dominance_info (CDI_DOMINATORS
);
1218 FOR_EACH_BB_FN (bb
, fun
)
1220 gimple_stmt_iterator gsi
;
1222 /* We do a reverse scan for bswap patterns to make sure we get the
1223 widest match. As bswap pattern matching doesn't handle previously
1224 inserted smaller bswap replacements as sub-patterns, the wider
1225 variant wouldn't be detected. */
1226 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
1228 gimple
*ins_stmt
, *cur_stmt
= gsi_stmt (gsi
);
1229 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1230 enum tree_code code
;
1231 struct symbolic_number n
;
1234 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1235 might be moved to a different basic block by bswap_replace and gsi
1236 must not points to it if that's the case. Moving the gsi_prev
1237 there make sure that gsi points to the statement previous to
1238 cur_stmt while still making sure that all statements are
1239 considered in this basic block. */
1242 if (!is_gimple_assign (cur_stmt
))
1245 code
= gimple_assign_rhs_code (cur_stmt
);
1250 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
1251 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
1261 ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
);
1269 /* Already in canonical form, nothing to do. */
1270 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1272 load_type
= bswap_type
= uint16_type_node
;
1275 load_type
= uint32_type_node
;
1278 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1279 bswap_type
= bswap32_type
;
1283 load_type
= uint64_type_node
;
1286 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1287 bswap_type
= bswap64_type
;
1294 if (bswap
&& !fndecl
&& n
.range
!= 16)
1297 if (bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1298 bswap_type
, load_type
, &n
, bswap
))
1303 statistics_counter_event (fun
, "16-bit nop implementations found",
1304 nop_stats
.found_16bit
);
1305 statistics_counter_event (fun
, "32-bit nop implementations found",
1306 nop_stats
.found_32bit
);
1307 statistics_counter_event (fun
, "64-bit nop implementations found",
1308 nop_stats
.found_64bit
);
1309 statistics_counter_event (fun
, "16-bit bswap implementations found",
1310 bswap_stats
.found_16bit
);
1311 statistics_counter_event (fun
, "32-bit bswap implementations found",
1312 bswap_stats
.found_32bit
);
1313 statistics_counter_event (fun
, "64-bit bswap implementations found",
1314 bswap_stats
.found_64bit
);
1316 return (changed
? TODO_update_ssa
: 0);
1322 make_pass_optimize_bswap (gcc::context
*ctxt
)
1324 return new pass_optimize_bswap (ctxt
);
1329 /* Struct recording one operand for the store, which is either a constant,
1330 then VAL represents the constant and all the other fields are zero, or
1331 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1332 and the other fields also reflect the memory load, or an SSA name, then
1333 VAL represents the SSA name and all the other fields are zero, */
1335 class store_operand_info
1340 poly_uint64 bitsize
;
1342 poly_uint64 bitregion_start
;
1343 poly_uint64 bitregion_end
;
1346 store_operand_info ();
1349 store_operand_info::store_operand_info ()
1350 : val (NULL_TREE
), base_addr (NULL_TREE
), bitsize (0), bitpos (0),
1351 bitregion_start (0), bitregion_end (0), stmt (NULL
), bit_not_p (false)
1355 /* Struct recording the information about a single store of an immediate
1356 to memory. These are created in the first phase and coalesced into
1357 merged_store_group objects in the second phase. */
1359 class store_immediate_info
1362 unsigned HOST_WIDE_INT bitsize
;
1363 unsigned HOST_WIDE_INT bitpos
;
1364 unsigned HOST_WIDE_INT bitregion_start
;
1365 /* This is one past the last bit of the bit region. */
1366 unsigned HOST_WIDE_INT bitregion_end
;
1369 /* INTEGER_CST for constant store, STRING_CST for string store,
1370 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1371 BIT_INSERT_EXPR for bit insertion.
1372 LROTATE_EXPR if it can be only bswap optimized and
1373 ops are not really meaningful.
1374 NOP_EXPR if bswap optimization detected identity, ops
1375 are not meaningful. */
1376 enum tree_code rhs_code
;
1377 /* Two fields for bswap optimization purposes. */
1378 struct symbolic_number n
;
1380 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1382 /* True if ops have been swapped and thus ops[1] represents
1383 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1385 /* The index number of the landing pad, or 0 if there is none. */
1387 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1388 just the first one. */
1389 store_operand_info ops
[2];
1390 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1391 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1392 gimple
*, unsigned int, enum tree_code
,
1393 struct symbolic_number
&, gimple
*, bool, int,
1394 const store_operand_info
&,
1395 const store_operand_info
&);
1398 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
1399 unsigned HOST_WIDE_INT bp
,
1400 unsigned HOST_WIDE_INT brs
,
1401 unsigned HOST_WIDE_INT bre
,
1404 enum tree_code rhscode
,
1405 struct symbolic_number
&nr
,
1409 const store_operand_info
&op0r
,
1410 const store_operand_info
&op1r
)
1411 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
1412 stmt (st
), order (ord
), rhs_code (rhscode
), n (nr
),
1413 ins_stmt (ins_stmtp
), bit_not_p (bitnotp
), ops_swapped_p (false),
1415 #if __cplusplus >= 201103L
1416 , ops
{ op0r
, op1r
}
1426 /* Struct representing a group of stores to contiguous memory locations.
1427 These are produced by the second phase (coalescing) and consumed in the
1428 third phase that outputs the widened stores. */
1430 class merged_store_group
1433 unsigned HOST_WIDE_INT start
;
1434 unsigned HOST_WIDE_INT width
;
1435 unsigned HOST_WIDE_INT bitregion_start
;
1436 unsigned HOST_WIDE_INT bitregion_end
;
1437 /* The size of the allocated memory for val and mask. */
1438 unsigned HOST_WIDE_INT buf_size
;
1439 unsigned HOST_WIDE_INT align_base
;
1440 poly_uint64 load_align_base
[2];
1443 unsigned int load_align
[2];
1444 unsigned int first_order
;
1445 unsigned int last_order
;
1447 bool string_concatenation
;
1448 bool only_constants
;
1449 unsigned int first_nonmergeable_order
;
1452 auto_vec
<store_immediate_info
*> stores
;
1453 /* We record the first and last original statements in the sequence because
1454 we'll need their vuse/vdef and replacement position. It's easier to keep
1455 track of them separately as 'stores' is reordered by apply_stores. */
1459 unsigned char *mask
;
1461 merged_store_group (store_immediate_info
*);
1462 ~merged_store_group ();
1463 bool can_be_merged_into (store_immediate_info
*);
1464 void merge_into (store_immediate_info
*);
1465 void merge_overlapping (store_immediate_info
*);
1466 bool apply_stores ();
1468 void do_merge (store_immediate_info
*);
1471 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1474 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
1479 for (unsigned int i
= 0; i
< len
; i
++)
1480 fprintf (fd
, "%02x ", ptr
[i
]);
1484 /* Clear out LEN bits starting from bit START in the byte array
1485 PTR. This clears the bits to the *right* from START.
1486 START must be within [0, BITS_PER_UNIT) and counts starting from
1487 the least significant bit. */
1490 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
1495 /* Clear len bits to the right of start. */
1496 else if (len
<= start
+ 1)
1498 unsigned char mask
= (~(~0U << len
));
1499 mask
= mask
<< (start
+ 1U - len
);
1502 else if (start
!= BITS_PER_UNIT
- 1)
1504 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
1505 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
1506 len
- (start
% BITS_PER_UNIT
) - 1);
1508 else if (start
== BITS_PER_UNIT
- 1
1509 && len
> BITS_PER_UNIT
)
1511 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1512 memset (ptr
, 0, nbytes
);
1513 if (len
% BITS_PER_UNIT
!= 0)
1514 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
1515 len
% BITS_PER_UNIT
);
1521 /* In the byte array PTR clear the bit region starting at bit
1522 START and is LEN bits wide.
1523 For regions spanning multiple bytes do this recursively until we reach
1524 zero LEN or a region contained within a single byte. */
1527 clear_bit_region (unsigned char *ptr
, unsigned int start
,
1530 /* Degenerate base case. */
1533 else if (start
>= BITS_PER_UNIT
)
1534 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
1535 /* Second base case. */
1536 else if ((start
+ len
) <= BITS_PER_UNIT
)
1538 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
1539 mask
>>= BITS_PER_UNIT
- (start
+ len
);
1545 /* Clear most significant bits in a byte and proceed with the next byte. */
1546 else if (start
!= 0)
1548 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
1549 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
1551 /* Whole bytes need to be cleared. */
1552 else if (start
== 0 && len
> BITS_PER_UNIT
)
1554 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1555 /* We could recurse on each byte but we clear whole bytes, so a simple
1557 memset (ptr
, '\0', nbytes
);
1558 /* Clear the remaining sub-byte region if there is one. */
1559 if (len
% BITS_PER_UNIT
!= 0)
1560 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
1566 /* Write BITLEN bits of EXPR to the byte array PTR at
1567 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1568 Return true if the operation succeeded. */
1571 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
1572 unsigned int total_bytes
)
1574 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
1575 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
1576 || (bitpos
% BITS_PER_UNIT
)
1577 || !int_mode_for_size (bitlen
, 0).exists ());
1579 = (TREE_CODE (expr
) == CONSTRUCTOR
1580 && CONSTRUCTOR_NELTS (expr
) == 0
1581 && TYPE_SIZE_UNIT (TREE_TYPE (expr
))
1582 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr
))));
1586 if (first_byte
>= total_bytes
)
1588 total_bytes
-= first_byte
;
1591 unsigned HOST_WIDE_INT rhs_bytes
1592 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1593 if (rhs_bytes
> total_bytes
)
1595 memset (ptr
+ first_byte
, '\0', rhs_bytes
);
1598 return native_encode_expr (expr
, ptr
+ first_byte
, total_bytes
) != 0;
1602 We are writing a non byte-sized quantity or at a position that is not
1604 |--------|--------|--------| ptr + first_byte
1606 xxx xxxxxxxx xxx< bp>
1609 First native_encode_expr EXPR into a temporary buffer and shift each
1610 byte in the buffer by 'bp' (carrying the bits over as necessary).
1611 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1612 <------bitlen---->< bp>
1613 Then we clear the destination bits:
1614 |---00000|00000000|000-----| ptr + first_byte
1615 <-------bitlen--->< bp>
1617 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1618 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1621 We are writing a non byte-sized quantity or at a position that is not
1623 ptr + first_byte |--------|--------|--------|
1625 <bp >xxx xxxxxxxx xxx
1628 First native_encode_expr EXPR into a temporary buffer and shift each
1629 byte in the buffer to the right by (carrying the bits over as necessary).
1630 We shift by as much as needed to align the most significant bit of EXPR
1632 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1633 <---bitlen----> <bp ><-----bitlen----->
1634 Then we clear the destination bits:
1635 ptr + first_byte |-----000||00000000||00000---|
1636 <bp ><-------bitlen----->
1638 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1639 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1640 The awkwardness comes from the fact that bitpos is counted from the
1641 most significant bit of a byte. */
1643 /* We must be dealing with fixed-size data at this point, since the
1644 total size is also fixed. */
1645 unsigned int byte_size
;
1648 unsigned HOST_WIDE_INT rhs_bytes
1649 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1650 if (rhs_bytes
> total_bytes
)
1652 byte_size
= rhs_bytes
;
1656 fixed_size_mode mode
1657 = as_a
<fixed_size_mode
> (TYPE_MODE (TREE_TYPE (expr
)));
1660 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)))
1661 : GET_MODE_SIZE (mode
);
1663 /* Allocate an extra byte so that we have space to shift into. */
1665 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
1666 memset (tmpbuf
, '\0', byte_size
);
1667 /* The store detection code should only have allowed constants that are
1668 accepted by native_encode_expr or empty ctors. */
1670 && native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
1673 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1674 bytes to write. This means it can write more than
1675 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1676 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1677 bitlen and zero out the bits that are not relevant as well (that may
1678 contain a sign bit due to sign-extension). */
1679 unsigned int padding
1680 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
1681 /* On big-endian the padding is at the 'front' so just skip the initial
1683 if (BYTES_BIG_ENDIAN
)
1686 byte_size
-= padding
;
1688 if (bitlen
% BITS_PER_UNIT
!= 0)
1690 if (BYTES_BIG_ENDIAN
)
1691 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
1692 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
1694 clear_bit_region (tmpbuf
, bitlen
,
1695 byte_size
* BITS_PER_UNIT
- bitlen
);
1697 /* Left shifting relies on the last byte being clear if bitlen is
1698 a multiple of BITS_PER_UNIT, which might not be clear if
1699 there are padding bytes. */
1700 else if (!BYTES_BIG_ENDIAN
)
1701 tmpbuf
[byte_size
- 1] = '\0';
1703 /* Clear the bit region in PTR where the bits from TMPBUF will be
1705 if (BYTES_BIG_ENDIAN
)
1706 clear_bit_region_be (ptr
+ first_byte
,
1707 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
1709 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
1712 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
1713 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
1715 bool skip_byte
= false;
1716 if (BYTES_BIG_ENDIAN
)
1718 /* BITPOS and BITLEN are exactly aligned and no shifting
1720 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
1721 || (bitpos_mod
== 0 && bitlen_mod
== 0))
1723 /* |. . . . . . . .|
1725 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1726 of the value until it aligns with 'bp' in the next byte over. */
1727 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
1729 shift_amnt
= bitlen_mod
+ bitpos_mod
;
1730 skip_byte
= bitlen_mod
!= 0;
1732 /* |. . . . . . . .|
1735 Shift the value right within the same byte so it aligns with 'bp'. */
1737 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
1740 shift_amnt
= bitpos
% BITS_PER_UNIT
;
1742 /* Create the shifted version of EXPR. */
1743 if (!BYTES_BIG_ENDIAN
)
1745 shift_bytes_in_array_left (tmpbuf
, byte_size
, shift_amnt
);
1746 if (shift_amnt
== 0)
1751 gcc_assert (BYTES_BIG_ENDIAN
);
1752 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
1753 /* If shifting right forced us to move into the next byte skip the now
1762 /* Insert the bits from TMPBUF. */
1763 for (unsigned int i
= 0; i
< byte_size
; i
++)
1764 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
1769 /* Sorting function for store_immediate_info objects.
1770 Sorts them by bitposition. */
1773 sort_by_bitpos (const void *x
, const void *y
)
1775 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
1776 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
1778 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
1780 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
1783 /* If they are the same let's use the order which is guaranteed to
1785 return (*tmp
)->order
- (*tmp2
)->order
;
1788 /* Sorting function for store_immediate_info objects.
1789 Sorts them by the order field. */
1792 sort_by_order (const void *x
, const void *y
)
1794 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
1795 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
1797 if ((*tmp
)->order
< (*tmp2
)->order
)
1799 else if ((*tmp
)->order
> (*tmp2
)->order
)
1805 /* Initialize a merged_store_group object from a store_immediate_info
1808 merged_store_group::merged_store_group (store_immediate_info
*info
)
1810 start
= info
->bitpos
;
1811 width
= info
->bitsize
;
1812 bitregion_start
= info
->bitregion_start
;
1813 bitregion_end
= info
->bitregion_end
;
1814 /* VAL has memory allocated for it in apply_stores once the group
1815 width has been finalized. */
1818 bit_insertion
= info
->rhs_code
== BIT_INSERT_EXPR
;
1819 string_concatenation
= info
->rhs_code
== STRING_CST
;
1820 only_constants
= info
->rhs_code
== INTEGER_CST
;
1821 first_nonmergeable_order
= ~0U;
1822 lp_nr
= info
->lp_nr
;
1823 unsigned HOST_WIDE_INT align_bitpos
= 0;
1824 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
1825 &align
, &align_bitpos
);
1826 align_base
= start
- align_bitpos
;
1827 for (int i
= 0; i
< 2; ++i
)
1829 store_operand_info
&op
= info
->ops
[i
];
1830 if (op
.base_addr
== NULL_TREE
)
1833 load_align_base
[i
] = 0;
1837 get_object_alignment_1 (op
.val
, &load_align
[i
], &align_bitpos
);
1838 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
1842 stores
.safe_push (info
);
1843 last_stmt
= info
->stmt
;
1844 last_order
= info
->order
;
1845 first_stmt
= last_stmt
;
1846 first_order
= last_order
;
1850 merged_store_group::~merged_store_group ()
1856 /* Return true if the store described by INFO can be merged into the group. */
1859 merged_store_group::can_be_merged_into (store_immediate_info
*info
)
1861 /* Do not merge bswap patterns. */
1862 if (info
->rhs_code
== LROTATE_EXPR
)
1865 if (info
->lp_nr
!= lp_nr
)
1868 /* The canonical case. */
1869 if (info
->rhs_code
== stores
[0]->rhs_code
)
1872 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
1873 if (info
->rhs_code
== BIT_INSERT_EXPR
&& stores
[0]->rhs_code
== INTEGER_CST
)
1874 return !string_concatenation
;
1876 if (stores
[0]->rhs_code
== BIT_INSERT_EXPR
&& info
->rhs_code
== INTEGER_CST
)
1877 return !string_concatenation
;
1879 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
1880 only for small regions since this can generate a lot of instructions. */
1881 if (info
->rhs_code
== MEM_REF
1882 && (stores
[0]->rhs_code
== INTEGER_CST
1883 || stores
[0]->rhs_code
== BIT_INSERT_EXPR
)
1884 && info
->bitregion_start
== stores
[0]->bitregion_start
1885 && info
->bitregion_end
== stores
[0]->bitregion_end
1886 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
1887 return !string_concatenation
;
1889 if (stores
[0]->rhs_code
== MEM_REF
1890 && (info
->rhs_code
== INTEGER_CST
1891 || info
->rhs_code
== BIT_INSERT_EXPR
)
1892 && info
->bitregion_start
== stores
[0]->bitregion_start
1893 && info
->bitregion_end
== stores
[0]->bitregion_end
1894 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
1895 return !string_concatenation
;
1897 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
1898 if (info
->rhs_code
== STRING_CST
1899 && stores
[0]->rhs_code
== INTEGER_CST
1900 && stores
[0]->bitsize
== CHAR_BIT
)
1901 return !bit_insertion
;
1903 if (stores
[0]->rhs_code
== STRING_CST
1904 && info
->rhs_code
== INTEGER_CST
1905 && info
->bitsize
== CHAR_BIT
)
1906 return !bit_insertion
;
1911 /* Helper method for merge_into and merge_overlapping to do
1915 merged_store_group::do_merge (store_immediate_info
*info
)
1917 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
1918 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
1920 unsigned int this_align
;
1921 unsigned HOST_WIDE_INT align_bitpos
= 0;
1922 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
1923 &this_align
, &align_bitpos
);
1924 if (this_align
> align
)
1927 align_base
= info
->bitpos
- align_bitpos
;
1929 for (int i
= 0; i
< 2; ++i
)
1931 store_operand_info
&op
= info
->ops
[i
];
1935 get_object_alignment_1 (op
.val
, &this_align
, &align_bitpos
);
1936 if (this_align
> load_align
[i
])
1938 load_align
[i
] = this_align
;
1939 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
1943 gimple
*stmt
= info
->stmt
;
1944 stores
.safe_push (info
);
1945 if (info
->order
> last_order
)
1947 last_order
= info
->order
;
1950 else if (info
->order
< first_order
)
1952 first_order
= info
->order
;
1956 /* We need to use extraction if there is any bit-field. */
1957 if (info
->rhs_code
== BIT_INSERT_EXPR
)
1959 bit_insertion
= true;
1960 gcc_assert (!string_concatenation
);
1963 /* We need to use concatenation if there is any string. */
1964 if (info
->rhs_code
== STRING_CST
)
1966 string_concatenation
= true;
1967 gcc_assert (!bit_insertion
);
1970 if (info
->rhs_code
!= INTEGER_CST
)
1971 only_constants
= false;
1974 /* Merge a store recorded by INFO into this merged store.
1975 The store is not overlapping with the existing recorded
1979 merged_store_group::merge_into (store_immediate_info
*info
)
1981 /* Make sure we're inserting in the position we think we're inserting. */
1982 gcc_assert (info
->bitpos
>= start
+ width
1983 && info
->bitregion_start
<= bitregion_end
);
1985 width
= info
->bitpos
+ info
->bitsize
- start
;
1989 /* Merge a store described by INFO into this merged store.
1990 INFO overlaps in some way with the current store (i.e. it's not contiguous
1991 which is handled by merged_store_group::merge_into). */
1994 merged_store_group::merge_overlapping (store_immediate_info
*info
)
1996 /* If the store extends the size of the group, extend the width. */
1997 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
1998 width
= info
->bitpos
+ info
->bitsize
- start
;
2003 /* Go through all the recorded stores in this group in program order and
2004 apply their values to the VAL byte array to create the final merged
2005 value. Return true if the operation succeeded. */
2008 merged_store_group::apply_stores ()
2010 store_immediate_info
*info
;
2013 /* Make sure we have more than one store in the group, otherwise we cannot
2015 if (bitregion_start
% BITS_PER_UNIT
!= 0
2016 || bitregion_end
% BITS_PER_UNIT
!= 0
2017 || stores
.length () == 1)
2020 buf_size
= (bitregion_end
- bitregion_start
) / BITS_PER_UNIT
;
2022 /* Really do string concatenation for large strings only. */
2023 if (buf_size
<= MOVE_MAX
)
2024 string_concatenation
= false;
2026 /* Create a power-of-2-sized buffer for native_encode_expr. */
2027 if (!string_concatenation
)
2028 buf_size
= 1 << ceil_log2 (buf_size
);
2030 val
= XNEWVEC (unsigned char, 2 * buf_size
);
2031 mask
= val
+ buf_size
;
2032 memset (val
, 0, buf_size
);
2033 memset (mask
, ~0U, buf_size
);
2035 stores
.qsort (sort_by_order
);
2037 FOR_EACH_VEC_ELT (stores
, i
, info
)
2039 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
2041 if (info
->ops
[0].val
&& info
->ops
[0].base_addr
== NULL_TREE
)
2042 cst
= info
->ops
[0].val
;
2043 else if (info
->ops
[1].val
&& info
->ops
[1].base_addr
== NULL_TREE
)
2044 cst
= info
->ops
[1].val
;
2048 if (cst
&& info
->rhs_code
!= BIT_INSERT_EXPR
)
2049 ret
= encode_tree_to_bitpos (cst
, val
, info
->bitsize
, pos_in_buffer
,
2051 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
2052 if (BYTES_BIG_ENDIAN
)
2053 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
2054 - (pos_in_buffer
% BITS_PER_UNIT
)),
2057 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
2058 if (cst
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
2062 fputs ("After writing ", dump_file
);
2063 print_generic_expr (dump_file
, cst
, TDF_NONE
);
2064 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
2065 " at position %d\n", info
->bitsize
, pos_in_buffer
);
2066 fputs (" the merged value contains ", dump_file
);
2067 dump_char_array (dump_file
, val
, buf_size
);
2068 fputs (" the merged mask contains ", dump_file
);
2069 dump_char_array (dump_file
, mask
, buf_size
);
2071 fputs (" bit insertion is required\n", dump_file
);
2072 if (string_concatenation
)
2073 fputs (" string concatenation is required\n", dump_file
);
2076 fprintf (dump_file
, "Failed to merge stores\n");
2081 stores
.qsort (sort_by_bitpos
);
2085 /* Structure describing the store chain. */
2087 class imm_store_chain_info
2090 /* Doubly-linked list that imposes an order on chain processing.
2091 PNXP (prev's next pointer) points to the head of a list, or to
2092 the next field in the previous chain in the list.
2093 See pass_store_merging::m_stores_head for more rationale. */
2094 imm_store_chain_info
*next
, **pnxp
;
2096 auto_vec
<store_immediate_info
*> m_store_info
;
2097 auto_vec
<merged_store_group
*> m_merged_store_groups
;
2099 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
2100 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
2105 gcc_checking_assert (pnxp
== next
->pnxp
);
2109 ~imm_store_chain_info ()
2114 gcc_checking_assert (&next
== next
->pnxp
);
2118 bool terminate_and_process_chain ();
2119 bool try_coalesce_bswap (merged_store_group
*, unsigned int, unsigned int);
2120 bool coalesce_immediate_stores ();
2121 bool output_merged_store (merged_store_group
*);
2122 bool output_merged_stores ();
2125 const pass_data pass_data_tree_store_merging
= {
2126 GIMPLE_PASS
, /* type */
2127 "store-merging", /* name */
2128 OPTGROUP_NONE
, /* optinfo_flags */
2129 TV_GIMPLE_STORE_MERGING
, /* tv_id */
2130 PROP_ssa
, /* properties_required */
2131 0, /* properties_provided */
2132 0, /* properties_destroyed */
2133 0, /* todo_flags_start */
2134 TODO_update_ssa
, /* todo_flags_finish */
2137 class pass_store_merging
: public gimple_opt_pass
2140 pass_store_merging (gcc::context
*ctxt
)
2141 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head ()
2145 /* Pass not supported for PDP-endian, nor for insane hosts or
2146 target character sizes where native_{encode,interpret}_expr
2147 doesn't work properly. */
2151 return flag_store_merging
2152 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
2154 && BITS_PER_UNIT
== 8;
2157 virtual unsigned int execute (function
*);
2160 hash_map
<tree_operand_hash
, class imm_store_chain_info
*> m_stores
;
2162 /* Form a doubly-linked stack of the elements of m_stores, so that
2163 we can iterate over them in a predictable way. Using this order
2164 avoids extraneous differences in the compiler output just because
2165 of tree pointer variations (e.g. different chains end up in
2166 different positions of m_stores, so they are handled in different
2167 orders, so they allocate or release SSA names in different
2168 orders, and when they get reused, subsequent passes end up
2169 getting different SSA names, which may ultimately change
2170 decisions when going out of SSA). */
2171 imm_store_chain_info
*m_stores_head
;
2173 bool process_store (gimple
*);
2174 bool terminate_and_process_chain (imm_store_chain_info
*);
2175 bool terminate_all_aliasing_chains (imm_store_chain_info
**, gimple
*);
2176 bool terminate_and_process_all_chains ();
2177 }; // class pass_store_merging
2179 /* Terminate and process all recorded chains. Return true if any changes
2183 pass_store_merging::terminate_and_process_all_chains ()
2186 while (m_stores_head
)
2187 ret
|= terminate_and_process_chain (m_stores_head
);
2188 gcc_assert (m_stores
.is_empty ());
2192 /* Terminate all chains that are affected by the statement STMT.
2193 CHAIN_INFO is the chain we should ignore from the checks if
2194 non-NULL. Return true if any changes were made. */
2197 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2203 /* If the statement doesn't touch memory it can't alias. */
2204 if (!gimple_vuse (stmt
))
2207 tree store_lhs
= gimple_store_p (stmt
) ? gimple_get_lhs (stmt
) : NULL_TREE
;
2208 ao_ref store_lhs_ref
;
2209 ao_ref_init (&store_lhs_ref
, store_lhs
);
2210 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
2214 /* We already checked all the stores in chain_info and terminated the
2215 chain if necessary. Skip it here. */
2216 if (chain_info
&& *chain_info
== cur
)
2219 store_immediate_info
*info
;
2221 FOR_EACH_VEC_ELT (cur
->m_store_info
, i
, info
)
2223 tree lhs
= gimple_assign_lhs (info
->stmt
);
2225 ao_ref_init (&lhs_ref
, lhs
);
2226 if (ref_maybe_used_by_stmt_p (stmt
, &lhs_ref
)
2227 || stmt_may_clobber_ref_p_1 (stmt
, &lhs_ref
)
2228 || (store_lhs
&& refs_may_alias_p_1 (&store_lhs_ref
,
2231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2233 fprintf (dump_file
, "stmt causes chain termination:\n");
2234 print_gimple_stmt (dump_file
, stmt
, 0);
2236 ret
|= terminate_and_process_chain (cur
);
2245 /* Helper function. Terminate the recorded chain storing to base object
2246 BASE. Return true if the merging and output was successful. The m_stores
2247 entry is removed after the processing in any case. */
2250 pass_store_merging::terminate_and_process_chain (imm_store_chain_info
*chain_info
)
2252 bool ret
= chain_info
->terminate_and_process_chain ();
2253 m_stores
.remove (chain_info
->base_addr
);
2258 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2259 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2260 be able to sink load of REF across stores between FIRST and LAST, up
2261 to right before LAST. */
2264 stmts_may_clobber_ref_p (gimple
*first
, gimple
*last
, tree ref
)
2267 ao_ref_init (&r
, ref
);
2268 unsigned int count
= 0;
2269 tree vop
= gimple_vdef (last
);
2272 /* Return true conservatively if the basic blocks are different. */
2273 if (gimple_bb (first
) != gimple_bb (last
))
2278 stmt
= SSA_NAME_DEF_STMT (vop
);
2279 if (stmt_may_clobber_ref_p_1 (stmt
, &r
))
2281 if (gimple_store_p (stmt
)
2282 && refs_anti_dependent_p (ref
, gimple_get_lhs (stmt
)))
2284 /* Avoid quadratic compile time by bounding the number of checks
2286 if (++count
> MAX_STORE_ALIAS_CHECKS
)
2288 vop
= gimple_vuse (stmt
);
2290 while (stmt
!= first
);
2295 /* Return true if INFO->ops[IDX] is mergeable with the
2296 corresponding loads already in MERGED_STORE group.
2297 BASE_ADDR is the base address of the whole store group. */
2300 compatible_load_p (merged_store_group
*merged_store
,
2301 store_immediate_info
*info
,
2302 tree base_addr
, int idx
)
2304 store_immediate_info
*infof
= merged_store
->stores
[0];
2305 if (!info
->ops
[idx
].base_addr
2306 || maybe_ne (info
->ops
[idx
].bitpos
- infof
->ops
[idx
].bitpos
,
2307 info
->bitpos
- infof
->bitpos
)
2308 || !operand_equal_p (info
->ops
[idx
].base_addr
,
2309 infof
->ops
[idx
].base_addr
, 0))
2312 store_immediate_info
*infol
= merged_store
->stores
.last ();
2313 tree load_vuse
= gimple_vuse (info
->ops
[idx
].stmt
);
2314 /* In this case all vuses should be the same, e.g.
2315 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2317 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2318 and we can emit the coalesced load next to any of those loads. */
2319 if (gimple_vuse (infof
->ops
[idx
].stmt
) == load_vuse
2320 && gimple_vuse (infol
->ops
[idx
].stmt
) == load_vuse
)
2323 /* Otherwise, at least for now require that the load has the same
2324 vuse as the store. See following examples. */
2325 if (gimple_vuse (info
->stmt
) != load_vuse
)
2328 if (gimple_vuse (infof
->stmt
) != gimple_vuse (infof
->ops
[idx
].stmt
)
2330 && gimple_vuse (infol
->stmt
) != gimple_vuse (infol
->ops
[idx
].stmt
)))
2333 /* If the load is from the same location as the store, already
2334 the construction of the immediate chain info guarantees no intervening
2335 stores, so no further checks are needed. Example:
2336 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2337 if (known_eq (info
->ops
[idx
].bitpos
, info
->bitpos
)
2338 && operand_equal_p (info
->ops
[idx
].base_addr
, base_addr
, 0))
2341 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2342 of the stores in the group, or any other stores in between those.
2343 Previous calls to compatible_load_p ensured that for all the
2344 merged_store->stores IDX loads, no stmts starting with
2345 merged_store->first_stmt and ending right before merged_store->last_stmt
2346 clobbers those loads. */
2347 gimple
*first
= merged_store
->first_stmt
;
2348 gimple
*last
= merged_store
->last_stmt
;
2350 store_immediate_info
*infoc
;
2351 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2352 comes before the so far first load, we'll be changing
2353 merged_store->first_stmt. In that case we need to give up if
2354 any of the earlier processed loads clobber with the stmts in the new
2356 if (info
->order
< merged_store
->first_order
)
2358 FOR_EACH_VEC_ELT (merged_store
->stores
, i
, infoc
)
2359 if (stmts_may_clobber_ref_p (info
->stmt
, first
, infoc
->ops
[idx
].val
))
2363 /* Similarly, we could change merged_store->last_stmt, so ensure
2364 in that case no stmts in the new range clobber any of the earlier
2366 else if (info
->order
> merged_store
->last_order
)
2368 FOR_EACH_VEC_ELT (merged_store
->stores
, i
, infoc
)
2369 if (stmts_may_clobber_ref_p (last
, info
->stmt
, infoc
->ops
[idx
].val
))
2373 /* And finally, we'd be adding a new load to the set, ensure it isn't
2374 clobbered in the new range. */
2375 if (stmts_may_clobber_ref_p (first
, last
, info
->ops
[idx
].val
))
2378 /* Otherwise, we are looking for:
2379 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2381 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2385 /* Add all refs loaded to compute VAL to REFS vector. */
2388 gather_bswap_load_refs (vec
<tree
> *refs
, tree val
)
2390 if (TREE_CODE (val
) != SSA_NAME
)
2393 gimple
*stmt
= SSA_NAME_DEF_STMT (val
);
2394 if (!is_gimple_assign (stmt
))
2397 if (gimple_assign_load_p (stmt
))
2399 refs
->safe_push (gimple_assign_rhs1 (stmt
));
2403 switch (gimple_assign_rhs_class (stmt
))
2405 case GIMPLE_BINARY_RHS
:
2406 gather_bswap_load_refs (refs
, gimple_assign_rhs2 (stmt
));
2408 case GIMPLE_UNARY_RHS
:
2409 gather_bswap_load_refs (refs
, gimple_assign_rhs1 (stmt
));
2416 /* Check if there are any stores in M_STORE_INFO after index I
2417 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2418 a potential group ending with END that have their order
2419 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2420 all the stores already merged and the one under consideration
2421 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2423 MEM[(long long int *)p_28] = 0;
2424 MEM[(long long int *)p_28 + 8B] = 0;
2425 MEM[(long long int *)p_28 + 16B] = 0;
2426 MEM[(long long int *)p_28 + 24B] = 0;
2428 MEM[(int *)p_28 + 8B] = _129;
2429 MEM[(int *)p_28].a = -1;
2431 MEM[(long long int *)p_28] = 0;
2432 MEM[(int *)p_28].a = -1;
2433 stmts in the current group and need to consider if it is safe to
2434 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2435 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2436 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2437 into the group and merging of those 3 stores is successful, merged
2438 stmts will be emitted at the latest store from that group, i.e.
2439 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2440 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2441 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2442 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2443 into the group. That way it will be its own store group and will
2444 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2445 INTEGER_CST stores, those are mergeable using merge_overlapping,
2446 so don't return false for those. */
2449 check_no_overlap (vec
<store_immediate_info
*> m_store_info
, unsigned int i
,
2450 bool all_integer_cst_p
, unsigned int last_order
,
2451 unsigned HOST_WIDE_INT end
)
2453 unsigned int len
= m_store_info
.length ();
2454 for (++i
; i
< len
; ++i
)
2456 store_immediate_info
*info
= m_store_info
[i
];
2457 if (info
->bitpos
>= end
)
2459 if (info
->order
< last_order
2460 && (!all_integer_cst_p
|| info
->rhs_code
!= INTEGER_CST
))
2466 /* Return true if m_store_info[first] and at least one following store
2467 form a group which store try_size bitsize value which is byte swapped
2468 from a memory load or some value, or identity from some value.
2469 This uses the bswap pass APIs. */
2472 imm_store_chain_info::try_coalesce_bswap (merged_store_group
*merged_store
,
2474 unsigned int try_size
)
2476 unsigned int len
= m_store_info
.length (), last
= first
;
2477 unsigned HOST_WIDE_INT width
= m_store_info
[first
]->bitsize
;
2478 if (width
>= try_size
)
2480 for (unsigned int i
= first
+ 1; i
< len
; ++i
)
2482 if (m_store_info
[i
]->bitpos
!= m_store_info
[first
]->bitpos
+ width
2483 || m_store_info
[i
]->lp_nr
!= merged_store
->lp_nr
2484 || m_store_info
[i
]->ins_stmt
== NULL
)
2486 width
+= m_store_info
[i
]->bitsize
;
2487 if (width
>= try_size
)
2493 if (width
!= try_size
)
2496 bool allow_unaligned
2497 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
2498 /* Punt if the combined store would not be aligned and we need alignment. */
2499 if (!allow_unaligned
)
2501 unsigned int align
= merged_store
->align
;
2502 unsigned HOST_WIDE_INT align_base
= merged_store
->align_base
;
2503 for (unsigned int i
= first
+ 1; i
<= last
; ++i
)
2505 unsigned int this_align
;
2506 unsigned HOST_WIDE_INT align_bitpos
= 0;
2507 get_object_alignment_1 (gimple_assign_lhs (m_store_info
[i
]->stmt
),
2508 &this_align
, &align_bitpos
);
2509 if (this_align
> align
)
2512 align_base
= m_store_info
[i
]->bitpos
- align_bitpos
;
2515 unsigned HOST_WIDE_INT align_bitpos
2516 = (m_store_info
[first
]->bitpos
- align_base
) & (align
- 1);
2518 align
= least_bit_hwi (align_bitpos
);
2519 if (align
< try_size
)
2526 case 16: type
= uint16_type_node
; break;
2527 case 32: type
= uint32_type_node
; break;
2528 case 64: type
= uint64_type_node
; break;
2529 default: gcc_unreachable ();
2531 struct symbolic_number n
;
2532 gimple
*ins_stmt
= NULL
;
2533 int vuse_store
= -1;
2534 unsigned int first_order
= merged_store
->first_order
;
2535 unsigned int last_order
= merged_store
->last_order
;
2536 gimple
*first_stmt
= merged_store
->first_stmt
;
2537 gimple
*last_stmt
= merged_store
->last_stmt
;
2538 unsigned HOST_WIDE_INT end
= merged_store
->start
+ merged_store
->width
;
2539 store_immediate_info
*infof
= m_store_info
[first
];
2541 for (unsigned int i
= first
; i
<= last
; ++i
)
2543 store_immediate_info
*info
= m_store_info
[i
];
2544 struct symbolic_number this_n
= info
->n
;
2546 if (!this_n
.base_addr
)
2547 this_n
.range
= try_size
/ BITS_PER_UNIT
;
2549 /* Update vuse in case it has changed by output_merged_stores. */
2550 this_n
.vuse
= gimple_vuse (info
->ins_stmt
);
2551 unsigned int bitpos
= info
->bitpos
- infof
->bitpos
;
2552 if (!do_shift_rotate (LSHIFT_EXPR
, &this_n
,
2554 ? try_size
- info
->bitsize
- bitpos
2557 if (this_n
.base_addr
&& vuse_store
)
2560 for (j
= first
; j
<= last
; ++j
)
2561 if (this_n
.vuse
== gimple_vuse (m_store_info
[j
]->stmt
))
2565 if (vuse_store
== 1)
2573 ins_stmt
= info
->ins_stmt
;
2577 if (n
.base_addr
&& n
.vuse
!= this_n
.vuse
)
2579 if (vuse_store
== 0)
2583 if (info
->order
> last_order
)
2585 last_order
= info
->order
;
2586 last_stmt
= info
->stmt
;
2588 else if (info
->order
< first_order
)
2590 first_order
= info
->order
;
2591 first_stmt
= info
->stmt
;
2593 end
= MAX (end
, info
->bitpos
+ info
->bitsize
);
2595 ins_stmt
= perform_symbolic_merge (ins_stmt
, &n
, info
->ins_stmt
,
2597 if (ins_stmt
== NULL
)
2602 uint64_t cmpxchg
, cmpnop
;
2603 find_bswap_or_nop_finalize (&n
, &cmpxchg
, &cmpnop
);
2605 /* A complete byte swap should make the symbolic number to start with
2606 the largest digit in the highest order byte. Unchanged symbolic
2607 number indicates a read with same endianness as target architecture. */
2608 if (n
.n
!= cmpnop
&& n
.n
!= cmpxchg
)
2611 if (n
.base_addr
== NULL_TREE
&& !is_gimple_val (n
.src
))
2614 if (!check_no_overlap (m_store_info
, last
, false, last_order
, end
))
2617 /* Don't handle memory copy this way if normal non-bswap processing
2618 would handle it too. */
2619 if (n
.n
== cmpnop
&& (unsigned) n
.n_ops
== last
- first
+ 1)
2622 for (i
= first
; i
<= last
; ++i
)
2623 if (m_store_info
[i
]->rhs_code
!= MEM_REF
)
2633 /* Will emit LROTATE_EXPR. */
2636 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2637 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
2641 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2642 && optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
)
2649 if (!allow_unaligned
&& n
.base_addr
)
2651 unsigned int align
= get_object_alignment (n
.src
);
2652 if (align
< try_size
)
2656 /* If each load has vuse of the corresponding store, need to verify
2657 the loads can be sunk right before the last store. */
2658 if (vuse_store
== 1)
2660 auto_vec
<tree
, 64> refs
;
2661 for (unsigned int i
= first
; i
<= last
; ++i
)
2662 gather_bswap_load_refs (&refs
,
2663 gimple_assign_rhs1 (m_store_info
[i
]->stmt
));
2667 FOR_EACH_VEC_ELT (refs
, i
, ref
)
2668 if (stmts_may_clobber_ref_p (first_stmt
, last_stmt
, ref
))
2674 infof
->ins_stmt
= ins_stmt
;
2675 for (unsigned int i
= first
; i
<= last
; ++i
)
2677 m_store_info
[i
]->rhs_code
= n
.n
== cmpxchg
? LROTATE_EXPR
: NOP_EXPR
;
2678 m_store_info
[i
]->ops
[0].base_addr
= NULL_TREE
;
2679 m_store_info
[i
]->ops
[1].base_addr
= NULL_TREE
;
2681 merged_store
->merge_into (m_store_info
[i
]);
2687 /* Go through the candidate stores recorded in m_store_info and merge them
2688 into merged_store_group objects recorded into m_merged_store_groups
2689 representing the widened stores. Return true if coalescing was successful
2690 and the number of widened stores is fewer than the original number
2694 imm_store_chain_info::coalesce_immediate_stores ()
2696 /* Anything less can't be processed. */
2697 if (m_store_info
.length () < 2)
2700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2701 fprintf (dump_file
, "Attempting to coalesce %u stores in chain\n",
2702 m_store_info
.length ());
2704 store_immediate_info
*info
;
2705 unsigned int i
, ignore
= 0;
2707 /* Order the stores by the bitposition they write to. */
2708 m_store_info
.qsort (sort_by_bitpos
);
2710 info
= m_store_info
[0];
2711 merged_store_group
*merged_store
= new merged_store_group (info
);
2712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2713 fputs ("New store group\n", dump_file
);
2715 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
2717 unsigned HOST_WIDE_INT new_bitregion_start
, new_bitregion_end
;
2722 /* First try to handle group of stores like:
2727 using the bswap framework. */
2728 if (info
->bitpos
== merged_store
->start
+ merged_store
->width
2729 && merged_store
->stores
.length () == 1
2730 && merged_store
->stores
[0]->ins_stmt
!= NULL
2731 && info
->lp_nr
== merged_store
->lp_nr
2732 && info
->ins_stmt
!= NULL
)
2734 unsigned int try_size
;
2735 for (try_size
= 64; try_size
>= 16; try_size
>>= 1)
2736 if (try_coalesce_bswap (merged_store
, i
- 1, try_size
))
2741 ignore
= i
+ merged_store
->stores
.length () - 1;
2742 m_merged_store_groups
.safe_push (merged_store
);
2743 if (ignore
< m_store_info
.length ())
2744 merged_store
= new merged_store_group (m_store_info
[ignore
]);
2746 merged_store
= NULL
;
2752 = MIN (merged_store
->bitregion_start
, info
->bitregion_start
);
2754 = MAX (merged_store
->bitregion_end
, info
->bitregion_end
);
2756 if (info
->order
>= merged_store
->first_nonmergeable_order
2757 || (((new_bitregion_end
- new_bitregion_start
+ 1) / BITS_PER_UNIT
)
2758 > (unsigned) param_store_merging_max_size
))
2763 Overlapping stores. */
2764 else if (IN_RANGE (info
->bitpos
, merged_store
->start
,
2765 merged_store
->start
+ merged_store
->width
- 1)
2766 /* |---store 1---||---store 2---|
2767 Handle also the consecutive INTEGER_CST stores case here,
2768 as we have here the code to deal with overlaps. */
2769 || (info
->bitregion_start
<= merged_store
->bitregion_end
2770 && info
->rhs_code
== INTEGER_CST
2771 && merged_store
->only_constants
2772 && merged_store
->can_be_merged_into (info
)))
2774 /* Only allow overlapping stores of constants. */
2775 if (info
->rhs_code
== INTEGER_CST
2776 && merged_store
->only_constants
2777 && info
->lp_nr
== merged_store
->lp_nr
)
2779 unsigned int last_order
2780 = MAX (merged_store
->last_order
, info
->order
);
2781 unsigned HOST_WIDE_INT end
2782 = MAX (merged_store
->start
+ merged_store
->width
,
2783 info
->bitpos
+ info
->bitsize
);
2784 if (check_no_overlap (m_store_info
, i
, true, last_order
, end
))
2786 /* check_no_overlap call above made sure there are no
2787 overlapping stores with non-INTEGER_CST rhs_code
2788 in between the first and last of the stores we've
2789 just merged. If there are any INTEGER_CST rhs_code
2790 stores in between, we need to merge_overlapping them
2791 even if in the sort_by_bitpos order there are other
2792 overlapping stores in between. Keep those stores as is.
2794 MEM[(int *)p_28] = 0;
2795 MEM[(char *)p_28 + 3B] = 1;
2796 MEM[(char *)p_28 + 1B] = 2;
2797 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2798 We can't merge the zero store with the store of two and
2799 not merge anything else, because the store of one is
2800 in the original order in between those two, but in
2801 store_by_bitpos order it comes after the last store that
2802 we can't merge with them. We can merge the first 3 stores
2803 and keep the last store as is though. */
2804 unsigned int len
= m_store_info
.length ();
2805 unsigned int try_order
= last_order
;
2806 unsigned int first_nonmergeable_order
;
2808 bool last_iter
= false;
2812 unsigned int max_order
= 0;
2813 unsigned first_nonmergeable_int_order
= ~0U;
2814 unsigned HOST_WIDE_INT this_end
= end
;
2816 first_nonmergeable_order
= ~0U;
2817 for (unsigned int j
= i
+ 1; j
< len
; ++j
)
2819 store_immediate_info
*info2
= m_store_info
[j
];
2820 if (info2
->bitpos
>= this_end
)
2822 if (info2
->order
< try_order
)
2824 if (info2
->rhs_code
!= INTEGER_CST
2825 || info2
->lp_nr
!= merged_store
->lp_nr
)
2827 /* Normally check_no_overlap makes sure this
2828 doesn't happen, but if end grows below,
2829 then we need to process more stores than
2830 check_no_overlap verified. Example:
2831 MEM[(int *)p_5] = 0;
2832 MEM[(short *)p_5 + 3B] = 1;
2833 MEM[(char *)p_5 + 4B] = _9;
2834 MEM[(char *)p_5 + 2B] = 2; */
2839 this_end
= MAX (this_end
,
2840 info2
->bitpos
+ info2
->bitsize
);
2842 else if (info2
->rhs_code
== INTEGER_CST
2843 && info2
->lp_nr
== merged_store
->lp_nr
2846 max_order
= MAX (max_order
, info2
->order
+ 1);
2847 first_nonmergeable_int_order
2848 = MIN (first_nonmergeable_int_order
,
2852 first_nonmergeable_order
2853 = MIN (first_nonmergeable_order
, info2
->order
);
2857 if (last_order
== try_order
)
2859 /* If this failed, but only because we grew
2860 try_order, retry with the last working one,
2861 so that we merge at least something. */
2862 try_order
= last_order
;
2866 last_order
= try_order
;
2867 /* Retry with a larger try_order to see if we could
2868 merge some further INTEGER_CST stores. */
2870 && (first_nonmergeable_int_order
2871 < first_nonmergeable_order
))
2873 try_order
= MIN (max_order
,
2874 first_nonmergeable_order
);
2877 merged_store
->first_nonmergeable_order
);
2878 if (try_order
> last_order
&& ++attempts
< 16)
2881 first_nonmergeable_order
2882 = MIN (first_nonmergeable_order
,
2883 first_nonmergeable_int_order
);
2891 merged_store
->merge_overlapping (info
);
2893 merged_store
->first_nonmergeable_order
2894 = MIN (merged_store
->first_nonmergeable_order
,
2895 first_nonmergeable_order
);
2897 for (unsigned int j
= i
+ 1; j
<= k
; j
++)
2899 store_immediate_info
*info2
= m_store_info
[j
];
2900 gcc_assert (info2
->bitpos
< end
);
2901 if (info2
->order
< last_order
)
2903 gcc_assert (info2
->rhs_code
== INTEGER_CST
);
2905 merged_store
->merge_overlapping (info2
);
2907 /* Other stores are kept and not merged in any
2916 /* |---store 1---||---store 2---|
2917 This store is consecutive to the previous one.
2918 Merge it into the current store group. There can be gaps in between
2919 the stores, but there can't be gaps in between bitregions. */
2920 else if (info
->bitregion_start
<= merged_store
->bitregion_end
2921 && merged_store
->can_be_merged_into (info
))
2923 store_immediate_info
*infof
= merged_store
->stores
[0];
2925 /* All the rhs_code ops that take 2 operands are commutative,
2926 swap the operands if it could make the operands compatible. */
2927 if (infof
->ops
[0].base_addr
2928 && infof
->ops
[1].base_addr
2929 && info
->ops
[0].base_addr
2930 && info
->ops
[1].base_addr
2931 && known_eq (info
->ops
[1].bitpos
- infof
->ops
[0].bitpos
,
2932 info
->bitpos
- infof
->bitpos
)
2933 && operand_equal_p (info
->ops
[1].base_addr
,
2934 infof
->ops
[0].base_addr
, 0))
2936 std::swap (info
->ops
[0], info
->ops
[1]);
2937 info
->ops_swapped_p
= true;
2939 if (check_no_overlap (m_store_info
, i
, false,
2940 MAX (merged_store
->last_order
, info
->order
),
2941 MAX (merged_store
->start
+ merged_store
->width
,
2942 info
->bitpos
+ info
->bitsize
)))
2944 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2945 if (info
->rhs_code
== MEM_REF
&& infof
->rhs_code
!= MEM_REF
)
2947 info
->rhs_code
= BIT_INSERT_EXPR
;
2948 info
->ops
[0].val
= gimple_assign_rhs1 (info
->stmt
);
2949 info
->ops
[0].base_addr
= NULL_TREE
;
2951 else if (infof
->rhs_code
== MEM_REF
&& info
->rhs_code
!= MEM_REF
)
2953 store_immediate_info
*infoj
;
2955 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, infoj
)
2957 infoj
->rhs_code
= BIT_INSERT_EXPR
;
2958 infoj
->ops
[0].val
= gimple_assign_rhs1 (infoj
->stmt
);
2959 infoj
->ops
[0].base_addr
= NULL_TREE
;
2961 merged_store
->bit_insertion
= true;
2963 if ((infof
->ops
[0].base_addr
2964 ? compatible_load_p (merged_store
, info
, base_addr
, 0)
2965 : !info
->ops
[0].base_addr
)
2966 && (infof
->ops
[1].base_addr
2967 ? compatible_load_p (merged_store
, info
, base_addr
, 1)
2968 : !info
->ops
[1].base_addr
))
2970 merged_store
->merge_into (info
);
2976 /* |---store 1---| <gap> |---store 2---|.
2977 Gap between stores or the rhs not compatible. Start a new group. */
2979 /* Try to apply all the stores recorded for the group to determine
2980 the bitpattern they write and discard it if that fails.
2981 This will also reject single-store groups. */
2982 if (merged_store
->apply_stores ())
2983 m_merged_store_groups
.safe_push (merged_store
);
2985 delete merged_store
;
2987 merged_store
= new merged_store_group (info
);
2988 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2989 fputs ("New store group\n", dump_file
);
2992 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2994 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2995 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:",
2996 i
, info
->bitsize
, info
->bitpos
);
2997 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
2998 fputc ('\n', dump_file
);
3002 /* Record or discard the last store group. */
3005 if (merged_store
->apply_stores ())
3006 m_merged_store_groups
.safe_push (merged_store
);
3008 delete merged_store
;
3011 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
3014 = !m_merged_store_groups
.is_empty ()
3015 && m_merged_store_groups
.length () < m_store_info
.length ();
3017 if (success
&& dump_file
)
3018 fprintf (dump_file
, "Coalescing successful!\nMerged into %u stores\n",
3019 m_merged_store_groups
.length ());
3024 /* Return the type to use for the merged stores or loads described by STMTS.
3025 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3026 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3027 of the MEM_REFs if any. */
3030 get_alias_type_for_stmts (vec
<gimple
*> &stmts
, bool is_load
,
3031 unsigned short *cliquep
, unsigned short *basep
)
3035 tree type
= NULL_TREE
;
3036 tree ret
= NULL_TREE
;
3040 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3042 tree ref
= is_load
? gimple_assign_rhs1 (stmt
)
3043 : gimple_assign_lhs (stmt
);
3044 tree type1
= reference_alias_ptr_type (ref
);
3045 tree base
= get_base_address (ref
);
3049 if (TREE_CODE (base
) == MEM_REF
)
3051 *cliquep
= MR_DEPENDENCE_CLIQUE (base
);
3052 *basep
= MR_DEPENDENCE_BASE (base
);
3057 if (!alias_ptr_types_compatible_p (type
, type1
))
3058 ret
= ptr_type_node
;
3059 if (TREE_CODE (base
) != MEM_REF
3060 || *cliquep
!= MR_DEPENDENCE_CLIQUE (base
)
3061 || *basep
!= MR_DEPENDENCE_BASE (base
))
3070 /* Return the location_t information we can find among the statements
3074 get_location_for_stmts (vec
<gimple
*> &stmts
)
3079 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3080 if (gimple_has_location (stmt
))
3081 return gimple_location (stmt
);
3083 return UNKNOWN_LOCATION
;
3086 /* Used to decribe a store resulting from splitting a wide store in smaller
3087 regularly-sized stores in split_group. */
3092 unsigned HOST_WIDE_INT bytepos
;
3093 unsigned HOST_WIDE_INT size
;
3094 unsigned HOST_WIDE_INT align
;
3095 auto_vec
<store_immediate_info
*> orig_stores
;
3096 /* True if there is a single orig stmt covering the whole split store. */
3098 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
3099 unsigned HOST_WIDE_INT
);
3102 /* Simple constructor. */
3104 split_store::split_store (unsigned HOST_WIDE_INT bp
,
3105 unsigned HOST_WIDE_INT sz
,
3106 unsigned HOST_WIDE_INT al
)
3107 : bytepos (bp
), size (sz
), align (al
), orig (false)
3109 orig_stores
.create (0);
3112 /* Record all stores in GROUP that write to the region starting at BITPOS and
3113 is of size BITSIZE. Record infos for such statements in STORES if
3114 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3115 if there is exactly one original store in the range (in that case ignore
3116 clobber stmts, unless there are only clobber stmts). */
3118 static store_immediate_info
*
3119 find_constituent_stores (class merged_store_group
*group
,
3120 vec
<store_immediate_info
*> *stores
,
3121 unsigned int *first
,
3122 unsigned HOST_WIDE_INT bitpos
,
3123 unsigned HOST_WIDE_INT bitsize
)
3125 store_immediate_info
*info
, *ret
= NULL
;
3127 bool second
= false;
3128 bool update_first
= true;
3129 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
3130 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
3132 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
3133 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
3134 if (stmt_end
<= bitpos
)
3136 /* BITPOS passed to this function never decreases from within the
3137 same split_group call, so optimize and don't scan info records
3138 which are known to end before or at BITPOS next time.
3139 Only do it if all stores before this one also pass this. */
3145 update_first
= false;
3147 /* The stores in GROUP are ordered by bitposition so if we're past
3148 the region for this group return early. */
3149 if (stmt_start
>= end
)
3152 if (gimple_clobber_p (info
->stmt
))
3155 stores
->safe_push (info
);
3162 stores
->safe_push (info
);
3163 if (ret
&& !gimple_clobber_p (ret
->stmt
))
3169 else if (ret
&& !gimple_clobber_p (ret
->stmt
))
3177 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3178 store have multiple uses. If any SSA_NAME has multiple uses, also
3179 count statements needed to compute it. */
3182 count_multiple_uses (store_immediate_info
*info
)
3184 gimple
*stmt
= info
->stmt
;
3186 switch (info
->rhs_code
)
3194 if (info
->bit_not_p
)
3196 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3197 ret
= 1; /* Fall through below to return
3198 the BIT_NOT_EXPR stmt and then
3199 BIT_{AND,IOR,XOR}_EXPR and anything it
3202 /* stmt is after this the BIT_NOT_EXPR. */
3203 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3205 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3207 ret
+= 1 + info
->ops
[0].bit_not_p
;
3208 if (info
->ops
[1].base_addr
)
3209 ret
+= 1 + info
->ops
[1].bit_not_p
;
3212 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3213 /* stmt is now the BIT_*_EXPR. */
3214 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3215 ret
+= 1 + info
->ops
[info
->ops_swapped_p
].bit_not_p
;
3216 else if (info
->ops
[info
->ops_swapped_p
].bit_not_p
)
3218 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3219 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3222 if (info
->ops
[1].base_addr
== NULL_TREE
)
3224 gcc_checking_assert (!info
->ops_swapped_p
);
3227 if (!has_single_use (gimple_assign_rhs2 (stmt
)))
3228 ret
+= 1 + info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
;
3229 else if (info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
)
3231 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3232 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3237 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3238 return 1 + info
->ops
[0].bit_not_p
;
3239 else if (info
->ops
[0].bit_not_p
)
3241 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3242 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3246 case BIT_INSERT_EXPR
:
3247 return has_single_use (gimple_assign_rhs1 (stmt
)) ? 0 : 1;
3253 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3254 vector (if non-NULL) with split_store structs describing the byte offset
3255 (from the base), the bit size and alignment of each store as well as the
3256 original statements involved in each such split group.
3257 This is to separate the splitting strategy from the statement
3258 building/emission/linking done in output_merged_store.
3259 Return number of new stores.
3260 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3261 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3262 BZERO_FIRST may be true only when the first store covers the whole group
3263 and clears it; if BZERO_FIRST is true, keep that first store in the set
3264 unmodified and emit further stores for the overrides only.
3265 If SPLIT_STORES is NULL, it is just a dry run to count number of
3269 split_group (merged_store_group
*group
, bool allow_unaligned_store
,
3270 bool allow_unaligned_load
, bool bzero_first
,
3271 vec
<split_store
*> *split_stores
,
3272 unsigned *total_orig
,
3273 unsigned *total_new
)
3275 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
3276 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
3277 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
3278 unsigned HOST_WIDE_INT group_align
= group
->align
;
3279 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
3280 unsigned HOST_WIDE_INT group_load_align
= group_align
;
3281 bool any_orig
= false;
3283 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
3285 /* For bswap framework using sets of stores, all the checking has been done
3286 earlier in try_coalesce_bswap and the result always needs to be emitted
3287 as a single store. Likewise for string concatenation, */
3288 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3289 || group
->stores
[0]->rhs_code
== NOP_EXPR
3290 || group
->string_concatenation
)
3292 gcc_assert (!bzero_first
);
3295 /* Avoid the old/new stmt count heuristics. It should be
3296 always beneficial. */
3303 unsigned HOST_WIDE_INT align_bitpos
3304 = (group
->start
- align_base
) & (group_align
- 1);
3305 unsigned HOST_WIDE_INT align
= group_align
;
3307 align
= least_bit_hwi (align_bitpos
);
3308 bytepos
= group
->start
/ BITS_PER_UNIT
;
3310 = new split_store (bytepos
, group
->width
, align
);
3311 unsigned int first
= 0;
3312 find_constituent_stores (group
, &store
->orig_stores
,
3313 &first
, group
->start
, group
->width
);
3314 split_stores
->safe_push (store
);
3320 unsigned int ret
= 0, first
= 0;
3321 unsigned HOST_WIDE_INT try_pos
= bytepos
;
3326 store_immediate_info
*info
= group
->stores
[0];
3329 total_orig
[0] = 1; /* The orig store. */
3330 info
= group
->stores
[0];
3331 if (info
->ops
[0].base_addr
)
3333 if (info
->ops
[1].base_addr
)
3335 switch (info
->rhs_code
)
3340 total_orig
[0]++; /* The orig BIT_*_EXPR stmt. */
3345 total_orig
[0] *= group
->stores
.length ();
3347 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3349 total_new
[0] += count_multiple_uses (info
);
3350 total_orig
[0] += (info
->bit_not_p
3351 + info
->ops
[0].bit_not_p
3352 + info
->ops
[1].bit_not_p
);
3356 if (!allow_unaligned_load
)
3357 for (int i
= 0; i
< 2; ++i
)
3358 if (group
->load_align
[i
])
3359 group_load_align
= MIN (group_load_align
, group
->load_align
[i
]);
3363 store_immediate_info
*gstore
;
3364 FOR_EACH_VEC_ELT (group
->stores
, first
, gstore
)
3365 if (!gimple_clobber_p (gstore
->stmt
))
3372 = new split_store (bytepos
, gstore
->bitsize
, align_base
);
3373 store
->orig_stores
.safe_push (gstore
);
3376 split_stores
->safe_push (store
);
3382 if ((allow_unaligned_store
|| group_align
<= BITS_PER_UNIT
)
3383 && (group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U
3384 || (bzero_first
&& group
->val
[try_pos
- bytepos
] == 0)))
3386 /* Skip padding bytes. */
3388 size
-= BITS_PER_UNIT
;
3392 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3393 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
3394 unsigned HOST_WIDE_INT align_bitpos
3395 = (try_bitpos
- align_base
) & (group_align
- 1);
3396 unsigned HOST_WIDE_INT align
= group_align
;
3397 bool found_orig
= false;
3399 align
= least_bit_hwi (align_bitpos
);
3400 if (!allow_unaligned_store
)
3401 try_size
= MIN (try_size
, align
);
3402 if (!allow_unaligned_load
)
3404 /* If we can't do or don't want to do unaligned stores
3405 as well as loads, we need to take the loads into account
3407 unsigned HOST_WIDE_INT load_align
= group_load_align
;
3408 align_bitpos
= (try_bitpos
- align_base
) & (load_align
- 1);
3410 load_align
= least_bit_hwi (align_bitpos
);
3411 for (int i
= 0; i
< 2; ++i
)
3412 if (group
->load_align
[i
])
3415 = known_alignment (try_bitpos
3416 - group
->stores
[0]->bitpos
3417 + group
->stores
[0]->ops
[i
].bitpos
3418 - group
->load_align_base
[i
]);
3419 if (align_bitpos
& (group_load_align
- 1))
3421 unsigned HOST_WIDE_INT a
= least_bit_hwi (align_bitpos
);
3422 load_align
= MIN (load_align
, a
);
3425 try_size
= MIN (try_size
, load_align
);
3427 store_immediate_info
*info
3428 = find_constituent_stores (group
, NULL
, &first
, try_bitpos
, try_size
);
3429 if (info
&& !gimple_clobber_p (info
->stmt
))
3431 /* If there is just one original statement for the range, see if
3432 we can just reuse the original store which could be even larger
3434 unsigned HOST_WIDE_INT stmt_end
3435 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
3436 info
= find_constituent_stores (group
, NULL
, &first
, try_bitpos
,
3437 stmt_end
- try_bitpos
);
3438 if (info
&& info
->bitpos
>= try_bitpos
)
3440 store_immediate_info
*info2
= NULL
;
3441 unsigned int first_copy
= first
;
3442 if (info
->bitpos
> try_bitpos
3443 && stmt_end
- try_bitpos
<= try_size
)
3445 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3447 info
->bitpos
- try_bitpos
);
3448 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3450 if (info2
== NULL
&& stmt_end
- try_bitpos
< try_size
)
3452 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3454 (try_bitpos
+ try_size
)
3456 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3460 try_size
= stmt_end
- try_bitpos
;
3467 /* Approximate store bitsize for the case when there are no padding
3469 while (try_size
> size
)
3471 /* Now look for whole padding bytes at the end of that bitsize. */
3472 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
3473 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
3474 != (unsigned char) ~0U
3476 || group
->val
[try_pos
- bytepos
+ nonmasked
- 1] != 0))
3478 if (nonmasked
== 0 || (info
&& gimple_clobber_p (info
->stmt
)))
3480 /* If entire try_size range is padding, skip it. */
3481 try_pos
+= try_size
/ BITS_PER_UNIT
;
3485 /* Otherwise try to decrease try_size if second half, last 3 quarters
3486 etc. are padding. */
3487 nonmasked
*= BITS_PER_UNIT
;
3488 while (nonmasked
<= try_size
/ 2)
3490 if (!allow_unaligned_store
&& group_align
> BITS_PER_UNIT
)
3492 /* Now look for whole padding bytes at the start of that bitsize. */
3493 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
3494 for (masked
= 0; masked
< try_bytesize
; ++masked
)
3495 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U
3497 || group
->val
[try_pos
- bytepos
+ masked
] != 0))
3499 masked
*= BITS_PER_UNIT
;
3500 gcc_assert (masked
< try_size
);
3501 if (masked
>= try_size
/ 2)
3503 while (masked
>= try_size
/ 2)
3506 try_pos
+= try_size
/ BITS_PER_UNIT
;
3510 /* Need to recompute the alignment, so just retry at the new
3522 = new split_store (try_pos
, try_size
, align
);
3523 info
= find_constituent_stores (group
, &store
->orig_stores
,
3524 &first
, try_bitpos
, try_size
);
3526 && !gimple_clobber_p (info
->stmt
)
3527 && info
->bitpos
>= try_bitpos
3528 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
3529 && (store
->orig_stores
.length () == 1
3531 || (info
->bitpos
== try_bitpos
3532 && (info
->bitpos
+ info
->bitsize
3533 == try_bitpos
+ try_size
))))
3538 split_stores
->safe_push (store
);
3541 try_pos
+= try_size
/ BITS_PER_UNIT
;
3549 /* If we are reusing some original stores and any of the
3550 original SSA_NAMEs had multiple uses, we need to subtract
3551 those now before we add the new ones. */
3552 if (total_new
[0] && any_orig
)
3554 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3556 total_new
[0] -= count_multiple_uses (store
->orig_stores
[0]);
3558 total_new
[0] += ret
; /* The new store. */
3559 store_immediate_info
*info
= group
->stores
[0];
3560 if (info
->ops
[0].base_addr
)
3561 total_new
[0] += ret
;
3562 if (info
->ops
[1].base_addr
)
3563 total_new
[0] += ret
;
3564 switch (info
->rhs_code
)
3569 total_new
[0] += ret
; /* The new BIT_*_EXPR stmt. */
3574 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3577 bool bit_not_p
[3] = { false, false, false };
3578 /* If all orig_stores have certain bit_not_p set, then
3579 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3580 If some orig_stores have certain bit_not_p set, then
3581 we'd use a BIT_XOR_EXPR with a mask and need to account for
3583 FOR_EACH_VEC_ELT (store
->orig_stores
, j
, info
)
3585 if (info
->ops
[0].bit_not_p
)
3586 bit_not_p
[0] = true;
3587 if (info
->ops
[1].bit_not_p
)
3588 bit_not_p
[1] = true;
3589 if (info
->bit_not_p
)
3590 bit_not_p
[2] = true;
3592 total_new
[0] += bit_not_p
[0] + bit_not_p
[1] + bit_not_p
[2];
3600 /* Return the operation through which the operand IDX (if < 2) or
3601 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3602 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3603 the bits should be xored with mask. */
3605 static enum tree_code
3606 invert_op (split_store
*split_store
, int idx
, tree int_type
, tree
&mask
)
3609 store_immediate_info
*info
;
3610 unsigned int cnt
= 0;
3611 bool any_paddings
= false;
3612 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3614 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3618 tree lhs
= gimple_assign_lhs (info
->stmt
);
3619 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3620 && TYPE_PRECISION (TREE_TYPE (lhs
)) < info
->bitsize
)
3621 any_paddings
= true;
3627 if (cnt
== split_store
->orig_stores
.length () && !any_paddings
)
3628 return BIT_NOT_EXPR
;
3630 unsigned HOST_WIDE_INT try_bitpos
= split_store
->bytepos
* BITS_PER_UNIT
;
3631 unsigned buf_size
= split_store
->size
/ BITS_PER_UNIT
;
3633 = XALLOCAVEC (unsigned char, buf_size
);
3634 memset (buf
, ~0U, buf_size
);
3635 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3637 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3640 /* Clear regions with bit_not_p and invert afterwards, rather than
3641 clear regions with !bit_not_p, so that gaps in between stores aren't
3643 unsigned HOST_WIDE_INT bitsize
= info
->bitsize
;
3644 unsigned HOST_WIDE_INT prec
= bitsize
;
3645 unsigned int pos_in_buffer
= 0;
3648 tree lhs
= gimple_assign_lhs (info
->stmt
);
3649 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3650 && TYPE_PRECISION (TREE_TYPE (lhs
)) < bitsize
)
3651 prec
= TYPE_PRECISION (TREE_TYPE (lhs
));
3653 if (info
->bitpos
< try_bitpos
)
3655 gcc_assert (info
->bitpos
+ bitsize
> try_bitpos
);
3656 if (!BYTES_BIG_ENDIAN
)
3658 if (prec
<= try_bitpos
- info
->bitpos
)
3660 prec
-= try_bitpos
- info
->bitpos
;
3662 bitsize
-= try_bitpos
- info
->bitpos
;
3663 if (BYTES_BIG_ENDIAN
&& prec
> bitsize
)
3667 pos_in_buffer
= info
->bitpos
- try_bitpos
;
3670 /* If this is a bool inversion, invert just the least significant
3671 prec bits rather than all bits of it. */
3672 if (BYTES_BIG_ENDIAN
)
3674 pos_in_buffer
+= bitsize
- prec
;
3675 if (pos_in_buffer
>= split_store
->size
)
3680 if (pos_in_buffer
+ bitsize
> split_store
->size
)
3681 bitsize
= split_store
->size
- pos_in_buffer
;
3682 unsigned char *p
= buf
+ (pos_in_buffer
/ BITS_PER_UNIT
);
3683 if (BYTES_BIG_ENDIAN
)
3684 clear_bit_region_be (p
, (BITS_PER_UNIT
- 1
3685 - (pos_in_buffer
% BITS_PER_UNIT
)), bitsize
);
3687 clear_bit_region (p
, pos_in_buffer
% BITS_PER_UNIT
, bitsize
);
3689 for (unsigned int i
= 0; i
< buf_size
; ++i
)
3691 mask
= native_interpret_expr (int_type
, buf
, buf_size
);
3692 return BIT_XOR_EXPR
;
3695 /* Given a merged store group GROUP output the widened version of it.
3696 The store chain is against the base object BASE.
3697 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3698 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3699 Make sure that the number of statements output is less than the number of
3700 original statements. If a better sequence is possible emit it and
3704 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
3706 const unsigned HOST_WIDE_INT start_byte_pos
3707 = group
->bitregion_start
/ BITS_PER_UNIT
;
3708 unsigned int orig_num_stmts
= group
->stores
.length ();
3709 if (orig_num_stmts
< 2)
3712 bool allow_unaligned_store
3713 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
3714 bool allow_unaligned_load
= allow_unaligned_store
;
3715 bool bzero_first
= false;
3716 store_immediate_info
*store
;
3717 unsigned int num_clobber_stmts
= 0;
3718 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
3721 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
3722 if (gimple_clobber_p (store
->stmt
))
3723 num_clobber_stmts
++;
3724 else if (TREE_CODE (gimple_assign_rhs1 (store
->stmt
)) == CONSTRUCTOR
3725 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store
->stmt
)) == 0
3726 && group
->start
== store
->bitpos
3727 && group
->width
== store
->bitsize
3728 && (group
->start
% BITS_PER_UNIT
) == 0
3729 && (group
->width
% BITS_PER_UNIT
) == 0)
3736 FOR_EACH_VEC_ELT_FROM (group
->stores
, i
, store
, i
)
3737 if (gimple_clobber_p (store
->stmt
))
3738 num_clobber_stmts
++;
3739 if (num_clobber_stmts
== orig_num_stmts
)
3741 orig_num_stmts
-= num_clobber_stmts
;
3743 if (allow_unaligned_store
|| bzero_first
)
3745 /* If unaligned stores are allowed, see how many stores we'd emit
3746 for unaligned and how many stores we'd emit for aligned stores.
3747 Only use unaligned stores if it allows fewer stores than aligned.
3748 Similarly, if there is a whole region clear first, prefer expanding
3749 it together compared to expanding clear first followed by merged
3751 unsigned cnt
[4] = { ~0, ~0, ~0, ~0 };
3753 for (int pass
= 0; pass
< 4; ++pass
)
3755 if (!allow_unaligned_store
&& (pass
& 1) != 0)
3757 if (!bzero_first
&& (pass
& 2) != 0)
3759 cnt
[pass
] = split_group (group
, (pass
& 1) != 0,
3760 allow_unaligned_load
, (pass
& 2) != 0,
3762 if (cnt
[pass
] < cnt
[pass_min
])
3765 if ((pass_min
& 1) == 0)
3766 allow_unaligned_store
= false;
3767 if ((pass_min
& 2) == 0)
3768 bzero_first
= false;
3771 auto_vec
<class split_store
*, 32> split_stores
;
3772 split_store
*split_store
;
3773 unsigned total_orig
, total_new
, i
;
3774 split_group (group
, allow_unaligned_store
, allow_unaligned_load
, bzero_first
,
3775 &split_stores
, &total_orig
, &total_new
);
3777 /* Determine if there is a clobber covering the whole group at the start,
3778 followed by proposed split stores that cover the whole group. In that
3779 case, prefer the transformation even if
3780 split_stores.length () == orig_num_stmts. */
3781 bool clobber_first
= false;
3782 if (num_clobber_stmts
3783 && gimple_clobber_p (group
->stores
[0]->stmt
)
3784 && group
->start
== group
->stores
[0]->bitpos
3785 && group
->width
== group
->stores
[0]->bitsize
3786 && (group
->start
% BITS_PER_UNIT
) == 0
3787 && (group
->width
% BITS_PER_UNIT
) == 0)
3789 clobber_first
= true;
3790 unsigned HOST_WIDE_INT pos
= group
->start
/ BITS_PER_UNIT
;
3791 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3792 if (split_store
->bytepos
!= pos
)
3794 clobber_first
= false;
3798 pos
+= split_store
->size
/ BITS_PER_UNIT
;
3799 if (pos
!= (group
->start
+ group
->width
) / BITS_PER_UNIT
)
3800 clobber_first
= false;
3803 if (split_stores
.length () >= orig_num_stmts
+ clobber_first
)
3806 /* We didn't manage to reduce the number of statements. Bail out. */
3807 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3808 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
3809 " Not profitable to emit new sequence.\n",
3811 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3815 if (total_orig
<= total_new
)
3817 /* If number of estimated new statements is above estimated original
3818 statements, bail out too. */
3819 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3820 fprintf (dump_file
, "Estimated number of original stmts (%u)"
3821 " not larger than estimated number of new"
3823 total_orig
, total_new
);
3824 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3828 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
3830 bool all_orig
= true;
3831 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3832 if (!split_store
->orig
)
3839 unsigned int cnt
= split_stores
.length ();
3840 store_immediate_info
*store
;
3841 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
3842 if (gimple_clobber_p (store
->stmt
))
3844 /* Punt if we wouldn't make any real changes, i.e. keep all
3845 orig stmts + all clobbers. */
3846 if (cnt
== group
->stores
.length ())
3848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3849 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
3850 " Not profitable to emit new sequence.\n",
3852 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3859 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
3860 gimple_seq seq
= NULL
;
3861 tree last_vdef
, new_vuse
;
3862 last_vdef
= gimple_vdef (group
->last_stmt
);
3863 new_vuse
= gimple_vuse (group
->last_stmt
);
3864 tree bswap_res
= NULL_TREE
;
3866 /* Clobbers are not removed. */
3867 if (gimple_clobber_p (group
->last_stmt
))
3869 new_vuse
= make_ssa_name (gimple_vop (cfun
), group
->last_stmt
);
3870 gimple_set_vdef (group
->last_stmt
, new_vuse
);
3873 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3874 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
3876 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
3877 gimple
*ins_stmt
= group
->stores
[0]->ins_stmt
;
3878 struct symbolic_number
*n
= &group
->stores
[0]->n
;
3879 bool bswap
= group
->stores
[0]->rhs_code
== LROTATE_EXPR
;
3884 load_type
= bswap_type
= uint16_type_node
;
3887 load_type
= uint32_type_node
;
3890 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
3891 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
3895 load_type
= uint64_type_node
;
3898 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
3899 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
3906 /* If the loads have each vuse of the corresponding store,
3907 we've checked the aliasing already in try_coalesce_bswap and
3908 we want to sink the need load into seq. So need to use new_vuse
3912 if (n
->vuse
== NULL
)
3918 /* Update vuse in case it has changed by output_merged_stores. */
3919 n
->vuse
= gimple_vuse (ins_stmt
);
3921 bswap_res
= bswap_replace (gsi_start (seq
), ins_stmt
, fndecl
,
3922 bswap_type
, load_type
, n
, bswap
);
3923 gcc_assert (bswap_res
);
3926 gimple
*stmt
= NULL
;
3927 auto_vec
<gimple
*, 32> orig_stmts
;
3928 gimple_seq this_seq
;
3929 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &this_seq
,
3930 is_gimple_mem_ref_addr
, NULL_TREE
);
3931 gimple_seq_add_seq_without_update (&seq
, this_seq
);
3933 tree load_addr
[2] = { NULL_TREE
, NULL_TREE
};
3934 gimple_seq load_seq
[2] = { NULL
, NULL
};
3935 gimple_stmt_iterator load_gsi
[2] = { gsi_none (), gsi_none () };
3936 for (int j
= 0; j
< 2; ++j
)
3938 store_operand_info
&op
= group
->stores
[0]->ops
[j
];
3939 if (op
.base_addr
== NULL_TREE
)
3942 store_immediate_info
*infol
= group
->stores
.last ();
3943 if (gimple_vuse (op
.stmt
) == gimple_vuse (infol
->ops
[j
].stmt
))
3945 /* We can't pick the location randomly; while we've verified
3946 all the loads have the same vuse, they can be still in different
3947 basic blocks and we need to pick the one from the last bb:
3953 otherwise if we put the wider load at the q[0] load, we might
3954 segfault if q[1] is not mapped. */
3955 basic_block bb
= gimple_bb (op
.stmt
);
3956 gimple
*ostmt
= op
.stmt
;
3957 store_immediate_info
*info
;
3958 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3960 gimple
*tstmt
= info
->ops
[j
].stmt
;
3961 basic_block tbb
= gimple_bb (tstmt
);
3962 if (dominated_by_p (CDI_DOMINATORS
, tbb
, bb
))
3968 load_gsi
[j
] = gsi_for_stmt (ostmt
);
3970 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
3971 &load_seq
[j
], is_gimple_mem_ref_addr
,
3974 else if (operand_equal_p (base_addr
, op
.base_addr
, 0))
3975 load_addr
[j
] = addr
;
3979 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
3980 &this_seq
, is_gimple_mem_ref_addr
,
3982 gimple_seq_add_seq_without_update (&seq
, this_seq
);
3986 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3988 const unsigned HOST_WIDE_INT try_size
= split_store
->size
;
3989 const unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
3990 const unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3991 const unsigned HOST_WIDE_INT try_align
= split_store
->align
;
3992 const unsigned HOST_WIDE_INT try_offset
= try_pos
- start_byte_pos
;
3996 if (split_store
->orig
)
3998 /* If there is just a single non-clobber constituent store
3999 which covers the whole area, just reuse the lhs and rhs. */
4000 gimple
*orig_stmt
= NULL
;
4001 store_immediate_info
*store
;
4003 FOR_EACH_VEC_ELT (split_store
->orig_stores
, j
, store
)
4004 if (!gimple_clobber_p (store
->stmt
))
4006 orig_stmt
= store
->stmt
;
4009 dest
= gimple_assign_lhs (orig_stmt
);
4010 src
= gimple_assign_rhs1 (orig_stmt
);
4011 loc
= gimple_location (orig_stmt
);
4015 store_immediate_info
*info
;
4016 unsigned short clique
, base
;
4018 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4019 orig_stmts
.safe_push (info
->stmt
);
4021 = get_alias_type_for_stmts (orig_stmts
, false, &clique
, &base
);
4023 loc
= get_location_for_stmts (orig_stmts
);
4024 orig_stmts
.truncate (0);
4026 if (group
->string_concatenation
)
4028 = build_array_type_nelts (char_type_node
,
4029 try_size
/ BITS_PER_UNIT
);
4032 dest_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
4033 dest_type
= build_aligned_type (dest_type
, try_align
);
4035 dest
= fold_build2 (MEM_REF
, dest_type
, addr
,
4036 build_int_cst (offset_type
, try_pos
));
4037 if (TREE_CODE (dest
) == MEM_REF
)
4039 MR_DEPENDENCE_CLIQUE (dest
) = clique
;
4040 MR_DEPENDENCE_BASE (dest
) = base
;
4044 if (bswap_res
|| group
->string_concatenation
)
4045 mask
= integer_zero_node
;
4047 mask
= native_interpret_expr (dest_type
,
4048 group
->mask
+ try_offset
,
4053 j
< 1 + (split_store
->orig_stores
[0]->ops
[1].val
!= NULL_TREE
);
4056 store_operand_info
&op
= split_store
->orig_stores
[0]->ops
[j
];
4059 else if (group
->string_concatenation
)
4061 ops
[j
] = build_string (try_size
/ BITS_PER_UNIT
,
4062 (const char *) group
->val
+ try_offset
);
4063 TREE_TYPE (ops
[j
]) = dest_type
;
4065 else if (op
.base_addr
)
4067 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4068 orig_stmts
.safe_push (info
->ops
[j
].stmt
);
4070 offset_type
= get_alias_type_for_stmts (orig_stmts
, true,
4072 location_t load_loc
= get_location_for_stmts (orig_stmts
);
4073 orig_stmts
.truncate (0);
4075 unsigned HOST_WIDE_INT load_align
= group
->load_align
[j
];
4076 unsigned HOST_WIDE_INT align_bitpos
4077 = known_alignment (try_bitpos
4078 - split_store
->orig_stores
[0]->bitpos
4080 if (align_bitpos
& (load_align
- 1))
4081 load_align
= least_bit_hwi (align_bitpos
);
4084 = build_nonstandard_integer_type (try_size
, UNSIGNED
);
4086 = build_aligned_type (load_int_type
, load_align
);
4088 poly_uint64 load_pos
4089 = exact_div (try_bitpos
4090 - split_store
->orig_stores
[0]->bitpos
4093 ops
[j
] = fold_build2 (MEM_REF
, load_int_type
, load_addr
[j
],
4094 build_int_cst (offset_type
, load_pos
));
4095 if (TREE_CODE (ops
[j
]) == MEM_REF
)
4097 MR_DEPENDENCE_CLIQUE (ops
[j
]) = clique
;
4098 MR_DEPENDENCE_BASE (ops
[j
]) = base
;
4100 if (!integer_zerop (mask
))
4101 /* The load might load some bits (that will be masked off
4102 later on) uninitialized, avoid -W*uninitialized
4103 warnings in that case. */
4104 TREE_NO_WARNING (ops
[j
]) = 1;
4106 stmt
= gimple_build_assign (make_ssa_name (dest_type
), ops
[j
]);
4107 gimple_set_location (stmt
, load_loc
);
4108 if (gsi_bb (load_gsi
[j
]))
4110 gimple_set_vuse (stmt
, gimple_vuse (op
.stmt
));
4111 gimple_seq_add_stmt_without_update (&load_seq
[j
], stmt
);
4115 gimple_set_vuse (stmt
, new_vuse
);
4116 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4118 ops
[j
] = gimple_assign_lhs (stmt
);
4120 enum tree_code inv_op
4121 = invert_op (split_store
, j
, dest_type
, xor_mask
);
4122 if (inv_op
!= NOP_EXPR
)
4124 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4125 inv_op
, ops
[j
], xor_mask
);
4126 gimple_set_location (stmt
, load_loc
);
4127 ops
[j
] = gimple_assign_lhs (stmt
);
4129 if (gsi_bb (load_gsi
[j
]))
4130 gimple_seq_add_stmt_without_update (&load_seq
[j
],
4133 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4137 ops
[j
] = native_interpret_expr (dest_type
,
4138 group
->val
+ try_offset
,
4142 switch (split_store
->orig_stores
[0]->rhs_code
)
4147 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4149 tree rhs1
= gimple_assign_rhs1 (info
->stmt
);
4150 orig_stmts
.safe_push (SSA_NAME_DEF_STMT (rhs1
));
4153 bit_loc
= get_location_for_stmts (orig_stmts
);
4154 orig_stmts
.truncate (0);
4157 = gimple_build_assign (make_ssa_name (dest_type
),
4158 split_store
->orig_stores
[0]->rhs_code
,
4160 gimple_set_location (stmt
, bit_loc
);
4161 /* If there is just one load and there is a separate
4162 load_seq[0], emit the bitwise op right after it. */
4163 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4164 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4165 /* Otherwise, if at least one load is in seq, we need to
4166 emit the bitwise op right before the store. If there
4167 are two loads and are emitted somewhere else, it would
4168 be better to emit the bitwise op as early as possible;
4169 we don't track where that would be possible right now
4172 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4173 src
= gimple_assign_lhs (stmt
);
4175 enum tree_code inv_op
;
4176 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4177 if (inv_op
!= NOP_EXPR
)
4179 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4180 inv_op
, src
, xor_mask
);
4181 gimple_set_location (stmt
, bit_loc
);
4182 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4183 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4185 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4186 src
= gimple_assign_lhs (stmt
);
4192 if (!is_gimple_val (src
))
4194 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (src
)),
4196 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4197 src
= gimple_assign_lhs (stmt
);
4199 if (!useless_type_conversion_p (dest_type
, TREE_TYPE (src
)))
4201 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4203 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4204 src
= gimple_assign_lhs (stmt
);
4206 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4207 if (inv_op
!= NOP_EXPR
)
4209 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4210 inv_op
, src
, xor_mask
);
4211 gimple_set_location (stmt
, loc
);
4212 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4213 src
= gimple_assign_lhs (stmt
);
4221 /* If bit insertion is required, we use the source as an accumulator
4222 into which the successive bit-field values are manually inserted.
4223 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4224 if (group
->bit_insertion
)
4225 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4226 if (info
->rhs_code
== BIT_INSERT_EXPR
4227 && info
->bitpos
< try_bitpos
+ try_size
4228 && info
->bitpos
+ info
->bitsize
> try_bitpos
)
4230 /* Mask, truncate, convert to final type, shift and ior into
4231 the accumulator. Note that every step can be a no-op. */
4232 const HOST_WIDE_INT start_gap
= info
->bitpos
- try_bitpos
;
4233 const HOST_WIDE_INT end_gap
4234 = (try_bitpos
+ try_size
) - (info
->bitpos
+ info
->bitsize
);
4235 tree tem
= info
->ops
[0].val
;
4236 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem
)))
4238 const unsigned HOST_WIDE_INT size
4239 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem
)));
4241 = build_nonstandard_integer_type (size
, UNSIGNED
);
4242 tem
= gimple_build (&seq
, loc
, VIEW_CONVERT_EXPR
,
4245 if (TYPE_PRECISION (TREE_TYPE (tem
)) <= info
->bitsize
)
4248 = build_nonstandard_integer_type (info
->bitsize
,
4250 tem
= gimple_convert (&seq
, loc
, bitfield_type
, tem
);
4252 else if ((BYTES_BIG_ENDIAN
? start_gap
: end_gap
) > 0)
4254 const unsigned HOST_WIDE_INT imask
4255 = (HOST_WIDE_INT_1U
<< info
->bitsize
) - 1;
4256 tem
= gimple_build (&seq
, loc
,
4257 BIT_AND_EXPR
, TREE_TYPE (tem
), tem
,
4258 build_int_cst (TREE_TYPE (tem
),
4261 const HOST_WIDE_INT shift
4262 = (BYTES_BIG_ENDIAN
? end_gap
: start_gap
);
4264 tem
= gimple_build (&seq
, loc
,
4265 RSHIFT_EXPR
, TREE_TYPE (tem
), tem
,
4266 build_int_cst (NULL_TREE
, -shift
));
4267 tem
= gimple_convert (&seq
, loc
, dest_type
, tem
);
4269 tem
= gimple_build (&seq
, loc
,
4270 LSHIFT_EXPR
, dest_type
, tem
,
4271 build_int_cst (NULL_TREE
, shift
));
4272 src
= gimple_build (&seq
, loc
,
4273 BIT_IOR_EXPR
, dest_type
, tem
, src
);
4276 if (!integer_zerop (mask
))
4278 tree tem
= make_ssa_name (dest_type
);
4279 tree load_src
= unshare_expr (dest
);
4280 /* The load might load some or all bits uninitialized,
4281 avoid -W*uninitialized warnings in that case.
4282 As optimization, it would be nice if all the bits are
4283 provably uninitialized (no stores at all yet or previous
4284 store a CLOBBER) we'd optimize away the load and replace
4286 TREE_NO_WARNING (load_src
) = 1;
4287 stmt
= gimple_build_assign (tem
, load_src
);
4288 gimple_set_location (stmt
, loc
);
4289 gimple_set_vuse (stmt
, new_vuse
);
4290 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4292 /* FIXME: If there is a single chunk of zero bits in mask,
4293 perhaps use BIT_INSERT_EXPR instead? */
4294 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4295 BIT_AND_EXPR
, tem
, mask
);
4296 gimple_set_location (stmt
, loc
);
4297 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4298 tem
= gimple_assign_lhs (stmt
);
4300 if (TREE_CODE (src
) == INTEGER_CST
)
4301 src
= wide_int_to_tree (dest_type
,
4302 wi::bit_and_not (wi::to_wide (src
),
4303 wi::to_wide (mask
)));
4307 = wide_int_to_tree (dest_type
,
4308 wi::bit_not (wi::to_wide (mask
)));
4309 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4310 BIT_AND_EXPR
, src
, nmask
);
4311 gimple_set_location (stmt
, loc
);
4312 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4313 src
= gimple_assign_lhs (stmt
);
4315 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4316 BIT_IOR_EXPR
, tem
, src
);
4317 gimple_set_location (stmt
, loc
);
4318 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4319 src
= gimple_assign_lhs (stmt
);
4323 stmt
= gimple_build_assign (dest
, src
);
4324 gimple_set_location (stmt
, loc
);
4325 gimple_set_vuse (stmt
, new_vuse
);
4326 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4328 if (group
->lp_nr
&& stmt_could_throw_p (cfun
, stmt
))
4329 add_stmt_to_eh_lp (stmt
, group
->lp_nr
);
4332 if (i
< split_stores
.length () - 1)
4333 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
4335 new_vdef
= last_vdef
;
4337 gimple_set_vdef (stmt
, new_vdef
);
4338 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
4339 new_vuse
= new_vdef
;
4342 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4349 "New sequence of %u stores to replace old one of %u stores\n",
4350 split_stores
.length (), orig_num_stmts
);
4351 if (dump_flags
& TDF_DETAILS
)
4352 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
4355 if (gimple_clobber_p (group
->last_stmt
))
4356 update_stmt (group
->last_stmt
);
4358 if (group
->lp_nr
> 0)
4360 /* We're going to insert a sequence of (potentially) throwing stores
4361 into an active EH region. This means that we're going to create
4362 new basic blocks with EH edges pointing to the post landing pad
4363 and, therefore, to have to update its PHI nodes, if any. For the
4364 virtual PHI node, we're going to use the VDEFs created above, but
4365 for the other nodes, we need to record the original reaching defs. */
4366 eh_landing_pad lp
= get_eh_landing_pad_from_number (group
->lp_nr
);
4367 basic_block lp_bb
= label_to_block (cfun
, lp
->post_landing_pad
);
4368 basic_block last_bb
= gimple_bb (group
->last_stmt
);
4369 edge last_edge
= find_edge (last_bb
, lp_bb
);
4370 auto_vec
<tree
, 16> last_defs
;
4372 for (gpi
= gsi_start_phis (lp_bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
4374 gphi
*phi
= gpi
.phi ();
4376 if (virtual_operand_p (gimple_phi_result (phi
)))
4377 last_def
= NULL_TREE
;
4379 last_def
= gimple_phi_arg_def (phi
, last_edge
->dest_idx
);
4380 last_defs
.safe_push (last_def
);
4383 /* Do the insertion. Then, if new basic blocks have been created in the
4384 process, rewind the chain of VDEFs create above to walk the new basic
4385 blocks and update the corresponding arguments of the PHI nodes. */
4386 update_modified_stmts (seq
);
4387 if (gimple_find_sub_bbs (seq
, &last_gsi
))
4388 while (last_vdef
!= gimple_vuse (group
->last_stmt
))
4390 gimple
*stmt
= SSA_NAME_DEF_STMT (last_vdef
);
4391 if (stmt_could_throw_p (cfun
, stmt
))
4393 edge new_edge
= find_edge (gimple_bb (stmt
), lp_bb
);
4395 for (gpi
= gsi_start_phis (lp_bb
), i
= 0;
4397 gsi_next (&gpi
), i
++)
4399 gphi
*phi
= gpi
.phi ();
4401 if (virtual_operand_p (gimple_phi_result (phi
)))
4402 new_def
= last_vdef
;
4404 new_def
= last_defs
[i
];
4405 add_phi_arg (phi
, new_def
, new_edge
, UNKNOWN_LOCATION
);
4408 last_vdef
= gimple_vuse (stmt
);
4412 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
4414 for (int j
= 0; j
< 2; ++j
)
4416 gsi_insert_seq_after (&load_gsi
[j
], load_seq
[j
], GSI_SAME_STMT
);
4421 /* Process the merged_store_group objects created in the coalescing phase.
4422 The stores are all against the base object BASE.
4423 Try to output the widened stores and delete the original statements if
4424 successful. Return true iff any changes were made. */
4427 imm_store_chain_info::output_merged_stores ()
4430 merged_store_group
*merged_store
;
4432 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
4434 if (dbg_cnt (store_merging
)
4435 && output_merged_store (merged_store
))
4438 store_immediate_info
*store
;
4439 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
4441 gimple
*stmt
= store
->stmt
;
4442 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4443 /* Don't remove clobbers, they are still useful even if
4444 everything is overwritten afterwards. */
4445 if (gimple_clobber_p (stmt
))
4447 gsi_remove (&gsi
, true);
4449 remove_stmt_from_eh_lp (stmt
);
4450 if (stmt
!= merged_store
->last_stmt
)
4452 unlink_stmt_vdef (stmt
);
4453 release_defs (stmt
);
4459 if (ret
&& dump_file
)
4460 fprintf (dump_file
, "Merging successful!\n");
4465 /* Coalesce the store_immediate_info objects recorded against the base object
4466 BASE in the first phase and output them.
4467 Delete the allocated structures.
4468 Return true if any changes were made. */
4471 imm_store_chain_info::terminate_and_process_chain ()
4473 /* Process store chain. */
4475 if (m_store_info
.length () > 1)
4477 ret
= coalesce_immediate_stores ();
4479 ret
= output_merged_stores ();
4482 /* Delete all the entries we allocated ourselves. */
4483 store_immediate_info
*info
;
4485 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
4488 merged_store_group
*merged_info
;
4489 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
4495 /* Return true iff LHS is a destination potentially interesting for
4496 store merging. In practice these are the codes that get_inner_reference
4500 lhs_valid_for_store_merging_p (tree lhs
)
4505 switch (TREE_CODE (lhs
))
4508 case ARRAY_RANGE_REF
:
4512 case VIEW_CONVERT_EXPR
:
4521 /* Return true if the tree RHS is a constant we want to consider
4522 during store merging. In practice accept all codes that
4523 native_encode_expr accepts. */
4526 rhs_valid_for_store_merging_p (tree rhs
)
4528 unsigned HOST_WIDE_INT size
;
4529 if (TREE_CODE (rhs
) == CONSTRUCTOR
4530 && CONSTRUCTOR_NELTS (rhs
) == 0
4531 && TYPE_SIZE_UNIT (TREE_TYPE (rhs
))
4532 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs
))))
4534 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
))).is_constant (&size
)
4535 && native_encode_expr (rhs
, NULL
, size
) != 0);
4538 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4539 and return true on success or false on failure. */
4542 adjust_bit_pos (poly_offset_int byte_off
,
4543 poly_int64
*pbitpos
,
4544 poly_uint64
*pbitregion_start
,
4545 poly_uint64
*pbitregion_end
)
4547 poly_offset_int bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4548 bit_off
+= *pbitpos
;
4550 if (known_ge (bit_off
, 0) && bit_off
.to_shwi (pbitpos
))
4552 if (maybe_ne (*pbitregion_end
, 0U))
4554 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4555 bit_off
+= *pbitregion_start
;
4556 if (bit_off
.to_uhwi (pbitregion_start
))
4558 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4559 bit_off
+= *pbitregion_end
;
4560 if (!bit_off
.to_uhwi (pbitregion_end
))
4561 *pbitregion_end
= 0;
4564 *pbitregion_end
= 0;
4572 /* If MEM is a memory reference usable for store merging (either as
4573 store destination or for loads), return the non-NULL base_addr
4574 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4575 Otherwise return NULL, *PBITPOS should be still valid even for that
4579 mem_valid_for_store_merging (tree mem
, poly_uint64
*pbitsize
,
4580 poly_uint64
*pbitpos
,
4581 poly_uint64
*pbitregion_start
,
4582 poly_uint64
*pbitregion_end
)
4584 poly_int64 bitsize
, bitpos
;
4585 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
4587 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
4589 tree base_addr
= get_inner_reference (mem
, &bitsize
, &bitpos
, &offset
, &mode
,
4590 &unsignedp
, &reversep
, &volatilep
);
4591 *pbitsize
= bitsize
;
4592 if (known_eq (bitsize
, 0))
4595 if (TREE_CODE (mem
) == COMPONENT_REF
4596 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem
, 1)))
4598 get_bit_range (&bitregion_start
, &bitregion_end
, mem
, &bitpos
, &offset
);
4599 if (maybe_ne (bitregion_end
, 0U))
4606 /* We do not want to rewrite TARGET_MEM_REFs. */
4607 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
4609 /* In some cases get_inner_reference may return a
4610 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4611 canonicalize the base_addr to MEM_REF [ptr] and take
4612 byteoffset into account in the bitpos. This occurs in
4613 PR 23684 and this way we can catch more chains. */
4614 else if (TREE_CODE (base_addr
) == MEM_REF
)
4616 if (!adjust_bit_pos (mem_ref_offset (base_addr
), &bitpos
,
4617 &bitregion_start
, &bitregion_end
))
4619 base_addr
= TREE_OPERAND (base_addr
, 0);
4621 /* get_inner_reference returns the base object, get at its
4625 if (maybe_lt (bitpos
, 0))
4627 base_addr
= build_fold_addr_expr (base_addr
);
4632 /* If the access is variable offset then a base decl has to be
4633 address-taken to be able to emit pointer-based stores to it.
4634 ??? We might be able to get away with re-using the original
4635 base up to the first variable part and then wrapping that inside
4637 tree base
= get_base_address (base_addr
);
4638 if (!base
|| (DECL_P (base
) && !TREE_ADDRESSABLE (base
)))
4641 /* Similarly to above for the base, remove constant from the offset. */
4642 if (TREE_CODE (offset
) == PLUS_EXPR
4643 && TREE_CODE (TREE_OPERAND (offset
, 1)) == INTEGER_CST
4644 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset
, 1)),
4645 &bitpos
, &bitregion_start
, &bitregion_end
))
4646 offset
= TREE_OPERAND (offset
, 0);
4648 base_addr
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base_addr
),
4652 if (known_eq (bitregion_end
, 0U))
4654 bitregion_start
= round_down_to_byte_boundary (bitpos
);
4655 bitregion_end
= round_up_to_byte_boundary (bitpos
+ bitsize
);
4658 *pbitsize
= bitsize
;
4660 *pbitregion_start
= bitregion_start
;
4661 *pbitregion_end
= bitregion_end
;
4665 /* Return true if STMT is a load that can be used for store merging.
4666 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4667 BITREGION_END are properties of the corresponding store. */
4670 handled_load (gimple
*stmt
, store_operand_info
*op
,
4671 poly_uint64 bitsize
, poly_uint64 bitpos
,
4672 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
)
4674 if (!is_gimple_assign (stmt
))
4676 if (gimple_assign_rhs_code (stmt
) == BIT_NOT_EXPR
)
4678 tree rhs1
= gimple_assign_rhs1 (stmt
);
4679 if (TREE_CODE (rhs1
) == SSA_NAME
4680 && handled_load (SSA_NAME_DEF_STMT (rhs1
), op
, bitsize
, bitpos
,
4681 bitregion_start
, bitregion_end
))
4683 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4684 been optimized earlier, but if allowed here, would confuse the
4685 multiple uses counting. */
4688 op
->bit_not_p
= !op
->bit_not_p
;
4693 if (gimple_vuse (stmt
)
4694 && gimple_assign_load_p (stmt
)
4695 && !stmt_can_throw_internal (cfun
, stmt
)
4696 && !gimple_has_volatile_ops (stmt
))
4698 tree mem
= gimple_assign_rhs1 (stmt
);
4700 = mem_valid_for_store_merging (mem
, &op
->bitsize
, &op
->bitpos
,
4701 &op
->bitregion_start
,
4702 &op
->bitregion_end
);
4703 if (op
->base_addr
!= NULL_TREE
4704 && known_eq (op
->bitsize
, bitsize
)
4705 && multiple_p (op
->bitpos
- bitpos
, BITS_PER_UNIT
)
4706 && known_ge (op
->bitpos
- op
->bitregion_start
,
4707 bitpos
- bitregion_start
)
4708 && known_ge (op
->bitregion_end
- op
->bitpos
,
4709 bitregion_end
- bitpos
))
4713 op
->bit_not_p
= false;
4720 /* Return the index number of the landing pad for STMT, if any. */
4723 lp_nr_for_store (gimple
*stmt
)
4725 if (!cfun
->can_throw_non_call_exceptions
|| !cfun
->eh
)
4728 if (!stmt_could_throw_p (cfun
, stmt
))
4731 return lookup_stmt_eh_lp (stmt
);
4734 /* Record the store STMT for store merging optimization if it can be
4735 optimized. Return true if any changes were made. */
4738 pass_store_merging::process_store (gimple
*stmt
)
4740 tree lhs
= gimple_assign_lhs (stmt
);
4741 tree rhs
= gimple_assign_rhs1 (stmt
);
4742 poly_uint64 bitsize
, bitpos
= 0;
4743 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
4745 = mem_valid_for_store_merging (lhs
, &bitsize
, &bitpos
,
4746 &bitregion_start
, &bitregion_end
);
4747 if (known_eq (bitsize
, 0U))
4750 bool invalid
= (base_addr
== NULL_TREE
4751 || (maybe_gt (bitsize
,
4752 (unsigned int) MAX_BITSIZE_MODE_ANY_INT
)
4753 && TREE_CODE (rhs
) != INTEGER_CST
4754 && (TREE_CODE (rhs
) != CONSTRUCTOR
4755 || CONSTRUCTOR_NELTS (rhs
) != 0)));
4756 enum tree_code rhs_code
= ERROR_MARK
;
4757 bool bit_not_p
= false;
4758 struct symbolic_number n
;
4759 gimple
*ins_stmt
= NULL
;
4760 store_operand_info ops
[2];
4763 else if (TREE_CODE (rhs
) == STRING_CST
)
4765 rhs_code
= STRING_CST
;
4768 else if (rhs_valid_for_store_merging_p (rhs
))
4770 rhs_code
= INTEGER_CST
;
4773 else if (TREE_CODE (rhs
) == SSA_NAME
)
4775 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
), *def_stmt1
, *def_stmt2
;
4776 if (!is_gimple_assign (def_stmt
))
4778 else if (handled_load (def_stmt
, &ops
[0], bitsize
, bitpos
,
4779 bitregion_start
, bitregion_end
))
4781 else if (gimple_assign_rhs_code (def_stmt
) == BIT_NOT_EXPR
)
4783 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4784 if (TREE_CODE (rhs1
) == SSA_NAME
4785 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1
)))
4788 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
4792 if (rhs_code
== ERROR_MARK
&& !invalid
)
4793 switch ((rhs_code
= gimple_assign_rhs_code (def_stmt
)))
4799 rhs1
= gimple_assign_rhs1 (def_stmt
);
4800 rhs2
= gimple_assign_rhs2 (def_stmt
);
4802 if (TREE_CODE (rhs1
) != SSA_NAME
)
4804 def_stmt1
= SSA_NAME_DEF_STMT (rhs1
);
4805 if (!is_gimple_assign (def_stmt1
)
4806 || !handled_load (def_stmt1
, &ops
[0], bitsize
, bitpos
,
4807 bitregion_start
, bitregion_end
))
4809 if (rhs_valid_for_store_merging_p (rhs2
))
4811 else if (TREE_CODE (rhs2
) != SSA_NAME
)
4815 def_stmt2
= SSA_NAME_DEF_STMT (rhs2
);
4816 if (!is_gimple_assign (def_stmt2
))
4818 else if (!handled_load (def_stmt2
, &ops
[1], bitsize
, bitpos
,
4819 bitregion_start
, bitregion_end
))
4829 unsigned HOST_WIDE_INT const_bitsize
;
4830 if (bitsize
.is_constant (&const_bitsize
)
4831 && (const_bitsize
% BITS_PER_UNIT
) == 0
4832 && const_bitsize
<= 64
4833 && multiple_p (bitpos
, BITS_PER_UNIT
))
4835 ins_stmt
= find_bswap_or_nop_1 (def_stmt
, &n
, 12);
4839 for (unsigned HOST_WIDE_INT i
= 0;
4841 i
+= BITS_PER_UNIT
, nn
>>= BITS_PER_MARKER
)
4842 if ((nn
& MARKER_MASK
) == 0
4843 || (nn
& MARKER_MASK
) == MARKER_BYTE_UNKNOWN
)
4852 rhs_code
= LROTATE_EXPR
;
4853 ops
[0].base_addr
= NULL_TREE
;
4854 ops
[1].base_addr
= NULL_TREE
;
4862 && bitsize
.is_constant (&const_bitsize
)
4863 && ((const_bitsize
% BITS_PER_UNIT
) != 0
4864 || !multiple_p (bitpos
, BITS_PER_UNIT
))
4865 && const_bitsize
<= MAX_FIXED_MODE_SIZE
)
4867 /* Bypass a conversion to the bit-field type. */
4869 && is_gimple_assign (def_stmt
)
4870 && CONVERT_EXPR_CODE_P (rhs_code
))
4872 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4873 if (TREE_CODE (rhs1
) == SSA_NAME
4874 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
)))
4877 rhs_code
= BIT_INSERT_EXPR
;
4880 ops
[0].base_addr
= NULL_TREE
;
4881 ops
[1].base_addr
= NULL_TREE
;
4888 unsigned HOST_WIDE_INT const_bitsize
, const_bitpos
;
4889 unsigned HOST_WIDE_INT const_bitregion_start
, const_bitregion_end
;
4891 || !bitsize
.is_constant (&const_bitsize
)
4892 || !bitpos
.is_constant (&const_bitpos
)
4893 || !bitregion_start
.is_constant (&const_bitregion_start
)
4894 || !bitregion_end
.is_constant (&const_bitregion_end
))
4895 return terminate_all_aliasing_chains (NULL
, stmt
);
4898 memset (&n
, 0, sizeof (n
));
4900 class imm_store_chain_info
**chain_info
= NULL
;
4903 chain_info
= m_stores
.get (base_addr
);
4905 store_immediate_info
*info
;
4908 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
4909 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
4910 const_bitregion_start
,
4911 const_bitregion_end
,
4912 stmt
, ord
, rhs_code
, n
, ins_stmt
,
4913 bit_not_p
, lp_nr_for_store (stmt
),
4915 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4917 fprintf (dump_file
, "Recording immediate store from stmt:\n");
4918 print_gimple_stmt (dump_file
, stmt
, 0);
4920 (*chain_info
)->m_store_info
.safe_push (info
);
4921 ret
|= terminate_all_aliasing_chains (chain_info
, stmt
);
4922 /* If we reach the limit of stores to merge in a chain terminate and
4923 process the chain now. */
4924 if ((*chain_info
)->m_store_info
.length ()
4925 == (unsigned int) param_max_stores_to_merge
)
4927 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4929 "Reached maximum number of statements to merge:\n");
4930 ret
|= terminate_and_process_chain (*chain_info
);
4935 /* Store aliases any existing chain? */
4936 ret
|= terminate_all_aliasing_chains (NULL
, stmt
);
4937 /* Start a new chain. */
4938 class imm_store_chain_info
*new_chain
4939 = new imm_store_chain_info (m_stores_head
, base_addr
);
4940 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
4941 const_bitregion_start
,
4942 const_bitregion_end
,
4943 stmt
, 0, rhs_code
, n
, ins_stmt
,
4944 bit_not_p
, lp_nr_for_store (stmt
),
4946 new_chain
->m_store_info
.safe_push (info
);
4947 m_stores
.put (base_addr
, new_chain
);
4948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4950 fprintf (dump_file
, "Starting new chain with statement:\n");
4951 print_gimple_stmt (dump_file
, stmt
, 0);
4952 fprintf (dump_file
, "The base object is:\n");
4953 print_generic_expr (dump_file
, base_addr
);
4954 fprintf (dump_file
, "\n");
4959 /* Return true if STMT is a store valid for store merging. */
4962 store_valid_for_store_merging_p (gimple
*stmt
)
4964 return gimple_assign_single_p (stmt
)
4965 && gimple_vdef (stmt
)
4966 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
))
4967 && (!gimple_has_volatile_ops (stmt
) || gimple_clobber_p (stmt
));
4970 enum basic_block_status
{ BB_INVALID
, BB_VALID
, BB_EXTENDED_VALID
};
4972 /* Return the status of basic block BB wrt store merging. */
4974 static enum basic_block_status
4975 get_status_for_store_merging (basic_block bb
)
4977 unsigned int num_statements
= 0;
4978 gimple_stmt_iterator gsi
;
4981 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4983 gimple
*stmt
= gsi_stmt (gsi
);
4985 if (is_gimple_debug (stmt
))
4988 if (store_valid_for_store_merging_p (stmt
) && ++num_statements
>= 2)
4992 if (num_statements
== 0)
4995 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
4996 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb
)))
4997 && (e
= find_fallthru_edge (bb
->succs
))
4998 && e
->dest
== bb
->next_bb
)
4999 return BB_EXTENDED_VALID
;
5001 return num_statements
>= 2 ? BB_VALID
: BB_INVALID
;
5004 /* Entry point for the pass. Go over each basic block recording chains of
5005 immediate stores. Upon encountering a terminating statement (as defined
5006 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5010 pass_store_merging::execute (function
*fun
)
5013 hash_set
<gimple
*> orig_stmts
;
5014 bool changed
= false, open_chains
= false;
5016 /* If the function can throw and catch non-call exceptions, we'll be trying
5017 to merge stores across different basic blocks so we need to first unsplit
5018 the EH edges in order to streamline the CFG of the function. */
5019 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
)
5020 unsplit_eh_edges ();
5022 calculate_dominance_info (CDI_DOMINATORS
);
5024 FOR_EACH_BB_FN (bb
, fun
)
5026 const basic_block_status bb_status
= get_status_for_store_merging (bb
);
5027 gimple_stmt_iterator gsi
;
5029 if (open_chains
&& (bb_status
== BB_INVALID
|| !single_pred_p (bb
)))
5031 changed
|= terminate_and_process_all_chains ();
5032 open_chains
= false;
5035 if (bb_status
== BB_INVALID
)
5038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5039 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
5041 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5043 gimple
*stmt
= gsi_stmt (gsi
);
5045 if (is_gimple_debug (stmt
))
5048 if (gimple_has_volatile_ops (stmt
) && !gimple_clobber_p (stmt
))
5050 /* Terminate all chains. */
5051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5052 fprintf (dump_file
, "Volatile access terminates "
5054 changed
|= terminate_and_process_all_chains ();
5055 open_chains
= false;
5059 if (store_valid_for_store_merging_p (stmt
))
5060 changed
|= process_store (stmt
);
5062 changed
|= terminate_all_aliasing_chains (NULL
, stmt
);
5065 if (bb_status
== BB_EXTENDED_VALID
)
5069 changed
|= terminate_and_process_all_chains ();
5070 open_chains
= false;
5075 changed
|= terminate_and_process_all_chains ();
5077 /* If the function can throw and catch non-call exceptions and something
5078 changed during the pass, then the CFG has (very likely) changed too. */
5079 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
&& changed
)
5081 free_dominance_info (CDI_DOMINATORS
);
5082 return TODO_cleanup_cfg
;
5090 /* Construct and return a store merging pass object. */
5093 make_pass_store_merging (gcc::context
*ctxt
)
5095 return new pass_store_merging (ctxt
);
5100 namespace selftest
{
5102 /* Selftests for store merging helpers. */
5104 /* Assert that all elements of the byte arrays X and Y, both of length N
5108 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
5110 for (unsigned int i
= 0; i
< n
; i
++)
5114 fprintf (stderr
, "Arrays do not match. X:\n");
5115 dump_char_array (stderr
, x
, n
);
5116 fprintf (stderr
, "Y:\n");
5117 dump_char_array (stderr
, y
, n
);
5119 ASSERT_EQ (x
[i
], y
[i
]);
5123 /* Test shift_bytes_in_array_left and that it carries bits across between
5127 verify_shift_bytes_in_array_left (void)
5130 00011111 | 11100000. */
5131 unsigned char orig
[2] = { 0xe0, 0x1f };
5132 unsigned char in
[2];
5133 memcpy (in
, orig
, sizeof orig
);
5135 unsigned char expected
[2] = { 0x80, 0x7f };
5136 shift_bytes_in_array_left (in
, sizeof (in
), 2);
5137 verify_array_eq (in
, expected
, sizeof (in
));
5139 memcpy (in
, orig
, sizeof orig
);
5140 memcpy (expected
, orig
, sizeof orig
);
5141 /* Check that shifting by zero doesn't change anything. */
5142 shift_bytes_in_array_left (in
, sizeof (in
), 0);
5143 verify_array_eq (in
, expected
, sizeof (in
));
5147 /* Test shift_bytes_in_array_right and that it carries bits across between
5151 verify_shift_bytes_in_array_right (void)
5154 00011111 | 11100000. */
5155 unsigned char orig
[2] = { 0x1f, 0xe0};
5156 unsigned char in
[2];
5157 memcpy (in
, orig
, sizeof orig
);
5158 unsigned char expected
[2] = { 0x07, 0xf8};
5159 shift_bytes_in_array_right (in
, sizeof (in
), 2);
5160 verify_array_eq (in
, expected
, sizeof (in
));
5162 memcpy (in
, orig
, sizeof orig
);
5163 memcpy (expected
, orig
, sizeof orig
);
5164 /* Check that shifting by zero doesn't change anything. */
5165 shift_bytes_in_array_right (in
, sizeof (in
), 0);
5166 verify_array_eq (in
, expected
, sizeof (in
));
5169 /* Test clear_bit_region that it clears exactly the bits asked and
5173 verify_clear_bit_region (void)
5175 /* Start with all bits set and test clearing various patterns in them. */
5176 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5177 unsigned char in
[3];
5178 unsigned char expected
[3];
5179 memcpy (in
, orig
, sizeof in
);
5181 /* Check zeroing out all the bits. */
5182 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
5183 expected
[0] = expected
[1] = expected
[2] = 0;
5184 verify_array_eq (in
, expected
, sizeof in
);
5186 memcpy (in
, orig
, sizeof in
);
5187 /* Leave the first and last bits intact. */
5188 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
5192 verify_array_eq (in
, expected
, sizeof in
);
5195 /* Test clear_bit_region_be that it clears exactly the bits asked and
5199 verify_clear_bit_region_be (void)
5201 /* Start with all bits set and test clearing various patterns in them. */
5202 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5203 unsigned char in
[3];
5204 unsigned char expected
[3];
5205 memcpy (in
, orig
, sizeof in
);
5207 /* Check zeroing out all the bits. */
5208 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
5209 expected
[0] = expected
[1] = expected
[2] = 0;
5210 verify_array_eq (in
, expected
, sizeof in
);
5212 memcpy (in
, orig
, sizeof in
);
5213 /* Leave the first and last bits intact. */
5214 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
5218 verify_array_eq (in
, expected
, sizeof in
);
5222 /* Run all of the selftests within this file. */
5225 store_merging_c_tests (void)
5227 verify_shift_bytes_in_array_left ();
5228 verify_shift_bytes_in_array_right ();
5229 verify_clear_bit_region ();
5230 verify_clear_bit_region_be ();
5233 } // namespace selftest
5234 #endif /* CHECKING_P. */