1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2022 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
143 #include "coretypes.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
151 #include "gimple-pretty-print.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
167 #include "gimplify-me.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
186 /* Number of hand-written 16-bit nop / bswaps found. */
189 /* Number of hand-written 32-bit nop / bswaps found. */
192 /* Number of hand-written 64-bit nop / bswaps found. */
194 } nop_stats
, bswap_stats
;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number
{
230 poly_int64_pod bytepos
;
234 unsigned HOST_WIDE_INT range
;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
261 do_shift_rotate (enum tree_code code
,
262 struct symbolic_number
*n
,
265 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
266 uint64_t head_marker
;
269 || count
>= TYPE_PRECISION (n
->type
)
270 || count
% BITS_PER_UNIT
!= 0)
272 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size
< 64 / BITS_PER_MARKER
)
277 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
285 head_marker
= HEAD_MARKER (n
->n
, size
);
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
289 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
290 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size
- 1 - i
) * BITS_PER_MARKER
);
294 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
297 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
302 /* Zero unused bits for size. */
303 if (size
< 64 / BITS_PER_MARKER
)
304 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
308 /* Perform sanity checking for the symbolic number N and the gimple
312 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
316 lhs_type
= TREE_TYPE (gimple_get_lhs (stmt
));
318 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
319 && TREE_CODE (lhs_type
) != ENUMERAL_TYPE
)
322 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
332 init_symbolic_number (struct symbolic_number
*n
, tree src
)
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src
)) && !POINTER_TYPE_P (TREE_TYPE (src
)))
339 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n
->type
= TREE_TYPE (src
);
346 size
= TYPE_PRECISION (n
->type
);
347 if (size
% BITS_PER_UNIT
!= 0)
349 size
/= BITS_PER_UNIT
;
350 if (size
> 64 / BITS_PER_MARKER
)
356 if (size
< 64 / BITS_PER_MARKER
)
357 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
367 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize
, bitpos
, bytepos
;
373 int unsignedp
, reversep
, volatilep
;
374 tree offset
, base_addr
;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
380 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
383 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
384 &unsignedp
, &reversep
, &volatilep
);
386 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
387 /* Do not rewrite TARGET_MEM_REF. */
389 else if (TREE_CODE (base_addr
) == MEM_REF
)
391 poly_offset_int bit_offset
= 0;
392 tree off
= TREE_OPERAND (base_addr
, 1);
394 if (!integer_zerop (off
))
396 poly_offset_int boff
= mem_ref_offset (base_addr
);
397 boff
<<= LOG2_BITS_PER_UNIT
;
401 base_addr
= TREE_OPERAND (base_addr
, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset
, 0))
406 tree byte_offset
= wide_int_to_tree
407 (sizetype
, bits_to_bytes_round_down (bit_offset
));
408 bit_offset
= num_trailing_bits (bit_offset
);
410 offset
= size_binop (PLUS_EXPR
, offset
, byte_offset
);
412 offset
= byte_offset
;
415 bitpos
+= bit_offset
.force_shwi ();
418 base_addr
= build_fold_addr_expr (base_addr
);
420 if (!multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
422 if (!multiple_p (bitsize
, BITS_PER_UNIT
))
427 if (!init_symbolic_number (n
, ref
))
429 n
->base_addr
= base_addr
;
431 n
->bytepos
= bytepos
;
432 n
->alias_set
= reference_alias_ptr_type (ref
);
433 n
->vuse
= gimple_vuse (stmt
);
437 /* Compute the symbolic number N representing the result of a bitwise OR,
438 bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
442 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
443 gimple
*source_stmt2
, struct symbolic_number
*n2
,
444 struct symbolic_number
*n
, enum tree_code code
)
449 struct symbolic_number
*n_start
;
451 tree rhs1
= gimple_assign_rhs1 (source_stmt1
);
452 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
454 rhs1
= TREE_OPERAND (rhs1
, 0);
455 tree rhs2
= gimple_assign_rhs1 (source_stmt2
);
456 if (TREE_CODE (rhs2
) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2
, 0)) == SSA_NAME
)
458 rhs2
= TREE_OPERAND (rhs2
, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
465 HOST_WIDE_INT start1
, start2
, start_sub
, end_sub
, end1
, end2
, end
;
466 struct symbolic_number
*toinc_n_ptr
, *n_end
;
467 basic_block bb1
, bb2
;
469 if (!n1
->base_addr
|| !n2
->base_addr
470 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
473 if (!n1
->offset
!= !n2
->offset
474 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
478 if (!(n2
->bytepos
- n1
->bytepos
).is_constant (&start2
))
484 start_sub
= start2
- start1
;
489 start_sub
= start1
- start2
;
492 bb1
= gimple_bb (source_stmt1
);
493 bb2
= gimple_bb (source_stmt2
);
494 if (dominated_by_p (CDI_DOMINATORS
, bb1
, bb2
))
495 source_stmt
= source_stmt1
;
497 source_stmt
= source_stmt2
;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1
= start1
+ (n1
->range
- 1);
502 end2
= start2
+ (n2
->range
- 1);
506 end_sub
= end2
- end1
;
511 end_sub
= end1
- end2
;
513 n_end
= (end2
> end1
) ? n2
: n1
;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN
)
517 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
519 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
521 n
->range
= end
- MIN (start1
, start2
) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n
->range
> 64 / BITS_PER_MARKER
)
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
531 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
532 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
535 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
536 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
537 toinc_n_ptr
->n
+= inc
;
542 n
->range
= n1
->range
;
544 source_stmt
= source_stmt1
;
548 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
549 n
->alias_set
= n1
->alias_set
;
551 n
->alias_set
= ptr_type_node
;
552 n
->vuse
= n_start
->vuse
;
553 n
->base_addr
= n_start
->base_addr
;
554 n
->offset
= n_start
->offset
;
555 n
->src
= n_start
->src
;
556 n
->bytepos
= n_start
->bytepos
;
557 n
->type
= n_start
->type
;
558 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
559 uint64_t res_n
= n1
->n
| n2
->n
;
561 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
563 uint64_t masked1
, masked2
;
565 masked1
= n1
->n
& mask
;
566 masked2
= n2
->n
& mask
;
567 /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 if (masked1
&& masked2
)
570 /* + can carry into upper bits, just punt. */
571 if (code
== PLUS_EXPR
)
573 /* x | x is still x. */
574 if (code
== BIT_IOR_EXPR
&& masked1
== masked2
)
576 if (code
== BIT_XOR_EXPR
)
578 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 unknown values and unknown ^ unknown is unknown. */
580 if (masked1
== masked2
581 && masked1
!= ((uint64_t) MARKER_BYTE_UNKNOWN
582 << i
* BITS_PER_MARKER
))
588 /* Otherwise set the byte to unknown, it might still be
594 n
->n_ops
= n1
->n_ops
+ n2
->n_ops
;
599 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 the operation given by the rhs of STMT on the result. If the operation
601 could successfully be executed the function returns a gimple stmt whose
602 rhs's first tree is the expression of the source operand and NULL
606 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
609 tree rhs1
, rhs2
= NULL
;
610 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
611 enum gimple_rhs_class rhs_class
;
613 if (!limit
|| !is_gimple_assign (stmt
))
616 rhs1
= gimple_assign_rhs1 (stmt
);
618 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
621 /* Handle BIT_FIELD_REF. */
622 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
623 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
625 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 1))
626 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 2)))
629 unsigned HOST_WIDE_INT bitsize
= tree_to_uhwi (TREE_OPERAND (rhs1
, 1));
630 unsigned HOST_WIDE_INT bitpos
= tree_to_uhwi (TREE_OPERAND (rhs1
, 2));
631 if (bitpos
% BITS_PER_UNIT
== 0
632 && bitsize
% BITS_PER_UNIT
== 0
633 && init_symbolic_number (n
, TREE_OPERAND (rhs1
, 0)))
635 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
636 if (BYTES_BIG_ENDIAN
)
637 bitpos
= TYPE_PRECISION (n
->type
) - bitpos
- bitsize
;
640 if (!do_shift_rotate (RSHIFT_EXPR
, n
, bitpos
))
645 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
646 for (unsigned i
= 0; i
< bitsize
/ BITS_PER_UNIT
;
647 i
++, tmp
<<= BITS_PER_UNIT
)
648 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
652 n
->type
= TREE_TYPE (rhs1
);
654 n
->range
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
656 return verify_symbolic_number_p (n
, stmt
) ? stmt
: NULL
;
662 if (TREE_CODE (rhs1
) != SSA_NAME
)
665 code
= gimple_assign_rhs_code (stmt
);
666 rhs_class
= gimple_assign_rhs_class (stmt
);
667 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
669 if (rhs_class
== GIMPLE_BINARY_RHS
)
670 rhs2
= gimple_assign_rhs2 (stmt
);
672 /* Handle unary rhs and binary rhs with integer constants as second
675 if (rhs_class
== GIMPLE_UNARY_RHS
676 || (rhs_class
== GIMPLE_BINARY_RHS
677 && TREE_CODE (rhs2
) == INTEGER_CST
))
679 if (code
!= BIT_AND_EXPR
680 && code
!= LSHIFT_EXPR
681 && code
!= RSHIFT_EXPR
682 && code
!= LROTATE_EXPR
683 && code
!= RROTATE_EXPR
684 && !CONVERT_EXPR_CODE_P (code
))
687 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
689 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
690 we have to initialize the symbolic number. */
693 if (gimple_assign_load_p (stmt
)
694 || !init_symbolic_number (n
, rhs1
))
703 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
704 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
705 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
707 /* Only constants masking full bytes are allowed. */
708 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
709 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
712 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
721 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
726 int i
, type_size
, old_type_size
;
729 type
= TREE_TYPE (gimple_assign_lhs (stmt
));
730 type_size
= TYPE_PRECISION (type
);
731 if (type_size
% BITS_PER_UNIT
!= 0)
733 type_size
/= BITS_PER_UNIT
;
734 if (type_size
> 64 / BITS_PER_MARKER
)
737 /* Sign extension: result is dependent on the value. */
738 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
739 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
740 && HEAD_MARKER (n
->n
, old_type_size
))
741 for (i
= 0; i
< type_size
- old_type_size
; i
++)
742 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
743 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
745 if (type_size
< 64 / BITS_PER_MARKER
)
747 /* If STMT casts to a smaller type mask out the bits not
748 belonging to the target type. */
749 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
753 n
->range
= type_size
;
759 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
762 /* Handle binary rhs. */
764 if (rhs_class
== GIMPLE_BINARY_RHS
)
766 struct symbolic_number n1
, n2
;
767 gimple
*source_stmt
, *source_stmt2
;
769 if (!rhs2
|| TREE_CODE (rhs2
) != SSA_NAME
)
772 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
779 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
784 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
789 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
792 if (n1
.vuse
!= n2
.vuse
)
796 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
,
802 if (!verify_symbolic_number_p (n
, stmt
))
814 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
815 *CMPXCHG, *CMPNOP and adjust *N. */
818 find_bswap_or_nop_finalize (struct symbolic_number
*n
, uint64_t *cmpxchg
,
819 uint64_t *cmpnop
, bool *cast64_to_32
)
824 /* The number which the find_bswap_or_nop_1 result should match in order
825 to have a full byte swap. The number is shifted to the right
826 according to the size of the symbolic number before using it. */
829 *cast64_to_32
= false;
831 /* Find real size of result (highest non-zero byte). */
833 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
837 /* Zero out the bits corresponding to untouched bytes in original gimple
839 if (n
->range
< (int) sizeof (int64_t))
841 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
842 if (n
->base_addr
== NULL
844 && int_size_in_bytes (TREE_TYPE (n
->src
)) == 8)
846 /* If all bytes in n->n are either 0 or in [5..8] range, this
847 might be a candidate for (unsigned) __builtin_bswap64 (src).
848 It is not worth it for (unsigned short) __builtin_bswap64 (src)
849 or (unsigned short) __builtin_bswap32 (src). */
850 *cast64_to_32
= true;
851 for (tmpn
= n
->n
; tmpn
; tmpn
>>= BITS_PER_MARKER
)
852 if ((tmpn
& MARKER_MASK
)
853 && ((tmpn
& MARKER_MASK
) <= 4 || (tmpn
& MARKER_MASK
) > 8))
855 *cast64_to_32
= false;
862 *cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
866 /* Zero out the bits corresponding to unused bytes in the result of the
867 gimple expression. */
868 if (rsize
< n
->range
)
870 if (BYTES_BIG_ENDIAN
)
872 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
874 if (n
->range
- rsize
== sizeof (int64_t))
877 *cmpnop
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
881 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
882 if (n
->range
- rsize
== sizeof (int64_t))
885 *cmpxchg
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
893 n
->range
*= BITS_PER_UNIT
;
896 /* Check if STMT completes a bswap implementation or a read in a given
897 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
898 accordingly. It also sets N to represent the kind of operations
899 performed: size of the resulting expression and whether it works on
900 a memory source, and if so alias-set and vuse. At last, the
901 function returns a stmt whose rhs's first tree is the source
905 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
,
906 bool *cast64_to_32
, uint64_t *mask
)
908 tree type_size
= TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt
)));
909 if (!tree_fits_uhwi_p (type_size
))
912 /* The last parameter determines the depth search limit. It usually
913 correlates directly to the number n of bytes to be touched. We
914 increase that number by 2 * (log2(n) + 1) here in order to also
915 cover signed -> unsigned conversions of the src operand as can be seen
916 in libgcc, and for initial shift/and operation of the src operand. */
917 int limit
= tree_to_uhwi (type_size
);
918 limit
+= 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
));
919 gimple
*ins_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
923 if (gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
924 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
926 unsigned HOST_WIDE_INT sz
= tree_to_uhwi (type_size
) * BITS_PER_UNIT
;
927 if (sz
!= 16 && sz
!= 32 && sz
!= 64)
929 tree rhs
= gimple_assign_rhs1 (stmt
);
930 if (CONSTRUCTOR_NELTS (rhs
) == 0)
932 tree eltype
= TREE_TYPE (TREE_TYPE (rhs
));
933 unsigned HOST_WIDE_INT eltsz
934 = int_size_in_bytes (eltype
) * BITS_PER_UNIT
;
935 if (TYPE_PRECISION (eltype
) != eltsz
)
937 constructor_elt
*elt
;
939 tree type
= build_nonstandard_integer_type (sz
, 1);
940 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
942 if (TREE_CODE (elt
->value
) != SSA_NAME
943 || !INTEGRAL_TYPE_P (TREE_TYPE (elt
->value
)))
945 struct symbolic_number n1
;
947 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt
->value
), &n1
,
955 n1
.range
= sz
/ BITS_PER_UNIT
;
959 ins_stmt
= source_stmt
;
964 if (n
->vuse
!= n1
.vuse
)
967 struct symbolic_number n0
= *n
;
969 if (!BYTES_BIG_ENDIAN
)
971 if (!do_shift_rotate (LSHIFT_EXPR
, &n1
, i
* eltsz
))
974 else if (!do_shift_rotate (LSHIFT_EXPR
, &n0
, eltsz
))
977 = perform_symbolic_merge (ins_stmt
, &n0
, source_stmt
, &n1
, n
,
986 uint64_t cmpxchg
, cmpnop
;
987 find_bswap_or_nop_finalize (n
, &cmpxchg
, &cmpnop
, cast64_to_32
);
989 /* A complete byte swap should make the symbolic number to start with
990 the largest digit in the highest order byte. Unchanged symbolic
991 number indicates a read with same endianness as target architecture. */
992 *mask
= ~(uint64_t) 0;
995 else if (n
->n
== cmpxchg
)
1000 for (uint64_t msk
= MARKER_MASK
; msk
; msk
<<= BITS_PER_MARKER
)
1001 if ((n
->n
& msk
) == 0)
1003 else if ((n
->n
& msk
) == (cmpxchg
& msk
))
1012 /* Useless bit manipulation performed by code. */
1013 if (!n
->base_addr
&& n
->n
== cmpnop
&& n
->n_ops
== 1)
1019 const pass_data pass_data_optimize_bswap
=
1021 GIMPLE_PASS
, /* type */
1023 OPTGROUP_NONE
, /* optinfo_flags */
1024 TV_NONE
, /* tv_id */
1025 PROP_ssa
, /* properties_required */
1026 0, /* properties_provided */
1027 0, /* properties_destroyed */
1028 0, /* todo_flags_start */
1029 0, /* todo_flags_finish */
1032 class pass_optimize_bswap
: public gimple_opt_pass
1035 pass_optimize_bswap (gcc::context
*ctxt
)
1036 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
1039 /* opt_pass methods: */
1040 bool gate (function
*) final override
1042 return flag_expensive_optimizations
&& optimize
&& BITS_PER_UNIT
== 8;
1045 unsigned int execute (function
*) final override
;
1047 }; // class pass_optimize_bswap
1049 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1050 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1054 bswap_view_convert (gimple_stmt_iterator
*gsi
, tree type
, tree val
,
1057 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val
))
1058 || POINTER_TYPE_P (TREE_TYPE (val
)));
1059 if (TYPE_SIZE (type
) != TYPE_SIZE (TREE_TYPE (val
)))
1061 HOST_WIDE_INT prec
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
1062 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1065 = gimple_build_assign (make_ssa_name (pointer_sized_int_node
),
1068 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1070 gsi_insert_after (gsi
, g
, GSI_NEW_STMT
);
1071 val
= gimple_assign_lhs (g
);
1073 tree itype
= build_nonstandard_integer_type (prec
, 1);
1074 gimple
*g
= gimple_build_assign (make_ssa_name (itype
), NOP_EXPR
, val
);
1076 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1078 gsi_insert_after (gsi
, g
, GSI_NEW_STMT
);
1079 val
= gimple_assign_lhs (g
);
1081 return build1 (VIEW_CONVERT_EXPR
, type
, val
);
1084 /* Perform the bswap optimization: replace the expression computed in the rhs
1085 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1086 bswap, load or load + bswap expression.
1087 Which of these alternatives replace the rhs is given by N->base_addr (non
1088 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1089 load to perform are also given in N while the builtin bswap invoke is given
1090 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1091 load statements involved to construct the rhs in gsi_stmt (GSI) and
1092 N->range gives the size of the rhs expression for maintaining some
1095 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1096 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1097 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1100 bswap_replace (gimple_stmt_iterator gsi
, gimple
*ins_stmt
, tree fndecl
,
1101 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
1102 bool bswap
, uint64_t mask
)
1104 tree src
, tmp
, tgt
= NULL_TREE
;
1105 gimple
*bswap_stmt
, *mask_stmt
= NULL
;
1106 tree_code conv_code
= NOP_EXPR
;
1108 gimple
*cur_stmt
= gsi_stmt (gsi
);
1112 tgt
= gimple_assign_lhs (cur_stmt
);
1113 if (gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
1115 && VECTOR_TYPE_P (TREE_TYPE (tgt
)))
1116 conv_code
= VIEW_CONVERT_EXPR
;
1119 /* Need to load the value from memory first. */
1122 gimple_stmt_iterator gsi_ins
= gsi
;
1124 gsi_ins
= gsi_for_stmt (ins_stmt
);
1125 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
1126 tree load_offset_ptr
, aligned_load_type
;
1128 unsigned align
= get_object_alignment (src
);
1129 poly_int64 load_offset
= 0;
1133 basic_block ins_bb
= gimple_bb (ins_stmt
);
1134 basic_block cur_bb
= gimple_bb (cur_stmt
);
1135 if (!dominated_by_p (CDI_DOMINATORS
, cur_bb
, ins_bb
))
1138 /* Move cur_stmt just before one of the load of the original
1139 to ensure it has the same VUSE. See PR61517 for what could
1141 if (gimple_bb (cur_stmt
) != gimple_bb (ins_stmt
))
1142 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt
));
1143 gsi_move_before (&gsi
, &gsi_ins
);
1144 gsi
= gsi_for_stmt (cur_stmt
);
1149 /* Compute address to load from and cast according to the size
1151 addr_expr
= build_fold_addr_expr (src
);
1152 if (is_gimple_mem_ref_addr (addr_expr
))
1153 addr_tmp
= unshare_expr (addr_expr
);
1156 addr_tmp
= unshare_expr (n
->base_addr
);
1157 if (!is_gimple_mem_ref_addr (addr_tmp
))
1158 addr_tmp
= force_gimple_operand_gsi_1 (&gsi
, addr_tmp
,
1159 is_gimple_mem_ref_addr
,
1162 load_offset
= n
->bytepos
;
1166 = force_gimple_operand_gsi (&gsi
, unshare_expr (n
->offset
),
1167 true, NULL_TREE
, true,
1170 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp
)),
1171 POINTER_PLUS_EXPR
, addr_tmp
, off
);
1172 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1173 addr_tmp
= gimple_assign_lhs (stmt
);
1177 /* Perform the load. */
1178 aligned_load_type
= load_type
;
1179 if (align
< TYPE_ALIGN (load_type
))
1180 aligned_load_type
= build_aligned_type (load_type
, align
);
1181 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
1182 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
1188 nop_stats
.found_16bit
++;
1189 else if (n
->range
== 32)
1190 nop_stats
.found_32bit
++;
1193 gcc_assert (n
->range
== 64);
1194 nop_stats
.found_64bit
++;
1197 /* Convert the result of load if necessary. */
1198 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
1200 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
1202 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1203 gimple_set_vuse (load_stmt
, n
->vuse
);
1204 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1205 if (conv_code
== VIEW_CONVERT_EXPR
)
1206 val_tmp
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), val_tmp
,
1208 gimple_assign_set_rhs_with_ops (&gsi
, conv_code
, val_tmp
);
1209 update_stmt (cur_stmt
);
1213 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
1214 gimple_set_vuse (cur_stmt
, n
->vuse
);
1215 update_stmt (cur_stmt
);
1219 tgt
= make_ssa_name (load_type
);
1220 cur_stmt
= gimple_build_assign (tgt
, MEM_REF
, val_expr
);
1221 gimple_set_vuse (cur_stmt
, n
->vuse
);
1222 gsi_insert_before (&gsi
, cur_stmt
, GSI_SAME_STMT
);
1228 "%d bit load in target endianness found at: ",
1230 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1236 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
1237 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1238 gimple_set_vuse (load_stmt
, n
->vuse
);
1239 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1246 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), TREE_TYPE (src
)))
1248 if (!is_gimple_val (src
))
1250 if (conv_code
== VIEW_CONVERT_EXPR
)
1251 src
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), src
, true);
1252 g
= gimple_build_assign (tgt
, conv_code
, src
);
1255 g
= gimple_build_assign (tgt
, src
);
1259 nop_stats
.found_16bit
++;
1260 else if (n
->range
== 32)
1261 nop_stats
.found_32bit
++;
1264 gcc_assert (n
->range
== 64);
1265 nop_stats
.found_64bit
++;
1270 "%d bit reshuffle in target endianness found at: ",
1273 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1276 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1277 fprintf (dump_file
, "\n");
1281 gsi_replace (&gsi
, g
, true);
1284 else if (TREE_CODE (src
) == BIT_FIELD_REF
)
1285 src
= TREE_OPERAND (src
, 0);
1288 bswap_stats
.found_16bit
++;
1289 else if (n
->range
== 32)
1290 bswap_stats
.found_32bit
++;
1293 gcc_assert (n
->range
== 64);
1294 bswap_stats
.found_64bit
++;
1299 /* Convert the src expression if necessary. */
1300 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
1302 gimple
*convert_stmt
;
1304 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
1305 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
1306 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1309 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1310 are considered as rotation of 2N bit values by N bits is generally not
1311 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1312 gives 0x03040102 while a bswap for that value is 0x04030201. */
1313 if (bswap
&& n
->range
== 16)
1315 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
1316 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
1317 bswap_stmt
= gimple_build_assign (NULL
, src
);
1320 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
1322 if (tgt
== NULL_TREE
)
1323 tgt
= make_ssa_name (bswap_type
);
1326 if (mask
!= ~(uint64_t) 0)
1328 tree m
= build_int_cst (bswap_type
, mask
);
1329 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1330 gimple_set_lhs (bswap_stmt
, tmp
);
1331 mask_stmt
= gimple_build_assign (tgt
, BIT_AND_EXPR
, tmp
, m
);
1335 /* Convert the result if necessary. */
1336 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
1338 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1340 gimple_stmt_iterator gsi2
= gsi
;
1341 if (conv_code
== VIEW_CONVERT_EXPR
)
1342 atmp
= bswap_view_convert (&gsi2
, TREE_TYPE (tgt
), tmp
, false);
1343 gimple
*convert_stmt
= gimple_build_assign (tgt
, conv_code
, atmp
);
1344 gsi_insert_after (&gsi2
, convert_stmt
, GSI_SAME_STMT
);
1347 gimple_set_lhs (mask_stmt
? mask_stmt
: bswap_stmt
, tmp
);
1351 fprintf (dump_file
, "%d bit bswap implementation found at: ",
1354 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1357 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1358 fprintf (dump_file
, "\n");
1365 gsi_insert_after (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1366 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1367 gsi_remove (&gsi
, true);
1371 gsi_insert_before (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1373 gsi_insert_before (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1378 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1379 using bswap optimizations. CDI_DOMINATORS need to be
1380 computed on entry. Return true if it has been optimized and
1381 TODO_update_ssa is needed. */
1384 maybe_optimize_vector_constructor (gimple
*cur_stmt
)
1386 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1387 struct symbolic_number n
;
1390 gcc_assert (is_gimple_assign (cur_stmt
)
1391 && gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
);
1393 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1394 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
1395 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
1396 || gimple_assign_lhs (cur_stmt
) == NULL_TREE
)
1399 HOST_WIDE_INT sz
= int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
1403 load_type
= bswap_type
= uint16_type_node
;
1406 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1407 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
1409 load_type
= uint32_type_node
;
1410 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1411 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1417 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1418 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1419 || (word_mode
== SImode
1420 && builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1421 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)))
1423 load_type
= uint64_type_node
;
1424 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1425 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1436 gimple
*ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1437 &cast64_to_32
, &mask
);
1439 || n
.range
!= (unsigned HOST_WIDE_INT
) sz
1441 || mask
!= ~(uint64_t) 0)
1444 if (bswap
&& !fndecl
&& n
.range
!= 16)
1447 memset (&nop_stats
, 0, sizeof (nop_stats
));
1448 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1449 return bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1450 bswap_type
, load_type
, &n
, bswap
, mask
) != NULL_TREE
;
1453 /* Find manual byte swap implementations as well as load in a given
1454 endianness. Byte swaps are turned into a bswap builtin invokation
1455 while endian loads are converted to bswap builtin invokation or
1456 simple load according to the target endianness. */
1459 pass_optimize_bswap::execute (function
*fun
)
1462 bool bswap32_p
, bswap64_p
;
1463 bool changed
= false;
1464 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
1466 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1467 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
1468 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1469 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1470 || (bswap32_p
&& word_mode
== SImode
)));
1472 /* Determine the argument type of the builtins. The code later on
1473 assumes that the return and argument type are the same. */
1476 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1477 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1482 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1483 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1486 memset (&nop_stats
, 0, sizeof (nop_stats
));
1487 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1488 calculate_dominance_info (CDI_DOMINATORS
);
1490 FOR_EACH_BB_FN (bb
, fun
)
1492 gimple_stmt_iterator gsi
;
1494 /* We do a reverse scan for bswap patterns to make sure we get the
1495 widest match. As bswap pattern matching doesn't handle previously
1496 inserted smaller bswap replacements as sub-patterns, the wider
1497 variant wouldn't be detected. */
1498 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
1500 gimple
*ins_stmt
, *cur_stmt
= gsi_stmt (gsi
);
1501 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1502 enum tree_code code
;
1503 struct symbolic_number n
;
1504 bool bswap
, cast64_to_32
;
1507 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1508 might be moved to a different basic block by bswap_replace and gsi
1509 must not points to it if that's the case. Moving the gsi_prev
1510 there make sure that gsi points to the statement previous to
1511 cur_stmt while still making sure that all statements are
1512 considered in this basic block. */
1515 if (!is_gimple_assign (cur_stmt
))
1518 code
= gimple_assign_rhs_code (cur_stmt
);
1523 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
1524 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
1534 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1535 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1536 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
))))
1544 ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1545 &cast64_to_32
, &mask
);
1553 /* Already in canonical form, nothing to do. */
1554 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1556 load_type
= bswap_type
= uint16_type_node
;
1559 load_type
= uint32_type_node
;
1562 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1563 bswap_type
= bswap32_type
;
1567 load_type
= uint64_type_node
;
1570 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1571 bswap_type
= bswap64_type
;
1578 if (bswap
&& !fndecl
&& n
.range
!= 16)
1581 if (bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1582 bswap_type
, load_type
, &n
, bswap
, mask
))
1587 statistics_counter_event (fun
, "16-bit nop implementations found",
1588 nop_stats
.found_16bit
);
1589 statistics_counter_event (fun
, "32-bit nop implementations found",
1590 nop_stats
.found_32bit
);
1591 statistics_counter_event (fun
, "64-bit nop implementations found",
1592 nop_stats
.found_64bit
);
1593 statistics_counter_event (fun
, "16-bit bswap implementations found",
1594 bswap_stats
.found_16bit
);
1595 statistics_counter_event (fun
, "32-bit bswap implementations found",
1596 bswap_stats
.found_32bit
);
1597 statistics_counter_event (fun
, "64-bit bswap implementations found",
1598 bswap_stats
.found_64bit
);
1600 return (changed
? TODO_update_ssa
: 0);
1606 make_pass_optimize_bswap (gcc::context
*ctxt
)
1608 return new pass_optimize_bswap (ctxt
);
1613 /* Struct recording one operand for the store, which is either a constant,
1614 then VAL represents the constant and all the other fields are zero, or
1615 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1616 and the other fields also reflect the memory load, or an SSA name, then
1617 VAL represents the SSA name and all the other fields are zero, */
1619 class store_operand_info
1624 poly_uint64 bitsize
;
1626 poly_uint64 bitregion_start
;
1627 poly_uint64 bitregion_end
;
1630 store_operand_info ();
1633 store_operand_info::store_operand_info ()
1634 : val (NULL_TREE
), base_addr (NULL_TREE
), bitsize (0), bitpos (0),
1635 bitregion_start (0), bitregion_end (0), stmt (NULL
), bit_not_p (false)
1639 /* Struct recording the information about a single store of an immediate
1640 to memory. These are created in the first phase and coalesced into
1641 merged_store_group objects in the second phase. */
1643 class store_immediate_info
1646 unsigned HOST_WIDE_INT bitsize
;
1647 unsigned HOST_WIDE_INT bitpos
;
1648 unsigned HOST_WIDE_INT bitregion_start
;
1649 /* This is one past the last bit of the bit region. */
1650 unsigned HOST_WIDE_INT bitregion_end
;
1653 /* INTEGER_CST for constant store, STRING_CST for string store,
1654 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1655 BIT_INSERT_EXPR for bit insertion.
1656 LROTATE_EXPR if it can be only bswap optimized and
1657 ops are not really meaningful.
1658 NOP_EXPR if bswap optimization detected identity, ops
1659 are not meaningful. */
1660 enum tree_code rhs_code
;
1661 /* Two fields for bswap optimization purposes. */
1662 struct symbolic_number n
;
1664 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1666 /* True if ops have been swapped and thus ops[1] represents
1667 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1669 /* The index number of the landing pad, or 0 if there is none. */
1671 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1672 just the first one. */
1673 store_operand_info ops
[2];
1674 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1675 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1676 gimple
*, unsigned int, enum tree_code
,
1677 struct symbolic_number
&, gimple
*, bool, int,
1678 const store_operand_info
&,
1679 const store_operand_info
&);
1682 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
1683 unsigned HOST_WIDE_INT bp
,
1684 unsigned HOST_WIDE_INT brs
,
1685 unsigned HOST_WIDE_INT bre
,
1688 enum tree_code rhscode
,
1689 struct symbolic_number
&nr
,
1693 const store_operand_info
&op0r
,
1694 const store_operand_info
&op1r
)
1695 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
1696 stmt (st
), order (ord
), rhs_code (rhscode
), n (nr
),
1697 ins_stmt (ins_stmtp
), bit_not_p (bitnotp
), ops_swapped_p (false),
1698 lp_nr (nr2
), ops
{ op0r
, op1r
}
1702 /* Struct representing a group of stores to contiguous memory locations.
1703 These are produced by the second phase (coalescing) and consumed in the
1704 third phase that outputs the widened stores. */
1706 class merged_store_group
1709 unsigned HOST_WIDE_INT start
;
1710 unsigned HOST_WIDE_INT width
;
1711 unsigned HOST_WIDE_INT bitregion_start
;
1712 unsigned HOST_WIDE_INT bitregion_end
;
1713 /* The size of the allocated memory for val and mask. */
1714 unsigned HOST_WIDE_INT buf_size
;
1715 unsigned HOST_WIDE_INT align_base
;
1716 poly_uint64 load_align_base
[2];
1719 unsigned int load_align
[2];
1720 unsigned int first_order
;
1721 unsigned int last_order
;
1723 bool string_concatenation
;
1724 bool only_constants
;
1726 unsigned int first_nonmergeable_order
;
1729 auto_vec
<store_immediate_info
*> stores
;
1730 /* We record the first and last original statements in the sequence because
1731 we'll need their vuse/vdef and replacement position. It's easier to keep
1732 track of them separately as 'stores' is reordered by apply_stores. */
1736 unsigned char *mask
;
1738 merged_store_group (store_immediate_info
*);
1739 ~merged_store_group ();
1740 bool can_be_merged_into (store_immediate_info
*);
1741 void merge_into (store_immediate_info
*);
1742 void merge_overlapping (store_immediate_info
*);
1743 bool apply_stores ();
1745 void do_merge (store_immediate_info
*);
1748 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1751 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
1756 for (unsigned int i
= 0; i
< len
; i
++)
1757 fprintf (fd
, "%02x ", ptr
[i
]);
1761 /* Clear out LEN bits starting from bit START in the byte array
1762 PTR. This clears the bits to the *right* from START.
1763 START must be within [0, BITS_PER_UNIT) and counts starting from
1764 the least significant bit. */
1767 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
1772 /* Clear len bits to the right of start. */
1773 else if (len
<= start
+ 1)
1775 unsigned char mask
= (~(~0U << len
));
1776 mask
= mask
<< (start
+ 1U - len
);
1779 else if (start
!= BITS_PER_UNIT
- 1)
1781 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
1782 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
1783 len
- (start
% BITS_PER_UNIT
) - 1);
1785 else if (start
== BITS_PER_UNIT
- 1
1786 && len
> BITS_PER_UNIT
)
1788 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1789 memset (ptr
, 0, nbytes
);
1790 if (len
% BITS_PER_UNIT
!= 0)
1791 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
1792 len
% BITS_PER_UNIT
);
1798 /* In the byte array PTR clear the bit region starting at bit
1799 START and is LEN bits wide.
1800 For regions spanning multiple bytes do this recursively until we reach
1801 zero LEN or a region contained within a single byte. */
1804 clear_bit_region (unsigned char *ptr
, unsigned int start
,
1807 /* Degenerate base case. */
1810 else if (start
>= BITS_PER_UNIT
)
1811 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
1812 /* Second base case. */
1813 else if ((start
+ len
) <= BITS_PER_UNIT
)
1815 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
1816 mask
>>= BITS_PER_UNIT
- (start
+ len
);
1822 /* Clear most significant bits in a byte and proceed with the next byte. */
1823 else if (start
!= 0)
1825 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
1826 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
1828 /* Whole bytes need to be cleared. */
1829 else if (start
== 0 && len
> BITS_PER_UNIT
)
1831 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1832 /* We could recurse on each byte but we clear whole bytes, so a simple
1834 memset (ptr
, '\0', nbytes
);
1835 /* Clear the remaining sub-byte region if there is one. */
1836 if (len
% BITS_PER_UNIT
!= 0)
1837 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
1843 /* Write BITLEN bits of EXPR to the byte array PTR at
1844 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1845 Return true if the operation succeeded. */
1848 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
1849 unsigned int total_bytes
)
1851 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
1852 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
1853 || (bitpos
% BITS_PER_UNIT
)
1854 || !int_mode_for_size (bitlen
, 0).exists ());
1856 = (TREE_CODE (expr
) == CONSTRUCTOR
1857 && CONSTRUCTOR_NELTS (expr
) == 0
1858 && TYPE_SIZE_UNIT (TREE_TYPE (expr
))
1859 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr
))));
1863 if (first_byte
>= total_bytes
)
1865 total_bytes
-= first_byte
;
1868 unsigned HOST_WIDE_INT rhs_bytes
1869 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1870 if (rhs_bytes
> total_bytes
)
1872 memset (ptr
+ first_byte
, '\0', rhs_bytes
);
1875 return native_encode_expr (expr
, ptr
+ first_byte
, total_bytes
) != 0;
1879 We are writing a non byte-sized quantity or at a position that is not
1881 |--------|--------|--------| ptr + first_byte
1883 xxx xxxxxxxx xxx< bp>
1886 First native_encode_expr EXPR into a temporary buffer and shift each
1887 byte in the buffer by 'bp' (carrying the bits over as necessary).
1888 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1889 <------bitlen---->< bp>
1890 Then we clear the destination bits:
1891 |---00000|00000000|000-----| ptr + first_byte
1892 <-------bitlen--->< bp>
1894 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1895 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1898 We are writing a non byte-sized quantity or at a position that is not
1900 ptr + first_byte |--------|--------|--------|
1902 <bp >xxx xxxxxxxx xxx
1905 First native_encode_expr EXPR into a temporary buffer and shift each
1906 byte in the buffer to the right by (carrying the bits over as necessary).
1907 We shift by as much as needed to align the most significant bit of EXPR
1909 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1910 <---bitlen----> <bp ><-----bitlen----->
1911 Then we clear the destination bits:
1912 ptr + first_byte |-----000||00000000||00000---|
1913 <bp ><-------bitlen----->
1915 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1916 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1917 The awkwardness comes from the fact that bitpos is counted from the
1918 most significant bit of a byte. */
1920 /* We must be dealing with fixed-size data at this point, since the
1921 total size is also fixed. */
1922 unsigned int byte_size
;
1925 unsigned HOST_WIDE_INT rhs_bytes
1926 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1927 if (rhs_bytes
> total_bytes
)
1929 byte_size
= rhs_bytes
;
1933 fixed_size_mode mode
1934 = as_a
<fixed_size_mode
> (TYPE_MODE (TREE_TYPE (expr
)));
1937 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)))
1938 : GET_MODE_SIZE (mode
);
1940 /* Allocate an extra byte so that we have space to shift into. */
1942 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
1943 memset (tmpbuf
, '\0', byte_size
);
1944 /* The store detection code should only have allowed constants that are
1945 accepted by native_encode_expr or empty ctors. */
1947 && native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
1950 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1951 bytes to write. This means it can write more than
1952 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1953 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1954 bitlen and zero out the bits that are not relevant as well (that may
1955 contain a sign bit due to sign-extension). */
1956 unsigned int padding
1957 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
1958 /* On big-endian the padding is at the 'front' so just skip the initial
1960 if (BYTES_BIG_ENDIAN
)
1963 byte_size
-= padding
;
1965 if (bitlen
% BITS_PER_UNIT
!= 0)
1967 if (BYTES_BIG_ENDIAN
)
1968 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
1969 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
1971 clear_bit_region (tmpbuf
, bitlen
,
1972 byte_size
* BITS_PER_UNIT
- bitlen
);
1974 /* Left shifting relies on the last byte being clear if bitlen is
1975 a multiple of BITS_PER_UNIT, which might not be clear if
1976 there are padding bytes. */
1977 else if (!BYTES_BIG_ENDIAN
)
1978 tmpbuf
[byte_size
- 1] = '\0';
1980 /* Clear the bit region in PTR where the bits from TMPBUF will be
1982 if (BYTES_BIG_ENDIAN
)
1983 clear_bit_region_be (ptr
+ first_byte
,
1984 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
1986 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
1989 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
1990 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
1992 bool skip_byte
= false;
1993 if (BYTES_BIG_ENDIAN
)
1995 /* BITPOS and BITLEN are exactly aligned and no shifting
1997 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
1998 || (bitpos_mod
== 0 && bitlen_mod
== 0))
2000 /* |. . . . . . . .|
2002 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2003 of the value until it aligns with 'bp' in the next byte over. */
2004 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
2006 shift_amnt
= bitlen_mod
+ bitpos_mod
;
2007 skip_byte
= bitlen_mod
!= 0;
2009 /* |. . . . . . . .|
2012 Shift the value right within the same byte so it aligns with 'bp'. */
2014 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
2017 shift_amnt
= bitpos
% BITS_PER_UNIT
;
2019 /* Create the shifted version of EXPR. */
2020 if (!BYTES_BIG_ENDIAN
)
2022 shift_bytes_in_array_left (tmpbuf
, byte_size
, shift_amnt
);
2023 if (shift_amnt
== 0)
2028 gcc_assert (BYTES_BIG_ENDIAN
);
2029 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
2030 /* If shifting right forced us to move into the next byte skip the now
2039 /* Insert the bits from TMPBUF. */
2040 for (unsigned int i
= 0; i
< byte_size
; i
++)
2041 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
2046 /* Sorting function for store_immediate_info objects.
2047 Sorts them by bitposition. */
2050 sort_by_bitpos (const void *x
, const void *y
)
2052 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2053 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2055 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
2057 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
2060 /* If they are the same let's use the order which is guaranteed to
2062 return (*tmp
)->order
- (*tmp2
)->order
;
2065 /* Sorting function for store_immediate_info objects.
2066 Sorts them by the order field. */
2069 sort_by_order (const void *x
, const void *y
)
2071 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2072 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2074 if ((*tmp
)->order
< (*tmp2
)->order
)
2076 else if ((*tmp
)->order
> (*tmp2
)->order
)
2082 /* Initialize a merged_store_group object from a store_immediate_info
2085 merged_store_group::merged_store_group (store_immediate_info
*info
)
2087 start
= info
->bitpos
;
2088 width
= info
->bitsize
;
2089 bitregion_start
= info
->bitregion_start
;
2090 bitregion_end
= info
->bitregion_end
;
2091 /* VAL has memory allocated for it in apply_stores once the group
2092 width has been finalized. */
2095 bit_insertion
= info
->rhs_code
== BIT_INSERT_EXPR
;
2096 string_concatenation
= info
->rhs_code
== STRING_CST
;
2097 only_constants
= info
->rhs_code
== INTEGER_CST
;
2099 first_nonmergeable_order
= ~0U;
2100 lp_nr
= info
->lp_nr
;
2101 unsigned HOST_WIDE_INT align_bitpos
= 0;
2102 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2103 &align
, &align_bitpos
);
2104 align_base
= start
- align_bitpos
;
2105 for (int i
= 0; i
< 2; ++i
)
2107 store_operand_info
&op
= info
->ops
[i
];
2108 if (op
.base_addr
== NULL_TREE
)
2111 load_align_base
[i
] = 0;
2115 get_object_alignment_1 (op
.val
, &load_align
[i
], &align_bitpos
);
2116 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2120 stores
.safe_push (info
);
2121 last_stmt
= info
->stmt
;
2122 last_order
= info
->order
;
2123 first_stmt
= last_stmt
;
2124 first_order
= last_order
;
2128 merged_store_group::~merged_store_group ()
2134 /* Return true if the store described by INFO can be merged into the group. */
2137 merged_store_group::can_be_merged_into (store_immediate_info
*info
)
2139 /* Do not merge bswap patterns. */
2140 if (info
->rhs_code
== LROTATE_EXPR
)
2143 if (info
->lp_nr
!= lp_nr
)
2146 /* The canonical case. */
2147 if (info
->rhs_code
== stores
[0]->rhs_code
)
2150 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2151 if (info
->rhs_code
== BIT_INSERT_EXPR
&& stores
[0]->rhs_code
== INTEGER_CST
)
2152 return !string_concatenation
;
2154 if (stores
[0]->rhs_code
== BIT_INSERT_EXPR
&& info
->rhs_code
== INTEGER_CST
)
2155 return !string_concatenation
;
2157 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2158 only for small regions since this can generate a lot of instructions. */
2159 if (info
->rhs_code
== MEM_REF
2160 && (stores
[0]->rhs_code
== INTEGER_CST
2161 || stores
[0]->rhs_code
== BIT_INSERT_EXPR
)
2162 && info
->bitregion_start
== stores
[0]->bitregion_start
2163 && info
->bitregion_end
== stores
[0]->bitregion_end
2164 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2165 return !string_concatenation
;
2167 if (stores
[0]->rhs_code
== MEM_REF
2168 && (info
->rhs_code
== INTEGER_CST
2169 || info
->rhs_code
== BIT_INSERT_EXPR
)
2170 && info
->bitregion_start
== stores
[0]->bitregion_start
2171 && info
->bitregion_end
== stores
[0]->bitregion_end
2172 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2173 return !string_concatenation
;
2175 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2176 if (info
->rhs_code
== STRING_CST
2177 && stores
[0]->rhs_code
== INTEGER_CST
2178 && stores
[0]->bitsize
== CHAR_BIT
)
2179 return !bit_insertion
;
2181 if (stores
[0]->rhs_code
== STRING_CST
2182 && info
->rhs_code
== INTEGER_CST
2183 && info
->bitsize
== CHAR_BIT
)
2184 return !bit_insertion
;
2189 /* Helper method for merge_into and merge_overlapping to do
2193 merged_store_group::do_merge (store_immediate_info
*info
)
2195 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
2196 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
2198 unsigned int this_align
;
2199 unsigned HOST_WIDE_INT align_bitpos
= 0;
2200 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2201 &this_align
, &align_bitpos
);
2202 if (this_align
> align
)
2205 align_base
= info
->bitpos
- align_bitpos
;
2207 for (int i
= 0; i
< 2; ++i
)
2209 store_operand_info
&op
= info
->ops
[i
];
2213 get_object_alignment_1 (op
.val
, &this_align
, &align_bitpos
);
2214 if (this_align
> load_align
[i
])
2216 load_align
[i
] = this_align
;
2217 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2221 gimple
*stmt
= info
->stmt
;
2222 stores
.safe_push (info
);
2223 if (info
->order
> last_order
)
2225 last_order
= info
->order
;
2228 else if (info
->order
< first_order
)
2230 first_order
= info
->order
;
2234 if (info
->bitpos
!= start
+ width
)
2235 consecutive
= false;
2237 /* We need to use extraction if there is any bit-field. */
2238 if (info
->rhs_code
== BIT_INSERT_EXPR
)
2240 bit_insertion
= true;
2241 gcc_assert (!string_concatenation
);
2244 /* We want to use concatenation if there is any string. */
2245 if (info
->rhs_code
== STRING_CST
)
2247 string_concatenation
= true;
2248 gcc_assert (!bit_insertion
);
2251 /* But we cannot use it if we don't have consecutive stores. */
2253 string_concatenation
= false;
2255 if (info
->rhs_code
!= INTEGER_CST
)
2256 only_constants
= false;
2259 /* Merge a store recorded by INFO into this merged store.
2260 The store is not overlapping with the existing recorded
2264 merged_store_group::merge_into (store_immediate_info
*info
)
2268 /* Make sure we're inserting in the position we think we're inserting. */
2269 gcc_assert (info
->bitpos
>= start
+ width
2270 && info
->bitregion_start
<= bitregion_end
);
2272 width
= info
->bitpos
+ info
->bitsize
- start
;
2275 /* Merge a store described by INFO into this merged store.
2276 INFO overlaps in some way with the current store (i.e. it's not contiguous
2277 which is handled by merged_store_group::merge_into). */
2280 merged_store_group::merge_overlapping (store_immediate_info
*info
)
2284 /* If the store extends the size of the group, extend the width. */
2285 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
2286 width
= info
->bitpos
+ info
->bitsize
- start
;
2289 /* Go through all the recorded stores in this group in program order and
2290 apply their values to the VAL byte array to create the final merged
2291 value. Return true if the operation succeeded. */
2294 merged_store_group::apply_stores ()
2296 store_immediate_info
*info
;
2299 /* Make sure we have more than one store in the group, otherwise we cannot
2301 if (bitregion_start
% BITS_PER_UNIT
!= 0
2302 || bitregion_end
% BITS_PER_UNIT
!= 0
2303 || stores
.length () == 1)
2306 buf_size
= (bitregion_end
- bitregion_start
) / BITS_PER_UNIT
;
2308 /* Really do string concatenation for large strings only. */
2309 if (buf_size
<= MOVE_MAX
)
2310 string_concatenation
= false;
2312 /* Create a power-of-2-sized buffer for native_encode_expr. */
2313 if (!string_concatenation
)
2314 buf_size
= 1 << ceil_log2 (buf_size
);
2316 val
= XNEWVEC (unsigned char, 2 * buf_size
);
2317 mask
= val
+ buf_size
;
2318 memset (val
, 0, buf_size
);
2319 memset (mask
, ~0U, buf_size
);
2321 stores
.qsort (sort_by_order
);
2323 FOR_EACH_VEC_ELT (stores
, i
, info
)
2325 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
2327 if (info
->ops
[0].val
&& info
->ops
[0].base_addr
== NULL_TREE
)
2328 cst
= info
->ops
[0].val
;
2329 else if (info
->ops
[1].val
&& info
->ops
[1].base_addr
== NULL_TREE
)
2330 cst
= info
->ops
[1].val
;
2334 if (cst
&& info
->rhs_code
!= BIT_INSERT_EXPR
)
2335 ret
= encode_tree_to_bitpos (cst
, val
, info
->bitsize
, pos_in_buffer
,
2337 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
2338 if (BYTES_BIG_ENDIAN
)
2339 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
2340 - (pos_in_buffer
% BITS_PER_UNIT
)),
2343 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
2344 if (cst
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
2348 fputs ("After writing ", dump_file
);
2349 print_generic_expr (dump_file
, cst
, TDF_NONE
);
2350 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
2351 " at position %d\n", info
->bitsize
, pos_in_buffer
);
2352 fputs (" the merged value contains ", dump_file
);
2353 dump_char_array (dump_file
, val
, buf_size
);
2354 fputs (" the merged mask contains ", dump_file
);
2355 dump_char_array (dump_file
, mask
, buf_size
);
2357 fputs (" bit insertion is required\n", dump_file
);
2358 if (string_concatenation
)
2359 fputs (" string concatenation is required\n", dump_file
);
2362 fprintf (dump_file
, "Failed to merge stores\n");
2367 stores
.qsort (sort_by_bitpos
);
2371 /* Structure describing the store chain. */
2373 class imm_store_chain_info
2376 /* Doubly-linked list that imposes an order on chain processing.
2377 PNXP (prev's next pointer) points to the head of a list, or to
2378 the next field in the previous chain in the list.
2379 See pass_store_merging::m_stores_head for more rationale. */
2380 imm_store_chain_info
*next
, **pnxp
;
2382 auto_vec
<store_immediate_info
*> m_store_info
;
2383 auto_vec
<merged_store_group
*> m_merged_store_groups
;
2385 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
2386 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
2391 gcc_checking_assert (pnxp
== next
->pnxp
);
2395 ~imm_store_chain_info ()
2400 gcc_checking_assert (&next
== next
->pnxp
);
2404 bool terminate_and_process_chain ();
2405 bool try_coalesce_bswap (merged_store_group
*, unsigned int, unsigned int,
2407 bool coalesce_immediate_stores ();
2408 bool output_merged_store (merged_store_group
*);
2409 bool output_merged_stores ();
2412 const pass_data pass_data_tree_store_merging
= {
2413 GIMPLE_PASS
, /* type */
2414 "store-merging", /* name */
2415 OPTGROUP_NONE
, /* optinfo_flags */
2416 TV_GIMPLE_STORE_MERGING
, /* tv_id */
2417 PROP_ssa
, /* properties_required */
2418 0, /* properties_provided */
2419 0, /* properties_destroyed */
2420 0, /* todo_flags_start */
2421 TODO_update_ssa
, /* todo_flags_finish */
2424 class pass_store_merging
: public gimple_opt_pass
2427 pass_store_merging (gcc::context
*ctxt
)
2428 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head (),
2429 m_n_chains (0), m_n_stores (0)
2433 /* Pass not supported for PDP-endian, nor for insane hosts or
2434 target character sizes where native_{encode,interpret}_expr
2435 doesn't work properly. */
2437 gate (function
*) final override
2439 return flag_store_merging
2440 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
2442 && BITS_PER_UNIT
== 8;
2445 unsigned int execute (function
*) final override
;
2448 hash_map
<tree_operand_hash
, class imm_store_chain_info
*> m_stores
;
2450 /* Form a doubly-linked stack of the elements of m_stores, so that
2451 we can iterate over them in a predictable way. Using this order
2452 avoids extraneous differences in the compiler output just because
2453 of tree pointer variations (e.g. different chains end up in
2454 different positions of m_stores, so they are handled in different
2455 orders, so they allocate or release SSA names in different
2456 orders, and when they get reused, subsequent passes end up
2457 getting different SSA names, which may ultimately change
2458 decisions when going out of SSA). */
2459 imm_store_chain_info
*m_stores_head
;
2461 /* The number of store chains currently tracked. */
2462 unsigned m_n_chains
;
2463 /* The number of stores currently tracked. */
2464 unsigned m_n_stores
;
2466 bool process_store (gimple
*);
2467 bool terminate_and_process_chain (imm_store_chain_info
*);
2468 bool terminate_all_aliasing_chains (imm_store_chain_info
**, gimple
*);
2469 bool terminate_and_process_all_chains ();
2470 }; // class pass_store_merging
2472 /* Terminate and process all recorded chains. Return true if any changes
2476 pass_store_merging::terminate_and_process_all_chains ()
2479 while (m_stores_head
)
2480 ret
|= terminate_and_process_chain (m_stores_head
);
2481 gcc_assert (m_stores
.is_empty ());
2485 /* Terminate all chains that are affected by the statement STMT.
2486 CHAIN_INFO is the chain we should ignore from the checks if
2487 non-NULL. Return true if any changes were made. */
2490 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2496 /* If the statement doesn't touch memory it can't alias. */
2497 if (!gimple_vuse (stmt
))
2500 tree store_lhs
= gimple_store_p (stmt
) ? gimple_get_lhs (stmt
) : NULL_TREE
;
2501 ao_ref store_lhs_ref
;
2502 ao_ref_init (&store_lhs_ref
, store_lhs
);
2503 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
2507 /* We already checked all the stores in chain_info and terminated the
2508 chain if necessary. Skip it here. */
2509 if (chain_info
&& *chain_info
== cur
)
2512 store_immediate_info
*info
;
2514 FOR_EACH_VEC_ELT (cur
->m_store_info
, i
, info
)
2516 tree lhs
= gimple_assign_lhs (info
->stmt
);
2518 ao_ref_init (&lhs_ref
, lhs
);
2519 if (ref_maybe_used_by_stmt_p (stmt
, &lhs_ref
)
2520 || stmt_may_clobber_ref_p_1 (stmt
, &lhs_ref
)
2521 || (store_lhs
&& refs_may_alias_p_1 (&store_lhs_ref
,
2524 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2526 fprintf (dump_file
, "stmt causes chain termination:\n");
2527 print_gimple_stmt (dump_file
, stmt
, 0);
2529 ret
|= terminate_and_process_chain (cur
);
2538 /* Helper function. Terminate the recorded chain storing to base object
2539 BASE. Return true if the merging and output was successful. The m_stores
2540 entry is removed after the processing in any case. */
2543 pass_store_merging::terminate_and_process_chain (imm_store_chain_info
*chain_info
)
2545 m_n_stores
-= chain_info
->m_store_info
.length ();
2547 bool ret
= chain_info
->terminate_and_process_chain ();
2548 m_stores
.remove (chain_info
->base_addr
);
2553 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2554 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2555 be able to sink load of REF across stores between FIRST and LAST, up
2556 to right before LAST. */
2559 stmts_may_clobber_ref_p (gimple
*first
, gimple
*last
, tree ref
)
2562 ao_ref_init (&r
, ref
);
2563 unsigned int count
= 0;
2564 tree vop
= gimple_vdef (last
);
2567 /* Return true conservatively if the basic blocks are different. */
2568 if (gimple_bb (first
) != gimple_bb (last
))
2573 stmt
= SSA_NAME_DEF_STMT (vop
);
2574 if (stmt_may_clobber_ref_p_1 (stmt
, &r
))
2576 if (gimple_store_p (stmt
)
2577 && refs_anti_dependent_p (ref
, gimple_get_lhs (stmt
)))
2579 /* Avoid quadratic compile time by bounding the number of checks
2581 if (++count
> MAX_STORE_ALIAS_CHECKS
)
2583 vop
= gimple_vuse (stmt
);
2585 while (stmt
!= first
);
2590 /* Return true if INFO->ops[IDX] is mergeable with the
2591 corresponding loads already in MERGED_STORE group.
2592 BASE_ADDR is the base address of the whole store group. */
2595 compatible_load_p (merged_store_group
*merged_store
,
2596 store_immediate_info
*info
,
2597 tree base_addr
, int idx
)
2599 store_immediate_info
*infof
= merged_store
->stores
[0];
2600 if (!info
->ops
[idx
].base_addr
2601 || maybe_ne (info
->ops
[idx
].bitpos
- infof
->ops
[idx
].bitpos
,
2602 info
->bitpos
- infof
->bitpos
)
2603 || !operand_equal_p (info
->ops
[idx
].base_addr
,
2604 infof
->ops
[idx
].base_addr
, 0))
2607 store_immediate_info
*infol
= merged_store
->stores
.last ();
2608 tree load_vuse
= gimple_vuse (info
->ops
[idx
].stmt
);
2609 /* In this case all vuses should be the same, e.g.
2610 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2612 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2613 and we can emit the coalesced load next to any of those loads. */
2614 if (gimple_vuse (infof
->ops
[idx
].stmt
) == load_vuse
2615 && gimple_vuse (infol
->ops
[idx
].stmt
) == load_vuse
)
2618 /* Otherwise, at least for now require that the load has the same
2619 vuse as the store. See following examples. */
2620 if (gimple_vuse (info
->stmt
) != load_vuse
)
2623 if (gimple_vuse (infof
->stmt
) != gimple_vuse (infof
->ops
[idx
].stmt
)
2625 && gimple_vuse (infol
->stmt
) != gimple_vuse (infol
->ops
[idx
].stmt
)))
2628 /* If the load is from the same location as the store, already
2629 the construction of the immediate chain info guarantees no intervening
2630 stores, so no further checks are needed. Example:
2631 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2632 if (known_eq (info
->ops
[idx
].bitpos
, info
->bitpos
)
2633 && operand_equal_p (info
->ops
[idx
].base_addr
, base_addr
, 0))
2636 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2637 of the stores in the group, or any other stores in between those.
2638 Previous calls to compatible_load_p ensured that for all the
2639 merged_store->stores IDX loads, no stmts starting with
2640 merged_store->first_stmt and ending right before merged_store->last_stmt
2641 clobbers those loads. */
2642 gimple
*first
= merged_store
->first_stmt
;
2643 gimple
*last
= merged_store
->last_stmt
;
2644 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2645 comes before the so far first load, we'll be changing
2646 merged_store->first_stmt. In that case we need to give up if
2647 any of the earlier processed loads clobber with the stmts in the new
2649 if (info
->order
< merged_store
->first_order
)
2651 for (store_immediate_info
*infoc
: merged_store
->stores
)
2652 if (stmts_may_clobber_ref_p (info
->stmt
, first
, infoc
->ops
[idx
].val
))
2656 /* Similarly, we could change merged_store->last_stmt, so ensure
2657 in that case no stmts in the new range clobber any of the earlier
2659 else if (info
->order
> merged_store
->last_order
)
2661 for (store_immediate_info
*infoc
: merged_store
->stores
)
2662 if (stmts_may_clobber_ref_p (last
, info
->stmt
, infoc
->ops
[idx
].val
))
2666 /* And finally, we'd be adding a new load to the set, ensure it isn't
2667 clobbered in the new range. */
2668 if (stmts_may_clobber_ref_p (first
, last
, info
->ops
[idx
].val
))
2671 /* Otherwise, we are looking for:
2672 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2674 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2678 /* Add all refs loaded to compute VAL to REFS vector. */
2681 gather_bswap_load_refs (vec
<tree
> *refs
, tree val
)
2683 if (TREE_CODE (val
) != SSA_NAME
)
2686 gimple
*stmt
= SSA_NAME_DEF_STMT (val
);
2687 if (!is_gimple_assign (stmt
))
2690 if (gimple_assign_load_p (stmt
))
2692 refs
->safe_push (gimple_assign_rhs1 (stmt
));
2696 switch (gimple_assign_rhs_class (stmt
))
2698 case GIMPLE_BINARY_RHS
:
2699 gather_bswap_load_refs (refs
, gimple_assign_rhs2 (stmt
));
2701 case GIMPLE_UNARY_RHS
:
2702 gather_bswap_load_refs (refs
, gimple_assign_rhs1 (stmt
));
2709 /* Check if there are any stores in M_STORE_INFO after index I
2710 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2711 a potential group ending with END that have their order
2712 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2713 all the stores already merged and the one under consideration
2714 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2716 MEM[(long long int *)p_28] = 0;
2717 MEM[(long long int *)p_28 + 8B] = 0;
2718 MEM[(long long int *)p_28 + 16B] = 0;
2719 MEM[(long long int *)p_28 + 24B] = 0;
2721 MEM[(int *)p_28 + 8B] = _129;
2722 MEM[(int *)p_28].a = -1;
2724 MEM[(long long int *)p_28] = 0;
2725 MEM[(int *)p_28].a = -1;
2726 stmts in the current group and need to consider if it is safe to
2727 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2728 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2729 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2730 into the group and merging of those 3 stores is successful, merged
2731 stmts will be emitted at the latest store from that group, i.e.
2732 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2733 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2734 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2735 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2736 into the group. That way it will be its own store group and will
2737 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2738 INTEGER_CST stores, those are mergeable using merge_overlapping,
2739 so don't return false for those.
2741 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2742 (exclusive), whether they don't overlap the bitrange START to END
2743 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2744 prevent merging in cases like:
2745 MEM <char[12]> [&b + 8B] = {};
2746 MEM[(short *) &b] = 5;
2748 MEM <long long unsigned int> [&b + 2B] = _5;
2749 MEM[(char *)&b + 16B] = 88;
2750 MEM[(int *)&b + 20B] = 1;
2751 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2752 be merged with it, because the = _5 store overlaps these and is in between
2753 them in sort_by_order ordering. If it was merged, the merged store would
2754 go after the = _5 store and thus change behavior. */
2757 check_no_overlap (const vec
<store_immediate_info
*> &m_store_info
,
2759 bool all_integer_cst_p
, unsigned int first_order
,
2760 unsigned int last_order
, unsigned HOST_WIDE_INT start
,
2761 unsigned HOST_WIDE_INT end
, unsigned int first_earlier
,
2762 unsigned end_earlier
)
2764 unsigned int len
= m_store_info
.length ();
2765 for (unsigned int j
= first_earlier
; j
< end_earlier
; j
++)
2767 store_immediate_info
*info
= m_store_info
[j
];
2768 if (info
->order
> first_order
2769 && info
->order
< last_order
2770 && info
->bitpos
+ info
->bitsize
> start
)
2773 for (++i
; i
< len
; ++i
)
2775 store_immediate_info
*info
= m_store_info
[i
];
2776 if (info
->bitpos
>= end
)
2778 if (info
->order
< last_order
2779 && (!all_integer_cst_p
|| info
->rhs_code
!= INTEGER_CST
))
2785 /* Return true if m_store_info[first] and at least one following store
2786 form a group which store try_size bitsize value which is byte swapped
2787 from a memory load or some value, or identity from some value.
2788 This uses the bswap pass APIs. */
2791 imm_store_chain_info::try_coalesce_bswap (merged_store_group
*merged_store
,
2793 unsigned int try_size
,
2794 unsigned int first_earlier
)
2796 unsigned int len
= m_store_info
.length (), last
= first
;
2797 unsigned HOST_WIDE_INT width
= m_store_info
[first
]->bitsize
;
2798 if (width
>= try_size
)
2800 for (unsigned int i
= first
+ 1; i
< len
; ++i
)
2802 if (m_store_info
[i
]->bitpos
!= m_store_info
[first
]->bitpos
+ width
2803 || m_store_info
[i
]->lp_nr
!= merged_store
->lp_nr
2804 || m_store_info
[i
]->ins_stmt
== NULL
)
2806 width
+= m_store_info
[i
]->bitsize
;
2807 if (width
>= try_size
)
2813 if (width
!= try_size
)
2816 bool allow_unaligned
2817 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
2818 /* Punt if the combined store would not be aligned and we need alignment. */
2819 if (!allow_unaligned
)
2821 unsigned int align
= merged_store
->align
;
2822 unsigned HOST_WIDE_INT align_base
= merged_store
->align_base
;
2823 for (unsigned int i
= first
+ 1; i
<= last
; ++i
)
2825 unsigned int this_align
;
2826 unsigned HOST_WIDE_INT align_bitpos
= 0;
2827 get_object_alignment_1 (gimple_assign_lhs (m_store_info
[i
]->stmt
),
2828 &this_align
, &align_bitpos
);
2829 if (this_align
> align
)
2832 align_base
= m_store_info
[i
]->bitpos
- align_bitpos
;
2835 unsigned HOST_WIDE_INT align_bitpos
2836 = (m_store_info
[first
]->bitpos
- align_base
) & (align
- 1);
2838 align
= least_bit_hwi (align_bitpos
);
2839 if (align
< try_size
)
2846 case 16: type
= uint16_type_node
; break;
2847 case 32: type
= uint32_type_node
; break;
2848 case 64: type
= uint64_type_node
; break;
2849 default: gcc_unreachable ();
2851 struct symbolic_number n
;
2852 gimple
*ins_stmt
= NULL
;
2853 int vuse_store
= -1;
2854 unsigned int first_order
= merged_store
->first_order
;
2855 unsigned int last_order
= merged_store
->last_order
;
2856 gimple
*first_stmt
= merged_store
->first_stmt
;
2857 gimple
*last_stmt
= merged_store
->last_stmt
;
2858 unsigned HOST_WIDE_INT end
= merged_store
->start
+ merged_store
->width
;
2859 store_immediate_info
*infof
= m_store_info
[first
];
2861 for (unsigned int i
= first
; i
<= last
; ++i
)
2863 store_immediate_info
*info
= m_store_info
[i
];
2864 struct symbolic_number this_n
= info
->n
;
2866 if (!this_n
.base_addr
)
2867 this_n
.range
= try_size
/ BITS_PER_UNIT
;
2869 /* Update vuse in case it has changed by output_merged_stores. */
2870 this_n
.vuse
= gimple_vuse (info
->ins_stmt
);
2871 unsigned int bitpos
= info
->bitpos
- infof
->bitpos
;
2872 if (!do_shift_rotate (LSHIFT_EXPR
, &this_n
,
2874 ? try_size
- info
->bitsize
- bitpos
2877 if (this_n
.base_addr
&& vuse_store
)
2880 for (j
= first
; j
<= last
; ++j
)
2881 if (this_n
.vuse
== gimple_vuse (m_store_info
[j
]->stmt
))
2885 if (vuse_store
== 1)
2893 ins_stmt
= info
->ins_stmt
;
2897 if (n
.base_addr
&& n
.vuse
!= this_n
.vuse
)
2899 if (vuse_store
== 0)
2903 if (info
->order
> last_order
)
2905 last_order
= info
->order
;
2906 last_stmt
= info
->stmt
;
2908 else if (info
->order
< first_order
)
2910 first_order
= info
->order
;
2911 first_stmt
= info
->stmt
;
2913 end
= MAX (end
, info
->bitpos
+ info
->bitsize
);
2915 ins_stmt
= perform_symbolic_merge (ins_stmt
, &n
, info
->ins_stmt
,
2916 &this_n
, &n
, BIT_IOR_EXPR
);
2917 if (ins_stmt
== NULL
)
2922 uint64_t cmpxchg
, cmpnop
;
2924 find_bswap_or_nop_finalize (&n
, &cmpxchg
, &cmpnop
, &cast64_to_32
);
2926 /* A complete byte swap should make the symbolic number to start with
2927 the largest digit in the highest order byte. Unchanged symbolic
2928 number indicates a read with same endianness as target architecture. */
2929 if (n
.n
!= cmpnop
&& n
.n
!= cmpxchg
)
2936 if (n
.base_addr
== NULL_TREE
&& !is_gimple_val (n
.src
))
2939 if (!check_no_overlap (m_store_info
, last
, false, first_order
, last_order
,
2940 merged_store
->start
, end
, first_earlier
, first
))
2943 /* Don't handle memory copy this way if normal non-bswap processing
2944 would handle it too. */
2945 if (n
.n
== cmpnop
&& (unsigned) n
.n_ops
== last
- first
+ 1)
2948 for (i
= first
; i
<= last
; ++i
)
2949 if (m_store_info
[i
]->rhs_code
!= MEM_REF
)
2959 /* Will emit LROTATE_EXPR. */
2962 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2963 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
2967 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2968 && optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
)
2975 if (!allow_unaligned
&& n
.base_addr
)
2977 unsigned int align
= get_object_alignment (n
.src
);
2978 if (align
< try_size
)
2982 /* If each load has vuse of the corresponding store, need to verify
2983 the loads can be sunk right before the last store. */
2984 if (vuse_store
== 1)
2986 auto_vec
<tree
, 64> refs
;
2987 for (unsigned int i
= first
; i
<= last
; ++i
)
2988 gather_bswap_load_refs (&refs
,
2989 gimple_assign_rhs1 (m_store_info
[i
]->stmt
));
2991 for (tree ref
: refs
)
2992 if (stmts_may_clobber_ref_p (first_stmt
, last_stmt
, ref
))
2998 infof
->ins_stmt
= ins_stmt
;
2999 for (unsigned int i
= first
; i
<= last
; ++i
)
3001 m_store_info
[i
]->rhs_code
= n
.n
== cmpxchg
? LROTATE_EXPR
: NOP_EXPR
;
3002 m_store_info
[i
]->ops
[0].base_addr
= NULL_TREE
;
3003 m_store_info
[i
]->ops
[1].base_addr
= NULL_TREE
;
3005 merged_store
->merge_into (m_store_info
[i
]);
3011 /* Go through the candidate stores recorded in m_store_info and merge them
3012 into merged_store_group objects recorded into m_merged_store_groups
3013 representing the widened stores. Return true if coalescing was successful
3014 and the number of widened stores is fewer than the original number
3018 imm_store_chain_info::coalesce_immediate_stores ()
3020 /* Anything less can't be processed. */
3021 if (m_store_info
.length () < 2)
3024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3025 fprintf (dump_file
, "Attempting to coalesce %u stores in chain\n",
3026 m_store_info
.length ());
3028 store_immediate_info
*info
;
3029 unsigned int i
, ignore
= 0;
3030 unsigned int first_earlier
= 0;
3031 unsigned int end_earlier
= 0;
3033 /* Order the stores by the bitposition they write to. */
3034 m_store_info
.qsort (sort_by_bitpos
);
3036 info
= m_store_info
[0];
3037 merged_store_group
*merged_store
= new merged_store_group (info
);
3038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3039 fputs ("New store group\n", dump_file
);
3041 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
3043 unsigned HOST_WIDE_INT new_bitregion_start
, new_bitregion_end
;
3048 while (first_earlier
< end_earlier
3049 && (m_store_info
[first_earlier
]->bitpos
3050 + m_store_info
[first_earlier
]->bitsize
3051 <= merged_store
->start
))
3054 /* First try to handle group of stores like:
3059 using the bswap framework. */
3060 if (info
->bitpos
== merged_store
->start
+ merged_store
->width
3061 && merged_store
->stores
.length () == 1
3062 && merged_store
->stores
[0]->ins_stmt
!= NULL
3063 && info
->lp_nr
== merged_store
->lp_nr
3064 && info
->ins_stmt
!= NULL
)
3066 unsigned int try_size
;
3067 for (try_size
= 64; try_size
>= 16; try_size
>>= 1)
3068 if (try_coalesce_bswap (merged_store
, i
- 1, try_size
,
3074 ignore
= i
+ merged_store
->stores
.length () - 1;
3075 m_merged_store_groups
.safe_push (merged_store
);
3076 if (ignore
< m_store_info
.length ())
3078 merged_store
= new merged_store_group (m_store_info
[ignore
]);
3079 end_earlier
= ignore
;
3082 merged_store
= NULL
;
3088 = MIN (merged_store
->bitregion_start
, info
->bitregion_start
);
3090 = MAX (merged_store
->bitregion_end
, info
->bitregion_end
);
3092 if (info
->order
>= merged_store
->first_nonmergeable_order
3093 || (((new_bitregion_end
- new_bitregion_start
+ 1) / BITS_PER_UNIT
)
3094 > (unsigned) param_store_merging_max_size
))
3099 Overlapping stores. */
3100 else if (IN_RANGE (info
->bitpos
, merged_store
->start
,
3101 merged_store
->start
+ merged_store
->width
- 1)
3102 /* |---store 1---||---store 2---|
3103 Handle also the consecutive INTEGER_CST stores case here,
3104 as we have here the code to deal with overlaps. */
3105 || (info
->bitregion_start
<= merged_store
->bitregion_end
3106 && info
->rhs_code
== INTEGER_CST
3107 && merged_store
->only_constants
3108 && merged_store
->can_be_merged_into (info
)))
3110 /* Only allow overlapping stores of constants. */
3111 if (info
->rhs_code
== INTEGER_CST
3112 && merged_store
->only_constants
3113 && info
->lp_nr
== merged_store
->lp_nr
)
3115 unsigned int first_order
3116 = MIN (merged_store
->first_order
, info
->order
);
3117 unsigned int last_order
3118 = MAX (merged_store
->last_order
, info
->order
);
3119 unsigned HOST_WIDE_INT end
3120 = MAX (merged_store
->start
+ merged_store
->width
,
3121 info
->bitpos
+ info
->bitsize
);
3122 if (check_no_overlap (m_store_info
, i
, true, first_order
,
3123 last_order
, merged_store
->start
, end
,
3124 first_earlier
, end_earlier
))
3126 /* check_no_overlap call above made sure there are no
3127 overlapping stores with non-INTEGER_CST rhs_code
3128 in between the first and last of the stores we've
3129 just merged. If there are any INTEGER_CST rhs_code
3130 stores in between, we need to merge_overlapping them
3131 even if in the sort_by_bitpos order there are other
3132 overlapping stores in between. Keep those stores as is.
3134 MEM[(int *)p_28] = 0;
3135 MEM[(char *)p_28 + 3B] = 1;
3136 MEM[(char *)p_28 + 1B] = 2;
3137 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3138 We can't merge the zero store with the store of two and
3139 not merge anything else, because the store of one is
3140 in the original order in between those two, but in
3141 store_by_bitpos order it comes after the last store that
3142 we can't merge with them. We can merge the first 3 stores
3143 and keep the last store as is though. */
3144 unsigned int len
= m_store_info
.length ();
3145 unsigned int try_order
= last_order
;
3146 unsigned int first_nonmergeable_order
;
3148 bool last_iter
= false;
3152 unsigned int max_order
= 0;
3153 unsigned int min_order
= first_order
;
3154 unsigned first_nonmergeable_int_order
= ~0U;
3155 unsigned HOST_WIDE_INT this_end
= end
;
3157 first_nonmergeable_order
= ~0U;
3158 for (unsigned int j
= i
+ 1; j
< len
; ++j
)
3160 store_immediate_info
*info2
= m_store_info
[j
];
3161 if (info2
->bitpos
>= this_end
)
3163 if (info2
->order
< try_order
)
3165 if (info2
->rhs_code
!= INTEGER_CST
3166 || info2
->lp_nr
!= merged_store
->lp_nr
)
3168 /* Normally check_no_overlap makes sure this
3169 doesn't happen, but if end grows below,
3170 then we need to process more stores than
3171 check_no_overlap verified. Example:
3172 MEM[(int *)p_5] = 0;
3173 MEM[(short *)p_5 + 3B] = 1;
3174 MEM[(char *)p_5 + 4B] = _9;
3175 MEM[(char *)p_5 + 2B] = 2; */
3180 min_order
= MIN (min_order
, info2
->order
);
3181 this_end
= MAX (this_end
,
3182 info2
->bitpos
+ info2
->bitsize
);
3184 else if (info2
->rhs_code
== INTEGER_CST
3185 && info2
->lp_nr
== merged_store
->lp_nr
3188 max_order
= MAX (max_order
, info2
->order
+ 1);
3189 first_nonmergeable_int_order
3190 = MIN (first_nonmergeable_int_order
,
3194 first_nonmergeable_order
3195 = MIN (first_nonmergeable_order
, info2
->order
);
3198 && !check_no_overlap (m_store_info
, len
- 1, true,
3199 min_order
, try_order
,
3200 merged_store
->start
, this_end
,
3201 first_earlier
, end_earlier
))
3205 if (last_order
== try_order
)
3207 /* If this failed, but only because we grew
3208 try_order, retry with the last working one,
3209 so that we merge at least something. */
3210 try_order
= last_order
;
3214 last_order
= try_order
;
3215 /* Retry with a larger try_order to see if we could
3216 merge some further INTEGER_CST stores. */
3218 && (first_nonmergeable_int_order
3219 < first_nonmergeable_order
))
3221 try_order
= MIN (max_order
,
3222 first_nonmergeable_order
);
3225 merged_store
->first_nonmergeable_order
);
3226 if (try_order
> last_order
&& ++attempts
< 16)
3229 first_nonmergeable_order
3230 = MIN (first_nonmergeable_order
,
3231 first_nonmergeable_int_order
);
3239 merged_store
->merge_overlapping (info
);
3241 merged_store
->first_nonmergeable_order
3242 = MIN (merged_store
->first_nonmergeable_order
,
3243 first_nonmergeable_order
);
3245 for (unsigned int j
= i
+ 1; j
<= k
; j
++)
3247 store_immediate_info
*info2
= m_store_info
[j
];
3248 gcc_assert (info2
->bitpos
< end
);
3249 if (info2
->order
< last_order
)
3251 gcc_assert (info2
->rhs_code
== INTEGER_CST
);
3253 merged_store
->merge_overlapping (info2
);
3255 /* Other stores are kept and not merged in any
3264 /* |---store 1---||---store 2---|
3265 This store is consecutive to the previous one.
3266 Merge it into the current store group. There can be gaps in between
3267 the stores, but there can't be gaps in between bitregions. */
3268 else if (info
->bitregion_start
<= merged_store
->bitregion_end
3269 && merged_store
->can_be_merged_into (info
))
3271 store_immediate_info
*infof
= merged_store
->stores
[0];
3273 /* All the rhs_code ops that take 2 operands are commutative,
3274 swap the operands if it could make the operands compatible. */
3275 if (infof
->ops
[0].base_addr
3276 && infof
->ops
[1].base_addr
3277 && info
->ops
[0].base_addr
3278 && info
->ops
[1].base_addr
3279 && known_eq (info
->ops
[1].bitpos
- infof
->ops
[0].bitpos
,
3280 info
->bitpos
- infof
->bitpos
)
3281 && operand_equal_p (info
->ops
[1].base_addr
,
3282 infof
->ops
[0].base_addr
, 0))
3284 std::swap (info
->ops
[0], info
->ops
[1]);
3285 info
->ops_swapped_p
= true;
3287 if (check_no_overlap (m_store_info
, i
, false,
3288 MIN (merged_store
->first_order
, info
->order
),
3289 MAX (merged_store
->last_order
, info
->order
),
3290 merged_store
->start
,
3291 MAX (merged_store
->start
+ merged_store
->width
,
3292 info
->bitpos
+ info
->bitsize
),
3293 first_earlier
, end_earlier
))
3295 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3296 if (info
->rhs_code
== MEM_REF
&& infof
->rhs_code
!= MEM_REF
)
3298 info
->rhs_code
= BIT_INSERT_EXPR
;
3299 info
->ops
[0].val
= gimple_assign_rhs1 (info
->stmt
);
3300 info
->ops
[0].base_addr
= NULL_TREE
;
3302 else if (infof
->rhs_code
== MEM_REF
&& info
->rhs_code
!= MEM_REF
)
3304 for (store_immediate_info
*infoj
: merged_store
->stores
)
3306 infoj
->rhs_code
= BIT_INSERT_EXPR
;
3307 infoj
->ops
[0].val
= gimple_assign_rhs1 (infoj
->stmt
);
3308 infoj
->ops
[0].base_addr
= NULL_TREE
;
3310 merged_store
->bit_insertion
= true;
3312 if ((infof
->ops
[0].base_addr
3313 ? compatible_load_p (merged_store
, info
, base_addr
, 0)
3314 : !info
->ops
[0].base_addr
)
3315 && (infof
->ops
[1].base_addr
3316 ? compatible_load_p (merged_store
, info
, base_addr
, 1)
3317 : !info
->ops
[1].base_addr
))
3319 merged_store
->merge_into (info
);
3325 /* |---store 1---| <gap> |---store 2---|.
3326 Gap between stores or the rhs not compatible. Start a new group. */
3328 /* Try to apply all the stores recorded for the group to determine
3329 the bitpattern they write and discard it if that fails.
3330 This will also reject single-store groups. */
3331 if (merged_store
->apply_stores ())
3332 m_merged_store_groups
.safe_push (merged_store
);
3334 delete merged_store
;
3336 merged_store
= new merged_store_group (info
);
3338 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3339 fputs ("New store group\n", dump_file
);
3342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3344 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3345 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:",
3346 i
, info
->bitsize
, info
->bitpos
);
3347 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
3348 fputc ('\n', dump_file
);
3352 /* Record or discard the last store group. */
3355 if (merged_store
->apply_stores ())
3356 m_merged_store_groups
.safe_push (merged_store
);
3358 delete merged_store
;
3361 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
3364 = !m_merged_store_groups
.is_empty ()
3365 && m_merged_store_groups
.length () < m_store_info
.length ();
3367 if (success
&& dump_file
)
3368 fprintf (dump_file
, "Coalescing successful!\nMerged into %u stores\n",
3369 m_merged_store_groups
.length ());
3374 /* Return the type to use for the merged stores or loads described by STMTS.
3375 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3376 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3377 of the MEM_REFs if any. */
3380 get_alias_type_for_stmts (vec
<gimple
*> &stmts
, bool is_load
,
3381 unsigned short *cliquep
, unsigned short *basep
)
3385 tree type
= NULL_TREE
;
3386 tree ret
= NULL_TREE
;
3390 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3392 tree ref
= is_load
? gimple_assign_rhs1 (stmt
)
3393 : gimple_assign_lhs (stmt
);
3394 tree type1
= reference_alias_ptr_type (ref
);
3395 tree base
= get_base_address (ref
);
3399 if (TREE_CODE (base
) == MEM_REF
)
3401 *cliquep
= MR_DEPENDENCE_CLIQUE (base
);
3402 *basep
= MR_DEPENDENCE_BASE (base
);
3407 if (!alias_ptr_types_compatible_p (type
, type1
))
3408 ret
= ptr_type_node
;
3409 if (TREE_CODE (base
) != MEM_REF
3410 || *cliquep
!= MR_DEPENDENCE_CLIQUE (base
)
3411 || *basep
!= MR_DEPENDENCE_BASE (base
))
3420 /* Return the location_t information we can find among the statements
3424 get_location_for_stmts (vec
<gimple
*> &stmts
)
3426 for (gimple
*stmt
: stmts
)
3427 if (gimple_has_location (stmt
))
3428 return gimple_location (stmt
);
3430 return UNKNOWN_LOCATION
;
3433 /* Used to decribe a store resulting from splitting a wide store in smaller
3434 regularly-sized stores in split_group. */
3439 unsigned HOST_WIDE_INT bytepos
;
3440 unsigned HOST_WIDE_INT size
;
3441 unsigned HOST_WIDE_INT align
;
3442 auto_vec
<store_immediate_info
*> orig_stores
;
3443 /* True if there is a single orig stmt covering the whole split store. */
3445 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
3446 unsigned HOST_WIDE_INT
);
3449 /* Simple constructor. */
3451 split_store::split_store (unsigned HOST_WIDE_INT bp
,
3452 unsigned HOST_WIDE_INT sz
,
3453 unsigned HOST_WIDE_INT al
)
3454 : bytepos (bp
), size (sz
), align (al
), orig (false)
3456 orig_stores
.create (0);
3459 /* Record all stores in GROUP that write to the region starting at BITPOS and
3460 is of size BITSIZE. Record infos for such statements in STORES if
3461 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3462 if there is exactly one original store in the range (in that case ignore
3463 clobber stmts, unless there are only clobber stmts). */
3465 static store_immediate_info
*
3466 find_constituent_stores (class merged_store_group
*group
,
3467 vec
<store_immediate_info
*> *stores
,
3468 unsigned int *first
,
3469 unsigned HOST_WIDE_INT bitpos
,
3470 unsigned HOST_WIDE_INT bitsize
)
3472 store_immediate_info
*info
, *ret
= NULL
;
3474 bool second
= false;
3475 bool update_first
= true;
3476 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
3477 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
3479 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
3480 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
3481 if (stmt_end
<= bitpos
)
3483 /* BITPOS passed to this function never decreases from within the
3484 same split_group call, so optimize and don't scan info records
3485 which are known to end before or at BITPOS next time.
3486 Only do it if all stores before this one also pass this. */
3492 update_first
= false;
3494 /* The stores in GROUP are ordered by bitposition so if we're past
3495 the region for this group return early. */
3496 if (stmt_start
>= end
)
3499 if (gimple_clobber_p (info
->stmt
))
3502 stores
->safe_push (info
);
3509 stores
->safe_push (info
);
3510 if (ret
&& !gimple_clobber_p (ret
->stmt
))
3516 else if (ret
&& !gimple_clobber_p (ret
->stmt
))
3524 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3525 store have multiple uses. If any SSA_NAME has multiple uses, also
3526 count statements needed to compute it. */
3529 count_multiple_uses (store_immediate_info
*info
)
3531 gimple
*stmt
= info
->stmt
;
3533 switch (info
->rhs_code
)
3541 if (info
->bit_not_p
)
3543 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3544 ret
= 1; /* Fall through below to return
3545 the BIT_NOT_EXPR stmt and then
3546 BIT_{AND,IOR,XOR}_EXPR and anything it
3549 /* stmt is after this the BIT_NOT_EXPR. */
3550 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3552 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3554 ret
+= 1 + info
->ops
[0].bit_not_p
;
3555 if (info
->ops
[1].base_addr
)
3556 ret
+= 1 + info
->ops
[1].bit_not_p
;
3559 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3560 /* stmt is now the BIT_*_EXPR. */
3561 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3562 ret
+= 1 + info
->ops
[info
->ops_swapped_p
].bit_not_p
;
3563 else if (info
->ops
[info
->ops_swapped_p
].bit_not_p
)
3565 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3566 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3569 if (info
->ops
[1].base_addr
== NULL_TREE
)
3571 gcc_checking_assert (!info
->ops_swapped_p
);
3574 if (!has_single_use (gimple_assign_rhs2 (stmt
)))
3575 ret
+= 1 + info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
;
3576 else if (info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
)
3578 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3579 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3584 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3585 return 1 + info
->ops
[0].bit_not_p
;
3586 else if (info
->ops
[0].bit_not_p
)
3588 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3589 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3593 case BIT_INSERT_EXPR
:
3594 return has_single_use (gimple_assign_rhs1 (stmt
)) ? 0 : 1;
3600 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3601 vector (if non-NULL) with split_store structs describing the byte offset
3602 (from the base), the bit size and alignment of each store as well as the
3603 original statements involved in each such split group.
3604 This is to separate the splitting strategy from the statement
3605 building/emission/linking done in output_merged_store.
3606 Return number of new stores.
3607 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3608 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3609 BZERO_FIRST may be true only when the first store covers the whole group
3610 and clears it; if BZERO_FIRST is true, keep that first store in the set
3611 unmodified and emit further stores for the overrides only.
3612 If SPLIT_STORES is NULL, it is just a dry run to count number of
3616 split_group (merged_store_group
*group
, bool allow_unaligned_store
,
3617 bool allow_unaligned_load
, bool bzero_first
,
3618 vec
<split_store
*> *split_stores
,
3619 unsigned *total_orig
,
3620 unsigned *total_new
)
3622 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
3623 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
3624 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
3625 unsigned HOST_WIDE_INT group_align
= group
->align
;
3626 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
3627 unsigned HOST_WIDE_INT group_load_align
= group_align
;
3628 bool any_orig
= false;
3630 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
3632 /* For bswap framework using sets of stores, all the checking has been done
3633 earlier in try_coalesce_bswap and the result always needs to be emitted
3634 as a single store. Likewise for string concatenation, */
3635 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3636 || group
->stores
[0]->rhs_code
== NOP_EXPR
3637 || group
->string_concatenation
)
3639 gcc_assert (!bzero_first
);
3642 /* Avoid the old/new stmt count heuristics. It should be
3643 always beneficial. */
3650 unsigned HOST_WIDE_INT align_bitpos
3651 = (group
->start
- align_base
) & (group_align
- 1);
3652 unsigned HOST_WIDE_INT align
= group_align
;
3654 align
= least_bit_hwi (align_bitpos
);
3655 bytepos
= group
->start
/ BITS_PER_UNIT
;
3657 = new split_store (bytepos
, group
->width
, align
);
3658 unsigned int first
= 0;
3659 find_constituent_stores (group
, &store
->orig_stores
,
3660 &first
, group
->start
, group
->width
);
3661 split_stores
->safe_push (store
);
3667 unsigned int ret
= 0, first
= 0;
3668 unsigned HOST_WIDE_INT try_pos
= bytepos
;
3673 store_immediate_info
*info
= group
->stores
[0];
3676 total_orig
[0] = 1; /* The orig store. */
3677 info
= group
->stores
[0];
3678 if (info
->ops
[0].base_addr
)
3680 if (info
->ops
[1].base_addr
)
3682 switch (info
->rhs_code
)
3687 total_orig
[0]++; /* The orig BIT_*_EXPR stmt. */
3692 total_orig
[0] *= group
->stores
.length ();
3694 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3696 total_new
[0] += count_multiple_uses (info
);
3697 total_orig
[0] += (info
->bit_not_p
3698 + info
->ops
[0].bit_not_p
3699 + info
->ops
[1].bit_not_p
);
3703 if (!allow_unaligned_load
)
3704 for (int i
= 0; i
< 2; ++i
)
3705 if (group
->load_align
[i
])
3706 group_load_align
= MIN (group_load_align
, group
->load_align
[i
]);
3710 store_immediate_info
*gstore
;
3711 FOR_EACH_VEC_ELT (group
->stores
, first
, gstore
)
3712 if (!gimple_clobber_p (gstore
->stmt
))
3719 = new split_store (bytepos
, gstore
->bitsize
, align_base
);
3720 store
->orig_stores
.safe_push (gstore
);
3723 split_stores
->safe_push (store
);
3729 if ((allow_unaligned_store
|| group_align
<= BITS_PER_UNIT
)
3730 && (group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U
3731 || (bzero_first
&& group
->val
[try_pos
- bytepos
] == 0)))
3733 /* Skip padding bytes. */
3735 size
-= BITS_PER_UNIT
;
3739 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3740 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
3741 unsigned HOST_WIDE_INT align_bitpos
3742 = (try_bitpos
- align_base
) & (group_align
- 1);
3743 unsigned HOST_WIDE_INT align
= group_align
;
3744 bool found_orig
= false;
3746 align
= least_bit_hwi (align_bitpos
);
3747 if (!allow_unaligned_store
)
3748 try_size
= MIN (try_size
, align
);
3749 if (!allow_unaligned_load
)
3751 /* If we can't do or don't want to do unaligned stores
3752 as well as loads, we need to take the loads into account
3754 unsigned HOST_WIDE_INT load_align
= group_load_align
;
3755 align_bitpos
= (try_bitpos
- align_base
) & (load_align
- 1);
3757 load_align
= least_bit_hwi (align_bitpos
);
3758 for (int i
= 0; i
< 2; ++i
)
3759 if (group
->load_align
[i
])
3762 = known_alignment (try_bitpos
3763 - group
->stores
[0]->bitpos
3764 + group
->stores
[0]->ops
[i
].bitpos
3765 - group
->load_align_base
[i
]);
3766 if (align_bitpos
& (group_load_align
- 1))
3768 unsigned HOST_WIDE_INT a
= least_bit_hwi (align_bitpos
);
3769 load_align
= MIN (load_align
, a
);
3772 try_size
= MIN (try_size
, load_align
);
3774 store_immediate_info
*info
3775 = find_constituent_stores (group
, NULL
, &first
, try_bitpos
, try_size
);
3776 if (info
&& !gimple_clobber_p (info
->stmt
))
3778 /* If there is just one original statement for the range, see if
3779 we can just reuse the original store which could be even larger
3781 unsigned HOST_WIDE_INT stmt_end
3782 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
3783 info
= find_constituent_stores (group
, NULL
, &first
, try_bitpos
,
3784 stmt_end
- try_bitpos
);
3785 if (info
&& info
->bitpos
>= try_bitpos
)
3787 store_immediate_info
*info2
= NULL
;
3788 unsigned int first_copy
= first
;
3789 if (info
->bitpos
> try_bitpos
3790 && stmt_end
- try_bitpos
<= try_size
)
3792 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3794 info
->bitpos
- try_bitpos
);
3795 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3797 if (info2
== NULL
&& stmt_end
- try_bitpos
< try_size
)
3799 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3801 (try_bitpos
+ try_size
)
3803 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3807 try_size
= stmt_end
- try_bitpos
;
3814 /* Approximate store bitsize for the case when there are no padding
3816 while (try_size
> size
)
3818 /* Now look for whole padding bytes at the end of that bitsize. */
3819 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
3820 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
3821 != (unsigned char) ~0U
3823 || group
->val
[try_pos
- bytepos
+ nonmasked
- 1] != 0))
3825 if (nonmasked
== 0 || (info
&& gimple_clobber_p (info
->stmt
)))
3827 /* If entire try_size range is padding, skip it. */
3828 try_pos
+= try_size
/ BITS_PER_UNIT
;
3832 /* Otherwise try to decrease try_size if second half, last 3 quarters
3833 etc. are padding. */
3834 nonmasked
*= BITS_PER_UNIT
;
3835 while (nonmasked
<= try_size
/ 2)
3837 if (!allow_unaligned_store
&& group_align
> BITS_PER_UNIT
)
3839 /* Now look for whole padding bytes at the start of that bitsize. */
3840 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
3841 for (masked
= 0; masked
< try_bytesize
; ++masked
)
3842 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U
3844 || group
->val
[try_pos
- bytepos
+ masked
] != 0))
3846 masked
*= BITS_PER_UNIT
;
3847 gcc_assert (masked
< try_size
);
3848 if (masked
>= try_size
/ 2)
3850 while (masked
>= try_size
/ 2)
3853 try_pos
+= try_size
/ BITS_PER_UNIT
;
3857 /* Need to recompute the alignment, so just retry at the new
3869 = new split_store (try_pos
, try_size
, align
);
3870 info
= find_constituent_stores (group
, &store
->orig_stores
,
3871 &first
, try_bitpos
, try_size
);
3873 && !gimple_clobber_p (info
->stmt
)
3874 && info
->bitpos
>= try_bitpos
3875 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
3876 && (store
->orig_stores
.length () == 1
3878 || (info
->bitpos
== try_bitpos
3879 && (info
->bitpos
+ info
->bitsize
3880 == try_bitpos
+ try_size
))))
3885 split_stores
->safe_push (store
);
3888 try_pos
+= try_size
/ BITS_PER_UNIT
;
3896 /* If we are reusing some original stores and any of the
3897 original SSA_NAMEs had multiple uses, we need to subtract
3898 those now before we add the new ones. */
3899 if (total_new
[0] && any_orig
)
3901 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3903 total_new
[0] -= count_multiple_uses (store
->orig_stores
[0]);
3905 total_new
[0] += ret
; /* The new store. */
3906 store_immediate_info
*info
= group
->stores
[0];
3907 if (info
->ops
[0].base_addr
)
3908 total_new
[0] += ret
;
3909 if (info
->ops
[1].base_addr
)
3910 total_new
[0] += ret
;
3911 switch (info
->rhs_code
)
3916 total_new
[0] += ret
; /* The new BIT_*_EXPR stmt. */
3921 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3924 bool bit_not_p
[3] = { false, false, false };
3925 /* If all orig_stores have certain bit_not_p set, then
3926 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3927 If some orig_stores have certain bit_not_p set, then
3928 we'd use a BIT_XOR_EXPR with a mask and need to account for
3930 FOR_EACH_VEC_ELT (store
->orig_stores
, j
, info
)
3932 if (info
->ops
[0].bit_not_p
)
3933 bit_not_p
[0] = true;
3934 if (info
->ops
[1].bit_not_p
)
3935 bit_not_p
[1] = true;
3936 if (info
->bit_not_p
)
3937 bit_not_p
[2] = true;
3939 total_new
[0] += bit_not_p
[0] + bit_not_p
[1] + bit_not_p
[2];
3947 /* Return the operation through which the operand IDX (if < 2) or
3948 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3949 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3950 the bits should be xored with mask. */
3952 static enum tree_code
3953 invert_op (split_store
*split_store
, int idx
, tree int_type
, tree
&mask
)
3956 store_immediate_info
*info
;
3957 unsigned int cnt
= 0;
3958 bool any_paddings
= false;
3959 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3961 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3965 tree lhs
= gimple_assign_lhs (info
->stmt
);
3966 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3967 && TYPE_PRECISION (TREE_TYPE (lhs
)) < info
->bitsize
)
3968 any_paddings
= true;
3974 if (cnt
== split_store
->orig_stores
.length () && !any_paddings
)
3975 return BIT_NOT_EXPR
;
3977 unsigned HOST_WIDE_INT try_bitpos
= split_store
->bytepos
* BITS_PER_UNIT
;
3978 unsigned buf_size
= split_store
->size
/ BITS_PER_UNIT
;
3980 = XALLOCAVEC (unsigned char, buf_size
);
3981 memset (buf
, ~0U, buf_size
);
3982 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3984 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3987 /* Clear regions with bit_not_p and invert afterwards, rather than
3988 clear regions with !bit_not_p, so that gaps in between stores aren't
3990 unsigned HOST_WIDE_INT bitsize
= info
->bitsize
;
3991 unsigned HOST_WIDE_INT prec
= bitsize
;
3992 unsigned int pos_in_buffer
= 0;
3995 tree lhs
= gimple_assign_lhs (info
->stmt
);
3996 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3997 && TYPE_PRECISION (TREE_TYPE (lhs
)) < bitsize
)
3998 prec
= TYPE_PRECISION (TREE_TYPE (lhs
));
4000 if (info
->bitpos
< try_bitpos
)
4002 gcc_assert (info
->bitpos
+ bitsize
> try_bitpos
);
4003 if (!BYTES_BIG_ENDIAN
)
4005 if (prec
<= try_bitpos
- info
->bitpos
)
4007 prec
-= try_bitpos
- info
->bitpos
;
4009 bitsize
-= try_bitpos
- info
->bitpos
;
4010 if (BYTES_BIG_ENDIAN
&& prec
> bitsize
)
4014 pos_in_buffer
= info
->bitpos
- try_bitpos
;
4017 /* If this is a bool inversion, invert just the least significant
4018 prec bits rather than all bits of it. */
4019 if (BYTES_BIG_ENDIAN
)
4021 pos_in_buffer
+= bitsize
- prec
;
4022 if (pos_in_buffer
>= split_store
->size
)
4027 if (pos_in_buffer
+ bitsize
> split_store
->size
)
4028 bitsize
= split_store
->size
- pos_in_buffer
;
4029 unsigned char *p
= buf
+ (pos_in_buffer
/ BITS_PER_UNIT
);
4030 if (BYTES_BIG_ENDIAN
)
4031 clear_bit_region_be (p
, (BITS_PER_UNIT
- 1
4032 - (pos_in_buffer
% BITS_PER_UNIT
)), bitsize
);
4034 clear_bit_region (p
, pos_in_buffer
% BITS_PER_UNIT
, bitsize
);
4036 for (unsigned int i
= 0; i
< buf_size
; ++i
)
4038 mask
= native_interpret_expr (int_type
, buf
, buf_size
);
4039 return BIT_XOR_EXPR
;
4042 /* Given a merged store group GROUP output the widened version of it.
4043 The store chain is against the base object BASE.
4044 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4045 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4046 Make sure that the number of statements output is less than the number of
4047 original statements. If a better sequence is possible emit it and
4051 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
4053 const unsigned HOST_WIDE_INT start_byte_pos
4054 = group
->bitregion_start
/ BITS_PER_UNIT
;
4055 unsigned int orig_num_stmts
= group
->stores
.length ();
4056 if (orig_num_stmts
< 2)
4059 bool allow_unaligned_store
4060 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
4061 bool allow_unaligned_load
= allow_unaligned_store
;
4062 bool bzero_first
= false;
4063 store_immediate_info
*store
;
4064 unsigned int num_clobber_stmts
= 0;
4065 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4068 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4069 if (gimple_clobber_p (store
->stmt
))
4070 num_clobber_stmts
++;
4071 else if (TREE_CODE (gimple_assign_rhs1 (store
->stmt
)) == CONSTRUCTOR
4072 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store
->stmt
)) == 0
4073 && group
->start
== store
->bitpos
4074 && group
->width
== store
->bitsize
4075 && (group
->start
% BITS_PER_UNIT
) == 0
4076 && (group
->width
% BITS_PER_UNIT
) == 0)
4083 FOR_EACH_VEC_ELT_FROM (group
->stores
, i
, store
, i
)
4084 if (gimple_clobber_p (store
->stmt
))
4085 num_clobber_stmts
++;
4086 if (num_clobber_stmts
== orig_num_stmts
)
4088 orig_num_stmts
-= num_clobber_stmts
;
4090 if (allow_unaligned_store
|| bzero_first
)
4092 /* If unaligned stores are allowed, see how many stores we'd emit
4093 for unaligned and how many stores we'd emit for aligned stores.
4094 Only use unaligned stores if it allows fewer stores than aligned.
4095 Similarly, if there is a whole region clear first, prefer expanding
4096 it together compared to expanding clear first followed by merged
4098 unsigned cnt
[4] = { ~0U, ~0U, ~0U, ~0U };
4100 for (int pass
= 0; pass
< 4; ++pass
)
4102 if (!allow_unaligned_store
&& (pass
& 1) != 0)
4104 if (!bzero_first
&& (pass
& 2) != 0)
4106 cnt
[pass
] = split_group (group
, (pass
& 1) != 0,
4107 allow_unaligned_load
, (pass
& 2) != 0,
4109 if (cnt
[pass
] < cnt
[pass_min
])
4112 if ((pass_min
& 1) == 0)
4113 allow_unaligned_store
= false;
4114 if ((pass_min
& 2) == 0)
4115 bzero_first
= false;
4118 auto_vec
<class split_store
*, 32> split_stores
;
4119 split_store
*split_store
;
4120 unsigned total_orig
, total_new
, i
;
4121 split_group (group
, allow_unaligned_store
, allow_unaligned_load
, bzero_first
,
4122 &split_stores
, &total_orig
, &total_new
);
4124 /* Determine if there is a clobber covering the whole group at the start,
4125 followed by proposed split stores that cover the whole group. In that
4126 case, prefer the transformation even if
4127 split_stores.length () == orig_num_stmts. */
4128 bool clobber_first
= false;
4129 if (num_clobber_stmts
4130 && gimple_clobber_p (group
->stores
[0]->stmt
)
4131 && group
->start
== group
->stores
[0]->bitpos
4132 && group
->width
== group
->stores
[0]->bitsize
4133 && (group
->start
% BITS_PER_UNIT
) == 0
4134 && (group
->width
% BITS_PER_UNIT
) == 0)
4136 clobber_first
= true;
4137 unsigned HOST_WIDE_INT pos
= group
->start
/ BITS_PER_UNIT
;
4138 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4139 if (split_store
->bytepos
!= pos
)
4141 clobber_first
= false;
4145 pos
+= split_store
->size
/ BITS_PER_UNIT
;
4146 if (pos
!= (group
->start
+ group
->width
) / BITS_PER_UNIT
)
4147 clobber_first
= false;
4150 if (split_stores
.length () >= orig_num_stmts
+ clobber_first
)
4153 /* We didn't manage to reduce the number of statements. Bail out. */
4154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4155 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4156 " Not profitable to emit new sequence.\n",
4158 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4162 if (total_orig
<= total_new
)
4164 /* If number of estimated new statements is above estimated original
4165 statements, bail out too. */
4166 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4167 fprintf (dump_file
, "Estimated number of original stmts (%u)"
4168 " not larger than estimated number of new"
4170 total_orig
, total_new
);
4171 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4175 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4177 bool all_orig
= true;
4178 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4179 if (!split_store
->orig
)
4186 unsigned int cnt
= split_stores
.length ();
4187 store_immediate_info
*store
;
4188 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4189 if (gimple_clobber_p (store
->stmt
))
4191 /* Punt if we wouldn't make any real changes, i.e. keep all
4192 orig stmts + all clobbers. */
4193 if (cnt
== group
->stores
.length ())
4195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4196 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4197 " Not profitable to emit new sequence.\n",
4199 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4206 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
4207 gimple_seq seq
= NULL
;
4208 tree last_vdef
, new_vuse
;
4209 last_vdef
= gimple_vdef (group
->last_stmt
);
4210 new_vuse
= gimple_vuse (group
->last_stmt
);
4211 tree bswap_res
= NULL_TREE
;
4213 /* Clobbers are not removed. */
4214 if (gimple_clobber_p (group
->last_stmt
))
4216 new_vuse
= make_ssa_name (gimple_vop (cfun
), group
->last_stmt
);
4217 gimple_set_vdef (group
->last_stmt
, new_vuse
);
4220 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
4221 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
4223 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
4224 gimple
*ins_stmt
= group
->stores
[0]->ins_stmt
;
4225 struct symbolic_number
*n
= &group
->stores
[0]->n
;
4226 bool bswap
= group
->stores
[0]->rhs_code
== LROTATE_EXPR
;
4231 load_type
= bswap_type
= uint16_type_node
;
4234 load_type
= uint32_type_node
;
4237 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
4238 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4242 load_type
= uint64_type_node
;
4245 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
4246 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4253 /* If the loads have each vuse of the corresponding store,
4254 we've checked the aliasing already in try_coalesce_bswap and
4255 we want to sink the need load into seq. So need to use new_vuse
4259 if (n
->vuse
== NULL
)
4265 /* Update vuse in case it has changed by output_merged_stores. */
4266 n
->vuse
= gimple_vuse (ins_stmt
);
4268 bswap_res
= bswap_replace (gsi_start (seq
), ins_stmt
, fndecl
,
4269 bswap_type
, load_type
, n
, bswap
,
4271 gcc_assert (bswap_res
);
4274 gimple
*stmt
= NULL
;
4275 auto_vec
<gimple
*, 32> orig_stmts
;
4276 gimple_seq this_seq
;
4277 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &this_seq
,
4278 is_gimple_mem_ref_addr
, NULL_TREE
);
4279 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4281 tree load_addr
[2] = { NULL_TREE
, NULL_TREE
};
4282 gimple_seq load_seq
[2] = { NULL
, NULL
};
4283 gimple_stmt_iterator load_gsi
[2] = { gsi_none (), gsi_none () };
4284 for (int j
= 0; j
< 2; ++j
)
4286 store_operand_info
&op
= group
->stores
[0]->ops
[j
];
4287 if (op
.base_addr
== NULL_TREE
)
4290 store_immediate_info
*infol
= group
->stores
.last ();
4291 if (gimple_vuse (op
.stmt
) == gimple_vuse (infol
->ops
[j
].stmt
))
4293 /* We can't pick the location randomly; while we've verified
4294 all the loads have the same vuse, they can be still in different
4295 basic blocks and we need to pick the one from the last bb:
4301 otherwise if we put the wider load at the q[0] load, we might
4302 segfault if q[1] is not mapped. */
4303 basic_block bb
= gimple_bb (op
.stmt
);
4304 gimple
*ostmt
= op
.stmt
;
4305 store_immediate_info
*info
;
4306 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
4308 gimple
*tstmt
= info
->ops
[j
].stmt
;
4309 basic_block tbb
= gimple_bb (tstmt
);
4310 if (dominated_by_p (CDI_DOMINATORS
, tbb
, bb
))
4316 load_gsi
[j
] = gsi_for_stmt (ostmt
);
4318 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4319 &load_seq
[j
], is_gimple_mem_ref_addr
,
4322 else if (operand_equal_p (base_addr
, op
.base_addr
, 0))
4323 load_addr
[j
] = addr
;
4327 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4328 &this_seq
, is_gimple_mem_ref_addr
,
4330 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4334 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4336 const unsigned HOST_WIDE_INT try_size
= split_store
->size
;
4337 const unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
4338 const unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
4339 const unsigned HOST_WIDE_INT try_align
= split_store
->align
;
4340 const unsigned HOST_WIDE_INT try_offset
= try_pos
- start_byte_pos
;
4344 if (split_store
->orig
)
4346 /* If there is just a single non-clobber constituent store
4347 which covers the whole area, just reuse the lhs and rhs. */
4348 gimple
*orig_stmt
= NULL
;
4349 store_immediate_info
*store
;
4351 FOR_EACH_VEC_ELT (split_store
->orig_stores
, j
, store
)
4352 if (!gimple_clobber_p (store
->stmt
))
4354 orig_stmt
= store
->stmt
;
4357 dest
= gimple_assign_lhs (orig_stmt
);
4358 src
= gimple_assign_rhs1 (orig_stmt
);
4359 loc
= gimple_location (orig_stmt
);
4363 store_immediate_info
*info
;
4364 unsigned short clique
, base
;
4366 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4367 orig_stmts
.safe_push (info
->stmt
);
4369 = get_alias_type_for_stmts (orig_stmts
, false, &clique
, &base
);
4371 loc
= get_location_for_stmts (orig_stmts
);
4372 orig_stmts
.truncate (0);
4374 if (group
->string_concatenation
)
4376 = build_array_type_nelts (char_type_node
,
4377 try_size
/ BITS_PER_UNIT
);
4380 dest_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
4381 dest_type
= build_aligned_type (dest_type
, try_align
);
4383 dest
= fold_build2 (MEM_REF
, dest_type
, addr
,
4384 build_int_cst (offset_type
, try_pos
));
4385 if (TREE_CODE (dest
) == MEM_REF
)
4387 MR_DEPENDENCE_CLIQUE (dest
) = clique
;
4388 MR_DEPENDENCE_BASE (dest
) = base
;
4392 if (bswap_res
|| group
->string_concatenation
)
4393 mask
= integer_zero_node
;
4395 mask
= native_interpret_expr (dest_type
,
4396 group
->mask
+ try_offset
,
4401 j
< 1 + (split_store
->orig_stores
[0]->ops
[1].val
!= NULL_TREE
);
4404 store_operand_info
&op
= split_store
->orig_stores
[0]->ops
[j
];
4407 else if (group
->string_concatenation
)
4409 ops
[j
] = build_string (try_size
/ BITS_PER_UNIT
,
4410 (const char *) group
->val
+ try_offset
);
4411 TREE_TYPE (ops
[j
]) = dest_type
;
4413 else if (op
.base_addr
)
4415 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4416 orig_stmts
.safe_push (info
->ops
[j
].stmt
);
4418 offset_type
= get_alias_type_for_stmts (orig_stmts
, true,
4420 location_t load_loc
= get_location_for_stmts (orig_stmts
);
4421 orig_stmts
.truncate (0);
4423 unsigned HOST_WIDE_INT load_align
= group
->load_align
[j
];
4424 unsigned HOST_WIDE_INT align_bitpos
4425 = known_alignment (try_bitpos
4426 - split_store
->orig_stores
[0]->bitpos
4428 if (align_bitpos
& (load_align
- 1))
4429 load_align
= least_bit_hwi (align_bitpos
);
4432 = build_nonstandard_integer_type (try_size
, UNSIGNED
);
4434 = build_aligned_type (load_int_type
, load_align
);
4436 poly_uint64 load_pos
4437 = exact_div (try_bitpos
4438 - split_store
->orig_stores
[0]->bitpos
4441 ops
[j
] = fold_build2 (MEM_REF
, load_int_type
, load_addr
[j
],
4442 build_int_cst (offset_type
, load_pos
));
4443 if (TREE_CODE (ops
[j
]) == MEM_REF
)
4445 MR_DEPENDENCE_CLIQUE (ops
[j
]) = clique
;
4446 MR_DEPENDENCE_BASE (ops
[j
]) = base
;
4448 if (!integer_zerop (mask
))
4450 /* The load might load some bits (that will be masked
4451 off later on) uninitialized, avoid -W*uninitialized
4452 warnings in that case. */
4453 suppress_warning (ops
[j
], OPT_Wuninitialized
);
4456 stmt
= gimple_build_assign (make_ssa_name (dest_type
), ops
[j
]);
4457 gimple_set_location (stmt
, load_loc
);
4458 if (gsi_bb (load_gsi
[j
]))
4460 gimple_set_vuse (stmt
, gimple_vuse (op
.stmt
));
4461 gimple_seq_add_stmt_without_update (&load_seq
[j
], stmt
);
4465 gimple_set_vuse (stmt
, new_vuse
);
4466 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4468 ops
[j
] = gimple_assign_lhs (stmt
);
4470 enum tree_code inv_op
4471 = invert_op (split_store
, j
, dest_type
, xor_mask
);
4472 if (inv_op
!= NOP_EXPR
)
4474 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4475 inv_op
, ops
[j
], xor_mask
);
4476 gimple_set_location (stmt
, load_loc
);
4477 ops
[j
] = gimple_assign_lhs (stmt
);
4479 if (gsi_bb (load_gsi
[j
]))
4480 gimple_seq_add_stmt_without_update (&load_seq
[j
],
4483 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4487 ops
[j
] = native_interpret_expr (dest_type
,
4488 group
->val
+ try_offset
,
4492 switch (split_store
->orig_stores
[0]->rhs_code
)
4497 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4499 tree rhs1
= gimple_assign_rhs1 (info
->stmt
);
4500 orig_stmts
.safe_push (SSA_NAME_DEF_STMT (rhs1
));
4503 bit_loc
= get_location_for_stmts (orig_stmts
);
4504 orig_stmts
.truncate (0);
4507 = gimple_build_assign (make_ssa_name (dest_type
),
4508 split_store
->orig_stores
[0]->rhs_code
,
4510 gimple_set_location (stmt
, bit_loc
);
4511 /* If there is just one load and there is a separate
4512 load_seq[0], emit the bitwise op right after it. */
4513 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4514 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4515 /* Otherwise, if at least one load is in seq, we need to
4516 emit the bitwise op right before the store. If there
4517 are two loads and are emitted somewhere else, it would
4518 be better to emit the bitwise op as early as possible;
4519 we don't track where that would be possible right now
4522 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4523 src
= gimple_assign_lhs (stmt
);
4525 enum tree_code inv_op
;
4526 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4527 if (inv_op
!= NOP_EXPR
)
4529 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4530 inv_op
, src
, xor_mask
);
4531 gimple_set_location (stmt
, bit_loc
);
4532 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4533 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4535 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4536 src
= gimple_assign_lhs (stmt
);
4542 if (!is_gimple_val (src
))
4544 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (src
)),
4546 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4547 src
= gimple_assign_lhs (stmt
);
4549 if (!useless_type_conversion_p (dest_type
, TREE_TYPE (src
)))
4551 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4553 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4554 src
= gimple_assign_lhs (stmt
);
4556 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4557 if (inv_op
!= NOP_EXPR
)
4559 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4560 inv_op
, src
, xor_mask
);
4561 gimple_set_location (stmt
, loc
);
4562 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4563 src
= gimple_assign_lhs (stmt
);
4571 /* If bit insertion is required, we use the source as an accumulator
4572 into which the successive bit-field values are manually inserted.
4573 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4574 if (group
->bit_insertion
)
4575 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4576 if (info
->rhs_code
== BIT_INSERT_EXPR
4577 && info
->bitpos
< try_bitpos
+ try_size
4578 && info
->bitpos
+ info
->bitsize
> try_bitpos
)
4580 /* Mask, truncate, convert to final type, shift and ior into
4581 the accumulator. Note that every step can be a no-op. */
4582 const HOST_WIDE_INT start_gap
= info
->bitpos
- try_bitpos
;
4583 const HOST_WIDE_INT end_gap
4584 = (try_bitpos
+ try_size
) - (info
->bitpos
+ info
->bitsize
);
4585 tree tem
= info
->ops
[0].val
;
4586 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem
)))
4588 const unsigned HOST_WIDE_INT size
4589 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem
)));
4591 = build_nonstandard_integer_type (size
, UNSIGNED
);
4592 tem
= gimple_build (&seq
, loc
, VIEW_CONVERT_EXPR
,
4595 if (TYPE_PRECISION (TREE_TYPE (tem
)) <= info
->bitsize
)
4598 = build_nonstandard_integer_type (info
->bitsize
,
4600 tem
= gimple_convert (&seq
, loc
, bitfield_type
, tem
);
4602 else if ((BYTES_BIG_ENDIAN
? start_gap
: end_gap
) > 0)
4604 const unsigned HOST_WIDE_INT imask
4605 = (HOST_WIDE_INT_1U
<< info
->bitsize
) - 1;
4606 tem
= gimple_build (&seq
, loc
,
4607 BIT_AND_EXPR
, TREE_TYPE (tem
), tem
,
4608 build_int_cst (TREE_TYPE (tem
),
4611 const HOST_WIDE_INT shift
4612 = (BYTES_BIG_ENDIAN
? end_gap
: start_gap
);
4614 tem
= gimple_build (&seq
, loc
,
4615 RSHIFT_EXPR
, TREE_TYPE (tem
), tem
,
4616 build_int_cst (NULL_TREE
, -shift
));
4617 tem
= gimple_convert (&seq
, loc
, dest_type
, tem
);
4619 tem
= gimple_build (&seq
, loc
,
4620 LSHIFT_EXPR
, dest_type
, tem
,
4621 build_int_cst (NULL_TREE
, shift
));
4622 src
= gimple_build (&seq
, loc
,
4623 BIT_IOR_EXPR
, dest_type
, tem
, src
);
4626 if (!integer_zerop (mask
))
4628 tree tem
= make_ssa_name (dest_type
);
4629 tree load_src
= unshare_expr (dest
);
4630 /* The load might load some or all bits uninitialized,
4631 avoid -W*uninitialized warnings in that case.
4632 As optimization, it would be nice if all the bits are
4633 provably uninitialized (no stores at all yet or previous
4634 store a CLOBBER) we'd optimize away the load and replace
4636 suppress_warning (load_src
, OPT_Wuninitialized
);
4637 stmt
= gimple_build_assign (tem
, load_src
);
4638 gimple_set_location (stmt
, loc
);
4639 gimple_set_vuse (stmt
, new_vuse
);
4640 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4642 /* FIXME: If there is a single chunk of zero bits in mask,
4643 perhaps use BIT_INSERT_EXPR instead? */
4644 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4645 BIT_AND_EXPR
, tem
, mask
);
4646 gimple_set_location (stmt
, loc
);
4647 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4648 tem
= gimple_assign_lhs (stmt
);
4650 if (TREE_CODE (src
) == INTEGER_CST
)
4651 src
= wide_int_to_tree (dest_type
,
4652 wi::bit_and_not (wi::to_wide (src
),
4653 wi::to_wide (mask
)));
4657 = wide_int_to_tree (dest_type
,
4658 wi::bit_not (wi::to_wide (mask
)));
4659 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4660 BIT_AND_EXPR
, src
, nmask
);
4661 gimple_set_location (stmt
, loc
);
4662 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4663 src
= gimple_assign_lhs (stmt
);
4665 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4666 BIT_IOR_EXPR
, tem
, src
);
4667 gimple_set_location (stmt
, loc
);
4668 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4669 src
= gimple_assign_lhs (stmt
);
4673 stmt
= gimple_build_assign (dest
, src
);
4674 gimple_set_location (stmt
, loc
);
4675 gimple_set_vuse (stmt
, new_vuse
);
4676 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4678 if (group
->lp_nr
&& stmt_could_throw_p (cfun
, stmt
))
4679 add_stmt_to_eh_lp (stmt
, group
->lp_nr
);
4682 if (i
< split_stores
.length () - 1)
4683 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
4685 new_vdef
= last_vdef
;
4687 gimple_set_vdef (stmt
, new_vdef
);
4688 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
4689 new_vuse
= new_vdef
;
4692 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4699 "New sequence of %u stores to replace old one of %u stores\n",
4700 split_stores
.length (), orig_num_stmts
);
4701 if (dump_flags
& TDF_DETAILS
)
4702 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
4705 if (gimple_clobber_p (group
->last_stmt
))
4706 update_stmt (group
->last_stmt
);
4708 if (group
->lp_nr
> 0)
4710 /* We're going to insert a sequence of (potentially) throwing stores
4711 into an active EH region. This means that we're going to create
4712 new basic blocks with EH edges pointing to the post landing pad
4713 and, therefore, to have to update its PHI nodes, if any. For the
4714 virtual PHI node, we're going to use the VDEFs created above, but
4715 for the other nodes, we need to record the original reaching defs. */
4716 eh_landing_pad lp
= get_eh_landing_pad_from_number (group
->lp_nr
);
4717 basic_block lp_bb
= label_to_block (cfun
, lp
->post_landing_pad
);
4718 basic_block last_bb
= gimple_bb (group
->last_stmt
);
4719 edge last_edge
= find_edge (last_bb
, lp_bb
);
4720 auto_vec
<tree
, 16> last_defs
;
4722 for (gpi
= gsi_start_phis (lp_bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
4724 gphi
*phi
= gpi
.phi ();
4726 if (virtual_operand_p (gimple_phi_result (phi
)))
4727 last_def
= NULL_TREE
;
4729 last_def
= gimple_phi_arg_def (phi
, last_edge
->dest_idx
);
4730 last_defs
.safe_push (last_def
);
4733 /* Do the insertion. Then, if new basic blocks have been created in the
4734 process, rewind the chain of VDEFs create above to walk the new basic
4735 blocks and update the corresponding arguments of the PHI nodes. */
4736 update_modified_stmts (seq
);
4737 if (gimple_find_sub_bbs (seq
, &last_gsi
))
4738 while (last_vdef
!= gimple_vuse (group
->last_stmt
))
4740 gimple
*stmt
= SSA_NAME_DEF_STMT (last_vdef
);
4741 if (stmt_could_throw_p (cfun
, stmt
))
4743 edge new_edge
= find_edge (gimple_bb (stmt
), lp_bb
);
4745 for (gpi
= gsi_start_phis (lp_bb
), i
= 0;
4747 gsi_next (&gpi
), i
++)
4749 gphi
*phi
= gpi
.phi ();
4751 if (virtual_operand_p (gimple_phi_result (phi
)))
4752 new_def
= last_vdef
;
4754 new_def
= last_defs
[i
];
4755 add_phi_arg (phi
, new_def
, new_edge
, UNKNOWN_LOCATION
);
4758 last_vdef
= gimple_vuse (stmt
);
4762 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
4764 for (int j
= 0; j
< 2; ++j
)
4766 gsi_insert_seq_after (&load_gsi
[j
], load_seq
[j
], GSI_SAME_STMT
);
4771 /* Process the merged_store_group objects created in the coalescing phase.
4772 The stores are all against the base object BASE.
4773 Try to output the widened stores and delete the original statements if
4774 successful. Return true iff any changes were made. */
4777 imm_store_chain_info::output_merged_stores ()
4780 merged_store_group
*merged_store
;
4782 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
4784 if (dbg_cnt (store_merging
)
4785 && output_merged_store (merged_store
))
4788 store_immediate_info
*store
;
4789 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
4791 gimple
*stmt
= store
->stmt
;
4792 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4793 /* Don't remove clobbers, they are still useful even if
4794 everything is overwritten afterwards. */
4795 if (gimple_clobber_p (stmt
))
4797 gsi_remove (&gsi
, true);
4799 remove_stmt_from_eh_lp (stmt
);
4800 if (stmt
!= merged_store
->last_stmt
)
4802 unlink_stmt_vdef (stmt
);
4803 release_defs (stmt
);
4809 if (ret
&& dump_file
)
4810 fprintf (dump_file
, "Merging successful!\n");
4815 /* Coalesce the store_immediate_info objects recorded against the base object
4816 BASE in the first phase and output them.
4817 Delete the allocated structures.
4818 Return true if any changes were made. */
4821 imm_store_chain_info::terminate_and_process_chain ()
4823 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4824 fprintf (dump_file
, "Terminating chain with %u stores\n",
4825 m_store_info
.length ());
4826 /* Process store chain. */
4828 if (m_store_info
.length () > 1)
4830 ret
= coalesce_immediate_stores ();
4832 ret
= output_merged_stores ();
4835 /* Delete all the entries we allocated ourselves. */
4836 store_immediate_info
*info
;
4838 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
4841 merged_store_group
*merged_info
;
4842 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
4848 /* Return true iff LHS is a destination potentially interesting for
4849 store merging. In practice these are the codes that get_inner_reference
4853 lhs_valid_for_store_merging_p (tree lhs
)
4858 switch (TREE_CODE (lhs
))
4861 case ARRAY_RANGE_REF
:
4865 case VIEW_CONVERT_EXPR
:
4872 /* Return true if the tree RHS is a constant we want to consider
4873 during store merging. In practice accept all codes that
4874 native_encode_expr accepts. */
4877 rhs_valid_for_store_merging_p (tree rhs
)
4879 unsigned HOST_WIDE_INT size
;
4880 if (TREE_CODE (rhs
) == CONSTRUCTOR
4881 && CONSTRUCTOR_NELTS (rhs
) == 0
4882 && TYPE_SIZE_UNIT (TREE_TYPE (rhs
))
4883 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs
))))
4885 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
))).is_constant (&size
)
4886 && native_encode_expr (rhs
, NULL
, size
) != 0);
4889 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4890 and return true on success or false on failure. */
4893 adjust_bit_pos (poly_offset_int byte_off
,
4894 poly_int64
*pbitpos
,
4895 poly_uint64
*pbitregion_start
,
4896 poly_uint64
*pbitregion_end
)
4898 poly_offset_int bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4899 bit_off
+= *pbitpos
;
4901 if (known_ge (bit_off
, 0) && bit_off
.to_shwi (pbitpos
))
4903 if (maybe_ne (*pbitregion_end
, 0U))
4905 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4906 bit_off
+= *pbitregion_start
;
4907 if (bit_off
.to_uhwi (pbitregion_start
))
4909 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4910 bit_off
+= *pbitregion_end
;
4911 if (!bit_off
.to_uhwi (pbitregion_end
))
4912 *pbitregion_end
= 0;
4915 *pbitregion_end
= 0;
4923 /* If MEM is a memory reference usable for store merging (either as
4924 store destination or for loads), return the non-NULL base_addr
4925 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4926 Otherwise return NULL, *PBITPOS should be still valid even for that
4930 mem_valid_for_store_merging (tree mem
, poly_uint64
*pbitsize
,
4931 poly_uint64
*pbitpos
,
4932 poly_uint64
*pbitregion_start
,
4933 poly_uint64
*pbitregion_end
)
4935 poly_int64 bitsize
, bitpos
;
4936 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
4938 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
4940 tree base_addr
= get_inner_reference (mem
, &bitsize
, &bitpos
, &offset
, &mode
,
4941 &unsignedp
, &reversep
, &volatilep
);
4942 *pbitsize
= bitsize
;
4943 if (known_le (bitsize
, 0))
4946 if (TREE_CODE (mem
) == COMPONENT_REF
4947 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem
, 1)))
4949 get_bit_range (&bitregion_start
, &bitregion_end
, mem
, &bitpos
, &offset
);
4950 if (maybe_ne (bitregion_end
, 0U))
4957 /* We do not want to rewrite TARGET_MEM_REFs. */
4958 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
4960 /* In some cases get_inner_reference may return a
4961 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4962 canonicalize the base_addr to MEM_REF [ptr] and take
4963 byteoffset into account in the bitpos. This occurs in
4964 PR 23684 and this way we can catch more chains. */
4965 else if (TREE_CODE (base_addr
) == MEM_REF
)
4967 if (!adjust_bit_pos (mem_ref_offset (base_addr
), &bitpos
,
4968 &bitregion_start
, &bitregion_end
))
4970 base_addr
= TREE_OPERAND (base_addr
, 0);
4972 /* get_inner_reference returns the base object, get at its
4976 if (maybe_lt (bitpos
, 0))
4978 base_addr
= build_fold_addr_expr (base_addr
);
4983 /* If the access is variable offset then a base decl has to be
4984 address-taken to be able to emit pointer-based stores to it.
4985 ??? We might be able to get away with re-using the original
4986 base up to the first variable part and then wrapping that inside
4988 tree base
= get_base_address (base_addr
);
4989 if (!base
|| (DECL_P (base
) && !TREE_ADDRESSABLE (base
)))
4992 /* Similarly to above for the base, remove constant from the offset. */
4993 if (TREE_CODE (offset
) == PLUS_EXPR
4994 && TREE_CODE (TREE_OPERAND (offset
, 1)) == INTEGER_CST
4995 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset
, 1)),
4996 &bitpos
, &bitregion_start
, &bitregion_end
))
4997 offset
= TREE_OPERAND (offset
, 0);
4999 base_addr
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base_addr
),
5003 if (known_eq (bitregion_end
, 0U))
5005 bitregion_start
= round_down_to_byte_boundary (bitpos
);
5006 bitregion_end
= round_up_to_byte_boundary (bitpos
+ bitsize
);
5009 *pbitsize
= bitsize
;
5011 *pbitregion_start
= bitregion_start
;
5012 *pbitregion_end
= bitregion_end
;
5016 /* Return true if STMT is a load that can be used for store merging.
5017 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5018 BITREGION_END are properties of the corresponding store. */
5021 handled_load (gimple
*stmt
, store_operand_info
*op
,
5022 poly_uint64 bitsize
, poly_uint64 bitpos
,
5023 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
)
5025 if (!is_gimple_assign (stmt
))
5027 if (gimple_assign_rhs_code (stmt
) == BIT_NOT_EXPR
)
5029 tree rhs1
= gimple_assign_rhs1 (stmt
);
5030 if (TREE_CODE (rhs1
) == SSA_NAME
5031 && handled_load (SSA_NAME_DEF_STMT (rhs1
), op
, bitsize
, bitpos
,
5032 bitregion_start
, bitregion_end
))
5034 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5035 been optimized earlier, but if allowed here, would confuse the
5036 multiple uses counting. */
5039 op
->bit_not_p
= !op
->bit_not_p
;
5044 if (gimple_vuse (stmt
)
5045 && gimple_assign_load_p (stmt
)
5046 && !stmt_can_throw_internal (cfun
, stmt
)
5047 && !gimple_has_volatile_ops (stmt
))
5049 tree mem
= gimple_assign_rhs1 (stmt
);
5051 = mem_valid_for_store_merging (mem
, &op
->bitsize
, &op
->bitpos
,
5052 &op
->bitregion_start
,
5053 &op
->bitregion_end
);
5054 if (op
->base_addr
!= NULL_TREE
5055 && known_eq (op
->bitsize
, bitsize
)
5056 && multiple_p (op
->bitpos
- bitpos
, BITS_PER_UNIT
)
5057 && known_ge (op
->bitpos
- op
->bitregion_start
,
5058 bitpos
- bitregion_start
)
5059 && known_ge (op
->bitregion_end
- op
->bitpos
,
5060 bitregion_end
- bitpos
))
5064 op
->bit_not_p
= false;
5071 /* Return the index number of the landing pad for STMT, if any. */
5074 lp_nr_for_store (gimple
*stmt
)
5076 if (!cfun
->can_throw_non_call_exceptions
|| !cfun
->eh
)
5079 if (!stmt_could_throw_p (cfun
, stmt
))
5082 return lookup_stmt_eh_lp (stmt
);
5085 /* Record the store STMT for store merging optimization if it can be
5086 optimized. Return true if any changes were made. */
5089 pass_store_merging::process_store (gimple
*stmt
)
5091 tree lhs
= gimple_assign_lhs (stmt
);
5092 tree rhs
= gimple_assign_rhs1 (stmt
);
5093 poly_uint64 bitsize
, bitpos
= 0;
5094 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
5096 = mem_valid_for_store_merging (lhs
, &bitsize
, &bitpos
,
5097 &bitregion_start
, &bitregion_end
);
5098 if (known_eq (bitsize
, 0U))
5101 bool invalid
= (base_addr
== NULL_TREE
5102 || (maybe_gt (bitsize
,
5103 (unsigned int) MAX_BITSIZE_MODE_ANY_INT
)
5104 && TREE_CODE (rhs
) != INTEGER_CST
5105 && (TREE_CODE (rhs
) != CONSTRUCTOR
5106 || CONSTRUCTOR_NELTS (rhs
) != 0)));
5107 enum tree_code rhs_code
= ERROR_MARK
;
5108 bool bit_not_p
= false;
5109 struct symbolic_number n
;
5110 gimple
*ins_stmt
= NULL
;
5111 store_operand_info ops
[2];
5114 else if (TREE_CODE (rhs
) == STRING_CST
)
5116 rhs_code
= STRING_CST
;
5119 else if (rhs_valid_for_store_merging_p (rhs
))
5121 rhs_code
= INTEGER_CST
;
5124 else if (TREE_CODE (rhs
) == SSA_NAME
)
5126 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
), *def_stmt1
, *def_stmt2
;
5127 if (!is_gimple_assign (def_stmt
))
5129 else if (handled_load (def_stmt
, &ops
[0], bitsize
, bitpos
,
5130 bitregion_start
, bitregion_end
))
5132 else if (gimple_assign_rhs_code (def_stmt
) == BIT_NOT_EXPR
)
5134 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5135 if (TREE_CODE (rhs1
) == SSA_NAME
5136 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1
)))
5139 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5143 if (rhs_code
== ERROR_MARK
&& !invalid
)
5144 switch ((rhs_code
= gimple_assign_rhs_code (def_stmt
)))
5150 rhs1
= gimple_assign_rhs1 (def_stmt
);
5151 rhs2
= gimple_assign_rhs2 (def_stmt
);
5153 if (TREE_CODE (rhs1
) != SSA_NAME
)
5155 def_stmt1
= SSA_NAME_DEF_STMT (rhs1
);
5156 if (!is_gimple_assign (def_stmt1
)
5157 || !handled_load (def_stmt1
, &ops
[0], bitsize
, bitpos
,
5158 bitregion_start
, bitregion_end
))
5160 if (rhs_valid_for_store_merging_p (rhs2
))
5162 else if (TREE_CODE (rhs2
) != SSA_NAME
)
5166 def_stmt2
= SSA_NAME_DEF_STMT (rhs2
);
5167 if (!is_gimple_assign (def_stmt2
))
5169 else if (!handled_load (def_stmt2
, &ops
[1], bitsize
, bitpos
,
5170 bitregion_start
, bitregion_end
))
5180 unsigned HOST_WIDE_INT const_bitsize
;
5181 if (bitsize
.is_constant (&const_bitsize
)
5182 && (const_bitsize
% BITS_PER_UNIT
) == 0
5183 && const_bitsize
<= 64
5184 && multiple_p (bitpos
, BITS_PER_UNIT
))
5186 ins_stmt
= find_bswap_or_nop_1 (def_stmt
, &n
, 12);
5190 for (unsigned HOST_WIDE_INT i
= 0;
5192 i
+= BITS_PER_UNIT
, nn
>>= BITS_PER_MARKER
)
5193 if ((nn
& MARKER_MASK
) == 0
5194 || (nn
& MARKER_MASK
) == MARKER_BYTE_UNKNOWN
)
5203 rhs_code
= LROTATE_EXPR
;
5204 ops
[0].base_addr
= NULL_TREE
;
5205 ops
[1].base_addr
= NULL_TREE
;
5213 && bitsize
.is_constant (&const_bitsize
)
5214 && ((const_bitsize
% BITS_PER_UNIT
) != 0
5215 || !multiple_p (bitpos
, BITS_PER_UNIT
))
5216 && const_bitsize
<= MAX_FIXED_MODE_SIZE
)
5218 /* Bypass a conversion to the bit-field type. */
5220 && is_gimple_assign (def_stmt
)
5221 && CONVERT_EXPR_CODE_P (rhs_code
))
5223 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5224 if (TREE_CODE (rhs1
) == SSA_NAME
5225 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
)))
5228 rhs_code
= BIT_INSERT_EXPR
;
5231 ops
[0].base_addr
= NULL_TREE
;
5232 ops
[1].base_addr
= NULL_TREE
;
5239 unsigned HOST_WIDE_INT const_bitsize
, const_bitpos
;
5240 unsigned HOST_WIDE_INT const_bitregion_start
, const_bitregion_end
;
5242 || !bitsize
.is_constant (&const_bitsize
)
5243 || !bitpos
.is_constant (&const_bitpos
)
5244 || !bitregion_start
.is_constant (&const_bitregion_start
)
5245 || !bitregion_end
.is_constant (&const_bitregion_end
))
5246 return terminate_all_aliasing_chains (NULL
, stmt
);
5249 memset (&n
, 0, sizeof (n
));
5251 class imm_store_chain_info
**chain_info
= NULL
;
5254 chain_info
= m_stores
.get (base_addr
);
5256 store_immediate_info
*info
;
5259 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
5260 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5261 const_bitregion_start
,
5262 const_bitregion_end
,
5263 stmt
, ord
, rhs_code
, n
, ins_stmt
,
5264 bit_not_p
, lp_nr_for_store (stmt
),
5266 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5268 fprintf (dump_file
, "Recording immediate store from stmt:\n");
5269 print_gimple_stmt (dump_file
, stmt
, 0);
5271 (*chain_info
)->m_store_info
.safe_push (info
);
5273 ret
|= terminate_all_aliasing_chains (chain_info
, stmt
);
5274 /* If we reach the limit of stores to merge in a chain terminate and
5275 process the chain now. */
5276 if ((*chain_info
)->m_store_info
.length ()
5277 == (unsigned int) param_max_stores_to_merge
)
5279 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5281 "Reached maximum number of statements to merge:\n");
5282 ret
|= terminate_and_process_chain (*chain_info
);
5287 /* Store aliases any existing chain? */
5288 ret
|= terminate_all_aliasing_chains (NULL
, stmt
);
5290 /* Start a new chain. */
5291 class imm_store_chain_info
*new_chain
5292 = new imm_store_chain_info (m_stores_head
, base_addr
);
5293 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5294 const_bitregion_start
,
5295 const_bitregion_end
,
5296 stmt
, 0, rhs_code
, n
, ins_stmt
,
5297 bit_not_p
, lp_nr_for_store (stmt
),
5299 new_chain
->m_store_info
.safe_push (info
);
5301 m_stores
.put (base_addr
, new_chain
);
5303 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5305 fprintf (dump_file
, "Starting active chain number %u with statement:\n",
5307 print_gimple_stmt (dump_file
, stmt
, 0);
5308 fprintf (dump_file
, "The base object is:\n");
5309 print_generic_expr (dump_file
, base_addr
);
5310 fprintf (dump_file
, "\n");
5314 /* Prune oldest chains so that after adding the chain or store above
5315 we're again within the limits set by the params. */
5316 if (m_n_chains
> (unsigned)param_max_store_chains_to_track
5317 || m_n_stores
> (unsigned)param_max_stores_to_track
)
5319 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5320 fprintf (dump_file
, "Too many chains (%u > %d) or stores (%u > %d), "
5321 "terminating oldest chain(s).\n", m_n_chains
,
5322 param_max_store_chains_to_track
, m_n_stores
,
5323 param_max_stores_to_track
);
5324 imm_store_chain_info
**e
= &m_stores_head
;
5326 unsigned n_stores
= 0;
5329 if (idx
>= (unsigned)param_max_store_chains_to_track
5330 || (n_stores
+ (*e
)->m_store_info
.length ()
5331 > (unsigned)param_max_stores_to_track
))
5332 ret
|= terminate_and_process_chain (*e
);
5335 n_stores
+= (*e
)->m_store_info
.length ();
5345 /* Return true if STMT is a store valid for store merging. */
5348 store_valid_for_store_merging_p (gimple
*stmt
)
5350 return gimple_assign_single_p (stmt
)
5351 && gimple_vdef (stmt
)
5352 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
))
5353 && (!gimple_has_volatile_ops (stmt
) || gimple_clobber_p (stmt
));
5356 enum basic_block_status
{ BB_INVALID
, BB_VALID
, BB_EXTENDED_VALID
};
5358 /* Return the status of basic block BB wrt store merging. */
5360 static enum basic_block_status
5361 get_status_for_store_merging (basic_block bb
)
5363 unsigned int num_statements
= 0;
5364 unsigned int num_constructors
= 0;
5365 gimple_stmt_iterator gsi
;
5367 gimple
*last_stmt
= NULL
;
5369 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5371 gimple
*stmt
= gsi_stmt (gsi
);
5373 if (is_gimple_debug (stmt
))
5378 if (store_valid_for_store_merging_p (stmt
) && ++num_statements
>= 2)
5381 if (is_gimple_assign (stmt
)
5382 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
)
5384 tree rhs
= gimple_assign_rhs1 (stmt
);
5385 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
5386 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
5387 && gimple_assign_lhs (stmt
) != NULL_TREE
)
5390 = int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
5391 if (sz
== 16 || sz
== 32 || sz
== 64)
5393 num_constructors
= 1;
5400 if (num_statements
== 0 && num_constructors
== 0)
5403 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
5404 && store_valid_for_store_merging_p (last_stmt
)
5405 && (e
= find_fallthru_edge (bb
->succs
))
5406 && e
->dest
== bb
->next_bb
)
5407 return BB_EXTENDED_VALID
;
5409 return (num_statements
>= 2 || num_constructors
) ? BB_VALID
: BB_INVALID
;
5412 /* Entry point for the pass. Go over each basic block recording chains of
5413 immediate stores. Upon encountering a terminating statement (as defined
5414 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5418 pass_store_merging::execute (function
*fun
)
5421 hash_set
<gimple
*> orig_stmts
;
5422 bool changed
= false, open_chains
= false;
5424 /* If the function can throw and catch non-call exceptions, we'll be trying
5425 to merge stores across different basic blocks so we need to first unsplit
5426 the EH edges in order to streamline the CFG of the function. */
5427 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
)
5428 unsplit_eh_edges ();
5430 calculate_dominance_info (CDI_DOMINATORS
);
5432 FOR_EACH_BB_FN (bb
, fun
)
5434 const basic_block_status bb_status
= get_status_for_store_merging (bb
);
5435 gimple_stmt_iterator gsi
;
5437 if (open_chains
&& (bb_status
== BB_INVALID
|| !single_pred_p (bb
)))
5439 changed
|= terminate_and_process_all_chains ();
5440 open_chains
= false;
5443 if (bb_status
== BB_INVALID
)
5446 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5447 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
5449 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); )
5451 gimple
*stmt
= gsi_stmt (gsi
);
5454 if (is_gimple_debug (stmt
))
5457 if (gimple_has_volatile_ops (stmt
) && !gimple_clobber_p (stmt
))
5459 /* Terminate all chains. */
5460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5461 fprintf (dump_file
, "Volatile access terminates "
5463 changed
|= terminate_and_process_all_chains ();
5464 open_chains
= false;
5468 if (is_gimple_assign (stmt
)
5469 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
5470 && maybe_optimize_vector_constructor (stmt
))
5473 if (store_valid_for_store_merging_p (stmt
))
5474 changed
|= process_store (stmt
);
5476 changed
|= terminate_all_aliasing_chains (NULL
, stmt
);
5479 if (bb_status
== BB_EXTENDED_VALID
)
5483 changed
|= terminate_and_process_all_chains ();
5484 open_chains
= false;
5489 changed
|= terminate_and_process_all_chains ();
5491 /* If the function can throw and catch non-call exceptions and something
5492 changed during the pass, then the CFG has (very likely) changed too. */
5493 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
&& changed
)
5495 free_dominance_info (CDI_DOMINATORS
);
5496 return TODO_cleanup_cfg
;
5504 /* Construct and return a store merging pass object. */
5507 make_pass_store_merging (gcc::context
*ctxt
)
5509 return new pass_store_merging (ctxt
);
5514 namespace selftest
{
5516 /* Selftests for store merging helpers. */
5518 /* Assert that all elements of the byte arrays X and Y, both of length N
5522 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
5524 for (unsigned int i
= 0; i
< n
; i
++)
5528 fprintf (stderr
, "Arrays do not match. X:\n");
5529 dump_char_array (stderr
, x
, n
);
5530 fprintf (stderr
, "Y:\n");
5531 dump_char_array (stderr
, y
, n
);
5533 ASSERT_EQ (x
[i
], y
[i
]);
5537 /* Test shift_bytes_in_array_left and that it carries bits across between
5541 verify_shift_bytes_in_array_left (void)
5544 00011111 | 11100000. */
5545 unsigned char orig
[2] = { 0xe0, 0x1f };
5546 unsigned char in
[2];
5547 memcpy (in
, orig
, sizeof orig
);
5549 unsigned char expected
[2] = { 0x80, 0x7f };
5550 shift_bytes_in_array_left (in
, sizeof (in
), 2);
5551 verify_array_eq (in
, expected
, sizeof (in
));
5553 memcpy (in
, orig
, sizeof orig
);
5554 memcpy (expected
, orig
, sizeof orig
);
5555 /* Check that shifting by zero doesn't change anything. */
5556 shift_bytes_in_array_left (in
, sizeof (in
), 0);
5557 verify_array_eq (in
, expected
, sizeof (in
));
5561 /* Test shift_bytes_in_array_right and that it carries bits across between
5565 verify_shift_bytes_in_array_right (void)
5568 00011111 | 11100000. */
5569 unsigned char orig
[2] = { 0x1f, 0xe0};
5570 unsigned char in
[2];
5571 memcpy (in
, orig
, sizeof orig
);
5572 unsigned char expected
[2] = { 0x07, 0xf8};
5573 shift_bytes_in_array_right (in
, sizeof (in
), 2);
5574 verify_array_eq (in
, expected
, sizeof (in
));
5576 memcpy (in
, orig
, sizeof orig
);
5577 memcpy (expected
, orig
, sizeof orig
);
5578 /* Check that shifting by zero doesn't change anything. */
5579 shift_bytes_in_array_right (in
, sizeof (in
), 0);
5580 verify_array_eq (in
, expected
, sizeof (in
));
5583 /* Test clear_bit_region that it clears exactly the bits asked and
5587 verify_clear_bit_region (void)
5589 /* Start with all bits set and test clearing various patterns in them. */
5590 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5591 unsigned char in
[3];
5592 unsigned char expected
[3];
5593 memcpy (in
, orig
, sizeof in
);
5595 /* Check zeroing out all the bits. */
5596 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
5597 expected
[0] = expected
[1] = expected
[2] = 0;
5598 verify_array_eq (in
, expected
, sizeof in
);
5600 memcpy (in
, orig
, sizeof in
);
5601 /* Leave the first and last bits intact. */
5602 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
5606 verify_array_eq (in
, expected
, sizeof in
);
5609 /* Test clear_bit_region_be that it clears exactly the bits asked and
5613 verify_clear_bit_region_be (void)
5615 /* Start with all bits set and test clearing various patterns in them. */
5616 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5617 unsigned char in
[3];
5618 unsigned char expected
[3];
5619 memcpy (in
, orig
, sizeof in
);
5621 /* Check zeroing out all the bits. */
5622 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
5623 expected
[0] = expected
[1] = expected
[2] = 0;
5624 verify_array_eq (in
, expected
, sizeof in
);
5626 memcpy (in
, orig
, sizeof in
);
5627 /* Leave the first and last bits intact. */
5628 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
5632 verify_array_eq (in
, expected
, sizeof in
);
5636 /* Run all of the selftests within this file. */
5639 store_merging_cc_tests (void)
5641 verify_shift_bytes_in_array_left ();
5642 verify_shift_bytes_in_array_right ();
5643 verify_clear_bit_region ();
5644 verify_clear_bit_region_be ();
5647 } // namespace selftest
5648 #endif /* CHECKING_P. */