1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2021 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
143 #include "coretypes.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
151 #include "gimple-pretty-print.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
167 #include "gimplify-me.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
186 /* Number of hand-written 16-bit nop / bswaps found. */
189 /* Number of hand-written 32-bit nop / bswaps found. */
192 /* Number of hand-written 64-bit nop / bswaps found. */
194 } nop_stats
, bswap_stats
;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number
{
230 poly_int64_pod bytepos
;
234 unsigned HOST_WIDE_INT range
;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
261 do_shift_rotate (enum tree_code code
,
262 struct symbolic_number
*n
,
265 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
266 unsigned head_marker
;
269 || count
>= TYPE_PRECISION (n
->type
)
270 || count
% BITS_PER_UNIT
!= 0)
272 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size
< 64 / BITS_PER_MARKER
)
277 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
285 head_marker
= HEAD_MARKER (n
->n
, size
);
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
289 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
290 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size
- 1 - i
) * BITS_PER_MARKER
);
294 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
297 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
302 /* Zero unused bits for size. */
303 if (size
< 64 / BITS_PER_MARKER
)
304 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
308 /* Perform sanity checking for the symbolic number N and the gimple
312 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
316 lhs_type
= TREE_TYPE (gimple_get_lhs (stmt
));
318 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
319 && TREE_CODE (lhs_type
) != ENUMERAL_TYPE
)
322 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
332 init_symbolic_number (struct symbolic_number
*n
, tree src
)
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src
)) && !POINTER_TYPE_P (TREE_TYPE (src
)))
339 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n
->type
= TREE_TYPE (src
);
346 size
= TYPE_PRECISION (n
->type
);
347 if (size
% BITS_PER_UNIT
!= 0)
349 size
/= BITS_PER_UNIT
;
350 if (size
> 64 / BITS_PER_MARKER
)
356 if (size
< 64 / BITS_PER_MARKER
)
357 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
367 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize
, bitpos
, bytepos
;
373 int unsignedp
, reversep
, volatilep
;
374 tree offset
, base_addr
;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
380 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
383 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
384 &unsignedp
, &reversep
, &volatilep
);
386 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
387 /* Do not rewrite TARGET_MEM_REF. */
389 else if (TREE_CODE (base_addr
) == MEM_REF
)
391 poly_offset_int bit_offset
= 0;
392 tree off
= TREE_OPERAND (base_addr
, 1);
394 if (!integer_zerop (off
))
396 poly_offset_int boff
= mem_ref_offset (base_addr
);
397 boff
<<= LOG2_BITS_PER_UNIT
;
401 base_addr
= TREE_OPERAND (base_addr
, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset
, 0))
406 tree byte_offset
= wide_int_to_tree
407 (sizetype
, bits_to_bytes_round_down (bit_offset
));
408 bit_offset
= num_trailing_bits (bit_offset
);
410 offset
= size_binop (PLUS_EXPR
, offset
, byte_offset
);
412 offset
= byte_offset
;
415 bitpos
+= bit_offset
.force_shwi ();
418 base_addr
= build_fold_addr_expr (base_addr
);
420 if (!multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
422 if (!multiple_p (bitsize
, BITS_PER_UNIT
))
427 if (!init_symbolic_number (n
, ref
))
429 n
->base_addr
= base_addr
;
431 n
->bytepos
= bytepos
;
432 n
->alias_set
= reference_alias_ptr_type (ref
);
433 n
->vuse
= gimple_vuse (stmt
);
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
442 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
443 gimple
*source_stmt2
, struct symbolic_number
*n2
,
444 struct symbolic_number
*n
)
449 struct symbolic_number
*n_start
;
451 tree rhs1
= gimple_assign_rhs1 (source_stmt1
);
452 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
454 rhs1
= TREE_OPERAND (rhs1
, 0);
455 tree rhs2
= gimple_assign_rhs1 (source_stmt2
);
456 if (TREE_CODE (rhs2
) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2
, 0)) == SSA_NAME
)
458 rhs2
= TREE_OPERAND (rhs2
, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
465 HOST_WIDE_INT start1
, start2
, start_sub
, end_sub
, end1
, end2
, end
;
466 struct symbolic_number
*toinc_n_ptr
, *n_end
;
467 basic_block bb1
, bb2
;
469 if (!n1
->base_addr
|| !n2
->base_addr
470 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
473 if (!n1
->offset
!= !n2
->offset
474 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
478 if (!(n2
->bytepos
- n1
->bytepos
).is_constant (&start2
))
484 start_sub
= start2
- start1
;
489 start_sub
= start1
- start2
;
492 bb1
= gimple_bb (source_stmt1
);
493 bb2
= gimple_bb (source_stmt2
);
494 if (dominated_by_p (CDI_DOMINATORS
, bb1
, bb2
))
495 source_stmt
= source_stmt1
;
497 source_stmt
= source_stmt2
;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1
= start1
+ (n1
->range
- 1);
502 end2
= start2
+ (n2
->range
- 1);
506 end_sub
= end2
- end1
;
511 end_sub
= end1
- end2
;
513 n_end
= (end2
> end1
) ? n2
: n1
;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN
)
517 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
519 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
521 n
->range
= end
- MIN (start1
, start2
) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n
->range
> 64 / BITS_PER_MARKER
)
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
531 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
532 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
535 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
536 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
537 toinc_n_ptr
->n
+= inc
;
542 n
->range
= n1
->range
;
544 source_stmt
= source_stmt1
;
548 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
549 n
->alias_set
= n1
->alias_set
;
551 n
->alias_set
= ptr_type_node
;
552 n
->vuse
= n_start
->vuse
;
553 n
->base_addr
= n_start
->base_addr
;
554 n
->offset
= n_start
->offset
;
555 n
->src
= n_start
->src
;
556 n
->bytepos
= n_start
->bytepos
;
557 n
->type
= n_start
->type
;
558 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
560 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
562 uint64_t masked1
, masked2
;
564 masked1
= n1
->n
& mask
;
565 masked2
= n2
->n
& mask
;
566 if (masked1
&& masked2
&& masked1
!= masked2
)
569 n
->n
= n1
->n
| n2
->n
;
570 n
->n_ops
= n1
->n_ops
+ n2
->n_ops
;
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
582 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
585 tree rhs1
, rhs2
= NULL
;
586 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
587 enum gimple_rhs_class rhs_class
;
589 if (!limit
|| !is_gimple_assign (stmt
))
592 rhs1
= gimple_assign_rhs1 (stmt
);
594 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 2)))
605 unsigned HOST_WIDE_INT bitsize
= tree_to_uhwi (TREE_OPERAND (rhs1
, 1));
606 unsigned HOST_WIDE_INT bitpos
= tree_to_uhwi (TREE_OPERAND (rhs1
, 2));
607 if (bitpos
% BITS_PER_UNIT
== 0
608 && bitsize
% BITS_PER_UNIT
== 0
609 && init_symbolic_number (n
, TREE_OPERAND (rhs1
, 0)))
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN
)
613 bitpos
= TYPE_PRECISION (n
->type
) - bitpos
- bitsize
;
616 if (!do_shift_rotate (RSHIFT_EXPR
, n
, bitpos
))
621 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
622 for (unsigned i
= 0; i
< bitsize
/ BITS_PER_UNIT
;
623 i
++, tmp
<<= BITS_PER_UNIT
)
624 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
628 n
->type
= TREE_TYPE (rhs1
);
630 n
->range
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
632 return verify_symbolic_number_p (n
, stmt
) ? stmt
: NULL
;
638 if (TREE_CODE (rhs1
) != SSA_NAME
)
641 code
= gimple_assign_rhs_code (stmt
);
642 rhs_class
= gimple_assign_rhs_class (stmt
);
643 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
645 if (rhs_class
== GIMPLE_BINARY_RHS
)
646 rhs2
= gimple_assign_rhs2 (stmt
);
648 /* Handle unary rhs and binary rhs with integer constants as second
651 if (rhs_class
== GIMPLE_UNARY_RHS
652 || (rhs_class
== GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2
) == INTEGER_CST
))
655 if (code
!= BIT_AND_EXPR
656 && code
!= LSHIFT_EXPR
657 && code
!= RSHIFT_EXPR
658 && code
!= LROTATE_EXPR
659 && code
!= RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code
))
663 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
669 if (gimple_assign_load_p (stmt
)
670 || !init_symbolic_number (n
, rhs1
))
679 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
680 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
681 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
683 /* Only constants masking full bytes are allowed. */
684 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
685 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
688 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
697 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
702 int i
, type_size
, old_type_size
;
705 type
= TREE_TYPE (gimple_assign_lhs (stmt
));
706 type_size
= TYPE_PRECISION (type
);
707 if (type_size
% BITS_PER_UNIT
!= 0)
709 type_size
/= BITS_PER_UNIT
;
710 if (type_size
> 64 / BITS_PER_MARKER
)
713 /* Sign extension: result is dependent on the value. */
714 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
715 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
716 && HEAD_MARKER (n
->n
, old_type_size
))
717 for (i
= 0; i
< type_size
- old_type_size
; i
++)
718 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
721 if (type_size
< 64 / BITS_PER_MARKER
)
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
729 n
->range
= type_size
;
735 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
738 /* Handle binary rhs. */
740 if (rhs_class
== GIMPLE_BINARY_RHS
)
742 struct symbolic_number n1
, n2
;
743 gimple
*source_stmt
, *source_stmt2
;
745 if (code
!= BIT_IOR_EXPR
)
748 if (TREE_CODE (rhs2
) != SSA_NAME
)
751 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
756 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
761 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
766 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
769 if (n1
.vuse
!= n2
.vuse
)
773 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
);
778 if (!verify_symbolic_number_p (n
, stmt
))
790 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
794 find_bswap_or_nop_finalize (struct symbolic_number
*n
, uint64_t *cmpxchg
,
795 uint64_t *cmpnop
, bool *cast64_to_32
)
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
805 *cast64_to_32
= false;
807 /* Find real size of result (highest non-zero byte). */
809 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
813 /* Zero out the bits corresponding to untouched bytes in original gimple
815 if (n
->range
< (int) sizeof (int64_t))
817 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
818 if (n
->base_addr
== NULL
820 && int_size_in_bytes (TREE_TYPE (n
->src
)) == 8)
822 /* If all bytes in n->n are either 0 or in [5..8] range, this
823 might be a candidate for (unsigned) __builtin_bswap64 (src).
824 It is not worth it for (unsigned short) __builtin_bswap64 (src)
825 or (unsigned short) __builtin_bswap32 (src). */
826 *cast64_to_32
= true;
827 for (tmpn
= n
->n
; tmpn
; tmpn
>>= BITS_PER_MARKER
)
828 if ((tmpn
& MARKER_MASK
)
829 && ((tmpn
& MARKER_MASK
) <= 4 || (tmpn
& MARKER_MASK
) > 8))
831 *cast64_to_32
= false;
838 *cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
842 /* Zero out the bits corresponding to unused bytes in the result of the
843 gimple expression. */
844 if (rsize
< n
->range
)
846 if (BYTES_BIG_ENDIAN
)
848 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
850 *cmpnop
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
854 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
855 *cmpxchg
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
863 n
->range
*= BITS_PER_UNIT
;
866 /* Check if STMT completes a bswap implementation or a read in a given
867 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
868 accordingly. It also sets N to represent the kind of operations
869 performed: size of the resulting expression and whether it works on
870 a memory source, and if so alias-set and vuse. At last, the
871 function returns a stmt whose rhs's first tree is the source
875 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
,
876 bool *cast64_to_32
, uint64_t *mask
)
878 tree type_size
= TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt
)));
879 if (!tree_fits_uhwi_p (type_size
))
882 /* The last parameter determines the depth search limit. It usually
883 correlates directly to the number n of bytes to be touched. We
884 increase that number by 2 * (log2(n) + 1) here in order to also
885 cover signed -> unsigned conversions of the src operand as can be seen
886 in libgcc, and for initial shift/and operation of the src operand. */
887 int limit
= tree_to_uhwi (type_size
);
888 limit
+= 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
));
889 gimple
*ins_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
893 if (gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
894 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
896 unsigned HOST_WIDE_INT sz
= tree_to_uhwi (type_size
) * BITS_PER_UNIT
;
897 if (sz
!= 16 && sz
!= 32 && sz
!= 64)
899 tree rhs
= gimple_assign_rhs1 (stmt
);
900 if (CONSTRUCTOR_NELTS (rhs
) == 0)
902 tree eltype
= TREE_TYPE (TREE_TYPE (rhs
));
903 unsigned HOST_WIDE_INT eltsz
904 = int_size_in_bytes (eltype
) * BITS_PER_UNIT
;
905 if (TYPE_PRECISION (eltype
) != eltsz
)
907 constructor_elt
*elt
;
909 tree type
= build_nonstandard_integer_type (sz
, 1);
910 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
912 if (TREE_CODE (elt
->value
) != SSA_NAME
913 || !INTEGRAL_TYPE_P (TREE_TYPE (elt
->value
)))
915 struct symbolic_number n1
;
917 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt
->value
), &n1
,
925 n1
.range
= sz
/ BITS_PER_UNIT
;
929 ins_stmt
= source_stmt
;
934 if (n
->vuse
!= n1
.vuse
)
937 struct symbolic_number n0
= *n
;
939 if (!BYTES_BIG_ENDIAN
)
941 if (!do_shift_rotate (LSHIFT_EXPR
, &n1
, i
* eltsz
))
944 else if (!do_shift_rotate (LSHIFT_EXPR
, &n0
, eltsz
))
947 = perform_symbolic_merge (ins_stmt
, &n0
, source_stmt
, &n1
, n
);
955 uint64_t cmpxchg
, cmpnop
;
956 find_bswap_or_nop_finalize (n
, &cmpxchg
, &cmpnop
, cast64_to_32
);
958 /* A complete byte swap should make the symbolic number to start with
959 the largest digit in the highest order byte. Unchanged symbolic
960 number indicates a read with same endianness as target architecture. */
961 *mask
= ~(uint64_t) 0;
964 else if (n
->n
== cmpxchg
)
969 for (uint64_t msk
= MARKER_MASK
; msk
; msk
<<= BITS_PER_MARKER
)
970 if ((n
->n
& msk
) == 0)
972 else if ((n
->n
& msk
) == (cmpxchg
& msk
))
981 /* Useless bit manipulation performed by code. */
982 if (!n
->base_addr
&& n
->n
== cmpnop
&& n
->n_ops
== 1)
988 const pass_data pass_data_optimize_bswap
=
990 GIMPLE_PASS
, /* type */
992 OPTGROUP_NONE
, /* optinfo_flags */
994 PROP_ssa
, /* properties_required */
995 0, /* properties_provided */
996 0, /* properties_destroyed */
997 0, /* todo_flags_start */
998 0, /* todo_flags_finish */
1001 class pass_optimize_bswap
: public gimple_opt_pass
1004 pass_optimize_bswap (gcc::context
*ctxt
)
1005 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
1008 /* opt_pass methods: */
1009 virtual bool gate (function
*)
1011 return flag_expensive_optimizations
&& optimize
&& BITS_PER_UNIT
== 8;
1014 virtual unsigned int execute (function
*);
1016 }; // class pass_optimize_bswap
1018 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1019 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1023 bswap_view_convert (gimple_stmt_iterator
*gsi
, tree type
, tree val
,
1026 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val
))
1027 || POINTER_TYPE_P (TREE_TYPE (val
)));
1028 if (TYPE_SIZE (type
) != TYPE_SIZE (TREE_TYPE (val
)))
1030 HOST_WIDE_INT prec
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
1031 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1034 = gimple_build_assign (make_ssa_name (pointer_sized_int_node
),
1037 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1039 gsi_insert_after (gsi
, g
, GSI_NEW_STMT
);
1040 val
= gimple_assign_lhs (g
);
1042 tree itype
= build_nonstandard_integer_type (prec
, 1);
1043 gimple
*g
= gimple_build_assign (make_ssa_name (itype
), NOP_EXPR
, val
);
1045 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1047 gsi_insert_after (gsi
, g
, GSI_NEW_STMT
);
1048 val
= gimple_assign_lhs (g
);
1050 return build1 (VIEW_CONVERT_EXPR
, type
, val
);
1053 /* Perform the bswap optimization: replace the expression computed in the rhs
1054 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1055 bswap, load or load + bswap expression.
1056 Which of these alternatives replace the rhs is given by N->base_addr (non
1057 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1058 load to perform are also given in N while the builtin bswap invoke is given
1059 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1060 load statements involved to construct the rhs in gsi_stmt (GSI) and
1061 N->range gives the size of the rhs expression for maintaining some
1064 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1065 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1066 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1069 bswap_replace (gimple_stmt_iterator gsi
, gimple
*ins_stmt
, tree fndecl
,
1070 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
1071 bool bswap
, uint64_t mask
)
1073 tree src
, tmp
, tgt
= NULL_TREE
;
1074 gimple
*bswap_stmt
, *mask_stmt
= NULL
;
1075 tree_code conv_code
= NOP_EXPR
;
1077 gimple
*cur_stmt
= gsi_stmt (gsi
);
1081 tgt
= gimple_assign_lhs (cur_stmt
);
1082 if (gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
1084 && VECTOR_TYPE_P (TREE_TYPE (tgt
)))
1085 conv_code
= VIEW_CONVERT_EXPR
;
1088 /* Need to load the value from memory first. */
1091 gimple_stmt_iterator gsi_ins
= gsi
;
1093 gsi_ins
= gsi_for_stmt (ins_stmt
);
1094 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
1095 tree load_offset_ptr
, aligned_load_type
;
1097 unsigned align
= get_object_alignment (src
);
1098 poly_int64 load_offset
= 0;
1102 basic_block ins_bb
= gimple_bb (ins_stmt
);
1103 basic_block cur_bb
= gimple_bb (cur_stmt
);
1104 if (!dominated_by_p (CDI_DOMINATORS
, cur_bb
, ins_bb
))
1107 /* Move cur_stmt just before one of the load of the original
1108 to ensure it has the same VUSE. See PR61517 for what could
1110 if (gimple_bb (cur_stmt
) != gimple_bb (ins_stmt
))
1111 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt
));
1112 gsi_move_before (&gsi
, &gsi_ins
);
1113 gsi
= gsi_for_stmt (cur_stmt
);
1118 /* Compute address to load from and cast according to the size
1120 addr_expr
= build_fold_addr_expr (src
);
1121 if (is_gimple_mem_ref_addr (addr_expr
))
1122 addr_tmp
= unshare_expr (addr_expr
);
1125 addr_tmp
= unshare_expr (n
->base_addr
);
1126 if (!is_gimple_mem_ref_addr (addr_tmp
))
1127 addr_tmp
= force_gimple_operand_gsi_1 (&gsi
, addr_tmp
,
1128 is_gimple_mem_ref_addr
,
1131 load_offset
= n
->bytepos
;
1135 = force_gimple_operand_gsi (&gsi
, unshare_expr (n
->offset
),
1136 true, NULL_TREE
, true,
1139 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp
)),
1140 POINTER_PLUS_EXPR
, addr_tmp
, off
);
1141 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1142 addr_tmp
= gimple_assign_lhs (stmt
);
1146 /* Perform the load. */
1147 aligned_load_type
= load_type
;
1148 if (align
< TYPE_ALIGN (load_type
))
1149 aligned_load_type
= build_aligned_type (load_type
, align
);
1150 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
1151 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
1157 nop_stats
.found_16bit
++;
1158 else if (n
->range
== 32)
1159 nop_stats
.found_32bit
++;
1162 gcc_assert (n
->range
== 64);
1163 nop_stats
.found_64bit
++;
1166 /* Convert the result of load if necessary. */
1167 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
1169 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
1171 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1172 gimple_set_vuse (load_stmt
, n
->vuse
);
1173 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1174 if (conv_code
== VIEW_CONVERT_EXPR
)
1175 val_tmp
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), val_tmp
,
1177 gimple_assign_set_rhs_with_ops (&gsi
, conv_code
, val_tmp
);
1178 update_stmt (cur_stmt
);
1182 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
1183 gimple_set_vuse (cur_stmt
, n
->vuse
);
1184 update_stmt (cur_stmt
);
1188 tgt
= make_ssa_name (load_type
);
1189 cur_stmt
= gimple_build_assign (tgt
, MEM_REF
, val_expr
);
1190 gimple_set_vuse (cur_stmt
, n
->vuse
);
1191 gsi_insert_before (&gsi
, cur_stmt
, GSI_SAME_STMT
);
1197 "%d bit load in target endianness found at: ",
1199 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1205 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
1206 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1207 gimple_set_vuse (load_stmt
, n
->vuse
);
1208 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1215 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), TREE_TYPE (src
)))
1217 if (!is_gimple_val (src
))
1219 if (conv_code
== VIEW_CONVERT_EXPR
)
1220 src
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), src
, true);
1221 g
= gimple_build_assign (tgt
, conv_code
, src
);
1224 g
= gimple_build_assign (tgt
, src
);
1228 nop_stats
.found_16bit
++;
1229 else if (n
->range
== 32)
1230 nop_stats
.found_32bit
++;
1233 gcc_assert (n
->range
== 64);
1234 nop_stats
.found_64bit
++;
1239 "%d bit reshuffle in target endianness found at: ",
1242 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1245 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1246 fprintf (dump_file
, "\n");
1250 gsi_replace (&gsi
, g
, true);
1253 else if (TREE_CODE (src
) == BIT_FIELD_REF
)
1254 src
= TREE_OPERAND (src
, 0);
1257 bswap_stats
.found_16bit
++;
1258 else if (n
->range
== 32)
1259 bswap_stats
.found_32bit
++;
1262 gcc_assert (n
->range
== 64);
1263 bswap_stats
.found_64bit
++;
1268 /* Convert the src expression if necessary. */
1269 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
1271 gimple
*convert_stmt
;
1273 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
1274 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
1275 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1278 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1279 are considered as rotation of 2N bit values by N bits is generally not
1280 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1281 gives 0x03040102 while a bswap for that value is 0x04030201. */
1282 if (bswap
&& n
->range
== 16)
1284 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
1285 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
1286 bswap_stmt
= gimple_build_assign (NULL
, src
);
1289 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
1291 if (tgt
== NULL_TREE
)
1292 tgt
= make_ssa_name (bswap_type
);
1295 if (mask
!= ~(uint64_t) 0)
1297 tree m
= build_int_cst (bswap_type
, mask
);
1298 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1299 gimple_set_lhs (bswap_stmt
, tmp
);
1300 mask_stmt
= gimple_build_assign (tgt
, BIT_AND_EXPR
, tmp
, m
);
1304 /* Convert the result if necessary. */
1305 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
1307 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1309 gimple_stmt_iterator gsi2
= gsi
;
1310 if (conv_code
== VIEW_CONVERT_EXPR
)
1311 atmp
= bswap_view_convert (&gsi2
, TREE_TYPE (tgt
), tmp
, false);
1312 gimple
*convert_stmt
= gimple_build_assign (tgt
, conv_code
, atmp
);
1313 gsi_insert_after (&gsi2
, convert_stmt
, GSI_SAME_STMT
);
1316 gimple_set_lhs (mask_stmt
? mask_stmt
: bswap_stmt
, tmp
);
1320 fprintf (dump_file
, "%d bit bswap implementation found at: ",
1323 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1326 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1327 fprintf (dump_file
, "\n");
1334 gsi_insert_after (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1335 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1336 gsi_remove (&gsi
, true);
1340 gsi_insert_before (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1342 gsi_insert_before (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1347 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1348 using bswap optimizations. CDI_DOMINATORS need to be
1349 computed on entry. Return true if it has been optimized and
1350 TODO_update_ssa is needed. */
1353 maybe_optimize_vector_constructor (gimple
*cur_stmt
)
1355 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1356 struct symbolic_number n
;
1359 gcc_assert (is_gimple_assign (cur_stmt
)
1360 && gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
);
1362 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1363 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
1364 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
1365 || gimple_assign_lhs (cur_stmt
) == NULL_TREE
)
1368 HOST_WIDE_INT sz
= int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
1372 load_type
= bswap_type
= uint16_type_node
;
1375 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1376 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
1378 load_type
= uint32_type_node
;
1379 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1380 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1386 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1387 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1388 || (word_mode
== SImode
1389 && builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1390 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)))
1392 load_type
= uint64_type_node
;
1393 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1394 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1405 gimple
*ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1406 &cast64_to_32
, &mask
);
1408 || n
.range
!= (unsigned HOST_WIDE_INT
) sz
1410 || mask
!= ~(uint64_t) 0)
1413 if (bswap
&& !fndecl
&& n
.range
!= 16)
1416 memset (&nop_stats
, 0, sizeof (nop_stats
));
1417 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1418 return bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1419 bswap_type
, load_type
, &n
, bswap
, mask
) != NULL_TREE
;
1422 /* Find manual byte swap implementations as well as load in a given
1423 endianness. Byte swaps are turned into a bswap builtin invokation
1424 while endian loads are converted to bswap builtin invokation or
1425 simple load according to the target endianness. */
1428 pass_optimize_bswap::execute (function
*fun
)
1431 bool bswap32_p
, bswap64_p
;
1432 bool changed
= false;
1433 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
1435 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1436 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
1437 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1438 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1439 || (bswap32_p
&& word_mode
== SImode
)));
1441 /* Determine the argument type of the builtins. The code later on
1442 assumes that the return and argument type are the same. */
1445 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1446 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1451 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1452 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1455 memset (&nop_stats
, 0, sizeof (nop_stats
));
1456 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1457 calculate_dominance_info (CDI_DOMINATORS
);
1459 FOR_EACH_BB_FN (bb
, fun
)
1461 gimple_stmt_iterator gsi
;
1463 /* We do a reverse scan for bswap patterns to make sure we get the
1464 widest match. As bswap pattern matching doesn't handle previously
1465 inserted smaller bswap replacements as sub-patterns, the wider
1466 variant wouldn't be detected. */
1467 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
1469 gimple
*ins_stmt
, *cur_stmt
= gsi_stmt (gsi
);
1470 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1471 enum tree_code code
;
1472 struct symbolic_number n
;
1473 bool bswap
, cast64_to_32
;
1476 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1477 might be moved to a different basic block by bswap_replace and gsi
1478 must not points to it if that's the case. Moving the gsi_prev
1479 there make sure that gsi points to the statement previous to
1480 cur_stmt while still making sure that all statements are
1481 considered in this basic block. */
1484 if (!is_gimple_assign (cur_stmt
))
1487 code
= gimple_assign_rhs_code (cur_stmt
);
1492 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
1493 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
1501 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1502 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1503 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
))))
1511 ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1512 &cast64_to_32
, &mask
);
1520 /* Already in canonical form, nothing to do. */
1521 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1523 load_type
= bswap_type
= uint16_type_node
;
1526 load_type
= uint32_type_node
;
1529 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1530 bswap_type
= bswap32_type
;
1534 load_type
= uint64_type_node
;
1537 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1538 bswap_type
= bswap64_type
;
1545 if (bswap
&& !fndecl
&& n
.range
!= 16)
1548 if (bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1549 bswap_type
, load_type
, &n
, bswap
, mask
))
1554 statistics_counter_event (fun
, "16-bit nop implementations found",
1555 nop_stats
.found_16bit
);
1556 statistics_counter_event (fun
, "32-bit nop implementations found",
1557 nop_stats
.found_32bit
);
1558 statistics_counter_event (fun
, "64-bit nop implementations found",
1559 nop_stats
.found_64bit
);
1560 statistics_counter_event (fun
, "16-bit bswap implementations found",
1561 bswap_stats
.found_16bit
);
1562 statistics_counter_event (fun
, "32-bit bswap implementations found",
1563 bswap_stats
.found_32bit
);
1564 statistics_counter_event (fun
, "64-bit bswap implementations found",
1565 bswap_stats
.found_64bit
);
1567 return (changed
? TODO_update_ssa
: 0);
1573 make_pass_optimize_bswap (gcc::context
*ctxt
)
1575 return new pass_optimize_bswap (ctxt
);
1580 /* Struct recording one operand for the store, which is either a constant,
1581 then VAL represents the constant and all the other fields are zero, or
1582 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1583 and the other fields also reflect the memory load, or an SSA name, then
1584 VAL represents the SSA name and all the other fields are zero, */
1586 class store_operand_info
1591 poly_uint64 bitsize
;
1593 poly_uint64 bitregion_start
;
1594 poly_uint64 bitregion_end
;
1597 store_operand_info ();
1600 store_operand_info::store_operand_info ()
1601 : val (NULL_TREE
), base_addr (NULL_TREE
), bitsize (0), bitpos (0),
1602 bitregion_start (0), bitregion_end (0), stmt (NULL
), bit_not_p (false)
1606 /* Struct recording the information about a single store of an immediate
1607 to memory. These are created in the first phase and coalesced into
1608 merged_store_group objects in the second phase. */
1610 class store_immediate_info
1613 unsigned HOST_WIDE_INT bitsize
;
1614 unsigned HOST_WIDE_INT bitpos
;
1615 unsigned HOST_WIDE_INT bitregion_start
;
1616 /* This is one past the last bit of the bit region. */
1617 unsigned HOST_WIDE_INT bitregion_end
;
1620 /* INTEGER_CST for constant store, STRING_CST for string store,
1621 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1622 BIT_INSERT_EXPR for bit insertion.
1623 LROTATE_EXPR if it can be only bswap optimized and
1624 ops are not really meaningful.
1625 NOP_EXPR if bswap optimization detected identity, ops
1626 are not meaningful. */
1627 enum tree_code rhs_code
;
1628 /* Two fields for bswap optimization purposes. */
1629 struct symbolic_number n
;
1631 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1633 /* True if ops have been swapped and thus ops[1] represents
1634 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1636 /* The index number of the landing pad, or 0 if there is none. */
1638 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1639 just the first one. */
1640 store_operand_info ops
[2];
1641 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1642 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1643 gimple
*, unsigned int, enum tree_code
,
1644 struct symbolic_number
&, gimple
*, bool, int,
1645 const store_operand_info
&,
1646 const store_operand_info
&);
1649 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
1650 unsigned HOST_WIDE_INT bp
,
1651 unsigned HOST_WIDE_INT brs
,
1652 unsigned HOST_WIDE_INT bre
,
1655 enum tree_code rhscode
,
1656 struct symbolic_number
&nr
,
1660 const store_operand_info
&op0r
,
1661 const store_operand_info
&op1r
)
1662 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
1663 stmt (st
), order (ord
), rhs_code (rhscode
), n (nr
),
1664 ins_stmt (ins_stmtp
), bit_not_p (bitnotp
), ops_swapped_p (false),
1665 lp_nr (nr2
), ops
{ op0r
, op1r
}
1669 /* Struct representing a group of stores to contiguous memory locations.
1670 These are produced by the second phase (coalescing) and consumed in the
1671 third phase that outputs the widened stores. */
1673 class merged_store_group
1676 unsigned HOST_WIDE_INT start
;
1677 unsigned HOST_WIDE_INT width
;
1678 unsigned HOST_WIDE_INT bitregion_start
;
1679 unsigned HOST_WIDE_INT bitregion_end
;
1680 /* The size of the allocated memory for val and mask. */
1681 unsigned HOST_WIDE_INT buf_size
;
1682 unsigned HOST_WIDE_INT align_base
;
1683 poly_uint64 load_align_base
[2];
1686 unsigned int load_align
[2];
1687 unsigned int first_order
;
1688 unsigned int last_order
;
1690 bool string_concatenation
;
1691 bool only_constants
;
1693 unsigned int first_nonmergeable_order
;
1696 auto_vec
<store_immediate_info
*> stores
;
1697 /* We record the first and last original statements in the sequence because
1698 we'll need their vuse/vdef and replacement position. It's easier to keep
1699 track of them separately as 'stores' is reordered by apply_stores. */
1703 unsigned char *mask
;
1705 merged_store_group (store_immediate_info
*);
1706 ~merged_store_group ();
1707 bool can_be_merged_into (store_immediate_info
*);
1708 void merge_into (store_immediate_info
*);
1709 void merge_overlapping (store_immediate_info
*);
1710 bool apply_stores ();
1712 void do_merge (store_immediate_info
*);
1715 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1718 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
1723 for (unsigned int i
= 0; i
< len
; i
++)
1724 fprintf (fd
, "%02x ", ptr
[i
]);
1728 /* Clear out LEN bits starting from bit START in the byte array
1729 PTR. This clears the bits to the *right* from START.
1730 START must be within [0, BITS_PER_UNIT) and counts starting from
1731 the least significant bit. */
1734 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
1739 /* Clear len bits to the right of start. */
1740 else if (len
<= start
+ 1)
1742 unsigned char mask
= (~(~0U << len
));
1743 mask
= mask
<< (start
+ 1U - len
);
1746 else if (start
!= BITS_PER_UNIT
- 1)
1748 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
1749 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
1750 len
- (start
% BITS_PER_UNIT
) - 1);
1752 else if (start
== BITS_PER_UNIT
- 1
1753 && len
> BITS_PER_UNIT
)
1755 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1756 memset (ptr
, 0, nbytes
);
1757 if (len
% BITS_PER_UNIT
!= 0)
1758 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
1759 len
% BITS_PER_UNIT
);
1765 /* In the byte array PTR clear the bit region starting at bit
1766 START and is LEN bits wide.
1767 For regions spanning multiple bytes do this recursively until we reach
1768 zero LEN or a region contained within a single byte. */
1771 clear_bit_region (unsigned char *ptr
, unsigned int start
,
1774 /* Degenerate base case. */
1777 else if (start
>= BITS_PER_UNIT
)
1778 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
1779 /* Second base case. */
1780 else if ((start
+ len
) <= BITS_PER_UNIT
)
1782 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
1783 mask
>>= BITS_PER_UNIT
- (start
+ len
);
1789 /* Clear most significant bits in a byte and proceed with the next byte. */
1790 else if (start
!= 0)
1792 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
1793 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
1795 /* Whole bytes need to be cleared. */
1796 else if (start
== 0 && len
> BITS_PER_UNIT
)
1798 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1799 /* We could recurse on each byte but we clear whole bytes, so a simple
1801 memset (ptr
, '\0', nbytes
);
1802 /* Clear the remaining sub-byte region if there is one. */
1803 if (len
% BITS_PER_UNIT
!= 0)
1804 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
1810 /* Write BITLEN bits of EXPR to the byte array PTR at
1811 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1812 Return true if the operation succeeded. */
1815 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
1816 unsigned int total_bytes
)
1818 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
1819 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
1820 || (bitpos
% BITS_PER_UNIT
)
1821 || !int_mode_for_size (bitlen
, 0).exists ());
1823 = (TREE_CODE (expr
) == CONSTRUCTOR
1824 && CONSTRUCTOR_NELTS (expr
) == 0
1825 && TYPE_SIZE_UNIT (TREE_TYPE (expr
))
1826 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr
))));
1830 if (first_byte
>= total_bytes
)
1832 total_bytes
-= first_byte
;
1835 unsigned HOST_WIDE_INT rhs_bytes
1836 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1837 if (rhs_bytes
> total_bytes
)
1839 memset (ptr
+ first_byte
, '\0', rhs_bytes
);
1842 return native_encode_expr (expr
, ptr
+ first_byte
, total_bytes
) != 0;
1846 We are writing a non byte-sized quantity or at a position that is not
1848 |--------|--------|--------| ptr + first_byte
1850 xxx xxxxxxxx xxx< bp>
1853 First native_encode_expr EXPR into a temporary buffer and shift each
1854 byte in the buffer by 'bp' (carrying the bits over as necessary).
1855 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1856 <------bitlen---->< bp>
1857 Then we clear the destination bits:
1858 |---00000|00000000|000-----| ptr + first_byte
1859 <-------bitlen--->< bp>
1861 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1862 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1865 We are writing a non byte-sized quantity or at a position that is not
1867 ptr + first_byte |--------|--------|--------|
1869 <bp >xxx xxxxxxxx xxx
1872 First native_encode_expr EXPR into a temporary buffer and shift each
1873 byte in the buffer to the right by (carrying the bits over as necessary).
1874 We shift by as much as needed to align the most significant bit of EXPR
1876 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1877 <---bitlen----> <bp ><-----bitlen----->
1878 Then we clear the destination bits:
1879 ptr + first_byte |-----000||00000000||00000---|
1880 <bp ><-------bitlen----->
1882 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1883 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1884 The awkwardness comes from the fact that bitpos is counted from the
1885 most significant bit of a byte. */
1887 /* We must be dealing with fixed-size data at this point, since the
1888 total size is also fixed. */
1889 unsigned int byte_size
;
1892 unsigned HOST_WIDE_INT rhs_bytes
1893 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1894 if (rhs_bytes
> total_bytes
)
1896 byte_size
= rhs_bytes
;
1900 fixed_size_mode mode
1901 = as_a
<fixed_size_mode
> (TYPE_MODE (TREE_TYPE (expr
)));
1904 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)))
1905 : GET_MODE_SIZE (mode
);
1907 /* Allocate an extra byte so that we have space to shift into. */
1909 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
1910 memset (tmpbuf
, '\0', byte_size
);
1911 /* The store detection code should only have allowed constants that are
1912 accepted by native_encode_expr or empty ctors. */
1914 && native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
1917 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1918 bytes to write. This means it can write more than
1919 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1920 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1921 bitlen and zero out the bits that are not relevant as well (that may
1922 contain a sign bit due to sign-extension). */
1923 unsigned int padding
1924 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
1925 /* On big-endian the padding is at the 'front' so just skip the initial
1927 if (BYTES_BIG_ENDIAN
)
1930 byte_size
-= padding
;
1932 if (bitlen
% BITS_PER_UNIT
!= 0)
1934 if (BYTES_BIG_ENDIAN
)
1935 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
1936 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
1938 clear_bit_region (tmpbuf
, bitlen
,
1939 byte_size
* BITS_PER_UNIT
- bitlen
);
1941 /* Left shifting relies on the last byte being clear if bitlen is
1942 a multiple of BITS_PER_UNIT, which might not be clear if
1943 there are padding bytes. */
1944 else if (!BYTES_BIG_ENDIAN
)
1945 tmpbuf
[byte_size
- 1] = '\0';
1947 /* Clear the bit region in PTR where the bits from TMPBUF will be
1949 if (BYTES_BIG_ENDIAN
)
1950 clear_bit_region_be (ptr
+ first_byte
,
1951 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
1953 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
1956 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
1957 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
1959 bool skip_byte
= false;
1960 if (BYTES_BIG_ENDIAN
)
1962 /* BITPOS and BITLEN are exactly aligned and no shifting
1964 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
1965 || (bitpos_mod
== 0 && bitlen_mod
== 0))
1967 /* |. . . . . . . .|
1969 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1970 of the value until it aligns with 'bp' in the next byte over. */
1971 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
1973 shift_amnt
= bitlen_mod
+ bitpos_mod
;
1974 skip_byte
= bitlen_mod
!= 0;
1976 /* |. . . . . . . .|
1979 Shift the value right within the same byte so it aligns with 'bp'. */
1981 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
1984 shift_amnt
= bitpos
% BITS_PER_UNIT
;
1986 /* Create the shifted version of EXPR. */
1987 if (!BYTES_BIG_ENDIAN
)
1989 shift_bytes_in_array_left (tmpbuf
, byte_size
, shift_amnt
);
1990 if (shift_amnt
== 0)
1995 gcc_assert (BYTES_BIG_ENDIAN
);
1996 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
1997 /* If shifting right forced us to move into the next byte skip the now
2006 /* Insert the bits from TMPBUF. */
2007 for (unsigned int i
= 0; i
< byte_size
; i
++)
2008 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
2013 /* Sorting function for store_immediate_info objects.
2014 Sorts them by bitposition. */
2017 sort_by_bitpos (const void *x
, const void *y
)
2019 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2020 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2022 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
2024 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
2027 /* If they are the same let's use the order which is guaranteed to
2029 return (*tmp
)->order
- (*tmp2
)->order
;
2032 /* Sorting function for store_immediate_info objects.
2033 Sorts them by the order field. */
2036 sort_by_order (const void *x
, const void *y
)
2038 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2039 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2041 if ((*tmp
)->order
< (*tmp2
)->order
)
2043 else if ((*tmp
)->order
> (*tmp2
)->order
)
2049 /* Initialize a merged_store_group object from a store_immediate_info
2052 merged_store_group::merged_store_group (store_immediate_info
*info
)
2054 start
= info
->bitpos
;
2055 width
= info
->bitsize
;
2056 bitregion_start
= info
->bitregion_start
;
2057 bitregion_end
= info
->bitregion_end
;
2058 /* VAL has memory allocated for it in apply_stores once the group
2059 width has been finalized. */
2062 bit_insertion
= info
->rhs_code
== BIT_INSERT_EXPR
;
2063 string_concatenation
= info
->rhs_code
== STRING_CST
;
2064 only_constants
= info
->rhs_code
== INTEGER_CST
;
2066 first_nonmergeable_order
= ~0U;
2067 lp_nr
= info
->lp_nr
;
2068 unsigned HOST_WIDE_INT align_bitpos
= 0;
2069 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2070 &align
, &align_bitpos
);
2071 align_base
= start
- align_bitpos
;
2072 for (int i
= 0; i
< 2; ++i
)
2074 store_operand_info
&op
= info
->ops
[i
];
2075 if (op
.base_addr
== NULL_TREE
)
2078 load_align_base
[i
] = 0;
2082 get_object_alignment_1 (op
.val
, &load_align
[i
], &align_bitpos
);
2083 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2087 stores
.safe_push (info
);
2088 last_stmt
= info
->stmt
;
2089 last_order
= info
->order
;
2090 first_stmt
= last_stmt
;
2091 first_order
= last_order
;
2095 merged_store_group::~merged_store_group ()
2101 /* Return true if the store described by INFO can be merged into the group. */
2104 merged_store_group::can_be_merged_into (store_immediate_info
*info
)
2106 /* Do not merge bswap patterns. */
2107 if (info
->rhs_code
== LROTATE_EXPR
)
2110 if (info
->lp_nr
!= lp_nr
)
2113 /* The canonical case. */
2114 if (info
->rhs_code
== stores
[0]->rhs_code
)
2117 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2118 if (info
->rhs_code
== BIT_INSERT_EXPR
&& stores
[0]->rhs_code
== INTEGER_CST
)
2119 return !string_concatenation
;
2121 if (stores
[0]->rhs_code
== BIT_INSERT_EXPR
&& info
->rhs_code
== INTEGER_CST
)
2122 return !string_concatenation
;
2124 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2125 only for small regions since this can generate a lot of instructions. */
2126 if (info
->rhs_code
== MEM_REF
2127 && (stores
[0]->rhs_code
== INTEGER_CST
2128 || stores
[0]->rhs_code
== BIT_INSERT_EXPR
)
2129 && info
->bitregion_start
== stores
[0]->bitregion_start
2130 && info
->bitregion_end
== stores
[0]->bitregion_end
2131 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2132 return !string_concatenation
;
2134 if (stores
[0]->rhs_code
== MEM_REF
2135 && (info
->rhs_code
== INTEGER_CST
2136 || info
->rhs_code
== BIT_INSERT_EXPR
)
2137 && info
->bitregion_start
== stores
[0]->bitregion_start
2138 && info
->bitregion_end
== stores
[0]->bitregion_end
2139 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2140 return !string_concatenation
;
2142 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2143 if (info
->rhs_code
== STRING_CST
2144 && stores
[0]->rhs_code
== INTEGER_CST
2145 && stores
[0]->bitsize
== CHAR_BIT
)
2146 return !bit_insertion
;
2148 if (stores
[0]->rhs_code
== STRING_CST
2149 && info
->rhs_code
== INTEGER_CST
2150 && info
->bitsize
== CHAR_BIT
)
2151 return !bit_insertion
;
2156 /* Helper method for merge_into and merge_overlapping to do
2160 merged_store_group::do_merge (store_immediate_info
*info
)
2162 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
2163 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
2165 unsigned int this_align
;
2166 unsigned HOST_WIDE_INT align_bitpos
= 0;
2167 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2168 &this_align
, &align_bitpos
);
2169 if (this_align
> align
)
2172 align_base
= info
->bitpos
- align_bitpos
;
2174 for (int i
= 0; i
< 2; ++i
)
2176 store_operand_info
&op
= info
->ops
[i
];
2180 get_object_alignment_1 (op
.val
, &this_align
, &align_bitpos
);
2181 if (this_align
> load_align
[i
])
2183 load_align
[i
] = this_align
;
2184 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2188 gimple
*stmt
= info
->stmt
;
2189 stores
.safe_push (info
);
2190 if (info
->order
> last_order
)
2192 last_order
= info
->order
;
2195 else if (info
->order
< first_order
)
2197 first_order
= info
->order
;
2201 if (info
->bitpos
!= start
+ width
)
2202 consecutive
= false;
2204 /* We need to use extraction if there is any bit-field. */
2205 if (info
->rhs_code
== BIT_INSERT_EXPR
)
2207 bit_insertion
= true;
2208 gcc_assert (!string_concatenation
);
2211 /* We want to use concatenation if there is any string. */
2212 if (info
->rhs_code
== STRING_CST
)
2214 string_concatenation
= true;
2215 gcc_assert (!bit_insertion
);
2218 /* But we cannot use it if we don't have consecutive stores. */
2220 string_concatenation
= false;
2222 if (info
->rhs_code
!= INTEGER_CST
)
2223 only_constants
= false;
2226 /* Merge a store recorded by INFO into this merged store.
2227 The store is not overlapping with the existing recorded
2231 merged_store_group::merge_into (store_immediate_info
*info
)
2235 /* Make sure we're inserting in the position we think we're inserting. */
2236 gcc_assert (info
->bitpos
>= start
+ width
2237 && info
->bitregion_start
<= bitregion_end
);
2239 width
= info
->bitpos
+ info
->bitsize
- start
;
2242 /* Merge a store described by INFO into this merged store.
2243 INFO overlaps in some way with the current store (i.e. it's not contiguous
2244 which is handled by merged_store_group::merge_into). */
2247 merged_store_group::merge_overlapping (store_immediate_info
*info
)
2251 /* If the store extends the size of the group, extend the width. */
2252 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
2253 width
= info
->bitpos
+ info
->bitsize
- start
;
2256 /* Go through all the recorded stores in this group in program order and
2257 apply their values to the VAL byte array to create the final merged
2258 value. Return true if the operation succeeded. */
2261 merged_store_group::apply_stores ()
2263 store_immediate_info
*info
;
2266 /* Make sure we have more than one store in the group, otherwise we cannot
2268 if (bitregion_start
% BITS_PER_UNIT
!= 0
2269 || bitregion_end
% BITS_PER_UNIT
!= 0
2270 || stores
.length () == 1)
2273 buf_size
= (bitregion_end
- bitregion_start
) / BITS_PER_UNIT
;
2275 /* Really do string concatenation for large strings only. */
2276 if (buf_size
<= MOVE_MAX
)
2277 string_concatenation
= false;
2279 /* Create a power-of-2-sized buffer for native_encode_expr. */
2280 if (!string_concatenation
)
2281 buf_size
= 1 << ceil_log2 (buf_size
);
2283 val
= XNEWVEC (unsigned char, 2 * buf_size
);
2284 mask
= val
+ buf_size
;
2285 memset (val
, 0, buf_size
);
2286 memset (mask
, ~0U, buf_size
);
2288 stores
.qsort (sort_by_order
);
2290 FOR_EACH_VEC_ELT (stores
, i
, info
)
2292 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
2294 if (info
->ops
[0].val
&& info
->ops
[0].base_addr
== NULL_TREE
)
2295 cst
= info
->ops
[0].val
;
2296 else if (info
->ops
[1].val
&& info
->ops
[1].base_addr
== NULL_TREE
)
2297 cst
= info
->ops
[1].val
;
2301 if (cst
&& info
->rhs_code
!= BIT_INSERT_EXPR
)
2302 ret
= encode_tree_to_bitpos (cst
, val
, info
->bitsize
, pos_in_buffer
,
2304 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
2305 if (BYTES_BIG_ENDIAN
)
2306 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
2307 - (pos_in_buffer
% BITS_PER_UNIT
)),
2310 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
2311 if (cst
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
2315 fputs ("After writing ", dump_file
);
2316 print_generic_expr (dump_file
, cst
, TDF_NONE
);
2317 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
2318 " at position %d\n", info
->bitsize
, pos_in_buffer
);
2319 fputs (" the merged value contains ", dump_file
);
2320 dump_char_array (dump_file
, val
, buf_size
);
2321 fputs (" the merged mask contains ", dump_file
);
2322 dump_char_array (dump_file
, mask
, buf_size
);
2324 fputs (" bit insertion is required\n", dump_file
);
2325 if (string_concatenation
)
2326 fputs (" string concatenation is required\n", dump_file
);
2329 fprintf (dump_file
, "Failed to merge stores\n");
2334 stores
.qsort (sort_by_bitpos
);
2338 /* Structure describing the store chain. */
2340 class imm_store_chain_info
2343 /* Doubly-linked list that imposes an order on chain processing.
2344 PNXP (prev's next pointer) points to the head of a list, or to
2345 the next field in the previous chain in the list.
2346 See pass_store_merging::m_stores_head for more rationale. */
2347 imm_store_chain_info
*next
, **pnxp
;
2349 auto_vec
<store_immediate_info
*> m_store_info
;
2350 auto_vec
<merged_store_group
*> m_merged_store_groups
;
2352 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
2353 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
2358 gcc_checking_assert (pnxp
== next
->pnxp
);
2362 ~imm_store_chain_info ()
2367 gcc_checking_assert (&next
== next
->pnxp
);
2371 bool terminate_and_process_chain ();
2372 bool try_coalesce_bswap (merged_store_group
*, unsigned int, unsigned int,
2374 bool coalesce_immediate_stores ();
2375 bool output_merged_store (merged_store_group
*);
2376 bool output_merged_stores ();
2379 const pass_data pass_data_tree_store_merging
= {
2380 GIMPLE_PASS
, /* type */
2381 "store-merging", /* name */
2382 OPTGROUP_NONE
, /* optinfo_flags */
2383 TV_GIMPLE_STORE_MERGING
, /* tv_id */
2384 PROP_ssa
, /* properties_required */
2385 0, /* properties_provided */
2386 0, /* properties_destroyed */
2387 0, /* todo_flags_start */
2388 TODO_update_ssa
, /* todo_flags_finish */
2391 class pass_store_merging
: public gimple_opt_pass
2394 pass_store_merging (gcc::context
*ctxt
)
2395 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head (),
2396 m_n_chains (0), m_n_stores (0)
2400 /* Pass not supported for PDP-endian, nor for insane hosts or
2401 target character sizes where native_{encode,interpret}_expr
2402 doesn't work properly. */
2406 return flag_store_merging
2407 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
2409 && BITS_PER_UNIT
== 8;
2412 virtual unsigned int execute (function
*);
2415 hash_map
<tree_operand_hash
, class imm_store_chain_info
*> m_stores
;
2417 /* Form a doubly-linked stack of the elements of m_stores, so that
2418 we can iterate over them in a predictable way. Using this order
2419 avoids extraneous differences in the compiler output just because
2420 of tree pointer variations (e.g. different chains end up in
2421 different positions of m_stores, so they are handled in different
2422 orders, so they allocate or release SSA names in different
2423 orders, and when they get reused, subsequent passes end up
2424 getting different SSA names, which may ultimately change
2425 decisions when going out of SSA). */
2426 imm_store_chain_info
*m_stores_head
;
2428 /* The number of store chains currently tracked. */
2429 unsigned m_n_chains
;
2430 /* The number of stores currently tracked. */
2431 unsigned m_n_stores
;
2433 bool process_store (gimple
*);
2434 bool terminate_and_process_chain (imm_store_chain_info
*);
2435 bool terminate_all_aliasing_chains (imm_store_chain_info
**, gimple
*);
2436 bool terminate_and_process_all_chains ();
2437 }; // class pass_store_merging
2439 /* Terminate and process all recorded chains. Return true if any changes
2443 pass_store_merging::terminate_and_process_all_chains ()
2446 while (m_stores_head
)
2447 ret
|= terminate_and_process_chain (m_stores_head
);
2448 gcc_assert (m_stores
.is_empty ());
2452 /* Terminate all chains that are affected by the statement STMT.
2453 CHAIN_INFO is the chain we should ignore from the checks if
2454 non-NULL. Return true if any changes were made. */
2457 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2463 /* If the statement doesn't touch memory it can't alias. */
2464 if (!gimple_vuse (stmt
))
2467 tree store_lhs
= gimple_store_p (stmt
) ? gimple_get_lhs (stmt
) : NULL_TREE
;
2468 ao_ref store_lhs_ref
;
2469 ao_ref_init (&store_lhs_ref
, store_lhs
);
2470 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
2474 /* We already checked all the stores in chain_info and terminated the
2475 chain if necessary. Skip it here. */
2476 if (chain_info
&& *chain_info
== cur
)
2479 store_immediate_info
*info
;
2481 FOR_EACH_VEC_ELT (cur
->m_store_info
, i
, info
)
2483 tree lhs
= gimple_assign_lhs (info
->stmt
);
2485 ao_ref_init (&lhs_ref
, lhs
);
2486 if (ref_maybe_used_by_stmt_p (stmt
, &lhs_ref
)
2487 || stmt_may_clobber_ref_p_1 (stmt
, &lhs_ref
)
2488 || (store_lhs
&& refs_may_alias_p_1 (&store_lhs_ref
,
2491 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2493 fprintf (dump_file
, "stmt causes chain termination:\n");
2494 print_gimple_stmt (dump_file
, stmt
, 0);
2496 ret
|= terminate_and_process_chain (cur
);
2505 /* Helper function. Terminate the recorded chain storing to base object
2506 BASE. Return true if the merging and output was successful. The m_stores
2507 entry is removed after the processing in any case. */
2510 pass_store_merging::terminate_and_process_chain (imm_store_chain_info
*chain_info
)
2512 m_n_stores
-= chain_info
->m_store_info
.length ();
2514 bool ret
= chain_info
->terminate_and_process_chain ();
2515 m_stores
.remove (chain_info
->base_addr
);
2520 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2521 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2522 be able to sink load of REF across stores between FIRST and LAST, up
2523 to right before LAST. */
2526 stmts_may_clobber_ref_p (gimple
*first
, gimple
*last
, tree ref
)
2529 ao_ref_init (&r
, ref
);
2530 unsigned int count
= 0;
2531 tree vop
= gimple_vdef (last
);
2534 /* Return true conservatively if the basic blocks are different. */
2535 if (gimple_bb (first
) != gimple_bb (last
))
2540 stmt
= SSA_NAME_DEF_STMT (vop
);
2541 if (stmt_may_clobber_ref_p_1 (stmt
, &r
))
2543 if (gimple_store_p (stmt
)
2544 && refs_anti_dependent_p (ref
, gimple_get_lhs (stmt
)))
2546 /* Avoid quadratic compile time by bounding the number of checks
2548 if (++count
> MAX_STORE_ALIAS_CHECKS
)
2550 vop
= gimple_vuse (stmt
);
2552 while (stmt
!= first
);
2557 /* Return true if INFO->ops[IDX] is mergeable with the
2558 corresponding loads already in MERGED_STORE group.
2559 BASE_ADDR is the base address of the whole store group. */
2562 compatible_load_p (merged_store_group
*merged_store
,
2563 store_immediate_info
*info
,
2564 tree base_addr
, int idx
)
2566 store_immediate_info
*infof
= merged_store
->stores
[0];
2567 if (!info
->ops
[idx
].base_addr
2568 || maybe_ne (info
->ops
[idx
].bitpos
- infof
->ops
[idx
].bitpos
,
2569 info
->bitpos
- infof
->bitpos
)
2570 || !operand_equal_p (info
->ops
[idx
].base_addr
,
2571 infof
->ops
[idx
].base_addr
, 0))
2574 store_immediate_info
*infol
= merged_store
->stores
.last ();
2575 tree load_vuse
= gimple_vuse (info
->ops
[idx
].stmt
);
2576 /* In this case all vuses should be the same, e.g.
2577 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2579 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2580 and we can emit the coalesced load next to any of those loads. */
2581 if (gimple_vuse (infof
->ops
[idx
].stmt
) == load_vuse
2582 && gimple_vuse (infol
->ops
[idx
].stmt
) == load_vuse
)
2585 /* Otherwise, at least for now require that the load has the same
2586 vuse as the store. See following examples. */
2587 if (gimple_vuse (info
->stmt
) != load_vuse
)
2590 if (gimple_vuse (infof
->stmt
) != gimple_vuse (infof
->ops
[idx
].stmt
)
2592 && gimple_vuse (infol
->stmt
) != gimple_vuse (infol
->ops
[idx
].stmt
)))
2595 /* If the load is from the same location as the store, already
2596 the construction of the immediate chain info guarantees no intervening
2597 stores, so no further checks are needed. Example:
2598 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2599 if (known_eq (info
->ops
[idx
].bitpos
, info
->bitpos
)
2600 && operand_equal_p (info
->ops
[idx
].base_addr
, base_addr
, 0))
2603 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2604 of the stores in the group, or any other stores in between those.
2605 Previous calls to compatible_load_p ensured that for all the
2606 merged_store->stores IDX loads, no stmts starting with
2607 merged_store->first_stmt and ending right before merged_store->last_stmt
2608 clobbers those loads. */
2609 gimple
*first
= merged_store
->first_stmt
;
2610 gimple
*last
= merged_store
->last_stmt
;
2611 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2612 comes before the so far first load, we'll be changing
2613 merged_store->first_stmt. In that case we need to give up if
2614 any of the earlier processed loads clobber with the stmts in the new
2616 if (info
->order
< merged_store
->first_order
)
2618 for (store_immediate_info
*infoc
: merged_store
->stores
)
2619 if (stmts_may_clobber_ref_p (info
->stmt
, first
, infoc
->ops
[idx
].val
))
2623 /* Similarly, we could change merged_store->last_stmt, so ensure
2624 in that case no stmts in the new range clobber any of the earlier
2626 else if (info
->order
> merged_store
->last_order
)
2628 for (store_immediate_info
*infoc
: merged_store
->stores
)
2629 if (stmts_may_clobber_ref_p (last
, info
->stmt
, infoc
->ops
[idx
].val
))
2633 /* And finally, we'd be adding a new load to the set, ensure it isn't
2634 clobbered in the new range. */
2635 if (stmts_may_clobber_ref_p (first
, last
, info
->ops
[idx
].val
))
2638 /* Otherwise, we are looking for:
2639 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2641 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2645 /* Add all refs loaded to compute VAL to REFS vector. */
2648 gather_bswap_load_refs (vec
<tree
> *refs
, tree val
)
2650 if (TREE_CODE (val
) != SSA_NAME
)
2653 gimple
*stmt
= SSA_NAME_DEF_STMT (val
);
2654 if (!is_gimple_assign (stmt
))
2657 if (gimple_assign_load_p (stmt
))
2659 refs
->safe_push (gimple_assign_rhs1 (stmt
));
2663 switch (gimple_assign_rhs_class (stmt
))
2665 case GIMPLE_BINARY_RHS
:
2666 gather_bswap_load_refs (refs
, gimple_assign_rhs2 (stmt
));
2668 case GIMPLE_UNARY_RHS
:
2669 gather_bswap_load_refs (refs
, gimple_assign_rhs1 (stmt
));
2676 /* Check if there are any stores in M_STORE_INFO after index I
2677 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2678 a potential group ending with END that have their order
2679 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2680 all the stores already merged and the one under consideration
2681 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2683 MEM[(long long int *)p_28] = 0;
2684 MEM[(long long int *)p_28 + 8B] = 0;
2685 MEM[(long long int *)p_28 + 16B] = 0;
2686 MEM[(long long int *)p_28 + 24B] = 0;
2688 MEM[(int *)p_28 + 8B] = _129;
2689 MEM[(int *)p_28].a = -1;
2691 MEM[(long long int *)p_28] = 0;
2692 MEM[(int *)p_28].a = -1;
2693 stmts in the current group and need to consider if it is safe to
2694 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2695 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2696 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2697 into the group and merging of those 3 stores is successful, merged
2698 stmts will be emitted at the latest store from that group, i.e.
2699 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2700 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2701 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2702 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2703 into the group. That way it will be its own store group and will
2704 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2705 INTEGER_CST stores, those are mergeable using merge_overlapping,
2706 so don't return false for those.
2708 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2709 (exclusive), whether they don't overlap the bitrange START to END
2710 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2711 prevent merging in cases like:
2712 MEM <char[12]> [&b + 8B] = {};
2713 MEM[(short *) &b] = 5;
2715 MEM <long long unsigned int> [&b + 2B] = _5;
2716 MEM[(char *)&b + 16B] = 88;
2717 MEM[(int *)&b + 20B] = 1;
2718 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2719 be merged with it, because the = _5 store overlaps these and is in between
2720 them in sort_by_order ordering. If it was merged, the merged store would
2721 go after the = _5 store and thus change behavior. */
2724 check_no_overlap (const vec
<store_immediate_info
*> &m_store_info
,
2726 bool all_integer_cst_p
, unsigned int first_order
,
2727 unsigned int last_order
, unsigned HOST_WIDE_INT start
,
2728 unsigned HOST_WIDE_INT end
, unsigned int first_earlier
,
2729 unsigned end_earlier
)
2731 unsigned int len
= m_store_info
.length ();
2732 for (unsigned int j
= first_earlier
; j
< end_earlier
; j
++)
2734 store_immediate_info
*info
= m_store_info
[j
];
2735 if (info
->order
> first_order
2736 && info
->order
< last_order
2737 && info
->bitpos
+ info
->bitsize
> start
)
2740 for (++i
; i
< len
; ++i
)
2742 store_immediate_info
*info
= m_store_info
[i
];
2743 if (info
->bitpos
>= end
)
2745 if (info
->order
< last_order
2746 && (!all_integer_cst_p
|| info
->rhs_code
!= INTEGER_CST
))
2752 /* Return true if m_store_info[first] and at least one following store
2753 form a group which store try_size bitsize value which is byte swapped
2754 from a memory load or some value, or identity from some value.
2755 This uses the bswap pass APIs. */
2758 imm_store_chain_info::try_coalesce_bswap (merged_store_group
*merged_store
,
2760 unsigned int try_size
,
2761 unsigned int first_earlier
)
2763 unsigned int len
= m_store_info
.length (), last
= first
;
2764 unsigned HOST_WIDE_INT width
= m_store_info
[first
]->bitsize
;
2765 if (width
>= try_size
)
2767 for (unsigned int i
= first
+ 1; i
< len
; ++i
)
2769 if (m_store_info
[i
]->bitpos
!= m_store_info
[first
]->bitpos
+ width
2770 || m_store_info
[i
]->lp_nr
!= merged_store
->lp_nr
2771 || m_store_info
[i
]->ins_stmt
== NULL
)
2773 width
+= m_store_info
[i
]->bitsize
;
2774 if (width
>= try_size
)
2780 if (width
!= try_size
)
2783 bool allow_unaligned
2784 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
2785 /* Punt if the combined store would not be aligned and we need alignment. */
2786 if (!allow_unaligned
)
2788 unsigned int align
= merged_store
->align
;
2789 unsigned HOST_WIDE_INT align_base
= merged_store
->align_base
;
2790 for (unsigned int i
= first
+ 1; i
<= last
; ++i
)
2792 unsigned int this_align
;
2793 unsigned HOST_WIDE_INT align_bitpos
= 0;
2794 get_object_alignment_1 (gimple_assign_lhs (m_store_info
[i
]->stmt
),
2795 &this_align
, &align_bitpos
);
2796 if (this_align
> align
)
2799 align_base
= m_store_info
[i
]->bitpos
- align_bitpos
;
2802 unsigned HOST_WIDE_INT align_bitpos
2803 = (m_store_info
[first
]->bitpos
- align_base
) & (align
- 1);
2805 align
= least_bit_hwi (align_bitpos
);
2806 if (align
< try_size
)
2813 case 16: type
= uint16_type_node
; break;
2814 case 32: type
= uint32_type_node
; break;
2815 case 64: type
= uint64_type_node
; break;
2816 default: gcc_unreachable ();
2818 struct symbolic_number n
;
2819 gimple
*ins_stmt
= NULL
;
2820 int vuse_store
= -1;
2821 unsigned int first_order
= merged_store
->first_order
;
2822 unsigned int last_order
= merged_store
->last_order
;
2823 gimple
*first_stmt
= merged_store
->first_stmt
;
2824 gimple
*last_stmt
= merged_store
->last_stmt
;
2825 unsigned HOST_WIDE_INT end
= merged_store
->start
+ merged_store
->width
;
2826 store_immediate_info
*infof
= m_store_info
[first
];
2828 for (unsigned int i
= first
; i
<= last
; ++i
)
2830 store_immediate_info
*info
= m_store_info
[i
];
2831 struct symbolic_number this_n
= info
->n
;
2833 if (!this_n
.base_addr
)
2834 this_n
.range
= try_size
/ BITS_PER_UNIT
;
2836 /* Update vuse in case it has changed by output_merged_stores. */
2837 this_n
.vuse
= gimple_vuse (info
->ins_stmt
);
2838 unsigned int bitpos
= info
->bitpos
- infof
->bitpos
;
2839 if (!do_shift_rotate (LSHIFT_EXPR
, &this_n
,
2841 ? try_size
- info
->bitsize
- bitpos
2844 if (this_n
.base_addr
&& vuse_store
)
2847 for (j
= first
; j
<= last
; ++j
)
2848 if (this_n
.vuse
== gimple_vuse (m_store_info
[j
]->stmt
))
2852 if (vuse_store
== 1)
2860 ins_stmt
= info
->ins_stmt
;
2864 if (n
.base_addr
&& n
.vuse
!= this_n
.vuse
)
2866 if (vuse_store
== 0)
2870 if (info
->order
> last_order
)
2872 last_order
= info
->order
;
2873 last_stmt
= info
->stmt
;
2875 else if (info
->order
< first_order
)
2877 first_order
= info
->order
;
2878 first_stmt
= info
->stmt
;
2880 end
= MAX (end
, info
->bitpos
+ info
->bitsize
);
2882 ins_stmt
= perform_symbolic_merge (ins_stmt
, &n
, info
->ins_stmt
,
2884 if (ins_stmt
== NULL
)
2889 uint64_t cmpxchg
, cmpnop
;
2891 find_bswap_or_nop_finalize (&n
, &cmpxchg
, &cmpnop
, &cast64_to_32
);
2893 /* A complete byte swap should make the symbolic number to start with
2894 the largest digit in the highest order byte. Unchanged symbolic
2895 number indicates a read with same endianness as target architecture. */
2896 if (n
.n
!= cmpnop
&& n
.n
!= cmpxchg
)
2903 if (n
.base_addr
== NULL_TREE
&& !is_gimple_val (n
.src
))
2906 if (!check_no_overlap (m_store_info
, last
, false, first_order
, last_order
,
2907 merged_store
->start
, end
, first_earlier
, first
))
2910 /* Don't handle memory copy this way if normal non-bswap processing
2911 would handle it too. */
2912 if (n
.n
== cmpnop
&& (unsigned) n
.n_ops
== last
- first
+ 1)
2915 for (i
= first
; i
<= last
; ++i
)
2916 if (m_store_info
[i
]->rhs_code
!= MEM_REF
)
2926 /* Will emit LROTATE_EXPR. */
2929 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2930 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
2934 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2935 && optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
)
2942 if (!allow_unaligned
&& n
.base_addr
)
2944 unsigned int align
= get_object_alignment (n
.src
);
2945 if (align
< try_size
)
2949 /* If each load has vuse of the corresponding store, need to verify
2950 the loads can be sunk right before the last store. */
2951 if (vuse_store
== 1)
2953 auto_vec
<tree
, 64> refs
;
2954 for (unsigned int i
= first
; i
<= last
; ++i
)
2955 gather_bswap_load_refs (&refs
,
2956 gimple_assign_rhs1 (m_store_info
[i
]->stmt
));
2958 for (tree ref
: refs
)
2959 if (stmts_may_clobber_ref_p (first_stmt
, last_stmt
, ref
))
2965 infof
->ins_stmt
= ins_stmt
;
2966 for (unsigned int i
= first
; i
<= last
; ++i
)
2968 m_store_info
[i
]->rhs_code
= n
.n
== cmpxchg
? LROTATE_EXPR
: NOP_EXPR
;
2969 m_store_info
[i
]->ops
[0].base_addr
= NULL_TREE
;
2970 m_store_info
[i
]->ops
[1].base_addr
= NULL_TREE
;
2972 merged_store
->merge_into (m_store_info
[i
]);
2978 /* Go through the candidate stores recorded in m_store_info and merge them
2979 into merged_store_group objects recorded into m_merged_store_groups
2980 representing the widened stores. Return true if coalescing was successful
2981 and the number of widened stores is fewer than the original number
2985 imm_store_chain_info::coalesce_immediate_stores ()
2987 /* Anything less can't be processed. */
2988 if (m_store_info
.length () < 2)
2991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2992 fprintf (dump_file
, "Attempting to coalesce %u stores in chain\n",
2993 m_store_info
.length ());
2995 store_immediate_info
*info
;
2996 unsigned int i
, ignore
= 0;
2997 unsigned int first_earlier
= 0;
2998 unsigned int end_earlier
= 0;
3000 /* Order the stores by the bitposition they write to. */
3001 m_store_info
.qsort (sort_by_bitpos
);
3003 info
= m_store_info
[0];
3004 merged_store_group
*merged_store
= new merged_store_group (info
);
3005 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3006 fputs ("New store group\n", dump_file
);
3008 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
3010 unsigned HOST_WIDE_INT new_bitregion_start
, new_bitregion_end
;
3015 while (first_earlier
< end_earlier
3016 && (m_store_info
[first_earlier
]->bitpos
3017 + m_store_info
[first_earlier
]->bitsize
3018 <= merged_store
->start
))
3021 /* First try to handle group of stores like:
3026 using the bswap framework. */
3027 if (info
->bitpos
== merged_store
->start
+ merged_store
->width
3028 && merged_store
->stores
.length () == 1
3029 && merged_store
->stores
[0]->ins_stmt
!= NULL
3030 && info
->lp_nr
== merged_store
->lp_nr
3031 && info
->ins_stmt
!= NULL
)
3033 unsigned int try_size
;
3034 for (try_size
= 64; try_size
>= 16; try_size
>>= 1)
3035 if (try_coalesce_bswap (merged_store
, i
- 1, try_size
,
3041 ignore
= i
+ merged_store
->stores
.length () - 1;
3042 m_merged_store_groups
.safe_push (merged_store
);
3043 if (ignore
< m_store_info
.length ())
3045 merged_store
= new merged_store_group (m_store_info
[ignore
]);
3046 end_earlier
= ignore
;
3049 merged_store
= NULL
;
3055 = MIN (merged_store
->bitregion_start
, info
->bitregion_start
);
3057 = MAX (merged_store
->bitregion_end
, info
->bitregion_end
);
3059 if (info
->order
>= merged_store
->first_nonmergeable_order
3060 || (((new_bitregion_end
- new_bitregion_start
+ 1) / BITS_PER_UNIT
)
3061 > (unsigned) param_store_merging_max_size
))
3066 Overlapping stores. */
3067 else if (IN_RANGE (info
->bitpos
, merged_store
->start
,
3068 merged_store
->start
+ merged_store
->width
- 1)
3069 /* |---store 1---||---store 2---|
3070 Handle also the consecutive INTEGER_CST stores case here,
3071 as we have here the code to deal with overlaps. */
3072 || (info
->bitregion_start
<= merged_store
->bitregion_end
3073 && info
->rhs_code
== INTEGER_CST
3074 && merged_store
->only_constants
3075 && merged_store
->can_be_merged_into (info
)))
3077 /* Only allow overlapping stores of constants. */
3078 if (info
->rhs_code
== INTEGER_CST
3079 && merged_store
->only_constants
3080 && info
->lp_nr
== merged_store
->lp_nr
)
3082 unsigned int first_order
3083 = MIN (merged_store
->first_order
, info
->order
);
3084 unsigned int last_order
3085 = MAX (merged_store
->last_order
, info
->order
);
3086 unsigned HOST_WIDE_INT end
3087 = MAX (merged_store
->start
+ merged_store
->width
,
3088 info
->bitpos
+ info
->bitsize
);
3089 if (check_no_overlap (m_store_info
, i
, true, first_order
,
3090 last_order
, merged_store
->start
, end
,
3091 first_earlier
, end_earlier
))
3093 /* check_no_overlap call above made sure there are no
3094 overlapping stores with non-INTEGER_CST rhs_code
3095 in between the first and last of the stores we've
3096 just merged. If there are any INTEGER_CST rhs_code
3097 stores in between, we need to merge_overlapping them
3098 even if in the sort_by_bitpos order there are other
3099 overlapping stores in between. Keep those stores as is.
3101 MEM[(int *)p_28] = 0;
3102 MEM[(char *)p_28 + 3B] = 1;
3103 MEM[(char *)p_28 + 1B] = 2;
3104 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3105 We can't merge the zero store with the store of two and
3106 not merge anything else, because the store of one is
3107 in the original order in between those two, but in
3108 store_by_bitpos order it comes after the last store that
3109 we can't merge with them. We can merge the first 3 stores
3110 and keep the last store as is though. */
3111 unsigned int len
= m_store_info
.length ();
3112 unsigned int try_order
= last_order
;
3113 unsigned int first_nonmergeable_order
;
3115 bool last_iter
= false;
3119 unsigned int max_order
= 0;
3120 unsigned int min_order
= first_order
;
3121 unsigned first_nonmergeable_int_order
= ~0U;
3122 unsigned HOST_WIDE_INT this_end
= end
;
3124 first_nonmergeable_order
= ~0U;
3125 for (unsigned int j
= i
+ 1; j
< len
; ++j
)
3127 store_immediate_info
*info2
= m_store_info
[j
];
3128 if (info2
->bitpos
>= this_end
)
3130 if (info2
->order
< try_order
)
3132 if (info2
->rhs_code
!= INTEGER_CST
3133 || info2
->lp_nr
!= merged_store
->lp_nr
)
3135 /* Normally check_no_overlap makes sure this
3136 doesn't happen, but if end grows below,
3137 then we need to process more stores than
3138 check_no_overlap verified. Example:
3139 MEM[(int *)p_5] = 0;
3140 MEM[(short *)p_5 + 3B] = 1;
3141 MEM[(char *)p_5 + 4B] = _9;
3142 MEM[(char *)p_5 + 2B] = 2; */
3147 min_order
= MIN (min_order
, info2
->order
);
3148 this_end
= MAX (this_end
,
3149 info2
->bitpos
+ info2
->bitsize
);
3151 else if (info2
->rhs_code
== INTEGER_CST
3152 && info2
->lp_nr
== merged_store
->lp_nr
3155 max_order
= MAX (max_order
, info2
->order
+ 1);
3156 first_nonmergeable_int_order
3157 = MIN (first_nonmergeable_int_order
,
3161 first_nonmergeable_order
3162 = MIN (first_nonmergeable_order
, info2
->order
);
3165 && !check_no_overlap (m_store_info
, len
- 1, true,
3166 min_order
, try_order
,
3167 merged_store
->start
, this_end
,
3168 first_earlier
, end_earlier
))
3172 if (last_order
== try_order
)
3174 /* If this failed, but only because we grew
3175 try_order, retry with the last working one,
3176 so that we merge at least something. */
3177 try_order
= last_order
;
3181 last_order
= try_order
;
3182 /* Retry with a larger try_order to see if we could
3183 merge some further INTEGER_CST stores. */
3185 && (first_nonmergeable_int_order
3186 < first_nonmergeable_order
))
3188 try_order
= MIN (max_order
,
3189 first_nonmergeable_order
);
3192 merged_store
->first_nonmergeable_order
);
3193 if (try_order
> last_order
&& ++attempts
< 16)
3196 first_nonmergeable_order
3197 = MIN (first_nonmergeable_order
,
3198 first_nonmergeable_int_order
);
3206 merged_store
->merge_overlapping (info
);
3208 merged_store
->first_nonmergeable_order
3209 = MIN (merged_store
->first_nonmergeable_order
,
3210 first_nonmergeable_order
);
3212 for (unsigned int j
= i
+ 1; j
<= k
; j
++)
3214 store_immediate_info
*info2
= m_store_info
[j
];
3215 gcc_assert (info2
->bitpos
< end
);
3216 if (info2
->order
< last_order
)
3218 gcc_assert (info2
->rhs_code
== INTEGER_CST
);
3220 merged_store
->merge_overlapping (info2
);
3222 /* Other stores are kept and not merged in any
3231 /* |---store 1---||---store 2---|
3232 This store is consecutive to the previous one.
3233 Merge it into the current store group. There can be gaps in between
3234 the stores, but there can't be gaps in between bitregions. */
3235 else if (info
->bitregion_start
<= merged_store
->bitregion_end
3236 && merged_store
->can_be_merged_into (info
))
3238 store_immediate_info
*infof
= merged_store
->stores
[0];
3240 /* All the rhs_code ops that take 2 operands are commutative,
3241 swap the operands if it could make the operands compatible. */
3242 if (infof
->ops
[0].base_addr
3243 && infof
->ops
[1].base_addr
3244 && info
->ops
[0].base_addr
3245 && info
->ops
[1].base_addr
3246 && known_eq (info
->ops
[1].bitpos
- infof
->ops
[0].bitpos
,
3247 info
->bitpos
- infof
->bitpos
)
3248 && operand_equal_p (info
->ops
[1].base_addr
,
3249 infof
->ops
[0].base_addr
, 0))
3251 std::swap (info
->ops
[0], info
->ops
[1]);
3252 info
->ops_swapped_p
= true;
3254 if (check_no_overlap (m_store_info
, i
, false,
3255 MIN (merged_store
->first_order
, info
->order
),
3256 MAX (merged_store
->last_order
, info
->order
),
3257 merged_store
->start
,
3258 MAX (merged_store
->start
+ merged_store
->width
,
3259 info
->bitpos
+ info
->bitsize
),
3260 first_earlier
, end_earlier
))
3262 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3263 if (info
->rhs_code
== MEM_REF
&& infof
->rhs_code
!= MEM_REF
)
3265 info
->rhs_code
= BIT_INSERT_EXPR
;
3266 info
->ops
[0].val
= gimple_assign_rhs1 (info
->stmt
);
3267 info
->ops
[0].base_addr
= NULL_TREE
;
3269 else if (infof
->rhs_code
== MEM_REF
&& info
->rhs_code
!= MEM_REF
)
3271 for (store_immediate_info
*infoj
: merged_store
->stores
)
3273 infoj
->rhs_code
= BIT_INSERT_EXPR
;
3274 infoj
->ops
[0].val
= gimple_assign_rhs1 (infoj
->stmt
);
3275 infoj
->ops
[0].base_addr
= NULL_TREE
;
3277 merged_store
->bit_insertion
= true;
3279 if ((infof
->ops
[0].base_addr
3280 ? compatible_load_p (merged_store
, info
, base_addr
, 0)
3281 : !info
->ops
[0].base_addr
)
3282 && (infof
->ops
[1].base_addr
3283 ? compatible_load_p (merged_store
, info
, base_addr
, 1)
3284 : !info
->ops
[1].base_addr
))
3286 merged_store
->merge_into (info
);
3292 /* |---store 1---| <gap> |---store 2---|.
3293 Gap between stores or the rhs not compatible. Start a new group. */
3295 /* Try to apply all the stores recorded for the group to determine
3296 the bitpattern they write and discard it if that fails.
3297 This will also reject single-store groups. */
3298 if (merged_store
->apply_stores ())
3299 m_merged_store_groups
.safe_push (merged_store
);
3301 delete merged_store
;
3303 merged_store
= new merged_store_group (info
);
3305 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3306 fputs ("New store group\n", dump_file
);
3309 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3311 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3312 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:",
3313 i
, info
->bitsize
, info
->bitpos
);
3314 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
3315 fputc ('\n', dump_file
);
3319 /* Record or discard the last store group. */
3322 if (merged_store
->apply_stores ())
3323 m_merged_store_groups
.safe_push (merged_store
);
3325 delete merged_store
;
3328 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
3331 = !m_merged_store_groups
.is_empty ()
3332 && m_merged_store_groups
.length () < m_store_info
.length ();
3334 if (success
&& dump_file
)
3335 fprintf (dump_file
, "Coalescing successful!\nMerged into %u stores\n",
3336 m_merged_store_groups
.length ());
3341 /* Return the type to use for the merged stores or loads described by STMTS.
3342 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3343 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3344 of the MEM_REFs if any. */
3347 get_alias_type_for_stmts (vec
<gimple
*> &stmts
, bool is_load
,
3348 unsigned short *cliquep
, unsigned short *basep
)
3352 tree type
= NULL_TREE
;
3353 tree ret
= NULL_TREE
;
3357 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3359 tree ref
= is_load
? gimple_assign_rhs1 (stmt
)
3360 : gimple_assign_lhs (stmt
);
3361 tree type1
= reference_alias_ptr_type (ref
);
3362 tree base
= get_base_address (ref
);
3366 if (TREE_CODE (base
) == MEM_REF
)
3368 *cliquep
= MR_DEPENDENCE_CLIQUE (base
);
3369 *basep
= MR_DEPENDENCE_BASE (base
);
3374 if (!alias_ptr_types_compatible_p (type
, type1
))
3375 ret
= ptr_type_node
;
3376 if (TREE_CODE (base
) != MEM_REF
3377 || *cliquep
!= MR_DEPENDENCE_CLIQUE (base
)
3378 || *basep
!= MR_DEPENDENCE_BASE (base
))
3387 /* Return the location_t information we can find among the statements
3391 get_location_for_stmts (vec
<gimple
*> &stmts
)
3393 for (gimple
*stmt
: stmts
)
3394 if (gimple_has_location (stmt
))
3395 return gimple_location (stmt
);
3397 return UNKNOWN_LOCATION
;
3400 /* Used to decribe a store resulting from splitting a wide store in smaller
3401 regularly-sized stores in split_group. */
3406 unsigned HOST_WIDE_INT bytepos
;
3407 unsigned HOST_WIDE_INT size
;
3408 unsigned HOST_WIDE_INT align
;
3409 auto_vec
<store_immediate_info
*> orig_stores
;
3410 /* True if there is a single orig stmt covering the whole split store. */
3412 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
3413 unsigned HOST_WIDE_INT
);
3416 /* Simple constructor. */
3418 split_store::split_store (unsigned HOST_WIDE_INT bp
,
3419 unsigned HOST_WIDE_INT sz
,
3420 unsigned HOST_WIDE_INT al
)
3421 : bytepos (bp
), size (sz
), align (al
), orig (false)
3423 orig_stores
.create (0);
3426 /* Record all stores in GROUP that write to the region starting at BITPOS and
3427 is of size BITSIZE. Record infos for such statements in STORES if
3428 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3429 if there is exactly one original store in the range (in that case ignore
3430 clobber stmts, unless there are only clobber stmts). */
3432 static store_immediate_info
*
3433 find_constituent_stores (class merged_store_group
*group
,
3434 vec
<store_immediate_info
*> *stores
,
3435 unsigned int *first
,
3436 unsigned HOST_WIDE_INT bitpos
,
3437 unsigned HOST_WIDE_INT bitsize
)
3439 store_immediate_info
*info
, *ret
= NULL
;
3441 bool second
= false;
3442 bool update_first
= true;
3443 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
3444 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
3446 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
3447 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
3448 if (stmt_end
<= bitpos
)
3450 /* BITPOS passed to this function never decreases from within the
3451 same split_group call, so optimize and don't scan info records
3452 which are known to end before or at BITPOS next time.
3453 Only do it if all stores before this one also pass this. */
3459 update_first
= false;
3461 /* The stores in GROUP are ordered by bitposition so if we're past
3462 the region for this group return early. */
3463 if (stmt_start
>= end
)
3466 if (gimple_clobber_p (info
->stmt
))
3469 stores
->safe_push (info
);
3476 stores
->safe_push (info
);
3477 if (ret
&& !gimple_clobber_p (ret
->stmt
))
3483 else if (ret
&& !gimple_clobber_p (ret
->stmt
))
3491 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3492 store have multiple uses. If any SSA_NAME has multiple uses, also
3493 count statements needed to compute it. */
3496 count_multiple_uses (store_immediate_info
*info
)
3498 gimple
*stmt
= info
->stmt
;
3500 switch (info
->rhs_code
)
3508 if (info
->bit_not_p
)
3510 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3511 ret
= 1; /* Fall through below to return
3512 the BIT_NOT_EXPR stmt and then
3513 BIT_{AND,IOR,XOR}_EXPR and anything it
3516 /* stmt is after this the BIT_NOT_EXPR. */
3517 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3519 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3521 ret
+= 1 + info
->ops
[0].bit_not_p
;
3522 if (info
->ops
[1].base_addr
)
3523 ret
+= 1 + info
->ops
[1].bit_not_p
;
3526 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3527 /* stmt is now the BIT_*_EXPR. */
3528 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3529 ret
+= 1 + info
->ops
[info
->ops_swapped_p
].bit_not_p
;
3530 else if (info
->ops
[info
->ops_swapped_p
].bit_not_p
)
3532 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3533 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3536 if (info
->ops
[1].base_addr
== NULL_TREE
)
3538 gcc_checking_assert (!info
->ops_swapped_p
);
3541 if (!has_single_use (gimple_assign_rhs2 (stmt
)))
3542 ret
+= 1 + info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
;
3543 else if (info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
)
3545 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3546 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3551 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3552 return 1 + info
->ops
[0].bit_not_p
;
3553 else if (info
->ops
[0].bit_not_p
)
3555 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3556 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3560 case BIT_INSERT_EXPR
:
3561 return has_single_use (gimple_assign_rhs1 (stmt
)) ? 0 : 1;
3567 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3568 vector (if non-NULL) with split_store structs describing the byte offset
3569 (from the base), the bit size and alignment of each store as well as the
3570 original statements involved in each such split group.
3571 This is to separate the splitting strategy from the statement
3572 building/emission/linking done in output_merged_store.
3573 Return number of new stores.
3574 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3575 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3576 BZERO_FIRST may be true only when the first store covers the whole group
3577 and clears it; if BZERO_FIRST is true, keep that first store in the set
3578 unmodified and emit further stores for the overrides only.
3579 If SPLIT_STORES is NULL, it is just a dry run to count number of
3583 split_group (merged_store_group
*group
, bool allow_unaligned_store
,
3584 bool allow_unaligned_load
, bool bzero_first
,
3585 vec
<split_store
*> *split_stores
,
3586 unsigned *total_orig
,
3587 unsigned *total_new
)
3589 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
3590 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
3591 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
3592 unsigned HOST_WIDE_INT group_align
= group
->align
;
3593 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
3594 unsigned HOST_WIDE_INT group_load_align
= group_align
;
3595 bool any_orig
= false;
3597 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
3599 /* For bswap framework using sets of stores, all the checking has been done
3600 earlier in try_coalesce_bswap and the result always needs to be emitted
3601 as a single store. Likewise for string concatenation, */
3602 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3603 || group
->stores
[0]->rhs_code
== NOP_EXPR
3604 || group
->string_concatenation
)
3606 gcc_assert (!bzero_first
);
3609 /* Avoid the old/new stmt count heuristics. It should be
3610 always beneficial. */
3617 unsigned HOST_WIDE_INT align_bitpos
3618 = (group
->start
- align_base
) & (group_align
- 1);
3619 unsigned HOST_WIDE_INT align
= group_align
;
3621 align
= least_bit_hwi (align_bitpos
);
3622 bytepos
= group
->start
/ BITS_PER_UNIT
;
3624 = new split_store (bytepos
, group
->width
, align
);
3625 unsigned int first
= 0;
3626 find_constituent_stores (group
, &store
->orig_stores
,
3627 &first
, group
->start
, group
->width
);
3628 split_stores
->safe_push (store
);
3634 unsigned int ret
= 0, first
= 0;
3635 unsigned HOST_WIDE_INT try_pos
= bytepos
;
3640 store_immediate_info
*info
= group
->stores
[0];
3643 total_orig
[0] = 1; /* The orig store. */
3644 info
= group
->stores
[0];
3645 if (info
->ops
[0].base_addr
)
3647 if (info
->ops
[1].base_addr
)
3649 switch (info
->rhs_code
)
3654 total_orig
[0]++; /* The orig BIT_*_EXPR stmt. */
3659 total_orig
[0] *= group
->stores
.length ();
3661 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3663 total_new
[0] += count_multiple_uses (info
);
3664 total_orig
[0] += (info
->bit_not_p
3665 + info
->ops
[0].bit_not_p
3666 + info
->ops
[1].bit_not_p
);
3670 if (!allow_unaligned_load
)
3671 for (int i
= 0; i
< 2; ++i
)
3672 if (group
->load_align
[i
])
3673 group_load_align
= MIN (group_load_align
, group
->load_align
[i
]);
3677 store_immediate_info
*gstore
;
3678 FOR_EACH_VEC_ELT (group
->stores
, first
, gstore
)
3679 if (!gimple_clobber_p (gstore
->stmt
))
3686 = new split_store (bytepos
, gstore
->bitsize
, align_base
);
3687 store
->orig_stores
.safe_push (gstore
);
3690 split_stores
->safe_push (store
);
3696 if ((allow_unaligned_store
|| group_align
<= BITS_PER_UNIT
)
3697 && (group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U
3698 || (bzero_first
&& group
->val
[try_pos
- bytepos
] == 0)))
3700 /* Skip padding bytes. */
3702 size
-= BITS_PER_UNIT
;
3706 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3707 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
3708 unsigned HOST_WIDE_INT align_bitpos
3709 = (try_bitpos
- align_base
) & (group_align
- 1);
3710 unsigned HOST_WIDE_INT align
= group_align
;
3711 bool found_orig
= false;
3713 align
= least_bit_hwi (align_bitpos
);
3714 if (!allow_unaligned_store
)
3715 try_size
= MIN (try_size
, align
);
3716 if (!allow_unaligned_load
)
3718 /* If we can't do or don't want to do unaligned stores
3719 as well as loads, we need to take the loads into account
3721 unsigned HOST_WIDE_INT load_align
= group_load_align
;
3722 align_bitpos
= (try_bitpos
- align_base
) & (load_align
- 1);
3724 load_align
= least_bit_hwi (align_bitpos
);
3725 for (int i
= 0; i
< 2; ++i
)
3726 if (group
->load_align
[i
])
3729 = known_alignment (try_bitpos
3730 - group
->stores
[0]->bitpos
3731 + group
->stores
[0]->ops
[i
].bitpos
3732 - group
->load_align_base
[i
]);
3733 if (align_bitpos
& (group_load_align
- 1))
3735 unsigned HOST_WIDE_INT a
= least_bit_hwi (align_bitpos
);
3736 load_align
= MIN (load_align
, a
);
3739 try_size
= MIN (try_size
, load_align
);
3741 store_immediate_info
*info
3742 = find_constituent_stores (group
, NULL
, &first
, try_bitpos
, try_size
);
3743 if (info
&& !gimple_clobber_p (info
->stmt
))
3745 /* If there is just one original statement for the range, see if
3746 we can just reuse the original store which could be even larger
3748 unsigned HOST_WIDE_INT stmt_end
3749 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
3750 info
= find_constituent_stores (group
, NULL
, &first
, try_bitpos
,
3751 stmt_end
- try_bitpos
);
3752 if (info
&& info
->bitpos
>= try_bitpos
)
3754 store_immediate_info
*info2
= NULL
;
3755 unsigned int first_copy
= first
;
3756 if (info
->bitpos
> try_bitpos
3757 && stmt_end
- try_bitpos
<= try_size
)
3759 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3761 info
->bitpos
- try_bitpos
);
3762 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3764 if (info2
== NULL
&& stmt_end
- try_bitpos
< try_size
)
3766 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3768 (try_bitpos
+ try_size
)
3770 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3774 try_size
= stmt_end
- try_bitpos
;
3781 /* Approximate store bitsize for the case when there are no padding
3783 while (try_size
> size
)
3785 /* Now look for whole padding bytes at the end of that bitsize. */
3786 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
3787 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
3788 != (unsigned char) ~0U
3790 || group
->val
[try_pos
- bytepos
+ nonmasked
- 1] != 0))
3792 if (nonmasked
== 0 || (info
&& gimple_clobber_p (info
->stmt
)))
3794 /* If entire try_size range is padding, skip it. */
3795 try_pos
+= try_size
/ BITS_PER_UNIT
;
3799 /* Otherwise try to decrease try_size if second half, last 3 quarters
3800 etc. are padding. */
3801 nonmasked
*= BITS_PER_UNIT
;
3802 while (nonmasked
<= try_size
/ 2)
3804 if (!allow_unaligned_store
&& group_align
> BITS_PER_UNIT
)
3806 /* Now look for whole padding bytes at the start of that bitsize. */
3807 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
3808 for (masked
= 0; masked
< try_bytesize
; ++masked
)
3809 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U
3811 || group
->val
[try_pos
- bytepos
+ masked
] != 0))
3813 masked
*= BITS_PER_UNIT
;
3814 gcc_assert (masked
< try_size
);
3815 if (masked
>= try_size
/ 2)
3817 while (masked
>= try_size
/ 2)
3820 try_pos
+= try_size
/ BITS_PER_UNIT
;
3824 /* Need to recompute the alignment, so just retry at the new
3836 = new split_store (try_pos
, try_size
, align
);
3837 info
= find_constituent_stores (group
, &store
->orig_stores
,
3838 &first
, try_bitpos
, try_size
);
3840 && !gimple_clobber_p (info
->stmt
)
3841 && info
->bitpos
>= try_bitpos
3842 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
3843 && (store
->orig_stores
.length () == 1
3845 || (info
->bitpos
== try_bitpos
3846 && (info
->bitpos
+ info
->bitsize
3847 == try_bitpos
+ try_size
))))
3852 split_stores
->safe_push (store
);
3855 try_pos
+= try_size
/ BITS_PER_UNIT
;
3863 /* If we are reusing some original stores and any of the
3864 original SSA_NAMEs had multiple uses, we need to subtract
3865 those now before we add the new ones. */
3866 if (total_new
[0] && any_orig
)
3868 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3870 total_new
[0] -= count_multiple_uses (store
->orig_stores
[0]);
3872 total_new
[0] += ret
; /* The new store. */
3873 store_immediate_info
*info
= group
->stores
[0];
3874 if (info
->ops
[0].base_addr
)
3875 total_new
[0] += ret
;
3876 if (info
->ops
[1].base_addr
)
3877 total_new
[0] += ret
;
3878 switch (info
->rhs_code
)
3883 total_new
[0] += ret
; /* The new BIT_*_EXPR stmt. */
3888 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3891 bool bit_not_p
[3] = { false, false, false };
3892 /* If all orig_stores have certain bit_not_p set, then
3893 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3894 If some orig_stores have certain bit_not_p set, then
3895 we'd use a BIT_XOR_EXPR with a mask and need to account for
3897 FOR_EACH_VEC_ELT (store
->orig_stores
, j
, info
)
3899 if (info
->ops
[0].bit_not_p
)
3900 bit_not_p
[0] = true;
3901 if (info
->ops
[1].bit_not_p
)
3902 bit_not_p
[1] = true;
3903 if (info
->bit_not_p
)
3904 bit_not_p
[2] = true;
3906 total_new
[0] += bit_not_p
[0] + bit_not_p
[1] + bit_not_p
[2];
3914 /* Return the operation through which the operand IDX (if < 2) or
3915 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3916 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3917 the bits should be xored with mask. */
3919 static enum tree_code
3920 invert_op (split_store
*split_store
, int idx
, tree int_type
, tree
&mask
)
3923 store_immediate_info
*info
;
3924 unsigned int cnt
= 0;
3925 bool any_paddings
= false;
3926 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3928 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3932 tree lhs
= gimple_assign_lhs (info
->stmt
);
3933 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3934 && TYPE_PRECISION (TREE_TYPE (lhs
)) < info
->bitsize
)
3935 any_paddings
= true;
3941 if (cnt
== split_store
->orig_stores
.length () && !any_paddings
)
3942 return BIT_NOT_EXPR
;
3944 unsigned HOST_WIDE_INT try_bitpos
= split_store
->bytepos
* BITS_PER_UNIT
;
3945 unsigned buf_size
= split_store
->size
/ BITS_PER_UNIT
;
3947 = XALLOCAVEC (unsigned char, buf_size
);
3948 memset (buf
, ~0U, buf_size
);
3949 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3951 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3954 /* Clear regions with bit_not_p and invert afterwards, rather than
3955 clear regions with !bit_not_p, so that gaps in between stores aren't
3957 unsigned HOST_WIDE_INT bitsize
= info
->bitsize
;
3958 unsigned HOST_WIDE_INT prec
= bitsize
;
3959 unsigned int pos_in_buffer
= 0;
3962 tree lhs
= gimple_assign_lhs (info
->stmt
);
3963 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3964 && TYPE_PRECISION (TREE_TYPE (lhs
)) < bitsize
)
3965 prec
= TYPE_PRECISION (TREE_TYPE (lhs
));
3967 if (info
->bitpos
< try_bitpos
)
3969 gcc_assert (info
->bitpos
+ bitsize
> try_bitpos
);
3970 if (!BYTES_BIG_ENDIAN
)
3972 if (prec
<= try_bitpos
- info
->bitpos
)
3974 prec
-= try_bitpos
- info
->bitpos
;
3976 bitsize
-= try_bitpos
- info
->bitpos
;
3977 if (BYTES_BIG_ENDIAN
&& prec
> bitsize
)
3981 pos_in_buffer
= info
->bitpos
- try_bitpos
;
3984 /* If this is a bool inversion, invert just the least significant
3985 prec bits rather than all bits of it. */
3986 if (BYTES_BIG_ENDIAN
)
3988 pos_in_buffer
+= bitsize
- prec
;
3989 if (pos_in_buffer
>= split_store
->size
)
3994 if (pos_in_buffer
+ bitsize
> split_store
->size
)
3995 bitsize
= split_store
->size
- pos_in_buffer
;
3996 unsigned char *p
= buf
+ (pos_in_buffer
/ BITS_PER_UNIT
);
3997 if (BYTES_BIG_ENDIAN
)
3998 clear_bit_region_be (p
, (BITS_PER_UNIT
- 1
3999 - (pos_in_buffer
% BITS_PER_UNIT
)), bitsize
);
4001 clear_bit_region (p
, pos_in_buffer
% BITS_PER_UNIT
, bitsize
);
4003 for (unsigned int i
= 0; i
< buf_size
; ++i
)
4005 mask
= native_interpret_expr (int_type
, buf
, buf_size
);
4006 return BIT_XOR_EXPR
;
4009 /* Given a merged store group GROUP output the widened version of it.
4010 The store chain is against the base object BASE.
4011 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4012 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4013 Make sure that the number of statements output is less than the number of
4014 original statements. If a better sequence is possible emit it and
4018 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
4020 const unsigned HOST_WIDE_INT start_byte_pos
4021 = group
->bitregion_start
/ BITS_PER_UNIT
;
4022 unsigned int orig_num_stmts
= group
->stores
.length ();
4023 if (orig_num_stmts
< 2)
4026 bool allow_unaligned_store
4027 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
4028 bool allow_unaligned_load
= allow_unaligned_store
;
4029 bool bzero_first
= false;
4030 store_immediate_info
*store
;
4031 unsigned int num_clobber_stmts
= 0;
4032 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4035 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4036 if (gimple_clobber_p (store
->stmt
))
4037 num_clobber_stmts
++;
4038 else if (TREE_CODE (gimple_assign_rhs1 (store
->stmt
)) == CONSTRUCTOR
4039 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store
->stmt
)) == 0
4040 && group
->start
== store
->bitpos
4041 && group
->width
== store
->bitsize
4042 && (group
->start
% BITS_PER_UNIT
) == 0
4043 && (group
->width
% BITS_PER_UNIT
) == 0)
4050 FOR_EACH_VEC_ELT_FROM (group
->stores
, i
, store
, i
)
4051 if (gimple_clobber_p (store
->stmt
))
4052 num_clobber_stmts
++;
4053 if (num_clobber_stmts
== orig_num_stmts
)
4055 orig_num_stmts
-= num_clobber_stmts
;
4057 if (allow_unaligned_store
|| bzero_first
)
4059 /* If unaligned stores are allowed, see how many stores we'd emit
4060 for unaligned and how many stores we'd emit for aligned stores.
4061 Only use unaligned stores if it allows fewer stores than aligned.
4062 Similarly, if there is a whole region clear first, prefer expanding
4063 it together compared to expanding clear first followed by merged
4065 unsigned cnt
[4] = { ~0U, ~0U, ~0U, ~0U };
4067 for (int pass
= 0; pass
< 4; ++pass
)
4069 if (!allow_unaligned_store
&& (pass
& 1) != 0)
4071 if (!bzero_first
&& (pass
& 2) != 0)
4073 cnt
[pass
] = split_group (group
, (pass
& 1) != 0,
4074 allow_unaligned_load
, (pass
& 2) != 0,
4076 if (cnt
[pass
] < cnt
[pass_min
])
4079 if ((pass_min
& 1) == 0)
4080 allow_unaligned_store
= false;
4081 if ((pass_min
& 2) == 0)
4082 bzero_first
= false;
4085 auto_vec
<class split_store
*, 32> split_stores
;
4086 split_store
*split_store
;
4087 unsigned total_orig
, total_new
, i
;
4088 split_group (group
, allow_unaligned_store
, allow_unaligned_load
, bzero_first
,
4089 &split_stores
, &total_orig
, &total_new
);
4091 /* Determine if there is a clobber covering the whole group at the start,
4092 followed by proposed split stores that cover the whole group. In that
4093 case, prefer the transformation even if
4094 split_stores.length () == orig_num_stmts. */
4095 bool clobber_first
= false;
4096 if (num_clobber_stmts
4097 && gimple_clobber_p (group
->stores
[0]->stmt
)
4098 && group
->start
== group
->stores
[0]->bitpos
4099 && group
->width
== group
->stores
[0]->bitsize
4100 && (group
->start
% BITS_PER_UNIT
) == 0
4101 && (group
->width
% BITS_PER_UNIT
) == 0)
4103 clobber_first
= true;
4104 unsigned HOST_WIDE_INT pos
= group
->start
/ BITS_PER_UNIT
;
4105 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4106 if (split_store
->bytepos
!= pos
)
4108 clobber_first
= false;
4112 pos
+= split_store
->size
/ BITS_PER_UNIT
;
4113 if (pos
!= (group
->start
+ group
->width
) / BITS_PER_UNIT
)
4114 clobber_first
= false;
4117 if (split_stores
.length () >= orig_num_stmts
+ clobber_first
)
4120 /* We didn't manage to reduce the number of statements. Bail out. */
4121 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4122 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4123 " Not profitable to emit new sequence.\n",
4125 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4129 if (total_orig
<= total_new
)
4131 /* If number of estimated new statements is above estimated original
4132 statements, bail out too. */
4133 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4134 fprintf (dump_file
, "Estimated number of original stmts (%u)"
4135 " not larger than estimated number of new"
4137 total_orig
, total_new
);
4138 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4142 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4144 bool all_orig
= true;
4145 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4146 if (!split_store
->orig
)
4153 unsigned int cnt
= split_stores
.length ();
4154 store_immediate_info
*store
;
4155 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4156 if (gimple_clobber_p (store
->stmt
))
4158 /* Punt if we wouldn't make any real changes, i.e. keep all
4159 orig stmts + all clobbers. */
4160 if (cnt
== group
->stores
.length ())
4162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4163 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4164 " Not profitable to emit new sequence.\n",
4166 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4173 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
4174 gimple_seq seq
= NULL
;
4175 tree last_vdef
, new_vuse
;
4176 last_vdef
= gimple_vdef (group
->last_stmt
);
4177 new_vuse
= gimple_vuse (group
->last_stmt
);
4178 tree bswap_res
= NULL_TREE
;
4180 /* Clobbers are not removed. */
4181 if (gimple_clobber_p (group
->last_stmt
))
4183 new_vuse
= make_ssa_name (gimple_vop (cfun
), group
->last_stmt
);
4184 gimple_set_vdef (group
->last_stmt
, new_vuse
);
4187 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
4188 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
4190 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
4191 gimple
*ins_stmt
= group
->stores
[0]->ins_stmt
;
4192 struct symbolic_number
*n
= &group
->stores
[0]->n
;
4193 bool bswap
= group
->stores
[0]->rhs_code
== LROTATE_EXPR
;
4198 load_type
= bswap_type
= uint16_type_node
;
4201 load_type
= uint32_type_node
;
4204 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
4205 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4209 load_type
= uint64_type_node
;
4212 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
4213 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4220 /* If the loads have each vuse of the corresponding store,
4221 we've checked the aliasing already in try_coalesce_bswap and
4222 we want to sink the need load into seq. So need to use new_vuse
4226 if (n
->vuse
== NULL
)
4232 /* Update vuse in case it has changed by output_merged_stores. */
4233 n
->vuse
= gimple_vuse (ins_stmt
);
4235 bswap_res
= bswap_replace (gsi_start (seq
), ins_stmt
, fndecl
,
4236 bswap_type
, load_type
, n
, bswap
,
4238 gcc_assert (bswap_res
);
4241 gimple
*stmt
= NULL
;
4242 auto_vec
<gimple
*, 32> orig_stmts
;
4243 gimple_seq this_seq
;
4244 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &this_seq
,
4245 is_gimple_mem_ref_addr
, NULL_TREE
);
4246 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4248 tree load_addr
[2] = { NULL_TREE
, NULL_TREE
};
4249 gimple_seq load_seq
[2] = { NULL
, NULL
};
4250 gimple_stmt_iterator load_gsi
[2] = { gsi_none (), gsi_none () };
4251 for (int j
= 0; j
< 2; ++j
)
4253 store_operand_info
&op
= group
->stores
[0]->ops
[j
];
4254 if (op
.base_addr
== NULL_TREE
)
4257 store_immediate_info
*infol
= group
->stores
.last ();
4258 if (gimple_vuse (op
.stmt
) == gimple_vuse (infol
->ops
[j
].stmt
))
4260 /* We can't pick the location randomly; while we've verified
4261 all the loads have the same vuse, they can be still in different
4262 basic blocks and we need to pick the one from the last bb:
4268 otherwise if we put the wider load at the q[0] load, we might
4269 segfault if q[1] is not mapped. */
4270 basic_block bb
= gimple_bb (op
.stmt
);
4271 gimple
*ostmt
= op
.stmt
;
4272 store_immediate_info
*info
;
4273 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
4275 gimple
*tstmt
= info
->ops
[j
].stmt
;
4276 basic_block tbb
= gimple_bb (tstmt
);
4277 if (dominated_by_p (CDI_DOMINATORS
, tbb
, bb
))
4283 load_gsi
[j
] = gsi_for_stmt (ostmt
);
4285 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4286 &load_seq
[j
], is_gimple_mem_ref_addr
,
4289 else if (operand_equal_p (base_addr
, op
.base_addr
, 0))
4290 load_addr
[j
] = addr
;
4294 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4295 &this_seq
, is_gimple_mem_ref_addr
,
4297 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4301 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4303 const unsigned HOST_WIDE_INT try_size
= split_store
->size
;
4304 const unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
4305 const unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
4306 const unsigned HOST_WIDE_INT try_align
= split_store
->align
;
4307 const unsigned HOST_WIDE_INT try_offset
= try_pos
- start_byte_pos
;
4311 if (split_store
->orig
)
4313 /* If there is just a single non-clobber constituent store
4314 which covers the whole area, just reuse the lhs and rhs. */
4315 gimple
*orig_stmt
= NULL
;
4316 store_immediate_info
*store
;
4318 FOR_EACH_VEC_ELT (split_store
->orig_stores
, j
, store
)
4319 if (!gimple_clobber_p (store
->stmt
))
4321 orig_stmt
= store
->stmt
;
4324 dest
= gimple_assign_lhs (orig_stmt
);
4325 src
= gimple_assign_rhs1 (orig_stmt
);
4326 loc
= gimple_location (orig_stmt
);
4330 store_immediate_info
*info
;
4331 unsigned short clique
, base
;
4333 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4334 orig_stmts
.safe_push (info
->stmt
);
4336 = get_alias_type_for_stmts (orig_stmts
, false, &clique
, &base
);
4338 loc
= get_location_for_stmts (orig_stmts
);
4339 orig_stmts
.truncate (0);
4341 if (group
->string_concatenation
)
4343 = build_array_type_nelts (char_type_node
,
4344 try_size
/ BITS_PER_UNIT
);
4347 dest_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
4348 dest_type
= build_aligned_type (dest_type
, try_align
);
4350 dest
= fold_build2 (MEM_REF
, dest_type
, addr
,
4351 build_int_cst (offset_type
, try_pos
));
4352 if (TREE_CODE (dest
) == MEM_REF
)
4354 MR_DEPENDENCE_CLIQUE (dest
) = clique
;
4355 MR_DEPENDENCE_BASE (dest
) = base
;
4359 if (bswap_res
|| group
->string_concatenation
)
4360 mask
= integer_zero_node
;
4362 mask
= native_interpret_expr (dest_type
,
4363 group
->mask
+ try_offset
,
4368 j
< 1 + (split_store
->orig_stores
[0]->ops
[1].val
!= NULL_TREE
);
4371 store_operand_info
&op
= split_store
->orig_stores
[0]->ops
[j
];
4374 else if (group
->string_concatenation
)
4376 ops
[j
] = build_string (try_size
/ BITS_PER_UNIT
,
4377 (const char *) group
->val
+ try_offset
);
4378 TREE_TYPE (ops
[j
]) = dest_type
;
4380 else if (op
.base_addr
)
4382 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4383 orig_stmts
.safe_push (info
->ops
[j
].stmt
);
4385 offset_type
= get_alias_type_for_stmts (orig_stmts
, true,
4387 location_t load_loc
= get_location_for_stmts (orig_stmts
);
4388 orig_stmts
.truncate (0);
4390 unsigned HOST_WIDE_INT load_align
= group
->load_align
[j
];
4391 unsigned HOST_WIDE_INT align_bitpos
4392 = known_alignment (try_bitpos
4393 - split_store
->orig_stores
[0]->bitpos
4395 if (align_bitpos
& (load_align
- 1))
4396 load_align
= least_bit_hwi (align_bitpos
);
4399 = build_nonstandard_integer_type (try_size
, UNSIGNED
);
4401 = build_aligned_type (load_int_type
, load_align
);
4403 poly_uint64 load_pos
4404 = exact_div (try_bitpos
4405 - split_store
->orig_stores
[0]->bitpos
4408 ops
[j
] = fold_build2 (MEM_REF
, load_int_type
, load_addr
[j
],
4409 build_int_cst (offset_type
, load_pos
));
4410 if (TREE_CODE (ops
[j
]) == MEM_REF
)
4412 MR_DEPENDENCE_CLIQUE (ops
[j
]) = clique
;
4413 MR_DEPENDENCE_BASE (ops
[j
]) = base
;
4415 if (!integer_zerop (mask
))
4417 /* The load might load some bits (that will be masked
4418 off later on) uninitialized, avoid -W*uninitialized
4419 warnings in that case. */
4420 suppress_warning (ops
[j
], OPT_Wuninitialized
);
4423 stmt
= gimple_build_assign (make_ssa_name (dest_type
), ops
[j
]);
4424 gimple_set_location (stmt
, load_loc
);
4425 if (gsi_bb (load_gsi
[j
]))
4427 gimple_set_vuse (stmt
, gimple_vuse (op
.stmt
));
4428 gimple_seq_add_stmt_without_update (&load_seq
[j
], stmt
);
4432 gimple_set_vuse (stmt
, new_vuse
);
4433 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4435 ops
[j
] = gimple_assign_lhs (stmt
);
4437 enum tree_code inv_op
4438 = invert_op (split_store
, j
, dest_type
, xor_mask
);
4439 if (inv_op
!= NOP_EXPR
)
4441 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4442 inv_op
, ops
[j
], xor_mask
);
4443 gimple_set_location (stmt
, load_loc
);
4444 ops
[j
] = gimple_assign_lhs (stmt
);
4446 if (gsi_bb (load_gsi
[j
]))
4447 gimple_seq_add_stmt_without_update (&load_seq
[j
],
4450 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4454 ops
[j
] = native_interpret_expr (dest_type
,
4455 group
->val
+ try_offset
,
4459 switch (split_store
->orig_stores
[0]->rhs_code
)
4464 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4466 tree rhs1
= gimple_assign_rhs1 (info
->stmt
);
4467 orig_stmts
.safe_push (SSA_NAME_DEF_STMT (rhs1
));
4470 bit_loc
= get_location_for_stmts (orig_stmts
);
4471 orig_stmts
.truncate (0);
4474 = gimple_build_assign (make_ssa_name (dest_type
),
4475 split_store
->orig_stores
[0]->rhs_code
,
4477 gimple_set_location (stmt
, bit_loc
);
4478 /* If there is just one load and there is a separate
4479 load_seq[0], emit the bitwise op right after it. */
4480 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4481 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4482 /* Otherwise, if at least one load is in seq, we need to
4483 emit the bitwise op right before the store. If there
4484 are two loads and are emitted somewhere else, it would
4485 be better to emit the bitwise op as early as possible;
4486 we don't track where that would be possible right now
4489 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4490 src
= gimple_assign_lhs (stmt
);
4492 enum tree_code inv_op
;
4493 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4494 if (inv_op
!= NOP_EXPR
)
4496 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4497 inv_op
, src
, xor_mask
);
4498 gimple_set_location (stmt
, bit_loc
);
4499 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4500 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4502 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4503 src
= gimple_assign_lhs (stmt
);
4509 if (!is_gimple_val (src
))
4511 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (src
)),
4513 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4514 src
= gimple_assign_lhs (stmt
);
4516 if (!useless_type_conversion_p (dest_type
, TREE_TYPE (src
)))
4518 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4520 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4521 src
= gimple_assign_lhs (stmt
);
4523 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4524 if (inv_op
!= NOP_EXPR
)
4526 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4527 inv_op
, src
, xor_mask
);
4528 gimple_set_location (stmt
, loc
);
4529 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4530 src
= gimple_assign_lhs (stmt
);
4538 /* If bit insertion is required, we use the source as an accumulator
4539 into which the successive bit-field values are manually inserted.
4540 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4541 if (group
->bit_insertion
)
4542 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4543 if (info
->rhs_code
== BIT_INSERT_EXPR
4544 && info
->bitpos
< try_bitpos
+ try_size
4545 && info
->bitpos
+ info
->bitsize
> try_bitpos
)
4547 /* Mask, truncate, convert to final type, shift and ior into
4548 the accumulator. Note that every step can be a no-op. */
4549 const HOST_WIDE_INT start_gap
= info
->bitpos
- try_bitpos
;
4550 const HOST_WIDE_INT end_gap
4551 = (try_bitpos
+ try_size
) - (info
->bitpos
+ info
->bitsize
);
4552 tree tem
= info
->ops
[0].val
;
4553 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem
)))
4555 const unsigned HOST_WIDE_INT size
4556 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem
)));
4558 = build_nonstandard_integer_type (size
, UNSIGNED
);
4559 tem
= gimple_build (&seq
, loc
, VIEW_CONVERT_EXPR
,
4562 if (TYPE_PRECISION (TREE_TYPE (tem
)) <= info
->bitsize
)
4565 = build_nonstandard_integer_type (info
->bitsize
,
4567 tem
= gimple_convert (&seq
, loc
, bitfield_type
, tem
);
4569 else if ((BYTES_BIG_ENDIAN
? start_gap
: end_gap
) > 0)
4571 const unsigned HOST_WIDE_INT imask
4572 = (HOST_WIDE_INT_1U
<< info
->bitsize
) - 1;
4573 tem
= gimple_build (&seq
, loc
,
4574 BIT_AND_EXPR
, TREE_TYPE (tem
), tem
,
4575 build_int_cst (TREE_TYPE (tem
),
4578 const HOST_WIDE_INT shift
4579 = (BYTES_BIG_ENDIAN
? end_gap
: start_gap
);
4581 tem
= gimple_build (&seq
, loc
,
4582 RSHIFT_EXPR
, TREE_TYPE (tem
), tem
,
4583 build_int_cst (NULL_TREE
, -shift
));
4584 tem
= gimple_convert (&seq
, loc
, dest_type
, tem
);
4586 tem
= gimple_build (&seq
, loc
,
4587 LSHIFT_EXPR
, dest_type
, tem
,
4588 build_int_cst (NULL_TREE
, shift
));
4589 src
= gimple_build (&seq
, loc
,
4590 BIT_IOR_EXPR
, dest_type
, tem
, src
);
4593 if (!integer_zerop (mask
))
4595 tree tem
= make_ssa_name (dest_type
);
4596 tree load_src
= unshare_expr (dest
);
4597 /* The load might load some or all bits uninitialized,
4598 avoid -W*uninitialized warnings in that case.
4599 As optimization, it would be nice if all the bits are
4600 provably uninitialized (no stores at all yet or previous
4601 store a CLOBBER) we'd optimize away the load and replace
4603 suppress_warning (load_src
, OPT_Wuninitialized
);
4604 stmt
= gimple_build_assign (tem
, load_src
);
4605 gimple_set_location (stmt
, loc
);
4606 gimple_set_vuse (stmt
, new_vuse
);
4607 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4609 /* FIXME: If there is a single chunk of zero bits in mask,
4610 perhaps use BIT_INSERT_EXPR instead? */
4611 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4612 BIT_AND_EXPR
, tem
, mask
);
4613 gimple_set_location (stmt
, loc
);
4614 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4615 tem
= gimple_assign_lhs (stmt
);
4617 if (TREE_CODE (src
) == INTEGER_CST
)
4618 src
= wide_int_to_tree (dest_type
,
4619 wi::bit_and_not (wi::to_wide (src
),
4620 wi::to_wide (mask
)));
4624 = wide_int_to_tree (dest_type
,
4625 wi::bit_not (wi::to_wide (mask
)));
4626 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4627 BIT_AND_EXPR
, src
, nmask
);
4628 gimple_set_location (stmt
, loc
);
4629 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4630 src
= gimple_assign_lhs (stmt
);
4632 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4633 BIT_IOR_EXPR
, tem
, src
);
4634 gimple_set_location (stmt
, loc
);
4635 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4636 src
= gimple_assign_lhs (stmt
);
4640 stmt
= gimple_build_assign (dest
, src
);
4641 gimple_set_location (stmt
, loc
);
4642 gimple_set_vuse (stmt
, new_vuse
);
4643 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4645 if (group
->lp_nr
&& stmt_could_throw_p (cfun
, stmt
))
4646 add_stmt_to_eh_lp (stmt
, group
->lp_nr
);
4649 if (i
< split_stores
.length () - 1)
4650 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
4652 new_vdef
= last_vdef
;
4654 gimple_set_vdef (stmt
, new_vdef
);
4655 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
4656 new_vuse
= new_vdef
;
4659 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4666 "New sequence of %u stores to replace old one of %u stores\n",
4667 split_stores
.length (), orig_num_stmts
);
4668 if (dump_flags
& TDF_DETAILS
)
4669 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
4672 if (gimple_clobber_p (group
->last_stmt
))
4673 update_stmt (group
->last_stmt
);
4675 if (group
->lp_nr
> 0)
4677 /* We're going to insert a sequence of (potentially) throwing stores
4678 into an active EH region. This means that we're going to create
4679 new basic blocks with EH edges pointing to the post landing pad
4680 and, therefore, to have to update its PHI nodes, if any. For the
4681 virtual PHI node, we're going to use the VDEFs created above, but
4682 for the other nodes, we need to record the original reaching defs. */
4683 eh_landing_pad lp
= get_eh_landing_pad_from_number (group
->lp_nr
);
4684 basic_block lp_bb
= label_to_block (cfun
, lp
->post_landing_pad
);
4685 basic_block last_bb
= gimple_bb (group
->last_stmt
);
4686 edge last_edge
= find_edge (last_bb
, lp_bb
);
4687 auto_vec
<tree
, 16> last_defs
;
4689 for (gpi
= gsi_start_phis (lp_bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
4691 gphi
*phi
= gpi
.phi ();
4693 if (virtual_operand_p (gimple_phi_result (phi
)))
4694 last_def
= NULL_TREE
;
4696 last_def
= gimple_phi_arg_def (phi
, last_edge
->dest_idx
);
4697 last_defs
.safe_push (last_def
);
4700 /* Do the insertion. Then, if new basic blocks have been created in the
4701 process, rewind the chain of VDEFs create above to walk the new basic
4702 blocks and update the corresponding arguments of the PHI nodes. */
4703 update_modified_stmts (seq
);
4704 if (gimple_find_sub_bbs (seq
, &last_gsi
))
4705 while (last_vdef
!= gimple_vuse (group
->last_stmt
))
4707 gimple
*stmt
= SSA_NAME_DEF_STMT (last_vdef
);
4708 if (stmt_could_throw_p (cfun
, stmt
))
4710 edge new_edge
= find_edge (gimple_bb (stmt
), lp_bb
);
4712 for (gpi
= gsi_start_phis (lp_bb
), i
= 0;
4714 gsi_next (&gpi
), i
++)
4716 gphi
*phi
= gpi
.phi ();
4718 if (virtual_operand_p (gimple_phi_result (phi
)))
4719 new_def
= last_vdef
;
4721 new_def
= last_defs
[i
];
4722 add_phi_arg (phi
, new_def
, new_edge
, UNKNOWN_LOCATION
);
4725 last_vdef
= gimple_vuse (stmt
);
4729 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
4731 for (int j
= 0; j
< 2; ++j
)
4733 gsi_insert_seq_after (&load_gsi
[j
], load_seq
[j
], GSI_SAME_STMT
);
4738 /* Process the merged_store_group objects created in the coalescing phase.
4739 The stores are all against the base object BASE.
4740 Try to output the widened stores and delete the original statements if
4741 successful. Return true iff any changes were made. */
4744 imm_store_chain_info::output_merged_stores ()
4747 merged_store_group
*merged_store
;
4749 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
4751 if (dbg_cnt (store_merging
)
4752 && output_merged_store (merged_store
))
4755 store_immediate_info
*store
;
4756 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
4758 gimple
*stmt
= store
->stmt
;
4759 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4760 /* Don't remove clobbers, they are still useful even if
4761 everything is overwritten afterwards. */
4762 if (gimple_clobber_p (stmt
))
4764 gsi_remove (&gsi
, true);
4766 remove_stmt_from_eh_lp (stmt
);
4767 if (stmt
!= merged_store
->last_stmt
)
4769 unlink_stmt_vdef (stmt
);
4770 release_defs (stmt
);
4776 if (ret
&& dump_file
)
4777 fprintf (dump_file
, "Merging successful!\n");
4782 /* Coalesce the store_immediate_info objects recorded against the base object
4783 BASE in the first phase and output them.
4784 Delete the allocated structures.
4785 Return true if any changes were made. */
4788 imm_store_chain_info::terminate_and_process_chain ()
4790 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4791 fprintf (dump_file
, "Terminating chain with %u stores\n",
4792 m_store_info
.length ());
4793 /* Process store chain. */
4795 if (m_store_info
.length () > 1)
4797 ret
= coalesce_immediate_stores ();
4799 ret
= output_merged_stores ();
4802 /* Delete all the entries we allocated ourselves. */
4803 store_immediate_info
*info
;
4805 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
4808 merged_store_group
*merged_info
;
4809 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
4815 /* Return true iff LHS is a destination potentially interesting for
4816 store merging. In practice these are the codes that get_inner_reference
4820 lhs_valid_for_store_merging_p (tree lhs
)
4825 switch (TREE_CODE (lhs
))
4828 case ARRAY_RANGE_REF
:
4832 case VIEW_CONVERT_EXPR
:
4841 /* Return true if the tree RHS is a constant we want to consider
4842 during store merging. In practice accept all codes that
4843 native_encode_expr accepts. */
4846 rhs_valid_for_store_merging_p (tree rhs
)
4848 unsigned HOST_WIDE_INT size
;
4849 if (TREE_CODE (rhs
) == CONSTRUCTOR
4850 && CONSTRUCTOR_NELTS (rhs
) == 0
4851 && TYPE_SIZE_UNIT (TREE_TYPE (rhs
))
4852 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs
))))
4854 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
))).is_constant (&size
)
4855 && native_encode_expr (rhs
, NULL
, size
) != 0);
4858 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4859 and return true on success or false on failure. */
4862 adjust_bit_pos (poly_offset_int byte_off
,
4863 poly_int64
*pbitpos
,
4864 poly_uint64
*pbitregion_start
,
4865 poly_uint64
*pbitregion_end
)
4867 poly_offset_int bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4868 bit_off
+= *pbitpos
;
4870 if (known_ge (bit_off
, 0) && bit_off
.to_shwi (pbitpos
))
4872 if (maybe_ne (*pbitregion_end
, 0U))
4874 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4875 bit_off
+= *pbitregion_start
;
4876 if (bit_off
.to_uhwi (pbitregion_start
))
4878 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4879 bit_off
+= *pbitregion_end
;
4880 if (!bit_off
.to_uhwi (pbitregion_end
))
4881 *pbitregion_end
= 0;
4884 *pbitregion_end
= 0;
4892 /* If MEM is a memory reference usable for store merging (either as
4893 store destination or for loads), return the non-NULL base_addr
4894 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4895 Otherwise return NULL, *PBITPOS should be still valid even for that
4899 mem_valid_for_store_merging (tree mem
, poly_uint64
*pbitsize
,
4900 poly_uint64
*pbitpos
,
4901 poly_uint64
*pbitregion_start
,
4902 poly_uint64
*pbitregion_end
)
4904 poly_int64 bitsize
, bitpos
;
4905 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
4907 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
4909 tree base_addr
= get_inner_reference (mem
, &bitsize
, &bitpos
, &offset
, &mode
,
4910 &unsignedp
, &reversep
, &volatilep
);
4911 *pbitsize
= bitsize
;
4912 if (known_eq (bitsize
, 0))
4915 if (TREE_CODE (mem
) == COMPONENT_REF
4916 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem
, 1)))
4918 get_bit_range (&bitregion_start
, &bitregion_end
, mem
, &bitpos
, &offset
);
4919 if (maybe_ne (bitregion_end
, 0U))
4926 /* We do not want to rewrite TARGET_MEM_REFs. */
4927 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
4929 /* In some cases get_inner_reference may return a
4930 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4931 canonicalize the base_addr to MEM_REF [ptr] and take
4932 byteoffset into account in the bitpos. This occurs in
4933 PR 23684 and this way we can catch more chains. */
4934 else if (TREE_CODE (base_addr
) == MEM_REF
)
4936 if (!adjust_bit_pos (mem_ref_offset (base_addr
), &bitpos
,
4937 &bitregion_start
, &bitregion_end
))
4939 base_addr
= TREE_OPERAND (base_addr
, 0);
4941 /* get_inner_reference returns the base object, get at its
4945 if (maybe_lt (bitpos
, 0))
4947 base_addr
= build_fold_addr_expr (base_addr
);
4952 /* If the access is variable offset then a base decl has to be
4953 address-taken to be able to emit pointer-based stores to it.
4954 ??? We might be able to get away with re-using the original
4955 base up to the first variable part and then wrapping that inside
4957 tree base
= get_base_address (base_addr
);
4958 if (!base
|| (DECL_P (base
) && !TREE_ADDRESSABLE (base
)))
4961 /* Similarly to above for the base, remove constant from the offset. */
4962 if (TREE_CODE (offset
) == PLUS_EXPR
4963 && TREE_CODE (TREE_OPERAND (offset
, 1)) == INTEGER_CST
4964 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset
, 1)),
4965 &bitpos
, &bitregion_start
, &bitregion_end
))
4966 offset
= TREE_OPERAND (offset
, 0);
4968 base_addr
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base_addr
),
4972 if (known_eq (bitregion_end
, 0U))
4974 bitregion_start
= round_down_to_byte_boundary (bitpos
);
4975 bitregion_end
= round_up_to_byte_boundary (bitpos
+ bitsize
);
4978 *pbitsize
= bitsize
;
4980 *pbitregion_start
= bitregion_start
;
4981 *pbitregion_end
= bitregion_end
;
4985 /* Return true if STMT is a load that can be used for store merging.
4986 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4987 BITREGION_END are properties of the corresponding store. */
4990 handled_load (gimple
*stmt
, store_operand_info
*op
,
4991 poly_uint64 bitsize
, poly_uint64 bitpos
,
4992 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
)
4994 if (!is_gimple_assign (stmt
))
4996 if (gimple_assign_rhs_code (stmt
) == BIT_NOT_EXPR
)
4998 tree rhs1
= gimple_assign_rhs1 (stmt
);
4999 if (TREE_CODE (rhs1
) == SSA_NAME
5000 && handled_load (SSA_NAME_DEF_STMT (rhs1
), op
, bitsize
, bitpos
,
5001 bitregion_start
, bitregion_end
))
5003 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5004 been optimized earlier, but if allowed here, would confuse the
5005 multiple uses counting. */
5008 op
->bit_not_p
= !op
->bit_not_p
;
5013 if (gimple_vuse (stmt
)
5014 && gimple_assign_load_p (stmt
)
5015 && !stmt_can_throw_internal (cfun
, stmt
)
5016 && !gimple_has_volatile_ops (stmt
))
5018 tree mem
= gimple_assign_rhs1 (stmt
);
5020 = mem_valid_for_store_merging (mem
, &op
->bitsize
, &op
->bitpos
,
5021 &op
->bitregion_start
,
5022 &op
->bitregion_end
);
5023 if (op
->base_addr
!= NULL_TREE
5024 && known_eq (op
->bitsize
, bitsize
)
5025 && multiple_p (op
->bitpos
- bitpos
, BITS_PER_UNIT
)
5026 && known_ge (op
->bitpos
- op
->bitregion_start
,
5027 bitpos
- bitregion_start
)
5028 && known_ge (op
->bitregion_end
- op
->bitpos
,
5029 bitregion_end
- bitpos
))
5033 op
->bit_not_p
= false;
5040 /* Return the index number of the landing pad for STMT, if any. */
5043 lp_nr_for_store (gimple
*stmt
)
5045 if (!cfun
->can_throw_non_call_exceptions
|| !cfun
->eh
)
5048 if (!stmt_could_throw_p (cfun
, stmt
))
5051 return lookup_stmt_eh_lp (stmt
);
5054 /* Record the store STMT for store merging optimization if it can be
5055 optimized. Return true if any changes were made. */
5058 pass_store_merging::process_store (gimple
*stmt
)
5060 tree lhs
= gimple_assign_lhs (stmt
);
5061 tree rhs
= gimple_assign_rhs1 (stmt
);
5062 poly_uint64 bitsize
, bitpos
= 0;
5063 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
5065 = mem_valid_for_store_merging (lhs
, &bitsize
, &bitpos
,
5066 &bitregion_start
, &bitregion_end
);
5067 if (known_eq (bitsize
, 0U))
5070 bool invalid
= (base_addr
== NULL_TREE
5071 || (maybe_gt (bitsize
,
5072 (unsigned int) MAX_BITSIZE_MODE_ANY_INT
)
5073 && TREE_CODE (rhs
) != INTEGER_CST
5074 && (TREE_CODE (rhs
) != CONSTRUCTOR
5075 || CONSTRUCTOR_NELTS (rhs
) != 0)));
5076 enum tree_code rhs_code
= ERROR_MARK
;
5077 bool bit_not_p
= false;
5078 struct symbolic_number n
;
5079 gimple
*ins_stmt
= NULL
;
5080 store_operand_info ops
[2];
5083 else if (TREE_CODE (rhs
) == STRING_CST
)
5085 rhs_code
= STRING_CST
;
5088 else if (rhs_valid_for_store_merging_p (rhs
))
5090 rhs_code
= INTEGER_CST
;
5093 else if (TREE_CODE (rhs
) == SSA_NAME
)
5095 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
), *def_stmt1
, *def_stmt2
;
5096 if (!is_gimple_assign (def_stmt
))
5098 else if (handled_load (def_stmt
, &ops
[0], bitsize
, bitpos
,
5099 bitregion_start
, bitregion_end
))
5101 else if (gimple_assign_rhs_code (def_stmt
) == BIT_NOT_EXPR
)
5103 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5104 if (TREE_CODE (rhs1
) == SSA_NAME
5105 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1
)))
5108 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5112 if (rhs_code
== ERROR_MARK
&& !invalid
)
5113 switch ((rhs_code
= gimple_assign_rhs_code (def_stmt
)))
5119 rhs1
= gimple_assign_rhs1 (def_stmt
);
5120 rhs2
= gimple_assign_rhs2 (def_stmt
);
5122 if (TREE_CODE (rhs1
) != SSA_NAME
)
5124 def_stmt1
= SSA_NAME_DEF_STMT (rhs1
);
5125 if (!is_gimple_assign (def_stmt1
)
5126 || !handled_load (def_stmt1
, &ops
[0], bitsize
, bitpos
,
5127 bitregion_start
, bitregion_end
))
5129 if (rhs_valid_for_store_merging_p (rhs2
))
5131 else if (TREE_CODE (rhs2
) != SSA_NAME
)
5135 def_stmt2
= SSA_NAME_DEF_STMT (rhs2
);
5136 if (!is_gimple_assign (def_stmt2
))
5138 else if (!handled_load (def_stmt2
, &ops
[1], bitsize
, bitpos
,
5139 bitregion_start
, bitregion_end
))
5149 unsigned HOST_WIDE_INT const_bitsize
;
5150 if (bitsize
.is_constant (&const_bitsize
)
5151 && (const_bitsize
% BITS_PER_UNIT
) == 0
5152 && const_bitsize
<= 64
5153 && multiple_p (bitpos
, BITS_PER_UNIT
))
5155 ins_stmt
= find_bswap_or_nop_1 (def_stmt
, &n
, 12);
5159 for (unsigned HOST_WIDE_INT i
= 0;
5161 i
+= BITS_PER_UNIT
, nn
>>= BITS_PER_MARKER
)
5162 if ((nn
& MARKER_MASK
) == 0
5163 || (nn
& MARKER_MASK
) == MARKER_BYTE_UNKNOWN
)
5172 rhs_code
= LROTATE_EXPR
;
5173 ops
[0].base_addr
= NULL_TREE
;
5174 ops
[1].base_addr
= NULL_TREE
;
5182 && bitsize
.is_constant (&const_bitsize
)
5183 && ((const_bitsize
% BITS_PER_UNIT
) != 0
5184 || !multiple_p (bitpos
, BITS_PER_UNIT
))
5185 && const_bitsize
<= MAX_FIXED_MODE_SIZE
)
5187 /* Bypass a conversion to the bit-field type. */
5189 && is_gimple_assign (def_stmt
)
5190 && CONVERT_EXPR_CODE_P (rhs_code
))
5192 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5193 if (TREE_CODE (rhs1
) == SSA_NAME
5194 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
)))
5197 rhs_code
= BIT_INSERT_EXPR
;
5200 ops
[0].base_addr
= NULL_TREE
;
5201 ops
[1].base_addr
= NULL_TREE
;
5208 unsigned HOST_WIDE_INT const_bitsize
, const_bitpos
;
5209 unsigned HOST_WIDE_INT const_bitregion_start
, const_bitregion_end
;
5211 || !bitsize
.is_constant (&const_bitsize
)
5212 || !bitpos
.is_constant (&const_bitpos
)
5213 || !bitregion_start
.is_constant (&const_bitregion_start
)
5214 || !bitregion_end
.is_constant (&const_bitregion_end
))
5215 return terminate_all_aliasing_chains (NULL
, stmt
);
5218 memset (&n
, 0, sizeof (n
));
5220 class imm_store_chain_info
**chain_info
= NULL
;
5223 chain_info
= m_stores
.get (base_addr
);
5225 store_immediate_info
*info
;
5228 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
5229 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5230 const_bitregion_start
,
5231 const_bitregion_end
,
5232 stmt
, ord
, rhs_code
, n
, ins_stmt
,
5233 bit_not_p
, lp_nr_for_store (stmt
),
5235 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5237 fprintf (dump_file
, "Recording immediate store from stmt:\n");
5238 print_gimple_stmt (dump_file
, stmt
, 0);
5240 (*chain_info
)->m_store_info
.safe_push (info
);
5242 ret
|= terminate_all_aliasing_chains (chain_info
, stmt
);
5243 /* If we reach the limit of stores to merge in a chain terminate and
5244 process the chain now. */
5245 if ((*chain_info
)->m_store_info
.length ()
5246 == (unsigned int) param_max_stores_to_merge
)
5248 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5250 "Reached maximum number of statements to merge:\n");
5251 ret
|= terminate_and_process_chain (*chain_info
);
5256 /* Store aliases any existing chain? */
5257 ret
|= terminate_all_aliasing_chains (NULL
, stmt
);
5259 /* Start a new chain. */
5260 class imm_store_chain_info
*new_chain
5261 = new imm_store_chain_info (m_stores_head
, base_addr
);
5262 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5263 const_bitregion_start
,
5264 const_bitregion_end
,
5265 stmt
, 0, rhs_code
, n
, ins_stmt
,
5266 bit_not_p
, lp_nr_for_store (stmt
),
5268 new_chain
->m_store_info
.safe_push (info
);
5270 m_stores
.put (base_addr
, new_chain
);
5272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5274 fprintf (dump_file
, "Starting active chain number %u with statement:\n",
5276 print_gimple_stmt (dump_file
, stmt
, 0);
5277 fprintf (dump_file
, "The base object is:\n");
5278 print_generic_expr (dump_file
, base_addr
);
5279 fprintf (dump_file
, "\n");
5283 /* Prune oldest chains so that after adding the chain or store above
5284 we're again within the limits set by the params. */
5285 if (m_n_chains
> (unsigned)param_max_store_chains_to_track
5286 || m_n_stores
> (unsigned)param_max_stores_to_track
)
5288 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5289 fprintf (dump_file
, "Too many chains (%u > %d) or stores (%u > %d), "
5290 "terminating oldest chain(s).\n", m_n_chains
,
5291 param_max_store_chains_to_track
, m_n_stores
,
5292 param_max_stores_to_track
);
5293 imm_store_chain_info
**e
= &m_stores_head
;
5295 unsigned n_stores
= 0;
5298 if (idx
>= (unsigned)param_max_store_chains_to_track
5299 || (n_stores
+ (*e
)->m_store_info
.length ()
5300 > (unsigned)param_max_stores_to_track
))
5301 ret
|= terminate_and_process_chain (*e
);
5304 n_stores
+= (*e
)->m_store_info
.length ();
5314 /* Return true if STMT is a store valid for store merging. */
5317 store_valid_for_store_merging_p (gimple
*stmt
)
5319 return gimple_assign_single_p (stmt
)
5320 && gimple_vdef (stmt
)
5321 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
))
5322 && (!gimple_has_volatile_ops (stmt
) || gimple_clobber_p (stmt
));
5325 enum basic_block_status
{ BB_INVALID
, BB_VALID
, BB_EXTENDED_VALID
};
5327 /* Return the status of basic block BB wrt store merging. */
5329 static enum basic_block_status
5330 get_status_for_store_merging (basic_block bb
)
5332 unsigned int num_statements
= 0;
5333 unsigned int num_constructors
= 0;
5334 gimple_stmt_iterator gsi
;
5337 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5339 gimple
*stmt
= gsi_stmt (gsi
);
5341 if (is_gimple_debug (stmt
))
5344 if (store_valid_for_store_merging_p (stmt
) && ++num_statements
>= 2)
5347 if (is_gimple_assign (stmt
)
5348 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
)
5350 tree rhs
= gimple_assign_rhs1 (stmt
);
5351 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
5352 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
5353 && gimple_assign_lhs (stmt
) != NULL_TREE
)
5356 = int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
5357 if (sz
== 16 || sz
== 32 || sz
== 64)
5359 num_constructors
= 1;
5366 if (num_statements
== 0 && num_constructors
== 0)
5369 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
5370 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb
)))
5371 && (e
= find_fallthru_edge (bb
->succs
))
5372 && e
->dest
== bb
->next_bb
)
5373 return BB_EXTENDED_VALID
;
5375 return (num_statements
>= 2 || num_constructors
) ? BB_VALID
: BB_INVALID
;
5378 /* Entry point for the pass. Go over each basic block recording chains of
5379 immediate stores. Upon encountering a terminating statement (as defined
5380 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5384 pass_store_merging::execute (function
*fun
)
5387 hash_set
<gimple
*> orig_stmts
;
5388 bool changed
= false, open_chains
= false;
5390 /* If the function can throw and catch non-call exceptions, we'll be trying
5391 to merge stores across different basic blocks so we need to first unsplit
5392 the EH edges in order to streamline the CFG of the function. */
5393 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
)
5394 unsplit_eh_edges ();
5396 calculate_dominance_info (CDI_DOMINATORS
);
5398 FOR_EACH_BB_FN (bb
, fun
)
5400 const basic_block_status bb_status
= get_status_for_store_merging (bb
);
5401 gimple_stmt_iterator gsi
;
5403 if (open_chains
&& (bb_status
== BB_INVALID
|| !single_pred_p (bb
)))
5405 changed
|= terminate_and_process_all_chains ();
5406 open_chains
= false;
5409 if (bb_status
== BB_INVALID
)
5412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5413 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
5415 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); )
5417 gimple
*stmt
= gsi_stmt (gsi
);
5420 if (is_gimple_debug (stmt
))
5423 if (gimple_has_volatile_ops (stmt
) && !gimple_clobber_p (stmt
))
5425 /* Terminate all chains. */
5426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5427 fprintf (dump_file
, "Volatile access terminates "
5429 changed
|= terminate_and_process_all_chains ();
5430 open_chains
= false;
5434 if (is_gimple_assign (stmt
)
5435 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
5436 && maybe_optimize_vector_constructor (stmt
))
5439 if (store_valid_for_store_merging_p (stmt
))
5440 changed
|= process_store (stmt
);
5442 changed
|= terminate_all_aliasing_chains (NULL
, stmt
);
5445 if (bb_status
== BB_EXTENDED_VALID
)
5449 changed
|= terminate_and_process_all_chains ();
5450 open_chains
= false;
5455 changed
|= terminate_and_process_all_chains ();
5457 /* If the function can throw and catch non-call exceptions and something
5458 changed during the pass, then the CFG has (very likely) changed too. */
5459 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
&& changed
)
5461 free_dominance_info (CDI_DOMINATORS
);
5462 return TODO_cleanup_cfg
;
5470 /* Construct and return a store merging pass object. */
5473 make_pass_store_merging (gcc::context
*ctxt
)
5475 return new pass_store_merging (ctxt
);
5480 namespace selftest
{
5482 /* Selftests for store merging helpers. */
5484 /* Assert that all elements of the byte arrays X and Y, both of length N
5488 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
5490 for (unsigned int i
= 0; i
< n
; i
++)
5494 fprintf (stderr
, "Arrays do not match. X:\n");
5495 dump_char_array (stderr
, x
, n
);
5496 fprintf (stderr
, "Y:\n");
5497 dump_char_array (stderr
, y
, n
);
5499 ASSERT_EQ (x
[i
], y
[i
]);
5503 /* Test shift_bytes_in_array_left and that it carries bits across between
5507 verify_shift_bytes_in_array_left (void)
5510 00011111 | 11100000. */
5511 unsigned char orig
[2] = { 0xe0, 0x1f };
5512 unsigned char in
[2];
5513 memcpy (in
, orig
, sizeof orig
);
5515 unsigned char expected
[2] = { 0x80, 0x7f };
5516 shift_bytes_in_array_left (in
, sizeof (in
), 2);
5517 verify_array_eq (in
, expected
, sizeof (in
));
5519 memcpy (in
, orig
, sizeof orig
);
5520 memcpy (expected
, orig
, sizeof orig
);
5521 /* Check that shifting by zero doesn't change anything. */
5522 shift_bytes_in_array_left (in
, sizeof (in
), 0);
5523 verify_array_eq (in
, expected
, sizeof (in
));
5527 /* Test shift_bytes_in_array_right and that it carries bits across between
5531 verify_shift_bytes_in_array_right (void)
5534 00011111 | 11100000. */
5535 unsigned char orig
[2] = { 0x1f, 0xe0};
5536 unsigned char in
[2];
5537 memcpy (in
, orig
, sizeof orig
);
5538 unsigned char expected
[2] = { 0x07, 0xf8};
5539 shift_bytes_in_array_right (in
, sizeof (in
), 2);
5540 verify_array_eq (in
, expected
, sizeof (in
));
5542 memcpy (in
, orig
, sizeof orig
);
5543 memcpy (expected
, orig
, sizeof orig
);
5544 /* Check that shifting by zero doesn't change anything. */
5545 shift_bytes_in_array_right (in
, sizeof (in
), 0);
5546 verify_array_eq (in
, expected
, sizeof (in
));
5549 /* Test clear_bit_region that it clears exactly the bits asked and
5553 verify_clear_bit_region (void)
5555 /* Start with all bits set and test clearing various patterns in them. */
5556 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5557 unsigned char in
[3];
5558 unsigned char expected
[3];
5559 memcpy (in
, orig
, sizeof in
);
5561 /* Check zeroing out all the bits. */
5562 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
5563 expected
[0] = expected
[1] = expected
[2] = 0;
5564 verify_array_eq (in
, expected
, sizeof in
);
5566 memcpy (in
, orig
, sizeof in
);
5567 /* Leave the first and last bits intact. */
5568 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
5572 verify_array_eq (in
, expected
, sizeof in
);
5575 /* Test clear_bit_region_be that it clears exactly the bits asked and
5579 verify_clear_bit_region_be (void)
5581 /* Start with all bits set and test clearing various patterns in them. */
5582 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5583 unsigned char in
[3];
5584 unsigned char expected
[3];
5585 memcpy (in
, orig
, sizeof in
);
5587 /* Check zeroing out all the bits. */
5588 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
5589 expected
[0] = expected
[1] = expected
[2] = 0;
5590 verify_array_eq (in
, expected
, sizeof in
);
5592 memcpy (in
, orig
, sizeof in
);
5593 /* Leave the first and last bits intact. */
5594 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
5598 verify_array_eq (in
, expected
, sizeof in
);
5602 /* Run all of the selftests within this file. */
5605 store_merging_c_tests (void)
5607 verify_shift_bytes_in_array_left ();
5608 verify_shift_bytes_in_array_right ();
5609 verify_clear_bit_region ();
5610 verify_clear_bit_region_be ();
5613 } // namespace selftest
5614 #endif /* CHECKING_P. */