Daily bump.
[official-gcc.git] / gcc / gimple-ssa-store-merging.c
blob8c195584eed84d8664630fad85e42aab72db93e6
1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
182 namespace {
184 struct bswap_stat
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
260 inline bool
261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
279 switch (code)
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
311 inline bool
312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
314 tree lhs_type;
316 lhs_type = gimple_expr_type (stmt);
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 return false;
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
325 return true;
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
331 bool
332 init_symbolic_number (struct symbolic_number *n, tree src)
334 int size;
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337 return false;
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
359 return true;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
366 bool
367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
394 if (!integer_zerop (off))
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
401 base_addr = TREE_OPERAND (base_addr, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
415 bitpos += bit_offset.force_shwi ();
417 else
418 base_addr = build_fold_addr_expr (base_addr);
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
441 gimple *
442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
481 if (start1 < start2)
483 n_start = n1;
484 start_sub = start2 - start1;
486 else
488 n_start = n2;
489 start_sub = start1 - start2;
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
505 end = end2;
506 end_sub = end2 - end1;
508 else
510 end = end1;
511 end_sub = end1 - end2;
513 n_end = (end2 > end1) ? n2 : n1;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
521 n->range = end - MIN (start1, start2) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
540 else
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 uint64_t masked1, masked2;
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
572 return source_stmt;
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
581 gimple *
582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
592 rhs1 = gimple_assign_rhs1 (stmt);
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
603 return NULL;
605 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
606 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
607 if (bitpos % BITS_PER_UNIT == 0
608 && bitsize % BITS_PER_UNIT == 0
609 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN)
613 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
615 /* Shift. */
616 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
617 return NULL;
619 /* Mask. */
620 uint64_t mask = 0;
621 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
622 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
623 i++, tmp <<= BITS_PER_UNIT)
624 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
625 n->n &= mask;
627 /* Convert. */
628 n->type = TREE_TYPE (rhs1);
629 if (!n->base_addr)
630 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
632 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
635 return NULL;
638 if (TREE_CODE (rhs1) != SSA_NAME)
639 return NULL;
641 code = gimple_assign_rhs_code (stmt);
642 rhs_class = gimple_assign_rhs_class (stmt);
643 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
645 if (rhs_class == GIMPLE_BINARY_RHS)
646 rhs2 = gimple_assign_rhs2 (stmt);
648 /* Handle unary rhs and binary rhs with integer constants as second
649 operand. */
651 if (rhs_class == GIMPLE_UNARY_RHS
652 || (rhs_class == GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2) == INTEGER_CST))
655 if (code != BIT_AND_EXPR
656 && code != LSHIFT_EXPR
657 && code != RSHIFT_EXPR
658 && code != LROTATE_EXPR
659 && code != RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code))
661 return NULL;
663 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
667 if (!source_stmt1)
669 if (gimple_assign_load_p (stmt)
670 || !init_symbolic_number (n, rhs1))
671 return NULL;
672 source_stmt1 = stmt;
675 switch (code)
677 case BIT_AND_EXPR:
679 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
680 uint64_t val = int_cst_value (rhs2), mask = 0;
681 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
683 /* Only constants masking full bytes are allowed. */
684 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
685 if ((val & tmp) != 0 && (val & tmp) != tmp)
686 return NULL;
687 else if (val & tmp)
688 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
690 n->n &= mask;
692 break;
693 case LSHIFT_EXPR:
694 case RSHIFT_EXPR:
695 case LROTATE_EXPR:
696 case RROTATE_EXPR:
697 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
698 return NULL;
699 break;
700 CASE_CONVERT:
702 int i, type_size, old_type_size;
703 tree type;
705 type = gimple_expr_type (stmt);
706 type_size = TYPE_PRECISION (type);
707 if (type_size % BITS_PER_UNIT != 0)
708 return NULL;
709 type_size /= BITS_PER_UNIT;
710 if (type_size > 64 / BITS_PER_MARKER)
711 return NULL;
713 /* Sign extension: result is dependent on the value. */
714 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
715 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
716 && HEAD_MARKER (n->n, old_type_size))
717 for (i = 0; i < type_size - old_type_size; i++)
718 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size - 1 - i) * BITS_PER_MARKER);
721 if (type_size < 64 / BITS_PER_MARKER)
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
727 n->type = type;
728 if (!n->base_addr)
729 n->range = type_size;
731 break;
732 default:
733 return NULL;
735 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
738 /* Handle binary rhs. */
740 if (rhs_class == GIMPLE_BINARY_RHS)
742 struct symbolic_number n1, n2;
743 gimple *source_stmt, *source_stmt2;
745 if (code != BIT_IOR_EXPR)
746 return NULL;
748 if (TREE_CODE (rhs2) != SSA_NAME)
749 return NULL;
751 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
753 switch (code)
755 case BIT_IOR_EXPR:
756 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
758 if (!source_stmt1)
759 return NULL;
761 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
763 if (!source_stmt2)
764 return NULL;
766 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
767 return NULL;
769 if (n1.vuse != n2.vuse)
770 return NULL;
772 source_stmt
773 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
775 if (!source_stmt)
776 return NULL;
778 if (!verify_symbolic_number_p (n, stmt))
779 return NULL;
781 break;
782 default:
783 return NULL;
785 return source_stmt;
787 return NULL;
790 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
793 void
794 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
795 uint64_t *cmpnop)
797 unsigned rsize;
798 uint64_t tmpn, mask;
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
803 *cmpxchg = CMPXCHG;
804 *cmpnop = CMPNOP;
806 /* Find real size of result (highest non-zero byte). */
807 if (n->base_addr)
808 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
809 else
810 rsize = n->range;
812 /* Zero out the bits corresponding to untouched bytes in original gimple
813 expression. */
814 if (n->range < (int) sizeof (int64_t))
816 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
817 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
818 *cmpnop &= mask;
821 /* Zero out the bits corresponding to unused bytes in the result of the
822 gimple expression. */
823 if (rsize < n->range)
825 if (BYTES_BIG_ENDIAN)
827 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
828 *cmpxchg &= mask;
829 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
831 else
833 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
834 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
835 *cmpnop &= mask;
837 n->range = rsize;
840 n->range *= BITS_PER_UNIT;
843 /* Check if STMT completes a bswap implementation or a read in a given
844 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
845 accordingly. It also sets N to represent the kind of operations
846 performed: size of the resulting expression and whether it works on
847 a memory source, and if so alias-set and vuse. At last, the
848 function returns a stmt whose rhs's first tree is the source
849 expression. */
851 gimple *
852 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
854 /* The last parameter determines the depth search limit. It usually
855 correlates directly to the number n of bytes to be touched. We
856 increase that number by 2 * (log2(n) + 1) here in order to also
857 cover signed -> unsigned conversions of the src operand as can be seen
858 in libgcc, and for initial shift/and operation of the src operand. */
859 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
860 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
861 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
863 if (!ins_stmt)
864 return NULL;
866 uint64_t cmpxchg, cmpnop;
867 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
869 /* A complete byte swap should make the symbolic number to start with
870 the largest digit in the highest order byte. Unchanged symbolic
871 number indicates a read with same endianness as target architecture. */
872 if (n->n == cmpnop)
873 *bswap = false;
874 else if (n->n == cmpxchg)
875 *bswap = true;
876 else
877 return NULL;
879 /* Useless bit manipulation performed by code. */
880 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
881 return NULL;
883 return ins_stmt;
886 const pass_data pass_data_optimize_bswap =
888 GIMPLE_PASS, /* type */
889 "bswap", /* name */
890 OPTGROUP_NONE, /* optinfo_flags */
891 TV_NONE, /* tv_id */
892 PROP_ssa, /* properties_required */
893 0, /* properties_provided */
894 0, /* properties_destroyed */
895 0, /* todo_flags_start */
896 0, /* todo_flags_finish */
899 class pass_optimize_bswap : public gimple_opt_pass
901 public:
902 pass_optimize_bswap (gcc::context *ctxt)
903 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
906 /* opt_pass methods: */
907 virtual bool gate (function *)
909 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
912 virtual unsigned int execute (function *);
914 }; // class pass_optimize_bswap
916 /* Perform the bswap optimization: replace the expression computed in the rhs
917 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
918 bswap, load or load + bswap expression.
919 Which of these alternatives replace the rhs is given by N->base_addr (non
920 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
921 load to perform are also given in N while the builtin bswap invoke is given
922 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
923 load statements involved to construct the rhs in gsi_stmt (GSI) and
924 N->range gives the size of the rhs expression for maintaining some
925 statistics.
927 Note that if the replacement involve a load and if gsi_stmt (GSI) is
928 non-NULL, that stmt is moved just after INS_STMT to do the load with the
929 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
931 tree
932 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
933 tree bswap_type, tree load_type, struct symbolic_number *n,
934 bool bswap)
936 tree src, tmp, tgt = NULL_TREE;
937 gimple *bswap_stmt;
939 gimple *cur_stmt = gsi_stmt (gsi);
940 src = n->src;
941 if (cur_stmt)
942 tgt = gimple_assign_lhs (cur_stmt);
944 /* Need to load the value from memory first. */
945 if (n->base_addr)
947 gimple_stmt_iterator gsi_ins = gsi;
948 if (ins_stmt)
949 gsi_ins = gsi_for_stmt (ins_stmt);
950 tree addr_expr, addr_tmp, val_expr, val_tmp;
951 tree load_offset_ptr, aligned_load_type;
952 gimple *load_stmt;
953 unsigned align = get_object_alignment (src);
954 poly_int64 load_offset = 0;
956 if (cur_stmt)
958 basic_block ins_bb = gimple_bb (ins_stmt);
959 basic_block cur_bb = gimple_bb (cur_stmt);
960 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
961 return NULL_TREE;
963 /* Move cur_stmt just before one of the load of the original
964 to ensure it has the same VUSE. See PR61517 for what could
965 go wrong. */
966 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
967 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
968 gsi_move_before (&gsi, &gsi_ins);
969 gsi = gsi_for_stmt (cur_stmt);
971 else
972 gsi = gsi_ins;
974 /* Compute address to load from and cast according to the size
975 of the load. */
976 addr_expr = build_fold_addr_expr (src);
977 if (is_gimple_mem_ref_addr (addr_expr))
978 addr_tmp = unshare_expr (addr_expr);
979 else
981 addr_tmp = unshare_expr (n->base_addr);
982 if (!is_gimple_mem_ref_addr (addr_tmp))
983 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
984 is_gimple_mem_ref_addr,
985 NULL_TREE, true,
986 GSI_SAME_STMT);
987 load_offset = n->bytepos;
988 if (n->offset)
990 tree off
991 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
992 true, NULL_TREE, true,
993 GSI_SAME_STMT);
994 gimple *stmt
995 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
996 POINTER_PLUS_EXPR, addr_tmp, off);
997 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
998 addr_tmp = gimple_assign_lhs (stmt);
1002 /* Perform the load. */
1003 aligned_load_type = load_type;
1004 if (align < TYPE_ALIGN (load_type))
1005 aligned_load_type = build_aligned_type (load_type, align);
1006 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1007 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1008 load_offset_ptr);
1010 if (!bswap)
1012 if (n->range == 16)
1013 nop_stats.found_16bit++;
1014 else if (n->range == 32)
1015 nop_stats.found_32bit++;
1016 else
1018 gcc_assert (n->range == 64);
1019 nop_stats.found_64bit++;
1022 /* Convert the result of load if necessary. */
1023 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1025 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1026 "load_dst");
1027 load_stmt = gimple_build_assign (val_tmp, val_expr);
1028 gimple_set_vuse (load_stmt, n->vuse);
1029 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1030 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1031 update_stmt (cur_stmt);
1033 else if (cur_stmt)
1035 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1036 gimple_set_vuse (cur_stmt, n->vuse);
1037 update_stmt (cur_stmt);
1039 else
1041 tgt = make_ssa_name (load_type);
1042 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1043 gimple_set_vuse (cur_stmt, n->vuse);
1044 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1047 if (dump_file)
1049 fprintf (dump_file,
1050 "%d bit load in target endianness found at: ",
1051 (int) n->range);
1052 print_gimple_stmt (dump_file, cur_stmt, 0);
1054 return tgt;
1056 else
1058 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1059 load_stmt = gimple_build_assign (val_tmp, val_expr);
1060 gimple_set_vuse (load_stmt, n->vuse);
1061 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1063 src = val_tmp;
1065 else if (!bswap)
1067 gimple *g = NULL;
1068 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1070 if (!is_gimple_val (src))
1071 return NULL_TREE;
1072 g = gimple_build_assign (tgt, NOP_EXPR, src);
1074 else if (cur_stmt)
1075 g = gimple_build_assign (tgt, src);
1076 else
1077 tgt = src;
1078 if (n->range == 16)
1079 nop_stats.found_16bit++;
1080 else if (n->range == 32)
1081 nop_stats.found_32bit++;
1082 else
1084 gcc_assert (n->range == 64);
1085 nop_stats.found_64bit++;
1087 if (dump_file)
1089 fprintf (dump_file,
1090 "%d bit reshuffle in target endianness found at: ",
1091 (int) n->range);
1092 if (cur_stmt)
1093 print_gimple_stmt (dump_file, cur_stmt, 0);
1094 else
1096 print_generic_expr (dump_file, tgt, TDF_NONE);
1097 fprintf (dump_file, "\n");
1100 if (cur_stmt)
1101 gsi_replace (&gsi, g, true);
1102 return tgt;
1104 else if (TREE_CODE (src) == BIT_FIELD_REF)
1105 src = TREE_OPERAND (src, 0);
1107 if (n->range == 16)
1108 bswap_stats.found_16bit++;
1109 else if (n->range == 32)
1110 bswap_stats.found_32bit++;
1111 else
1113 gcc_assert (n->range == 64);
1114 bswap_stats.found_64bit++;
1117 tmp = src;
1119 /* Convert the src expression if necessary. */
1120 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1122 gimple *convert_stmt;
1124 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1125 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1126 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1129 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1130 are considered as rotation of 2N bit values by N bits is generally not
1131 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1132 gives 0x03040102 while a bswap for that value is 0x04030201. */
1133 if (bswap && n->range == 16)
1135 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1136 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1137 bswap_stmt = gimple_build_assign (NULL, src);
1139 else
1140 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1142 if (tgt == NULL_TREE)
1143 tgt = make_ssa_name (bswap_type);
1144 tmp = tgt;
1146 /* Convert the result if necessary. */
1147 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1149 gimple *convert_stmt;
1151 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1152 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1153 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1156 gimple_set_lhs (bswap_stmt, tmp);
1158 if (dump_file)
1160 fprintf (dump_file, "%d bit bswap implementation found at: ",
1161 (int) n->range);
1162 if (cur_stmt)
1163 print_gimple_stmt (dump_file, cur_stmt, 0);
1164 else
1166 print_generic_expr (dump_file, tgt, TDF_NONE);
1167 fprintf (dump_file, "\n");
1171 if (cur_stmt)
1173 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1174 gsi_remove (&gsi, true);
1176 else
1177 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1178 return tgt;
1181 /* Find manual byte swap implementations as well as load in a given
1182 endianness. Byte swaps are turned into a bswap builtin invokation
1183 while endian loads are converted to bswap builtin invokation or
1184 simple load according to the target endianness. */
1186 unsigned int
1187 pass_optimize_bswap::execute (function *fun)
1189 basic_block bb;
1190 bool bswap32_p, bswap64_p;
1191 bool changed = false;
1192 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1194 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1195 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1196 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1197 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1198 || (bswap32_p && word_mode == SImode)));
1200 /* Determine the argument type of the builtins. The code later on
1201 assumes that the return and argument type are the same. */
1202 if (bswap32_p)
1204 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1205 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208 if (bswap64_p)
1210 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1211 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1214 memset (&nop_stats, 0, sizeof (nop_stats));
1215 memset (&bswap_stats, 0, sizeof (bswap_stats));
1216 calculate_dominance_info (CDI_DOMINATORS);
1218 FOR_EACH_BB_FN (bb, fun)
1220 gimple_stmt_iterator gsi;
1222 /* We do a reverse scan for bswap patterns to make sure we get the
1223 widest match. As bswap pattern matching doesn't handle previously
1224 inserted smaller bswap replacements as sub-patterns, the wider
1225 variant wouldn't be detected. */
1226 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1228 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1229 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1230 enum tree_code code;
1231 struct symbolic_number n;
1232 bool bswap;
1234 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1235 might be moved to a different basic block by bswap_replace and gsi
1236 must not points to it if that's the case. Moving the gsi_prev
1237 there make sure that gsi points to the statement previous to
1238 cur_stmt while still making sure that all statements are
1239 considered in this basic block. */
1240 gsi_prev (&gsi);
1242 if (!is_gimple_assign (cur_stmt))
1243 continue;
1245 code = gimple_assign_rhs_code (cur_stmt);
1246 switch (code)
1248 case LROTATE_EXPR:
1249 case RROTATE_EXPR:
1250 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1251 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1252 % BITS_PER_UNIT)
1253 continue;
1254 /* Fall through. */
1255 case BIT_IOR_EXPR:
1256 break;
1257 default:
1258 continue;
1261 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1263 if (!ins_stmt)
1264 continue;
1266 switch (n.range)
1268 case 16:
1269 /* Already in canonical form, nothing to do. */
1270 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1271 continue;
1272 load_type = bswap_type = uint16_type_node;
1273 break;
1274 case 32:
1275 load_type = uint32_type_node;
1276 if (bswap32_p)
1278 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1279 bswap_type = bswap32_type;
1281 break;
1282 case 64:
1283 load_type = uint64_type_node;
1284 if (bswap64_p)
1286 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1287 bswap_type = bswap64_type;
1289 break;
1290 default:
1291 continue;
1294 if (bswap && !fndecl && n.range != 16)
1295 continue;
1297 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1298 bswap_type, load_type, &n, bswap))
1299 changed = true;
1303 statistics_counter_event (fun, "16-bit nop implementations found",
1304 nop_stats.found_16bit);
1305 statistics_counter_event (fun, "32-bit nop implementations found",
1306 nop_stats.found_32bit);
1307 statistics_counter_event (fun, "64-bit nop implementations found",
1308 nop_stats.found_64bit);
1309 statistics_counter_event (fun, "16-bit bswap implementations found",
1310 bswap_stats.found_16bit);
1311 statistics_counter_event (fun, "32-bit bswap implementations found",
1312 bswap_stats.found_32bit);
1313 statistics_counter_event (fun, "64-bit bswap implementations found",
1314 bswap_stats.found_64bit);
1316 return (changed ? TODO_update_ssa : 0);
1319 } // anon namespace
1321 gimple_opt_pass *
1322 make_pass_optimize_bswap (gcc::context *ctxt)
1324 return new pass_optimize_bswap (ctxt);
1327 namespace {
1329 /* Struct recording one operand for the store, which is either a constant,
1330 then VAL represents the constant and all the other fields are zero, or
1331 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1332 and the other fields also reflect the memory load, or an SSA name, then
1333 VAL represents the SSA name and all the other fields are zero, */
1335 class store_operand_info
1337 public:
1338 tree val;
1339 tree base_addr;
1340 poly_uint64 bitsize;
1341 poly_uint64 bitpos;
1342 poly_uint64 bitregion_start;
1343 poly_uint64 bitregion_end;
1344 gimple *stmt;
1345 bool bit_not_p;
1346 store_operand_info ();
1349 store_operand_info::store_operand_info ()
1350 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1351 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1355 /* Struct recording the information about a single store of an immediate
1356 to memory. These are created in the first phase and coalesced into
1357 merged_store_group objects in the second phase. */
1359 class store_immediate_info
1361 public:
1362 unsigned HOST_WIDE_INT bitsize;
1363 unsigned HOST_WIDE_INT bitpos;
1364 unsigned HOST_WIDE_INT bitregion_start;
1365 /* This is one past the last bit of the bit region. */
1366 unsigned HOST_WIDE_INT bitregion_end;
1367 gimple *stmt;
1368 unsigned int order;
1369 /* INTEGER_CST for constant store, STRING_CST for string store,
1370 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1371 BIT_INSERT_EXPR for bit insertion.
1372 LROTATE_EXPR if it can be only bswap optimized and
1373 ops are not really meaningful.
1374 NOP_EXPR if bswap optimization detected identity, ops
1375 are not meaningful. */
1376 enum tree_code rhs_code;
1377 /* Two fields for bswap optimization purposes. */
1378 struct symbolic_number n;
1379 gimple *ins_stmt;
1380 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1381 bool bit_not_p;
1382 /* True if ops have been swapped and thus ops[1] represents
1383 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1384 bool ops_swapped_p;
1385 /* The index number of the landing pad, or 0 if there is none. */
1386 int lp_nr;
1387 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1388 just the first one. */
1389 store_operand_info ops[2];
1390 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1391 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1392 gimple *, unsigned int, enum tree_code,
1393 struct symbolic_number &, gimple *, bool, int,
1394 const store_operand_info &,
1395 const store_operand_info &);
1398 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1399 unsigned HOST_WIDE_INT bp,
1400 unsigned HOST_WIDE_INT brs,
1401 unsigned HOST_WIDE_INT bre,
1402 gimple *st,
1403 unsigned int ord,
1404 enum tree_code rhscode,
1405 struct symbolic_number &nr,
1406 gimple *ins_stmtp,
1407 bool bitnotp,
1408 int nr2,
1409 const store_operand_info &op0r,
1410 const store_operand_info &op1r)
1411 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1412 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1413 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1414 lp_nr (nr2)
1415 #if __cplusplus >= 201103L
1416 , ops { op0r, op1r }
1419 #else
1421 ops[0] = op0r;
1422 ops[1] = op1r;
1424 #endif
1426 /* Struct representing a group of stores to contiguous memory locations.
1427 These are produced by the second phase (coalescing) and consumed in the
1428 third phase that outputs the widened stores. */
1430 class merged_store_group
1432 public:
1433 unsigned HOST_WIDE_INT start;
1434 unsigned HOST_WIDE_INT width;
1435 unsigned HOST_WIDE_INT bitregion_start;
1436 unsigned HOST_WIDE_INT bitregion_end;
1437 /* The size of the allocated memory for val and mask. */
1438 unsigned HOST_WIDE_INT buf_size;
1439 unsigned HOST_WIDE_INT align_base;
1440 poly_uint64 load_align_base[2];
1442 unsigned int align;
1443 unsigned int load_align[2];
1444 unsigned int first_order;
1445 unsigned int last_order;
1446 bool bit_insertion;
1447 bool string_concatenation;
1448 bool only_constants;
1449 unsigned int first_nonmergeable_order;
1450 int lp_nr;
1452 auto_vec<store_immediate_info *> stores;
1453 /* We record the first and last original statements in the sequence because
1454 we'll need their vuse/vdef and replacement position. It's easier to keep
1455 track of them separately as 'stores' is reordered by apply_stores. */
1456 gimple *last_stmt;
1457 gimple *first_stmt;
1458 unsigned char *val;
1459 unsigned char *mask;
1461 merged_store_group (store_immediate_info *);
1462 ~merged_store_group ();
1463 bool can_be_merged_into (store_immediate_info *);
1464 void merge_into (store_immediate_info *);
1465 void merge_overlapping (store_immediate_info *);
1466 bool apply_stores ();
1467 private:
1468 void do_merge (store_immediate_info *);
1471 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1473 static void
1474 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1476 if (!fd)
1477 return;
1479 for (unsigned int i = 0; i < len; i++)
1480 fprintf (fd, "%02x ", ptr[i]);
1481 fprintf (fd, "\n");
1484 /* Clear out LEN bits starting from bit START in the byte array
1485 PTR. This clears the bits to the *right* from START.
1486 START must be within [0, BITS_PER_UNIT) and counts starting from
1487 the least significant bit. */
1489 static void
1490 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1491 unsigned int len)
1493 if (len == 0)
1494 return;
1495 /* Clear len bits to the right of start. */
1496 else if (len <= start + 1)
1498 unsigned char mask = (~(~0U << len));
1499 mask = mask << (start + 1U - len);
1500 ptr[0] &= ~mask;
1502 else if (start != BITS_PER_UNIT - 1)
1504 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1505 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1506 len - (start % BITS_PER_UNIT) - 1);
1508 else if (start == BITS_PER_UNIT - 1
1509 && len > BITS_PER_UNIT)
1511 unsigned int nbytes = len / BITS_PER_UNIT;
1512 memset (ptr, 0, nbytes);
1513 if (len % BITS_PER_UNIT != 0)
1514 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1515 len % BITS_PER_UNIT);
1517 else
1518 gcc_unreachable ();
1521 /* In the byte array PTR clear the bit region starting at bit
1522 START and is LEN bits wide.
1523 For regions spanning multiple bytes do this recursively until we reach
1524 zero LEN or a region contained within a single byte. */
1526 static void
1527 clear_bit_region (unsigned char *ptr, unsigned int start,
1528 unsigned int len)
1530 /* Degenerate base case. */
1531 if (len == 0)
1532 return;
1533 else if (start >= BITS_PER_UNIT)
1534 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1535 /* Second base case. */
1536 else if ((start + len) <= BITS_PER_UNIT)
1538 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1539 mask >>= BITS_PER_UNIT - (start + len);
1541 ptr[0] &= ~mask;
1543 return;
1545 /* Clear most significant bits in a byte and proceed with the next byte. */
1546 else if (start != 0)
1548 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1549 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1551 /* Whole bytes need to be cleared. */
1552 else if (start == 0 && len > BITS_PER_UNIT)
1554 unsigned int nbytes = len / BITS_PER_UNIT;
1555 /* We could recurse on each byte but we clear whole bytes, so a simple
1556 memset will do. */
1557 memset (ptr, '\0', nbytes);
1558 /* Clear the remaining sub-byte region if there is one. */
1559 if (len % BITS_PER_UNIT != 0)
1560 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1562 else
1563 gcc_unreachable ();
1566 /* Write BITLEN bits of EXPR to the byte array PTR at
1567 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1568 Return true if the operation succeeded. */
1570 static bool
1571 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1572 unsigned int total_bytes)
1574 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1575 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1576 || (bitpos % BITS_PER_UNIT)
1577 || !int_mode_for_size (bitlen, 0).exists ());
1578 bool empty_ctor_p
1579 = (TREE_CODE (expr) == CONSTRUCTOR
1580 && CONSTRUCTOR_NELTS (expr) == 0
1581 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1582 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1584 if (!sub_byte_op_p)
1586 if (first_byte >= total_bytes)
1587 return false;
1588 total_bytes -= first_byte;
1589 if (empty_ctor_p)
1591 unsigned HOST_WIDE_INT rhs_bytes
1592 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1593 if (rhs_bytes > total_bytes)
1594 return false;
1595 memset (ptr + first_byte, '\0', rhs_bytes);
1596 return true;
1598 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1601 /* LITTLE-ENDIAN
1602 We are writing a non byte-sized quantity or at a position that is not
1603 at a byte boundary.
1604 |--------|--------|--------| ptr + first_byte
1606 xxx xxxxxxxx xxx< bp>
1607 |______EXPR____|
1609 First native_encode_expr EXPR into a temporary buffer and shift each
1610 byte in the buffer by 'bp' (carrying the bits over as necessary).
1611 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1612 <------bitlen---->< bp>
1613 Then we clear the destination bits:
1614 |---00000|00000000|000-----| ptr + first_byte
1615 <-------bitlen--->< bp>
1617 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1618 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1620 BIG-ENDIAN
1621 We are writing a non byte-sized quantity or at a position that is not
1622 at a byte boundary.
1623 ptr + first_byte |--------|--------|--------|
1625 <bp >xxx xxxxxxxx xxx
1626 |_____EXPR_____|
1628 First native_encode_expr EXPR into a temporary buffer and shift each
1629 byte in the buffer to the right by (carrying the bits over as necessary).
1630 We shift by as much as needed to align the most significant bit of EXPR
1631 with bitpos:
1632 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1633 <---bitlen----> <bp ><-----bitlen----->
1634 Then we clear the destination bits:
1635 ptr + first_byte |-----000||00000000||00000---|
1636 <bp ><-------bitlen----->
1638 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1639 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1640 The awkwardness comes from the fact that bitpos is counted from the
1641 most significant bit of a byte. */
1643 /* We must be dealing with fixed-size data at this point, since the
1644 total size is also fixed. */
1645 unsigned int byte_size;
1646 if (empty_ctor_p)
1648 unsigned HOST_WIDE_INT rhs_bytes
1649 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1650 if (rhs_bytes > total_bytes)
1651 return false;
1652 byte_size = rhs_bytes;
1654 else
1656 fixed_size_mode mode
1657 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1658 byte_size
1659 = mode == BLKmode
1660 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1661 : GET_MODE_SIZE (mode);
1663 /* Allocate an extra byte so that we have space to shift into. */
1664 byte_size++;
1665 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1666 memset (tmpbuf, '\0', byte_size);
1667 /* The store detection code should only have allowed constants that are
1668 accepted by native_encode_expr or empty ctors. */
1669 if (!empty_ctor_p
1670 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1671 gcc_unreachable ();
1673 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1674 bytes to write. This means it can write more than
1675 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1676 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1677 bitlen and zero out the bits that are not relevant as well (that may
1678 contain a sign bit due to sign-extension). */
1679 unsigned int padding
1680 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1681 /* On big-endian the padding is at the 'front' so just skip the initial
1682 bytes. */
1683 if (BYTES_BIG_ENDIAN)
1684 tmpbuf += padding;
1686 byte_size -= padding;
1688 if (bitlen % BITS_PER_UNIT != 0)
1690 if (BYTES_BIG_ENDIAN)
1691 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1692 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1693 else
1694 clear_bit_region (tmpbuf, bitlen,
1695 byte_size * BITS_PER_UNIT - bitlen);
1697 /* Left shifting relies on the last byte being clear if bitlen is
1698 a multiple of BITS_PER_UNIT, which might not be clear if
1699 there are padding bytes. */
1700 else if (!BYTES_BIG_ENDIAN)
1701 tmpbuf[byte_size - 1] = '\0';
1703 /* Clear the bit region in PTR where the bits from TMPBUF will be
1704 inserted into. */
1705 if (BYTES_BIG_ENDIAN)
1706 clear_bit_region_be (ptr + first_byte,
1707 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1708 else
1709 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1711 int shift_amnt;
1712 int bitlen_mod = bitlen % BITS_PER_UNIT;
1713 int bitpos_mod = bitpos % BITS_PER_UNIT;
1715 bool skip_byte = false;
1716 if (BYTES_BIG_ENDIAN)
1718 /* BITPOS and BITLEN are exactly aligned and no shifting
1719 is necessary. */
1720 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1721 || (bitpos_mod == 0 && bitlen_mod == 0))
1722 shift_amnt = 0;
1723 /* |. . . . . . . .|
1724 <bp > <blen >.
1725 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1726 of the value until it aligns with 'bp' in the next byte over. */
1727 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1729 shift_amnt = bitlen_mod + bitpos_mod;
1730 skip_byte = bitlen_mod != 0;
1732 /* |. . . . . . . .|
1733 <----bp--->
1734 <---blen---->.
1735 Shift the value right within the same byte so it aligns with 'bp'. */
1736 else
1737 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1739 else
1740 shift_amnt = bitpos % BITS_PER_UNIT;
1742 /* Create the shifted version of EXPR. */
1743 if (!BYTES_BIG_ENDIAN)
1745 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1746 if (shift_amnt == 0)
1747 byte_size--;
1749 else
1751 gcc_assert (BYTES_BIG_ENDIAN);
1752 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1753 /* If shifting right forced us to move into the next byte skip the now
1754 empty byte. */
1755 if (skip_byte)
1757 tmpbuf++;
1758 byte_size--;
1762 /* Insert the bits from TMPBUF. */
1763 for (unsigned int i = 0; i < byte_size; i++)
1764 ptr[first_byte + i] |= tmpbuf[i];
1766 return true;
1769 /* Sorting function for store_immediate_info objects.
1770 Sorts them by bitposition. */
1772 static int
1773 sort_by_bitpos (const void *x, const void *y)
1775 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1776 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1778 if ((*tmp)->bitpos < (*tmp2)->bitpos)
1779 return -1;
1780 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1781 return 1;
1782 else
1783 /* If they are the same let's use the order which is guaranteed to
1784 be different. */
1785 return (*tmp)->order - (*tmp2)->order;
1788 /* Sorting function for store_immediate_info objects.
1789 Sorts them by the order field. */
1791 static int
1792 sort_by_order (const void *x, const void *y)
1794 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1795 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1797 if ((*tmp)->order < (*tmp2)->order)
1798 return -1;
1799 else if ((*tmp)->order > (*tmp2)->order)
1800 return 1;
1802 gcc_unreachable ();
1805 /* Initialize a merged_store_group object from a store_immediate_info
1806 object. */
1808 merged_store_group::merged_store_group (store_immediate_info *info)
1810 start = info->bitpos;
1811 width = info->bitsize;
1812 bitregion_start = info->bitregion_start;
1813 bitregion_end = info->bitregion_end;
1814 /* VAL has memory allocated for it in apply_stores once the group
1815 width has been finalized. */
1816 val = NULL;
1817 mask = NULL;
1818 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
1819 string_concatenation = info->rhs_code == STRING_CST;
1820 only_constants = info->rhs_code == INTEGER_CST;
1821 first_nonmergeable_order = ~0U;
1822 lp_nr = info->lp_nr;
1823 unsigned HOST_WIDE_INT align_bitpos = 0;
1824 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1825 &align, &align_bitpos);
1826 align_base = start - align_bitpos;
1827 for (int i = 0; i < 2; ++i)
1829 store_operand_info &op = info->ops[i];
1830 if (op.base_addr == NULL_TREE)
1832 load_align[i] = 0;
1833 load_align_base[i] = 0;
1835 else
1837 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1838 load_align_base[i] = op.bitpos - align_bitpos;
1841 stores.create (1);
1842 stores.safe_push (info);
1843 last_stmt = info->stmt;
1844 last_order = info->order;
1845 first_stmt = last_stmt;
1846 first_order = last_order;
1847 buf_size = 0;
1850 merged_store_group::~merged_store_group ()
1852 if (val)
1853 XDELETEVEC (val);
1856 /* Return true if the store described by INFO can be merged into the group. */
1858 bool
1859 merged_store_group::can_be_merged_into (store_immediate_info *info)
1861 /* Do not merge bswap patterns. */
1862 if (info->rhs_code == LROTATE_EXPR)
1863 return false;
1865 if (info->lp_nr != lp_nr)
1866 return false;
1868 /* The canonical case. */
1869 if (info->rhs_code == stores[0]->rhs_code)
1870 return true;
1872 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
1873 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1874 return !string_concatenation;
1876 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1877 return !string_concatenation;
1879 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
1880 only for small regions since this can generate a lot of instructions. */
1881 if (info->rhs_code == MEM_REF
1882 && (stores[0]->rhs_code == INTEGER_CST
1883 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1884 && info->bitregion_start == stores[0]->bitregion_start
1885 && info->bitregion_end == stores[0]->bitregion_end
1886 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
1887 return !string_concatenation;
1889 if (stores[0]->rhs_code == MEM_REF
1890 && (info->rhs_code == INTEGER_CST
1891 || info->rhs_code == BIT_INSERT_EXPR)
1892 && info->bitregion_start == stores[0]->bitregion_start
1893 && info->bitregion_end == stores[0]->bitregion_end
1894 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
1895 return !string_concatenation;
1897 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
1898 if (info->rhs_code == STRING_CST
1899 && stores[0]->rhs_code == INTEGER_CST
1900 && stores[0]->bitsize == CHAR_BIT)
1901 return !bit_insertion;
1903 if (stores[0]->rhs_code == STRING_CST
1904 && info->rhs_code == INTEGER_CST
1905 && info->bitsize == CHAR_BIT)
1906 return !bit_insertion;
1908 return false;
1911 /* Helper method for merge_into and merge_overlapping to do
1912 the common part. */
1914 void
1915 merged_store_group::do_merge (store_immediate_info *info)
1917 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1918 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1920 unsigned int this_align;
1921 unsigned HOST_WIDE_INT align_bitpos = 0;
1922 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1923 &this_align, &align_bitpos);
1924 if (this_align > align)
1926 align = this_align;
1927 align_base = info->bitpos - align_bitpos;
1929 for (int i = 0; i < 2; ++i)
1931 store_operand_info &op = info->ops[i];
1932 if (!op.base_addr)
1933 continue;
1935 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1936 if (this_align > load_align[i])
1938 load_align[i] = this_align;
1939 load_align_base[i] = op.bitpos - align_bitpos;
1943 gimple *stmt = info->stmt;
1944 stores.safe_push (info);
1945 if (info->order > last_order)
1947 last_order = info->order;
1948 last_stmt = stmt;
1950 else if (info->order < first_order)
1952 first_order = info->order;
1953 first_stmt = stmt;
1956 /* We need to use extraction if there is any bit-field. */
1957 if (info->rhs_code == BIT_INSERT_EXPR)
1959 bit_insertion = true;
1960 gcc_assert (!string_concatenation);
1963 /* We need to use concatenation if there is any string. */
1964 if (info->rhs_code == STRING_CST)
1966 string_concatenation = true;
1967 gcc_assert (!bit_insertion);
1970 if (info->rhs_code != INTEGER_CST)
1971 only_constants = false;
1974 /* Merge a store recorded by INFO into this merged store.
1975 The store is not overlapping with the existing recorded
1976 stores. */
1978 void
1979 merged_store_group::merge_into (store_immediate_info *info)
1981 /* Make sure we're inserting in the position we think we're inserting. */
1982 gcc_assert (info->bitpos >= start + width
1983 && info->bitregion_start <= bitregion_end);
1985 width = info->bitpos + info->bitsize - start;
1986 do_merge (info);
1989 /* Merge a store described by INFO into this merged store.
1990 INFO overlaps in some way with the current store (i.e. it's not contiguous
1991 which is handled by merged_store_group::merge_into). */
1993 void
1994 merged_store_group::merge_overlapping (store_immediate_info *info)
1996 /* If the store extends the size of the group, extend the width. */
1997 if (info->bitpos + info->bitsize > start + width)
1998 width = info->bitpos + info->bitsize - start;
2000 do_merge (info);
2003 /* Go through all the recorded stores in this group in program order and
2004 apply their values to the VAL byte array to create the final merged
2005 value. Return true if the operation succeeded. */
2007 bool
2008 merged_store_group::apply_stores ()
2010 store_immediate_info *info;
2011 unsigned int i;
2013 /* Make sure we have more than one store in the group, otherwise we cannot
2014 merge anything. */
2015 if (bitregion_start % BITS_PER_UNIT != 0
2016 || bitregion_end % BITS_PER_UNIT != 0
2017 || stores.length () == 1)
2018 return false;
2020 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2022 /* Really do string concatenation for large strings only. */
2023 if (buf_size <= MOVE_MAX)
2024 string_concatenation = false;
2026 /* Create a power-of-2-sized buffer for native_encode_expr. */
2027 if (!string_concatenation)
2028 buf_size = 1 << ceil_log2 (buf_size);
2030 val = XNEWVEC (unsigned char, 2 * buf_size);
2031 mask = val + buf_size;
2032 memset (val, 0, buf_size);
2033 memset (mask, ~0U, buf_size);
2035 stores.qsort (sort_by_order);
2037 FOR_EACH_VEC_ELT (stores, i, info)
2039 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2040 tree cst;
2041 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2042 cst = info->ops[0].val;
2043 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2044 cst = info->ops[1].val;
2045 else
2046 cst = NULL_TREE;
2047 bool ret = true;
2048 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2049 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2050 buf_size);
2051 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2052 if (BYTES_BIG_ENDIAN)
2053 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2054 - (pos_in_buffer % BITS_PER_UNIT)),
2055 info->bitsize);
2056 else
2057 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2058 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2060 if (ret)
2062 fputs ("After writing ", dump_file);
2063 print_generic_expr (dump_file, cst, TDF_NONE);
2064 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2065 " at position %d\n", info->bitsize, pos_in_buffer);
2066 fputs (" the merged value contains ", dump_file);
2067 dump_char_array (dump_file, val, buf_size);
2068 fputs (" the merged mask contains ", dump_file);
2069 dump_char_array (dump_file, mask, buf_size);
2070 if (bit_insertion)
2071 fputs (" bit insertion is required\n", dump_file);
2072 if (string_concatenation)
2073 fputs (" string concatenation is required\n", dump_file);
2075 else
2076 fprintf (dump_file, "Failed to merge stores\n");
2078 if (!ret)
2079 return false;
2081 stores.qsort (sort_by_bitpos);
2082 return true;
2085 /* Structure describing the store chain. */
2087 class imm_store_chain_info
2089 public:
2090 /* Doubly-linked list that imposes an order on chain processing.
2091 PNXP (prev's next pointer) points to the head of a list, or to
2092 the next field in the previous chain in the list.
2093 See pass_store_merging::m_stores_head for more rationale. */
2094 imm_store_chain_info *next, **pnxp;
2095 tree base_addr;
2096 auto_vec<store_immediate_info *> m_store_info;
2097 auto_vec<merged_store_group *> m_merged_store_groups;
2099 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2100 : next (inspt), pnxp (&inspt), base_addr (b_a)
2102 inspt = this;
2103 if (next)
2105 gcc_checking_assert (pnxp == next->pnxp);
2106 next->pnxp = &next;
2109 ~imm_store_chain_info ()
2111 *pnxp = next;
2112 if (next)
2114 gcc_checking_assert (&next == next->pnxp);
2115 next->pnxp = pnxp;
2118 bool terminate_and_process_chain ();
2119 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
2120 bool coalesce_immediate_stores ();
2121 bool output_merged_store (merged_store_group *);
2122 bool output_merged_stores ();
2125 const pass_data pass_data_tree_store_merging = {
2126 GIMPLE_PASS, /* type */
2127 "store-merging", /* name */
2128 OPTGROUP_NONE, /* optinfo_flags */
2129 TV_GIMPLE_STORE_MERGING, /* tv_id */
2130 PROP_ssa, /* properties_required */
2131 0, /* properties_provided */
2132 0, /* properties_destroyed */
2133 0, /* todo_flags_start */
2134 TODO_update_ssa, /* todo_flags_finish */
2137 class pass_store_merging : public gimple_opt_pass
2139 public:
2140 pass_store_merging (gcc::context *ctxt)
2141 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2145 /* Pass not supported for PDP-endian, nor for insane hosts or
2146 target character sizes where native_{encode,interpret}_expr
2147 doesn't work properly. */
2148 virtual bool
2149 gate (function *)
2151 return flag_store_merging
2152 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2153 && CHAR_BIT == 8
2154 && BITS_PER_UNIT == 8;
2157 virtual unsigned int execute (function *);
2159 private:
2160 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2162 /* Form a doubly-linked stack of the elements of m_stores, so that
2163 we can iterate over them in a predictable way. Using this order
2164 avoids extraneous differences in the compiler output just because
2165 of tree pointer variations (e.g. different chains end up in
2166 different positions of m_stores, so they are handled in different
2167 orders, so they allocate or release SSA names in different
2168 orders, and when they get reused, subsequent passes end up
2169 getting different SSA names, which may ultimately change
2170 decisions when going out of SSA). */
2171 imm_store_chain_info *m_stores_head;
2173 bool process_store (gimple *);
2174 bool terminate_and_process_chain (imm_store_chain_info *);
2175 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2176 bool terminate_and_process_all_chains ();
2177 }; // class pass_store_merging
2179 /* Terminate and process all recorded chains. Return true if any changes
2180 were made. */
2182 bool
2183 pass_store_merging::terminate_and_process_all_chains ()
2185 bool ret = false;
2186 while (m_stores_head)
2187 ret |= terminate_and_process_chain (m_stores_head);
2188 gcc_assert (m_stores.is_empty ());
2189 return ret;
2192 /* Terminate all chains that are affected by the statement STMT.
2193 CHAIN_INFO is the chain we should ignore from the checks if
2194 non-NULL. Return true if any changes were made. */
2196 bool
2197 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2198 **chain_info,
2199 gimple *stmt)
2201 bool ret = false;
2203 /* If the statement doesn't touch memory it can't alias. */
2204 if (!gimple_vuse (stmt))
2205 return false;
2207 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2208 ao_ref store_lhs_ref;
2209 ao_ref_init (&store_lhs_ref, store_lhs);
2210 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2212 next = cur->next;
2214 /* We already checked all the stores in chain_info and terminated the
2215 chain if necessary. Skip it here. */
2216 if (chain_info && *chain_info == cur)
2217 continue;
2219 store_immediate_info *info;
2220 unsigned int i;
2221 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2223 tree lhs = gimple_assign_lhs (info->stmt);
2224 ao_ref lhs_ref;
2225 ao_ref_init (&lhs_ref, lhs);
2226 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2227 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2228 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2229 &lhs_ref, false)))
2231 if (dump_file && (dump_flags & TDF_DETAILS))
2233 fprintf (dump_file, "stmt causes chain termination:\n");
2234 print_gimple_stmt (dump_file, stmt, 0);
2236 ret |= terminate_and_process_chain (cur);
2237 break;
2242 return ret;
2245 /* Helper function. Terminate the recorded chain storing to base object
2246 BASE. Return true if the merging and output was successful. The m_stores
2247 entry is removed after the processing in any case. */
2249 bool
2250 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2252 bool ret = chain_info->terminate_and_process_chain ();
2253 m_stores.remove (chain_info->base_addr);
2254 delete chain_info;
2255 return ret;
2258 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2259 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2260 be able to sink load of REF across stores between FIRST and LAST, up
2261 to right before LAST. */
2263 bool
2264 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2266 ao_ref r;
2267 ao_ref_init (&r, ref);
2268 unsigned int count = 0;
2269 tree vop = gimple_vdef (last);
2270 gimple *stmt;
2272 /* Return true conservatively if the basic blocks are different. */
2273 if (gimple_bb (first) != gimple_bb (last))
2274 return true;
2278 stmt = SSA_NAME_DEF_STMT (vop);
2279 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2280 return true;
2281 if (gimple_store_p (stmt)
2282 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2283 return true;
2284 /* Avoid quadratic compile time by bounding the number of checks
2285 we perform. */
2286 if (++count > MAX_STORE_ALIAS_CHECKS)
2287 return true;
2288 vop = gimple_vuse (stmt);
2290 while (stmt != first);
2292 return false;
2295 /* Return true if INFO->ops[IDX] is mergeable with the
2296 corresponding loads already in MERGED_STORE group.
2297 BASE_ADDR is the base address of the whole store group. */
2299 bool
2300 compatible_load_p (merged_store_group *merged_store,
2301 store_immediate_info *info,
2302 tree base_addr, int idx)
2304 store_immediate_info *infof = merged_store->stores[0];
2305 if (!info->ops[idx].base_addr
2306 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2307 info->bitpos - infof->bitpos)
2308 || !operand_equal_p (info->ops[idx].base_addr,
2309 infof->ops[idx].base_addr, 0))
2310 return false;
2312 store_immediate_info *infol = merged_store->stores.last ();
2313 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2314 /* In this case all vuses should be the same, e.g.
2315 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2317 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2318 and we can emit the coalesced load next to any of those loads. */
2319 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2320 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2321 return true;
2323 /* Otherwise, at least for now require that the load has the same
2324 vuse as the store. See following examples. */
2325 if (gimple_vuse (info->stmt) != load_vuse)
2326 return false;
2328 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2329 || (infof != infol
2330 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2331 return false;
2333 /* If the load is from the same location as the store, already
2334 the construction of the immediate chain info guarantees no intervening
2335 stores, so no further checks are needed. Example:
2336 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2337 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2338 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2339 return true;
2341 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2342 of the stores in the group, or any other stores in between those.
2343 Previous calls to compatible_load_p ensured that for all the
2344 merged_store->stores IDX loads, no stmts starting with
2345 merged_store->first_stmt and ending right before merged_store->last_stmt
2346 clobbers those loads. */
2347 gimple *first = merged_store->first_stmt;
2348 gimple *last = merged_store->last_stmt;
2349 unsigned int i;
2350 store_immediate_info *infoc;
2351 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2352 comes before the so far first load, we'll be changing
2353 merged_store->first_stmt. In that case we need to give up if
2354 any of the earlier processed loads clobber with the stmts in the new
2355 range. */
2356 if (info->order < merged_store->first_order)
2358 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2359 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2360 return false;
2361 first = info->stmt;
2363 /* Similarly, we could change merged_store->last_stmt, so ensure
2364 in that case no stmts in the new range clobber any of the earlier
2365 processed loads. */
2366 else if (info->order > merged_store->last_order)
2368 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2369 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2370 return false;
2371 last = info->stmt;
2373 /* And finally, we'd be adding a new load to the set, ensure it isn't
2374 clobbered in the new range. */
2375 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2376 return false;
2378 /* Otherwise, we are looking for:
2379 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2381 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2382 return true;
2385 /* Add all refs loaded to compute VAL to REFS vector. */
2387 void
2388 gather_bswap_load_refs (vec<tree> *refs, tree val)
2390 if (TREE_CODE (val) != SSA_NAME)
2391 return;
2393 gimple *stmt = SSA_NAME_DEF_STMT (val);
2394 if (!is_gimple_assign (stmt))
2395 return;
2397 if (gimple_assign_load_p (stmt))
2399 refs->safe_push (gimple_assign_rhs1 (stmt));
2400 return;
2403 switch (gimple_assign_rhs_class (stmt))
2405 case GIMPLE_BINARY_RHS:
2406 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2407 /* FALLTHRU */
2408 case GIMPLE_UNARY_RHS:
2409 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2410 break;
2411 default:
2412 gcc_unreachable ();
2416 /* Check if there are any stores in M_STORE_INFO after index I
2417 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2418 a potential group ending with END that have their order
2419 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2420 all the stores already merged and the one under consideration
2421 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2422 Consider:
2423 MEM[(long long int *)p_28] = 0;
2424 MEM[(long long int *)p_28 + 8B] = 0;
2425 MEM[(long long int *)p_28 + 16B] = 0;
2426 MEM[(long long int *)p_28 + 24B] = 0;
2427 _129 = (int) _130;
2428 MEM[(int *)p_28 + 8B] = _129;
2429 MEM[(int *)p_28].a = -1;
2430 We already have
2431 MEM[(long long int *)p_28] = 0;
2432 MEM[(int *)p_28].a = -1;
2433 stmts in the current group and need to consider if it is safe to
2434 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2435 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2436 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2437 into the group and merging of those 3 stores is successful, merged
2438 stmts will be emitted at the latest store from that group, i.e.
2439 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2440 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2441 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2442 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2443 into the group. That way it will be its own store group and will
2444 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2445 INTEGER_CST stores, those are mergeable using merge_overlapping,
2446 so don't return false for those. */
2448 static bool
2449 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2450 bool all_integer_cst_p, unsigned int last_order,
2451 unsigned HOST_WIDE_INT end)
2453 unsigned int len = m_store_info.length ();
2454 for (++i; i < len; ++i)
2456 store_immediate_info *info = m_store_info[i];
2457 if (info->bitpos >= end)
2458 break;
2459 if (info->order < last_order
2460 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2461 return false;
2463 return true;
2466 /* Return true if m_store_info[first] and at least one following store
2467 form a group which store try_size bitsize value which is byte swapped
2468 from a memory load or some value, or identity from some value.
2469 This uses the bswap pass APIs. */
2471 bool
2472 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2473 unsigned int first,
2474 unsigned int try_size)
2476 unsigned int len = m_store_info.length (), last = first;
2477 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2478 if (width >= try_size)
2479 return false;
2480 for (unsigned int i = first + 1; i < len; ++i)
2482 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2483 || m_store_info[i]->lp_nr != merged_store->lp_nr
2484 || m_store_info[i]->ins_stmt == NULL)
2485 return false;
2486 width += m_store_info[i]->bitsize;
2487 if (width >= try_size)
2489 last = i;
2490 break;
2493 if (width != try_size)
2494 return false;
2496 bool allow_unaligned
2497 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2498 /* Punt if the combined store would not be aligned and we need alignment. */
2499 if (!allow_unaligned)
2501 unsigned int align = merged_store->align;
2502 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2503 for (unsigned int i = first + 1; i <= last; ++i)
2505 unsigned int this_align;
2506 unsigned HOST_WIDE_INT align_bitpos = 0;
2507 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2508 &this_align, &align_bitpos);
2509 if (this_align > align)
2511 align = this_align;
2512 align_base = m_store_info[i]->bitpos - align_bitpos;
2515 unsigned HOST_WIDE_INT align_bitpos
2516 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2517 if (align_bitpos)
2518 align = least_bit_hwi (align_bitpos);
2519 if (align < try_size)
2520 return false;
2523 tree type;
2524 switch (try_size)
2526 case 16: type = uint16_type_node; break;
2527 case 32: type = uint32_type_node; break;
2528 case 64: type = uint64_type_node; break;
2529 default: gcc_unreachable ();
2531 struct symbolic_number n;
2532 gimple *ins_stmt = NULL;
2533 int vuse_store = -1;
2534 unsigned int first_order = merged_store->first_order;
2535 unsigned int last_order = merged_store->last_order;
2536 gimple *first_stmt = merged_store->first_stmt;
2537 gimple *last_stmt = merged_store->last_stmt;
2538 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2539 store_immediate_info *infof = m_store_info[first];
2541 for (unsigned int i = first; i <= last; ++i)
2543 store_immediate_info *info = m_store_info[i];
2544 struct symbolic_number this_n = info->n;
2545 this_n.type = type;
2546 if (!this_n.base_addr)
2547 this_n.range = try_size / BITS_PER_UNIT;
2548 else
2549 /* Update vuse in case it has changed by output_merged_stores. */
2550 this_n.vuse = gimple_vuse (info->ins_stmt);
2551 unsigned int bitpos = info->bitpos - infof->bitpos;
2552 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2553 BYTES_BIG_ENDIAN
2554 ? try_size - info->bitsize - bitpos
2555 : bitpos))
2556 return false;
2557 if (this_n.base_addr && vuse_store)
2559 unsigned int j;
2560 for (j = first; j <= last; ++j)
2561 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2562 break;
2563 if (j > last)
2565 if (vuse_store == 1)
2566 return false;
2567 vuse_store = 0;
2570 if (i == first)
2572 n = this_n;
2573 ins_stmt = info->ins_stmt;
2575 else
2577 if (n.base_addr && n.vuse != this_n.vuse)
2579 if (vuse_store == 0)
2580 return false;
2581 vuse_store = 1;
2583 if (info->order > last_order)
2585 last_order = info->order;
2586 last_stmt = info->stmt;
2588 else if (info->order < first_order)
2590 first_order = info->order;
2591 first_stmt = info->stmt;
2593 end = MAX (end, info->bitpos + info->bitsize);
2595 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2596 &this_n, &n);
2597 if (ins_stmt == NULL)
2598 return false;
2602 uint64_t cmpxchg, cmpnop;
2603 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2605 /* A complete byte swap should make the symbolic number to start with
2606 the largest digit in the highest order byte. Unchanged symbolic
2607 number indicates a read with same endianness as target architecture. */
2608 if (n.n != cmpnop && n.n != cmpxchg)
2609 return false;
2611 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2612 return false;
2614 if (!check_no_overlap (m_store_info, last, false, last_order, end))
2615 return false;
2617 /* Don't handle memory copy this way if normal non-bswap processing
2618 would handle it too. */
2619 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2621 unsigned int i;
2622 for (i = first; i <= last; ++i)
2623 if (m_store_info[i]->rhs_code != MEM_REF)
2624 break;
2625 if (i == last + 1)
2626 return false;
2629 if (n.n == cmpxchg)
2630 switch (try_size)
2632 case 16:
2633 /* Will emit LROTATE_EXPR. */
2634 break;
2635 case 32:
2636 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2637 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2638 break;
2639 return false;
2640 case 64:
2641 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2642 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2643 break;
2644 return false;
2645 default:
2646 gcc_unreachable ();
2649 if (!allow_unaligned && n.base_addr)
2651 unsigned int align = get_object_alignment (n.src);
2652 if (align < try_size)
2653 return false;
2656 /* If each load has vuse of the corresponding store, need to verify
2657 the loads can be sunk right before the last store. */
2658 if (vuse_store == 1)
2660 auto_vec<tree, 64> refs;
2661 for (unsigned int i = first; i <= last; ++i)
2662 gather_bswap_load_refs (&refs,
2663 gimple_assign_rhs1 (m_store_info[i]->stmt));
2665 unsigned int i;
2666 tree ref;
2667 FOR_EACH_VEC_ELT (refs, i, ref)
2668 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2669 return false;
2670 n.vuse = NULL_TREE;
2673 infof->n = n;
2674 infof->ins_stmt = ins_stmt;
2675 for (unsigned int i = first; i <= last; ++i)
2677 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2678 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2679 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2680 if (i != first)
2681 merged_store->merge_into (m_store_info[i]);
2684 return true;
2687 /* Go through the candidate stores recorded in m_store_info and merge them
2688 into merged_store_group objects recorded into m_merged_store_groups
2689 representing the widened stores. Return true if coalescing was successful
2690 and the number of widened stores is fewer than the original number
2691 of stores. */
2693 bool
2694 imm_store_chain_info::coalesce_immediate_stores ()
2696 /* Anything less can't be processed. */
2697 if (m_store_info.length () < 2)
2698 return false;
2700 if (dump_file && (dump_flags & TDF_DETAILS))
2701 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2702 m_store_info.length ());
2704 store_immediate_info *info;
2705 unsigned int i, ignore = 0;
2707 /* Order the stores by the bitposition they write to. */
2708 m_store_info.qsort (sort_by_bitpos);
2710 info = m_store_info[0];
2711 merged_store_group *merged_store = new merged_store_group (info);
2712 if (dump_file && (dump_flags & TDF_DETAILS))
2713 fputs ("New store group\n", dump_file);
2715 FOR_EACH_VEC_ELT (m_store_info, i, info)
2717 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2719 if (i <= ignore)
2720 goto done;
2722 /* First try to handle group of stores like:
2723 p[0] = data >> 24;
2724 p[1] = data >> 16;
2725 p[2] = data >> 8;
2726 p[3] = data;
2727 using the bswap framework. */
2728 if (info->bitpos == merged_store->start + merged_store->width
2729 && merged_store->stores.length () == 1
2730 && merged_store->stores[0]->ins_stmt != NULL
2731 && info->lp_nr == merged_store->lp_nr
2732 && info->ins_stmt != NULL)
2734 unsigned int try_size;
2735 for (try_size = 64; try_size >= 16; try_size >>= 1)
2736 if (try_coalesce_bswap (merged_store, i - 1, try_size))
2737 break;
2739 if (try_size >= 16)
2741 ignore = i + merged_store->stores.length () - 1;
2742 m_merged_store_groups.safe_push (merged_store);
2743 if (ignore < m_store_info.length ())
2744 merged_store = new merged_store_group (m_store_info[ignore]);
2745 else
2746 merged_store = NULL;
2747 goto done;
2751 new_bitregion_start
2752 = MIN (merged_store->bitregion_start, info->bitregion_start);
2753 new_bitregion_end
2754 = MAX (merged_store->bitregion_end, info->bitregion_end);
2756 if (info->order >= merged_store->first_nonmergeable_order
2757 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2758 > (unsigned) param_store_merging_max_size))
2761 /* |---store 1---|
2762 |---store 2---|
2763 Overlapping stores. */
2764 else if (IN_RANGE (info->bitpos, merged_store->start,
2765 merged_store->start + merged_store->width - 1)
2766 /* |---store 1---||---store 2---|
2767 Handle also the consecutive INTEGER_CST stores case here,
2768 as we have here the code to deal with overlaps. */
2769 || (info->bitregion_start <= merged_store->bitregion_end
2770 && info->rhs_code == INTEGER_CST
2771 && merged_store->only_constants
2772 && merged_store->can_be_merged_into (info)))
2774 /* Only allow overlapping stores of constants. */
2775 if (info->rhs_code == INTEGER_CST
2776 && merged_store->only_constants
2777 && info->lp_nr == merged_store->lp_nr)
2779 unsigned int last_order
2780 = MAX (merged_store->last_order, info->order);
2781 unsigned HOST_WIDE_INT end
2782 = MAX (merged_store->start + merged_store->width,
2783 info->bitpos + info->bitsize);
2784 if (check_no_overlap (m_store_info, i, true, last_order, end))
2786 /* check_no_overlap call above made sure there are no
2787 overlapping stores with non-INTEGER_CST rhs_code
2788 in between the first and last of the stores we've
2789 just merged. If there are any INTEGER_CST rhs_code
2790 stores in between, we need to merge_overlapping them
2791 even if in the sort_by_bitpos order there are other
2792 overlapping stores in between. Keep those stores as is.
2793 Example:
2794 MEM[(int *)p_28] = 0;
2795 MEM[(char *)p_28 + 3B] = 1;
2796 MEM[(char *)p_28 + 1B] = 2;
2797 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2798 We can't merge the zero store with the store of two and
2799 not merge anything else, because the store of one is
2800 in the original order in between those two, but in
2801 store_by_bitpos order it comes after the last store that
2802 we can't merge with them. We can merge the first 3 stores
2803 and keep the last store as is though. */
2804 unsigned int len = m_store_info.length ();
2805 unsigned int try_order = last_order;
2806 unsigned int first_nonmergeable_order;
2807 unsigned int k;
2808 bool last_iter = false;
2809 int attempts = 0;
2812 unsigned int max_order = 0;
2813 unsigned first_nonmergeable_int_order = ~0U;
2814 unsigned HOST_WIDE_INT this_end = end;
2815 k = i;
2816 first_nonmergeable_order = ~0U;
2817 for (unsigned int j = i + 1; j < len; ++j)
2819 store_immediate_info *info2 = m_store_info[j];
2820 if (info2->bitpos >= this_end)
2821 break;
2822 if (info2->order < try_order)
2824 if (info2->rhs_code != INTEGER_CST
2825 || info2->lp_nr != merged_store->lp_nr)
2827 /* Normally check_no_overlap makes sure this
2828 doesn't happen, but if end grows below,
2829 then we need to process more stores than
2830 check_no_overlap verified. Example:
2831 MEM[(int *)p_5] = 0;
2832 MEM[(short *)p_5 + 3B] = 1;
2833 MEM[(char *)p_5 + 4B] = _9;
2834 MEM[(char *)p_5 + 2B] = 2; */
2835 k = 0;
2836 break;
2838 k = j;
2839 this_end = MAX (this_end,
2840 info2->bitpos + info2->bitsize);
2842 else if (info2->rhs_code == INTEGER_CST
2843 && info2->lp_nr == merged_store->lp_nr
2844 && !last_iter)
2846 max_order = MAX (max_order, info2->order + 1);
2847 first_nonmergeable_int_order
2848 = MIN (first_nonmergeable_int_order,
2849 info2->order);
2851 else
2852 first_nonmergeable_order
2853 = MIN (first_nonmergeable_order, info2->order);
2855 if (k == 0)
2857 if (last_order == try_order)
2858 break;
2859 /* If this failed, but only because we grew
2860 try_order, retry with the last working one,
2861 so that we merge at least something. */
2862 try_order = last_order;
2863 last_iter = true;
2864 continue;
2866 last_order = try_order;
2867 /* Retry with a larger try_order to see if we could
2868 merge some further INTEGER_CST stores. */
2869 if (max_order
2870 && (first_nonmergeable_int_order
2871 < first_nonmergeable_order))
2873 try_order = MIN (max_order,
2874 first_nonmergeable_order);
2875 try_order
2876 = MIN (try_order,
2877 merged_store->first_nonmergeable_order);
2878 if (try_order > last_order && ++attempts < 16)
2879 continue;
2881 first_nonmergeable_order
2882 = MIN (first_nonmergeable_order,
2883 first_nonmergeable_int_order);
2884 end = this_end;
2885 break;
2887 while (1);
2889 if (k != 0)
2891 merged_store->merge_overlapping (info);
2893 merged_store->first_nonmergeable_order
2894 = MIN (merged_store->first_nonmergeable_order,
2895 first_nonmergeable_order);
2897 for (unsigned int j = i + 1; j <= k; j++)
2899 store_immediate_info *info2 = m_store_info[j];
2900 gcc_assert (info2->bitpos < end);
2901 if (info2->order < last_order)
2903 gcc_assert (info2->rhs_code == INTEGER_CST);
2904 if (info != info2)
2905 merged_store->merge_overlapping (info2);
2907 /* Other stores are kept and not merged in any
2908 way. */
2910 ignore = k;
2911 goto done;
2916 /* |---store 1---||---store 2---|
2917 This store is consecutive to the previous one.
2918 Merge it into the current store group. There can be gaps in between
2919 the stores, but there can't be gaps in between bitregions. */
2920 else if (info->bitregion_start <= merged_store->bitregion_end
2921 && merged_store->can_be_merged_into (info))
2923 store_immediate_info *infof = merged_store->stores[0];
2925 /* All the rhs_code ops that take 2 operands are commutative,
2926 swap the operands if it could make the operands compatible. */
2927 if (infof->ops[0].base_addr
2928 && infof->ops[1].base_addr
2929 && info->ops[0].base_addr
2930 && info->ops[1].base_addr
2931 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2932 info->bitpos - infof->bitpos)
2933 && operand_equal_p (info->ops[1].base_addr,
2934 infof->ops[0].base_addr, 0))
2936 std::swap (info->ops[0], info->ops[1]);
2937 info->ops_swapped_p = true;
2939 if (check_no_overlap (m_store_info, i, false,
2940 MAX (merged_store->last_order, info->order),
2941 MAX (merged_store->start + merged_store->width,
2942 info->bitpos + info->bitsize)))
2944 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2945 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2947 info->rhs_code = BIT_INSERT_EXPR;
2948 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2949 info->ops[0].base_addr = NULL_TREE;
2951 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2953 store_immediate_info *infoj;
2954 unsigned int j;
2955 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2957 infoj->rhs_code = BIT_INSERT_EXPR;
2958 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2959 infoj->ops[0].base_addr = NULL_TREE;
2961 merged_store->bit_insertion = true;
2963 if ((infof->ops[0].base_addr
2964 ? compatible_load_p (merged_store, info, base_addr, 0)
2965 : !info->ops[0].base_addr)
2966 && (infof->ops[1].base_addr
2967 ? compatible_load_p (merged_store, info, base_addr, 1)
2968 : !info->ops[1].base_addr))
2970 merged_store->merge_into (info);
2971 goto done;
2976 /* |---store 1---| <gap> |---store 2---|.
2977 Gap between stores or the rhs not compatible. Start a new group. */
2979 /* Try to apply all the stores recorded for the group to determine
2980 the bitpattern they write and discard it if that fails.
2981 This will also reject single-store groups. */
2982 if (merged_store->apply_stores ())
2983 m_merged_store_groups.safe_push (merged_store);
2984 else
2985 delete merged_store;
2987 merged_store = new merged_store_group (info);
2988 if (dump_file && (dump_flags & TDF_DETAILS))
2989 fputs ("New store group\n", dump_file);
2991 done:
2992 if (dump_file && (dump_flags & TDF_DETAILS))
2994 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2995 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
2996 i, info->bitsize, info->bitpos);
2997 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
2998 fputc ('\n', dump_file);
3002 /* Record or discard the last store group. */
3003 if (merged_store)
3005 if (merged_store->apply_stores ())
3006 m_merged_store_groups.safe_push (merged_store);
3007 else
3008 delete merged_store;
3011 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3013 bool success
3014 = !m_merged_store_groups.is_empty ()
3015 && m_merged_store_groups.length () < m_store_info.length ();
3017 if (success && dump_file)
3018 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3019 m_merged_store_groups.length ());
3021 return success;
3024 /* Return the type to use for the merged stores or loads described by STMTS.
3025 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3026 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3027 of the MEM_REFs if any. */
3029 static tree
3030 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3031 unsigned short *cliquep, unsigned short *basep)
3033 gimple *stmt;
3034 unsigned int i;
3035 tree type = NULL_TREE;
3036 tree ret = NULL_TREE;
3037 *cliquep = 0;
3038 *basep = 0;
3040 FOR_EACH_VEC_ELT (stmts, i, stmt)
3042 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3043 : gimple_assign_lhs (stmt);
3044 tree type1 = reference_alias_ptr_type (ref);
3045 tree base = get_base_address (ref);
3047 if (i == 0)
3049 if (TREE_CODE (base) == MEM_REF)
3051 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3052 *basep = MR_DEPENDENCE_BASE (base);
3054 ret = type = type1;
3055 continue;
3057 if (!alias_ptr_types_compatible_p (type, type1))
3058 ret = ptr_type_node;
3059 if (TREE_CODE (base) != MEM_REF
3060 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3061 || *basep != MR_DEPENDENCE_BASE (base))
3063 *cliquep = 0;
3064 *basep = 0;
3067 return ret;
3070 /* Return the location_t information we can find among the statements
3071 in STMTS. */
3073 static location_t
3074 get_location_for_stmts (vec<gimple *> &stmts)
3076 gimple *stmt;
3077 unsigned int i;
3079 FOR_EACH_VEC_ELT (stmts, i, stmt)
3080 if (gimple_has_location (stmt))
3081 return gimple_location (stmt);
3083 return UNKNOWN_LOCATION;
3086 /* Used to decribe a store resulting from splitting a wide store in smaller
3087 regularly-sized stores in split_group. */
3089 class split_store
3091 public:
3092 unsigned HOST_WIDE_INT bytepos;
3093 unsigned HOST_WIDE_INT size;
3094 unsigned HOST_WIDE_INT align;
3095 auto_vec<store_immediate_info *> orig_stores;
3096 /* True if there is a single orig stmt covering the whole split store. */
3097 bool orig;
3098 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3099 unsigned HOST_WIDE_INT);
3102 /* Simple constructor. */
3104 split_store::split_store (unsigned HOST_WIDE_INT bp,
3105 unsigned HOST_WIDE_INT sz,
3106 unsigned HOST_WIDE_INT al)
3107 : bytepos (bp), size (sz), align (al), orig (false)
3109 orig_stores.create (0);
3112 /* Record all stores in GROUP that write to the region starting at BITPOS and
3113 is of size BITSIZE. Record infos for such statements in STORES if
3114 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3115 if there is exactly one original store in the range (in that case ignore
3116 clobber stmts, unless there are only clobber stmts). */
3118 static store_immediate_info *
3119 find_constituent_stores (class merged_store_group *group,
3120 vec<store_immediate_info *> *stores,
3121 unsigned int *first,
3122 unsigned HOST_WIDE_INT bitpos,
3123 unsigned HOST_WIDE_INT bitsize)
3125 store_immediate_info *info, *ret = NULL;
3126 unsigned int i;
3127 bool second = false;
3128 bool update_first = true;
3129 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3130 for (i = *first; group->stores.iterate (i, &info); ++i)
3132 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3133 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3134 if (stmt_end <= bitpos)
3136 /* BITPOS passed to this function never decreases from within the
3137 same split_group call, so optimize and don't scan info records
3138 which are known to end before or at BITPOS next time.
3139 Only do it if all stores before this one also pass this. */
3140 if (update_first)
3141 *first = i + 1;
3142 continue;
3144 else
3145 update_first = false;
3147 /* The stores in GROUP are ordered by bitposition so if we're past
3148 the region for this group return early. */
3149 if (stmt_start >= end)
3150 return ret;
3152 if (gimple_clobber_p (info->stmt))
3154 if (stores)
3155 stores->safe_push (info);
3156 if (ret == NULL)
3157 ret = info;
3158 continue;
3160 if (stores)
3162 stores->safe_push (info);
3163 if (ret && !gimple_clobber_p (ret->stmt))
3165 ret = NULL;
3166 second = true;
3169 else if (ret && !gimple_clobber_p (ret->stmt))
3170 return NULL;
3171 if (!second)
3172 ret = info;
3174 return ret;
3177 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3178 store have multiple uses. If any SSA_NAME has multiple uses, also
3179 count statements needed to compute it. */
3181 static unsigned
3182 count_multiple_uses (store_immediate_info *info)
3184 gimple *stmt = info->stmt;
3185 unsigned ret = 0;
3186 switch (info->rhs_code)
3188 case INTEGER_CST:
3189 case STRING_CST:
3190 return 0;
3191 case BIT_AND_EXPR:
3192 case BIT_IOR_EXPR:
3193 case BIT_XOR_EXPR:
3194 if (info->bit_not_p)
3196 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3197 ret = 1; /* Fall through below to return
3198 the BIT_NOT_EXPR stmt and then
3199 BIT_{AND,IOR,XOR}_EXPR and anything it
3200 uses. */
3201 else
3202 /* stmt is after this the BIT_NOT_EXPR. */
3203 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3205 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3207 ret += 1 + info->ops[0].bit_not_p;
3208 if (info->ops[1].base_addr)
3209 ret += 1 + info->ops[1].bit_not_p;
3210 return ret + 1;
3212 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3213 /* stmt is now the BIT_*_EXPR. */
3214 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3215 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3216 else if (info->ops[info->ops_swapped_p].bit_not_p)
3218 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3219 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3220 ++ret;
3222 if (info->ops[1].base_addr == NULL_TREE)
3224 gcc_checking_assert (!info->ops_swapped_p);
3225 return ret;
3227 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3228 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3229 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3231 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3232 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3233 ++ret;
3235 return ret;
3236 case MEM_REF:
3237 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3238 return 1 + info->ops[0].bit_not_p;
3239 else if (info->ops[0].bit_not_p)
3241 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3242 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3243 return 1;
3245 return 0;
3246 case BIT_INSERT_EXPR:
3247 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3248 default:
3249 gcc_unreachable ();
3253 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3254 vector (if non-NULL) with split_store structs describing the byte offset
3255 (from the base), the bit size and alignment of each store as well as the
3256 original statements involved in each such split group.
3257 This is to separate the splitting strategy from the statement
3258 building/emission/linking done in output_merged_store.
3259 Return number of new stores.
3260 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3261 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3262 BZERO_FIRST may be true only when the first store covers the whole group
3263 and clears it; if BZERO_FIRST is true, keep that first store in the set
3264 unmodified and emit further stores for the overrides only.
3265 If SPLIT_STORES is NULL, it is just a dry run to count number of
3266 new stores. */
3268 static unsigned int
3269 split_group (merged_store_group *group, bool allow_unaligned_store,
3270 bool allow_unaligned_load, bool bzero_first,
3271 vec<split_store *> *split_stores,
3272 unsigned *total_orig,
3273 unsigned *total_new)
3275 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3276 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3277 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3278 unsigned HOST_WIDE_INT group_align = group->align;
3279 unsigned HOST_WIDE_INT align_base = group->align_base;
3280 unsigned HOST_WIDE_INT group_load_align = group_align;
3281 bool any_orig = false;
3283 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3285 /* For bswap framework using sets of stores, all the checking has been done
3286 earlier in try_coalesce_bswap and the result always needs to be emitted
3287 as a single store. Likewise for string concatenation, */
3288 if (group->stores[0]->rhs_code == LROTATE_EXPR
3289 || group->stores[0]->rhs_code == NOP_EXPR
3290 || group->string_concatenation)
3292 gcc_assert (!bzero_first);
3293 if (total_orig)
3295 /* Avoid the old/new stmt count heuristics. It should be
3296 always beneficial. */
3297 total_new[0] = 1;
3298 total_orig[0] = 2;
3301 if (split_stores)
3303 unsigned HOST_WIDE_INT align_bitpos
3304 = (group->start - align_base) & (group_align - 1);
3305 unsigned HOST_WIDE_INT align = group_align;
3306 if (align_bitpos)
3307 align = least_bit_hwi (align_bitpos);
3308 bytepos = group->start / BITS_PER_UNIT;
3309 split_store *store
3310 = new split_store (bytepos, group->width, align);
3311 unsigned int first = 0;
3312 find_constituent_stores (group, &store->orig_stores,
3313 &first, group->start, group->width);
3314 split_stores->safe_push (store);
3317 return 1;
3320 unsigned int ret = 0, first = 0;
3321 unsigned HOST_WIDE_INT try_pos = bytepos;
3323 if (total_orig)
3325 unsigned int i;
3326 store_immediate_info *info = group->stores[0];
3328 total_new[0] = 0;
3329 total_orig[0] = 1; /* The orig store. */
3330 info = group->stores[0];
3331 if (info->ops[0].base_addr)
3332 total_orig[0]++;
3333 if (info->ops[1].base_addr)
3334 total_orig[0]++;
3335 switch (info->rhs_code)
3337 case BIT_AND_EXPR:
3338 case BIT_IOR_EXPR:
3339 case BIT_XOR_EXPR:
3340 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3341 break;
3342 default:
3343 break;
3345 total_orig[0] *= group->stores.length ();
3347 FOR_EACH_VEC_ELT (group->stores, i, info)
3349 total_new[0] += count_multiple_uses (info);
3350 total_orig[0] += (info->bit_not_p
3351 + info->ops[0].bit_not_p
3352 + info->ops[1].bit_not_p);
3356 if (!allow_unaligned_load)
3357 for (int i = 0; i < 2; ++i)
3358 if (group->load_align[i])
3359 group_load_align = MIN (group_load_align, group->load_align[i]);
3361 if (bzero_first)
3363 store_immediate_info *gstore;
3364 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3365 if (!gimple_clobber_p (gstore->stmt))
3366 break;
3367 ++first;
3368 ret = 1;
3369 if (split_stores)
3371 split_store *store
3372 = new split_store (bytepos, gstore->bitsize, align_base);
3373 store->orig_stores.safe_push (gstore);
3374 store->orig = true;
3375 any_orig = true;
3376 split_stores->safe_push (store);
3380 while (size > 0)
3382 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3383 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3384 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3386 /* Skip padding bytes. */
3387 ++try_pos;
3388 size -= BITS_PER_UNIT;
3389 continue;
3392 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3393 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3394 unsigned HOST_WIDE_INT align_bitpos
3395 = (try_bitpos - align_base) & (group_align - 1);
3396 unsigned HOST_WIDE_INT align = group_align;
3397 bool found_orig = false;
3398 if (align_bitpos)
3399 align = least_bit_hwi (align_bitpos);
3400 if (!allow_unaligned_store)
3401 try_size = MIN (try_size, align);
3402 if (!allow_unaligned_load)
3404 /* If we can't do or don't want to do unaligned stores
3405 as well as loads, we need to take the loads into account
3406 as well. */
3407 unsigned HOST_WIDE_INT load_align = group_load_align;
3408 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3409 if (align_bitpos)
3410 load_align = least_bit_hwi (align_bitpos);
3411 for (int i = 0; i < 2; ++i)
3412 if (group->load_align[i])
3414 align_bitpos
3415 = known_alignment (try_bitpos
3416 - group->stores[0]->bitpos
3417 + group->stores[0]->ops[i].bitpos
3418 - group->load_align_base[i]);
3419 if (align_bitpos & (group_load_align - 1))
3421 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3422 load_align = MIN (load_align, a);
3425 try_size = MIN (try_size, load_align);
3427 store_immediate_info *info
3428 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3429 if (info && !gimple_clobber_p (info->stmt))
3431 /* If there is just one original statement for the range, see if
3432 we can just reuse the original store which could be even larger
3433 than try_size. */
3434 unsigned HOST_WIDE_INT stmt_end
3435 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3436 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3437 stmt_end - try_bitpos);
3438 if (info && info->bitpos >= try_bitpos)
3440 store_immediate_info *info2 = NULL;
3441 unsigned int first_copy = first;
3442 if (info->bitpos > try_bitpos
3443 && stmt_end - try_bitpos <= try_size)
3445 info2 = find_constituent_stores (group, NULL, &first_copy,
3446 try_bitpos,
3447 info->bitpos - try_bitpos);
3448 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3450 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3452 info2 = find_constituent_stores (group, NULL, &first_copy,
3453 stmt_end,
3454 (try_bitpos + try_size)
3455 - stmt_end);
3456 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3458 if (info2 == NULL)
3460 try_size = stmt_end - try_bitpos;
3461 found_orig = true;
3462 goto found;
3467 /* Approximate store bitsize for the case when there are no padding
3468 bits. */
3469 while (try_size > size)
3470 try_size /= 2;
3471 /* Now look for whole padding bytes at the end of that bitsize. */
3472 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3473 if (group->mask[try_pos - bytepos + nonmasked - 1]
3474 != (unsigned char) ~0U
3475 && (!bzero_first
3476 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3477 break;
3478 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3480 /* If entire try_size range is padding, skip it. */
3481 try_pos += try_size / BITS_PER_UNIT;
3482 size -= try_size;
3483 continue;
3485 /* Otherwise try to decrease try_size if second half, last 3 quarters
3486 etc. are padding. */
3487 nonmasked *= BITS_PER_UNIT;
3488 while (nonmasked <= try_size / 2)
3489 try_size /= 2;
3490 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3492 /* Now look for whole padding bytes at the start of that bitsize. */
3493 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3494 for (masked = 0; masked < try_bytesize; ++masked)
3495 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3496 && (!bzero_first
3497 || group->val[try_pos - bytepos + masked] != 0))
3498 break;
3499 masked *= BITS_PER_UNIT;
3500 gcc_assert (masked < try_size);
3501 if (masked >= try_size / 2)
3503 while (masked >= try_size / 2)
3505 try_size /= 2;
3506 try_pos += try_size / BITS_PER_UNIT;
3507 size -= try_size;
3508 masked -= try_size;
3510 /* Need to recompute the alignment, so just retry at the new
3511 position. */
3512 continue;
3516 found:
3517 ++ret;
3519 if (split_stores)
3521 split_store *store
3522 = new split_store (try_pos, try_size, align);
3523 info = find_constituent_stores (group, &store->orig_stores,
3524 &first, try_bitpos, try_size);
3525 if (info
3526 && !gimple_clobber_p (info->stmt)
3527 && info->bitpos >= try_bitpos
3528 && info->bitpos + info->bitsize <= try_bitpos + try_size
3529 && (store->orig_stores.length () == 1
3530 || found_orig
3531 || (info->bitpos == try_bitpos
3532 && (info->bitpos + info->bitsize
3533 == try_bitpos + try_size))))
3535 store->orig = true;
3536 any_orig = true;
3538 split_stores->safe_push (store);
3541 try_pos += try_size / BITS_PER_UNIT;
3542 size -= try_size;
3545 if (total_orig)
3547 unsigned int i;
3548 split_store *store;
3549 /* If we are reusing some original stores and any of the
3550 original SSA_NAMEs had multiple uses, we need to subtract
3551 those now before we add the new ones. */
3552 if (total_new[0] && any_orig)
3554 FOR_EACH_VEC_ELT (*split_stores, i, store)
3555 if (store->orig)
3556 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3558 total_new[0] += ret; /* The new store. */
3559 store_immediate_info *info = group->stores[0];
3560 if (info->ops[0].base_addr)
3561 total_new[0] += ret;
3562 if (info->ops[1].base_addr)
3563 total_new[0] += ret;
3564 switch (info->rhs_code)
3566 case BIT_AND_EXPR:
3567 case BIT_IOR_EXPR:
3568 case BIT_XOR_EXPR:
3569 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3570 break;
3571 default:
3572 break;
3574 FOR_EACH_VEC_ELT (*split_stores, i, store)
3576 unsigned int j;
3577 bool bit_not_p[3] = { false, false, false };
3578 /* If all orig_stores have certain bit_not_p set, then
3579 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3580 If some orig_stores have certain bit_not_p set, then
3581 we'd use a BIT_XOR_EXPR with a mask and need to account for
3582 it. */
3583 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3585 if (info->ops[0].bit_not_p)
3586 bit_not_p[0] = true;
3587 if (info->ops[1].bit_not_p)
3588 bit_not_p[1] = true;
3589 if (info->bit_not_p)
3590 bit_not_p[2] = true;
3592 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3597 return ret;
3600 /* Return the operation through which the operand IDX (if < 2) or
3601 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3602 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3603 the bits should be xored with mask. */
3605 static enum tree_code
3606 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3608 unsigned int i;
3609 store_immediate_info *info;
3610 unsigned int cnt = 0;
3611 bool any_paddings = false;
3612 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3614 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3615 if (bit_not_p)
3617 ++cnt;
3618 tree lhs = gimple_assign_lhs (info->stmt);
3619 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3620 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3621 any_paddings = true;
3624 mask = NULL_TREE;
3625 if (cnt == 0)
3626 return NOP_EXPR;
3627 if (cnt == split_store->orig_stores.length () && !any_paddings)
3628 return BIT_NOT_EXPR;
3630 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3631 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3632 unsigned char *buf
3633 = XALLOCAVEC (unsigned char, buf_size);
3634 memset (buf, ~0U, buf_size);
3635 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3637 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3638 if (!bit_not_p)
3639 continue;
3640 /* Clear regions with bit_not_p and invert afterwards, rather than
3641 clear regions with !bit_not_p, so that gaps in between stores aren't
3642 set in the mask. */
3643 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3644 unsigned HOST_WIDE_INT prec = bitsize;
3645 unsigned int pos_in_buffer = 0;
3646 if (any_paddings)
3648 tree lhs = gimple_assign_lhs (info->stmt);
3649 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3650 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3651 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3653 if (info->bitpos < try_bitpos)
3655 gcc_assert (info->bitpos + bitsize > try_bitpos);
3656 if (!BYTES_BIG_ENDIAN)
3658 if (prec <= try_bitpos - info->bitpos)
3659 continue;
3660 prec -= try_bitpos - info->bitpos;
3662 bitsize -= try_bitpos - info->bitpos;
3663 if (BYTES_BIG_ENDIAN && prec > bitsize)
3664 prec = bitsize;
3666 else
3667 pos_in_buffer = info->bitpos - try_bitpos;
3668 if (prec < bitsize)
3670 /* If this is a bool inversion, invert just the least significant
3671 prec bits rather than all bits of it. */
3672 if (BYTES_BIG_ENDIAN)
3674 pos_in_buffer += bitsize - prec;
3675 if (pos_in_buffer >= split_store->size)
3676 continue;
3678 bitsize = prec;
3680 if (pos_in_buffer + bitsize > split_store->size)
3681 bitsize = split_store->size - pos_in_buffer;
3682 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3683 if (BYTES_BIG_ENDIAN)
3684 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3685 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3686 else
3687 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3689 for (unsigned int i = 0; i < buf_size; ++i)
3690 buf[i] = ~buf[i];
3691 mask = native_interpret_expr (int_type, buf, buf_size);
3692 return BIT_XOR_EXPR;
3695 /* Given a merged store group GROUP output the widened version of it.
3696 The store chain is against the base object BASE.
3697 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3698 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3699 Make sure that the number of statements output is less than the number of
3700 original statements. If a better sequence is possible emit it and
3701 return true. */
3703 bool
3704 imm_store_chain_info::output_merged_store (merged_store_group *group)
3706 const unsigned HOST_WIDE_INT start_byte_pos
3707 = group->bitregion_start / BITS_PER_UNIT;
3708 unsigned int orig_num_stmts = group->stores.length ();
3709 if (orig_num_stmts < 2)
3710 return false;
3712 bool allow_unaligned_store
3713 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
3714 bool allow_unaligned_load = allow_unaligned_store;
3715 bool bzero_first = false;
3716 store_immediate_info *store;
3717 unsigned int num_clobber_stmts = 0;
3718 if (group->stores[0]->rhs_code == INTEGER_CST)
3720 unsigned int i;
3721 FOR_EACH_VEC_ELT (group->stores, i, store)
3722 if (gimple_clobber_p (store->stmt))
3723 num_clobber_stmts++;
3724 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3725 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3726 && group->start == store->bitpos
3727 && group->width == store->bitsize
3728 && (group->start % BITS_PER_UNIT) == 0
3729 && (group->width % BITS_PER_UNIT) == 0)
3731 bzero_first = true;
3732 break;
3734 else
3735 break;
3736 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3737 if (gimple_clobber_p (store->stmt))
3738 num_clobber_stmts++;
3739 if (num_clobber_stmts == orig_num_stmts)
3740 return false;
3741 orig_num_stmts -= num_clobber_stmts;
3743 if (allow_unaligned_store || bzero_first)
3745 /* If unaligned stores are allowed, see how many stores we'd emit
3746 for unaligned and how many stores we'd emit for aligned stores.
3747 Only use unaligned stores if it allows fewer stores than aligned.
3748 Similarly, if there is a whole region clear first, prefer expanding
3749 it together compared to expanding clear first followed by merged
3750 further stores. */
3751 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3752 int pass_min = 0;
3753 for (int pass = 0; pass < 4; ++pass)
3755 if (!allow_unaligned_store && (pass & 1) != 0)
3756 continue;
3757 if (!bzero_first && (pass & 2) != 0)
3758 continue;
3759 cnt[pass] = split_group (group, (pass & 1) != 0,
3760 allow_unaligned_load, (pass & 2) != 0,
3761 NULL, NULL, NULL);
3762 if (cnt[pass] < cnt[pass_min])
3763 pass_min = pass;
3765 if ((pass_min & 1) == 0)
3766 allow_unaligned_store = false;
3767 if ((pass_min & 2) == 0)
3768 bzero_first = false;
3771 auto_vec<class split_store *, 32> split_stores;
3772 split_store *split_store;
3773 unsigned total_orig, total_new, i;
3774 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3775 &split_stores, &total_orig, &total_new);
3777 /* Determine if there is a clobber covering the whole group at the start,
3778 followed by proposed split stores that cover the whole group. In that
3779 case, prefer the transformation even if
3780 split_stores.length () == orig_num_stmts. */
3781 bool clobber_first = false;
3782 if (num_clobber_stmts
3783 && gimple_clobber_p (group->stores[0]->stmt)
3784 && group->start == group->stores[0]->bitpos
3785 && group->width == group->stores[0]->bitsize
3786 && (group->start % BITS_PER_UNIT) == 0
3787 && (group->width % BITS_PER_UNIT) == 0)
3789 clobber_first = true;
3790 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3791 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3792 if (split_store->bytepos != pos)
3794 clobber_first = false;
3795 break;
3797 else
3798 pos += split_store->size / BITS_PER_UNIT;
3799 if (pos != (group->start + group->width) / BITS_PER_UNIT)
3800 clobber_first = false;
3803 if (split_stores.length () >= orig_num_stmts + clobber_first)
3806 /* We didn't manage to reduce the number of statements. Bail out. */
3807 if (dump_file && (dump_flags & TDF_DETAILS))
3808 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3809 " Not profitable to emit new sequence.\n",
3810 orig_num_stmts);
3811 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3812 delete split_store;
3813 return false;
3815 if (total_orig <= total_new)
3817 /* If number of estimated new statements is above estimated original
3818 statements, bail out too. */
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3820 fprintf (dump_file, "Estimated number of original stmts (%u)"
3821 " not larger than estimated number of new"
3822 " stmts (%u).\n",
3823 total_orig, total_new);
3824 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3825 delete split_store;
3826 return false;
3828 if (group->stores[0]->rhs_code == INTEGER_CST)
3830 bool all_orig = true;
3831 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3832 if (!split_store->orig)
3834 all_orig = false;
3835 break;
3837 if (all_orig)
3839 unsigned int cnt = split_stores.length ();
3840 store_immediate_info *store;
3841 FOR_EACH_VEC_ELT (group->stores, i, store)
3842 if (gimple_clobber_p (store->stmt))
3843 ++cnt;
3844 /* Punt if we wouldn't make any real changes, i.e. keep all
3845 orig stmts + all clobbers. */
3846 if (cnt == group->stores.length ())
3848 if (dump_file && (dump_flags & TDF_DETAILS))
3849 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3850 " Not profitable to emit new sequence.\n",
3851 orig_num_stmts);
3852 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3853 delete split_store;
3854 return false;
3859 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3860 gimple_seq seq = NULL;
3861 tree last_vdef, new_vuse;
3862 last_vdef = gimple_vdef (group->last_stmt);
3863 new_vuse = gimple_vuse (group->last_stmt);
3864 tree bswap_res = NULL_TREE;
3866 /* Clobbers are not removed. */
3867 if (gimple_clobber_p (group->last_stmt))
3869 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3870 gimple_set_vdef (group->last_stmt, new_vuse);
3873 if (group->stores[0]->rhs_code == LROTATE_EXPR
3874 || group->stores[0]->rhs_code == NOP_EXPR)
3876 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3877 gimple *ins_stmt = group->stores[0]->ins_stmt;
3878 struct symbolic_number *n = &group->stores[0]->n;
3879 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3881 switch (n->range)
3883 case 16:
3884 load_type = bswap_type = uint16_type_node;
3885 break;
3886 case 32:
3887 load_type = uint32_type_node;
3888 if (bswap)
3890 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3891 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3893 break;
3894 case 64:
3895 load_type = uint64_type_node;
3896 if (bswap)
3898 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3899 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3901 break;
3902 default:
3903 gcc_unreachable ();
3906 /* If the loads have each vuse of the corresponding store,
3907 we've checked the aliasing already in try_coalesce_bswap and
3908 we want to sink the need load into seq. So need to use new_vuse
3909 on the load. */
3910 if (n->base_addr)
3912 if (n->vuse == NULL)
3914 n->vuse = new_vuse;
3915 ins_stmt = NULL;
3917 else
3918 /* Update vuse in case it has changed by output_merged_stores. */
3919 n->vuse = gimple_vuse (ins_stmt);
3921 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3922 bswap_type, load_type, n, bswap);
3923 gcc_assert (bswap_res);
3926 gimple *stmt = NULL;
3927 auto_vec<gimple *, 32> orig_stmts;
3928 gimple_seq this_seq;
3929 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3930 is_gimple_mem_ref_addr, NULL_TREE);
3931 gimple_seq_add_seq_without_update (&seq, this_seq);
3933 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3934 gimple_seq load_seq[2] = { NULL, NULL };
3935 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3936 for (int j = 0; j < 2; ++j)
3938 store_operand_info &op = group->stores[0]->ops[j];
3939 if (op.base_addr == NULL_TREE)
3940 continue;
3942 store_immediate_info *infol = group->stores.last ();
3943 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3945 /* We can't pick the location randomly; while we've verified
3946 all the loads have the same vuse, they can be still in different
3947 basic blocks and we need to pick the one from the last bb:
3948 int x = q[0];
3949 if (x == N) return;
3950 int y = q[1];
3951 p[0] = x;
3952 p[1] = y;
3953 otherwise if we put the wider load at the q[0] load, we might
3954 segfault if q[1] is not mapped. */
3955 basic_block bb = gimple_bb (op.stmt);
3956 gimple *ostmt = op.stmt;
3957 store_immediate_info *info;
3958 FOR_EACH_VEC_ELT (group->stores, i, info)
3960 gimple *tstmt = info->ops[j].stmt;
3961 basic_block tbb = gimple_bb (tstmt);
3962 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3964 ostmt = tstmt;
3965 bb = tbb;
3968 load_gsi[j] = gsi_for_stmt (ostmt);
3969 load_addr[j]
3970 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3971 &load_seq[j], is_gimple_mem_ref_addr,
3972 NULL_TREE);
3974 else if (operand_equal_p (base_addr, op.base_addr, 0))
3975 load_addr[j] = addr;
3976 else
3978 load_addr[j]
3979 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3980 &this_seq, is_gimple_mem_ref_addr,
3981 NULL_TREE);
3982 gimple_seq_add_seq_without_update (&seq, this_seq);
3986 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3988 const unsigned HOST_WIDE_INT try_size = split_store->size;
3989 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3990 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3991 const unsigned HOST_WIDE_INT try_align = split_store->align;
3992 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
3993 tree dest, src;
3994 location_t loc;
3996 if (split_store->orig)
3998 /* If there is just a single non-clobber constituent store
3999 which covers the whole area, just reuse the lhs and rhs. */
4000 gimple *orig_stmt = NULL;
4001 store_immediate_info *store;
4002 unsigned int j;
4003 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4004 if (!gimple_clobber_p (store->stmt))
4006 orig_stmt = store->stmt;
4007 break;
4009 dest = gimple_assign_lhs (orig_stmt);
4010 src = gimple_assign_rhs1 (orig_stmt);
4011 loc = gimple_location (orig_stmt);
4013 else
4015 store_immediate_info *info;
4016 unsigned short clique, base;
4017 unsigned int k;
4018 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4019 orig_stmts.safe_push (info->stmt);
4020 tree offset_type
4021 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4022 tree dest_type;
4023 loc = get_location_for_stmts (orig_stmts);
4024 orig_stmts.truncate (0);
4026 if (group->string_concatenation)
4027 dest_type
4028 = build_array_type_nelts (char_type_node,
4029 try_size / BITS_PER_UNIT);
4030 else
4032 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4033 dest_type = build_aligned_type (dest_type, try_align);
4035 dest = fold_build2 (MEM_REF, dest_type, addr,
4036 build_int_cst (offset_type, try_pos));
4037 if (TREE_CODE (dest) == MEM_REF)
4039 MR_DEPENDENCE_CLIQUE (dest) = clique;
4040 MR_DEPENDENCE_BASE (dest) = base;
4043 tree mask;
4044 if (bswap_res || group->string_concatenation)
4045 mask = integer_zero_node;
4046 else
4047 mask = native_interpret_expr (dest_type,
4048 group->mask + try_offset,
4049 group->buf_size);
4051 tree ops[2];
4052 for (int j = 0;
4053 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4054 ++j)
4056 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4057 if (bswap_res)
4058 ops[j] = bswap_res;
4059 else if (group->string_concatenation)
4061 ops[j] = build_string (try_size / BITS_PER_UNIT,
4062 (const char *) group->val + try_offset);
4063 TREE_TYPE (ops[j]) = dest_type;
4065 else if (op.base_addr)
4067 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4068 orig_stmts.safe_push (info->ops[j].stmt);
4070 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4071 &clique, &base);
4072 location_t load_loc = get_location_for_stmts (orig_stmts);
4073 orig_stmts.truncate (0);
4075 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4076 unsigned HOST_WIDE_INT align_bitpos
4077 = known_alignment (try_bitpos
4078 - split_store->orig_stores[0]->bitpos
4079 + op.bitpos);
4080 if (align_bitpos & (load_align - 1))
4081 load_align = least_bit_hwi (align_bitpos);
4083 tree load_int_type
4084 = build_nonstandard_integer_type (try_size, UNSIGNED);
4085 load_int_type
4086 = build_aligned_type (load_int_type, load_align);
4088 poly_uint64 load_pos
4089 = exact_div (try_bitpos
4090 - split_store->orig_stores[0]->bitpos
4091 + op.bitpos,
4092 BITS_PER_UNIT);
4093 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4094 build_int_cst (offset_type, load_pos));
4095 if (TREE_CODE (ops[j]) == MEM_REF)
4097 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4098 MR_DEPENDENCE_BASE (ops[j]) = base;
4100 if (!integer_zerop (mask))
4101 /* The load might load some bits (that will be masked off
4102 later on) uninitialized, avoid -W*uninitialized
4103 warnings in that case. */
4104 TREE_NO_WARNING (ops[j]) = 1;
4106 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4107 gimple_set_location (stmt, load_loc);
4108 if (gsi_bb (load_gsi[j]))
4110 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4111 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4113 else
4115 gimple_set_vuse (stmt, new_vuse);
4116 gimple_seq_add_stmt_without_update (&seq, stmt);
4118 ops[j] = gimple_assign_lhs (stmt);
4119 tree xor_mask;
4120 enum tree_code inv_op
4121 = invert_op (split_store, j, dest_type, xor_mask);
4122 if (inv_op != NOP_EXPR)
4124 stmt = gimple_build_assign (make_ssa_name (dest_type),
4125 inv_op, ops[j], xor_mask);
4126 gimple_set_location (stmt, load_loc);
4127 ops[j] = gimple_assign_lhs (stmt);
4129 if (gsi_bb (load_gsi[j]))
4130 gimple_seq_add_stmt_without_update (&load_seq[j],
4131 stmt);
4132 else
4133 gimple_seq_add_stmt_without_update (&seq, stmt);
4136 else
4137 ops[j] = native_interpret_expr (dest_type,
4138 group->val + try_offset,
4139 group->buf_size);
4142 switch (split_store->orig_stores[0]->rhs_code)
4144 case BIT_AND_EXPR:
4145 case BIT_IOR_EXPR:
4146 case BIT_XOR_EXPR:
4147 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4149 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4150 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4152 location_t bit_loc;
4153 bit_loc = get_location_for_stmts (orig_stmts);
4154 orig_stmts.truncate (0);
4156 stmt
4157 = gimple_build_assign (make_ssa_name (dest_type),
4158 split_store->orig_stores[0]->rhs_code,
4159 ops[0], ops[1]);
4160 gimple_set_location (stmt, bit_loc);
4161 /* If there is just one load and there is a separate
4162 load_seq[0], emit the bitwise op right after it. */
4163 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4164 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4165 /* Otherwise, if at least one load is in seq, we need to
4166 emit the bitwise op right before the store. If there
4167 are two loads and are emitted somewhere else, it would
4168 be better to emit the bitwise op as early as possible;
4169 we don't track where that would be possible right now
4170 though. */
4171 else
4172 gimple_seq_add_stmt_without_update (&seq, stmt);
4173 src = gimple_assign_lhs (stmt);
4174 tree xor_mask;
4175 enum tree_code inv_op;
4176 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4177 if (inv_op != NOP_EXPR)
4179 stmt = gimple_build_assign (make_ssa_name (dest_type),
4180 inv_op, src, xor_mask);
4181 gimple_set_location (stmt, bit_loc);
4182 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4183 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4184 else
4185 gimple_seq_add_stmt_without_update (&seq, stmt);
4186 src = gimple_assign_lhs (stmt);
4188 break;
4189 case LROTATE_EXPR:
4190 case NOP_EXPR:
4191 src = ops[0];
4192 if (!is_gimple_val (src))
4194 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4195 src);
4196 gimple_seq_add_stmt_without_update (&seq, stmt);
4197 src = gimple_assign_lhs (stmt);
4199 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4201 stmt = gimple_build_assign (make_ssa_name (dest_type),
4202 NOP_EXPR, src);
4203 gimple_seq_add_stmt_without_update (&seq, stmt);
4204 src = gimple_assign_lhs (stmt);
4206 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4207 if (inv_op != NOP_EXPR)
4209 stmt = gimple_build_assign (make_ssa_name (dest_type),
4210 inv_op, src, xor_mask);
4211 gimple_set_location (stmt, loc);
4212 gimple_seq_add_stmt_without_update (&seq, stmt);
4213 src = gimple_assign_lhs (stmt);
4215 break;
4216 default:
4217 src = ops[0];
4218 break;
4221 /* If bit insertion is required, we use the source as an accumulator
4222 into which the successive bit-field values are manually inserted.
4223 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4224 if (group->bit_insertion)
4225 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4226 if (info->rhs_code == BIT_INSERT_EXPR
4227 && info->bitpos < try_bitpos + try_size
4228 && info->bitpos + info->bitsize > try_bitpos)
4230 /* Mask, truncate, convert to final type, shift and ior into
4231 the accumulator. Note that every step can be a no-op. */
4232 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4233 const HOST_WIDE_INT end_gap
4234 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4235 tree tem = info->ops[0].val;
4236 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4238 const unsigned HOST_WIDE_INT size
4239 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4240 tree integer_type
4241 = build_nonstandard_integer_type (size, UNSIGNED);
4242 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4243 integer_type, tem);
4245 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4247 tree bitfield_type
4248 = build_nonstandard_integer_type (info->bitsize,
4249 UNSIGNED);
4250 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4252 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4254 const unsigned HOST_WIDE_INT imask
4255 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4256 tem = gimple_build (&seq, loc,
4257 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4258 build_int_cst (TREE_TYPE (tem),
4259 imask));
4261 const HOST_WIDE_INT shift
4262 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4263 if (shift < 0)
4264 tem = gimple_build (&seq, loc,
4265 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4266 build_int_cst (NULL_TREE, -shift));
4267 tem = gimple_convert (&seq, loc, dest_type, tem);
4268 if (shift > 0)
4269 tem = gimple_build (&seq, loc,
4270 LSHIFT_EXPR, dest_type, tem,
4271 build_int_cst (NULL_TREE, shift));
4272 src = gimple_build (&seq, loc,
4273 BIT_IOR_EXPR, dest_type, tem, src);
4276 if (!integer_zerop (mask))
4278 tree tem = make_ssa_name (dest_type);
4279 tree load_src = unshare_expr (dest);
4280 /* The load might load some or all bits uninitialized,
4281 avoid -W*uninitialized warnings in that case.
4282 As optimization, it would be nice if all the bits are
4283 provably uninitialized (no stores at all yet or previous
4284 store a CLOBBER) we'd optimize away the load and replace
4285 it e.g. with 0. */
4286 TREE_NO_WARNING (load_src) = 1;
4287 stmt = gimple_build_assign (tem, load_src);
4288 gimple_set_location (stmt, loc);
4289 gimple_set_vuse (stmt, new_vuse);
4290 gimple_seq_add_stmt_without_update (&seq, stmt);
4292 /* FIXME: If there is a single chunk of zero bits in mask,
4293 perhaps use BIT_INSERT_EXPR instead? */
4294 stmt = gimple_build_assign (make_ssa_name (dest_type),
4295 BIT_AND_EXPR, tem, mask);
4296 gimple_set_location (stmt, loc);
4297 gimple_seq_add_stmt_without_update (&seq, stmt);
4298 tem = gimple_assign_lhs (stmt);
4300 if (TREE_CODE (src) == INTEGER_CST)
4301 src = wide_int_to_tree (dest_type,
4302 wi::bit_and_not (wi::to_wide (src),
4303 wi::to_wide (mask)));
4304 else
4306 tree nmask
4307 = wide_int_to_tree (dest_type,
4308 wi::bit_not (wi::to_wide (mask)));
4309 stmt = gimple_build_assign (make_ssa_name (dest_type),
4310 BIT_AND_EXPR, src, nmask);
4311 gimple_set_location (stmt, loc);
4312 gimple_seq_add_stmt_without_update (&seq, stmt);
4313 src = gimple_assign_lhs (stmt);
4315 stmt = gimple_build_assign (make_ssa_name (dest_type),
4316 BIT_IOR_EXPR, tem, src);
4317 gimple_set_location (stmt, loc);
4318 gimple_seq_add_stmt_without_update (&seq, stmt);
4319 src = gimple_assign_lhs (stmt);
4323 stmt = gimple_build_assign (dest, src);
4324 gimple_set_location (stmt, loc);
4325 gimple_set_vuse (stmt, new_vuse);
4326 gimple_seq_add_stmt_without_update (&seq, stmt);
4328 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4329 add_stmt_to_eh_lp (stmt, group->lp_nr);
4331 tree new_vdef;
4332 if (i < split_stores.length () - 1)
4333 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4334 else
4335 new_vdef = last_vdef;
4337 gimple_set_vdef (stmt, new_vdef);
4338 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4339 new_vuse = new_vdef;
4342 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4343 delete split_store;
4345 gcc_assert (seq);
4346 if (dump_file)
4348 fprintf (dump_file,
4349 "New sequence of %u stores to replace old one of %u stores\n",
4350 split_stores.length (), orig_num_stmts);
4351 if (dump_flags & TDF_DETAILS)
4352 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4355 if (gimple_clobber_p (group->last_stmt))
4356 update_stmt (group->last_stmt);
4358 if (group->lp_nr > 0)
4360 /* We're going to insert a sequence of (potentially) throwing stores
4361 into an active EH region. This means that we're going to create
4362 new basic blocks with EH edges pointing to the post landing pad
4363 and, therefore, to have to update its PHI nodes, if any. For the
4364 virtual PHI node, we're going to use the VDEFs created above, but
4365 for the other nodes, we need to record the original reaching defs. */
4366 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4367 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4368 basic_block last_bb = gimple_bb (group->last_stmt);
4369 edge last_edge = find_edge (last_bb, lp_bb);
4370 auto_vec<tree, 16> last_defs;
4371 gphi_iterator gpi;
4372 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4374 gphi *phi = gpi.phi ();
4375 tree last_def;
4376 if (virtual_operand_p (gimple_phi_result (phi)))
4377 last_def = NULL_TREE;
4378 else
4379 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4380 last_defs.safe_push (last_def);
4383 /* Do the insertion. Then, if new basic blocks have been created in the
4384 process, rewind the chain of VDEFs create above to walk the new basic
4385 blocks and update the corresponding arguments of the PHI nodes. */
4386 update_modified_stmts (seq);
4387 if (gimple_find_sub_bbs (seq, &last_gsi))
4388 while (last_vdef != gimple_vuse (group->last_stmt))
4390 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4391 if (stmt_could_throw_p (cfun, stmt))
4393 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4394 unsigned int i;
4395 for (gpi = gsi_start_phis (lp_bb), i = 0;
4396 !gsi_end_p (gpi);
4397 gsi_next (&gpi), i++)
4399 gphi *phi = gpi.phi ();
4400 tree new_def;
4401 if (virtual_operand_p (gimple_phi_result (phi)))
4402 new_def = last_vdef;
4403 else
4404 new_def = last_defs[i];
4405 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4408 last_vdef = gimple_vuse (stmt);
4411 else
4412 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4414 for (int j = 0; j < 2; ++j)
4415 if (load_seq[j])
4416 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4418 return true;
4421 /* Process the merged_store_group objects created in the coalescing phase.
4422 The stores are all against the base object BASE.
4423 Try to output the widened stores and delete the original statements if
4424 successful. Return true iff any changes were made. */
4426 bool
4427 imm_store_chain_info::output_merged_stores ()
4429 unsigned int i;
4430 merged_store_group *merged_store;
4431 bool ret = false;
4432 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4434 if (dbg_cnt (store_merging)
4435 && output_merged_store (merged_store))
4437 unsigned int j;
4438 store_immediate_info *store;
4439 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4441 gimple *stmt = store->stmt;
4442 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4443 /* Don't remove clobbers, they are still useful even if
4444 everything is overwritten afterwards. */
4445 if (gimple_clobber_p (stmt))
4446 continue;
4447 gsi_remove (&gsi, true);
4448 if (store->lp_nr)
4449 remove_stmt_from_eh_lp (stmt);
4450 if (stmt != merged_store->last_stmt)
4452 unlink_stmt_vdef (stmt);
4453 release_defs (stmt);
4456 ret = true;
4459 if (ret && dump_file)
4460 fprintf (dump_file, "Merging successful!\n");
4462 return ret;
4465 /* Coalesce the store_immediate_info objects recorded against the base object
4466 BASE in the first phase and output them.
4467 Delete the allocated structures.
4468 Return true if any changes were made. */
4470 bool
4471 imm_store_chain_info::terminate_and_process_chain ()
4473 /* Process store chain. */
4474 bool ret = false;
4475 if (m_store_info.length () > 1)
4477 ret = coalesce_immediate_stores ();
4478 if (ret)
4479 ret = output_merged_stores ();
4482 /* Delete all the entries we allocated ourselves. */
4483 store_immediate_info *info;
4484 unsigned int i;
4485 FOR_EACH_VEC_ELT (m_store_info, i, info)
4486 delete info;
4488 merged_store_group *merged_info;
4489 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4490 delete merged_info;
4492 return ret;
4495 /* Return true iff LHS is a destination potentially interesting for
4496 store merging. In practice these are the codes that get_inner_reference
4497 can process. */
4499 static bool
4500 lhs_valid_for_store_merging_p (tree lhs)
4502 if (DECL_P (lhs))
4503 return true;
4505 switch (TREE_CODE (lhs))
4507 case ARRAY_REF:
4508 case ARRAY_RANGE_REF:
4509 case BIT_FIELD_REF:
4510 case COMPONENT_REF:
4511 case MEM_REF:
4512 case VIEW_CONVERT_EXPR:
4513 return true;
4514 default:
4515 return false;
4518 gcc_unreachable ();
4521 /* Return true if the tree RHS is a constant we want to consider
4522 during store merging. In practice accept all codes that
4523 native_encode_expr accepts. */
4525 static bool
4526 rhs_valid_for_store_merging_p (tree rhs)
4528 unsigned HOST_WIDE_INT size;
4529 if (TREE_CODE (rhs) == CONSTRUCTOR
4530 && CONSTRUCTOR_NELTS (rhs) == 0
4531 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4532 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4533 return true;
4534 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4535 && native_encode_expr (rhs, NULL, size) != 0);
4538 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4539 and return true on success or false on failure. */
4541 static bool
4542 adjust_bit_pos (poly_offset_int byte_off,
4543 poly_int64 *pbitpos,
4544 poly_uint64 *pbitregion_start,
4545 poly_uint64 *pbitregion_end)
4547 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4548 bit_off += *pbitpos;
4550 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4552 if (maybe_ne (*pbitregion_end, 0U))
4554 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4555 bit_off += *pbitregion_start;
4556 if (bit_off.to_uhwi (pbitregion_start))
4558 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4559 bit_off += *pbitregion_end;
4560 if (!bit_off.to_uhwi (pbitregion_end))
4561 *pbitregion_end = 0;
4563 else
4564 *pbitregion_end = 0;
4566 return true;
4568 else
4569 return false;
4572 /* If MEM is a memory reference usable for store merging (either as
4573 store destination or for loads), return the non-NULL base_addr
4574 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4575 Otherwise return NULL, *PBITPOS should be still valid even for that
4576 case. */
4578 static tree
4579 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4580 poly_uint64 *pbitpos,
4581 poly_uint64 *pbitregion_start,
4582 poly_uint64 *pbitregion_end)
4584 poly_int64 bitsize, bitpos;
4585 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4586 machine_mode mode;
4587 int unsignedp = 0, reversep = 0, volatilep = 0;
4588 tree offset;
4589 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4590 &unsignedp, &reversep, &volatilep);
4591 *pbitsize = bitsize;
4592 if (known_eq (bitsize, 0))
4593 return NULL_TREE;
4595 if (TREE_CODE (mem) == COMPONENT_REF
4596 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4598 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4599 if (maybe_ne (bitregion_end, 0U))
4600 bitregion_end += 1;
4603 if (reversep)
4604 return NULL_TREE;
4606 /* We do not want to rewrite TARGET_MEM_REFs. */
4607 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4608 return NULL_TREE;
4609 /* In some cases get_inner_reference may return a
4610 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4611 canonicalize the base_addr to MEM_REF [ptr] and take
4612 byteoffset into account in the bitpos. This occurs in
4613 PR 23684 and this way we can catch more chains. */
4614 else if (TREE_CODE (base_addr) == MEM_REF)
4616 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4617 &bitregion_start, &bitregion_end))
4618 return NULL_TREE;
4619 base_addr = TREE_OPERAND (base_addr, 0);
4621 /* get_inner_reference returns the base object, get at its
4622 address now. */
4623 else
4625 if (maybe_lt (bitpos, 0))
4626 return NULL_TREE;
4627 base_addr = build_fold_addr_expr (base_addr);
4630 if (offset)
4632 /* If the access is variable offset then a base decl has to be
4633 address-taken to be able to emit pointer-based stores to it.
4634 ??? We might be able to get away with re-using the original
4635 base up to the first variable part and then wrapping that inside
4636 a BIT_FIELD_REF. */
4637 tree base = get_base_address (base_addr);
4638 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4639 return NULL_TREE;
4641 /* Similarly to above for the base, remove constant from the offset. */
4642 if (TREE_CODE (offset) == PLUS_EXPR
4643 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4644 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4645 &bitpos, &bitregion_start, &bitregion_end))
4646 offset = TREE_OPERAND (offset, 0);
4648 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4649 base_addr, offset);
4652 if (known_eq (bitregion_end, 0U))
4654 bitregion_start = round_down_to_byte_boundary (bitpos);
4655 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4658 *pbitsize = bitsize;
4659 *pbitpos = bitpos;
4660 *pbitregion_start = bitregion_start;
4661 *pbitregion_end = bitregion_end;
4662 return base_addr;
4665 /* Return true if STMT is a load that can be used for store merging.
4666 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4667 BITREGION_END are properties of the corresponding store. */
4669 static bool
4670 handled_load (gimple *stmt, store_operand_info *op,
4671 poly_uint64 bitsize, poly_uint64 bitpos,
4672 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4674 if (!is_gimple_assign (stmt))
4675 return false;
4676 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4678 tree rhs1 = gimple_assign_rhs1 (stmt);
4679 if (TREE_CODE (rhs1) == SSA_NAME
4680 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4681 bitregion_start, bitregion_end))
4683 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4684 been optimized earlier, but if allowed here, would confuse the
4685 multiple uses counting. */
4686 if (op->bit_not_p)
4687 return false;
4688 op->bit_not_p = !op->bit_not_p;
4689 return true;
4691 return false;
4693 if (gimple_vuse (stmt)
4694 && gimple_assign_load_p (stmt)
4695 && !stmt_can_throw_internal (cfun, stmt)
4696 && !gimple_has_volatile_ops (stmt))
4698 tree mem = gimple_assign_rhs1 (stmt);
4699 op->base_addr
4700 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4701 &op->bitregion_start,
4702 &op->bitregion_end);
4703 if (op->base_addr != NULL_TREE
4704 && known_eq (op->bitsize, bitsize)
4705 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4706 && known_ge (op->bitpos - op->bitregion_start,
4707 bitpos - bitregion_start)
4708 && known_ge (op->bitregion_end - op->bitpos,
4709 bitregion_end - bitpos))
4711 op->stmt = stmt;
4712 op->val = mem;
4713 op->bit_not_p = false;
4714 return true;
4717 return false;
4720 /* Return the index number of the landing pad for STMT, if any. */
4722 static int
4723 lp_nr_for_store (gimple *stmt)
4725 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4726 return 0;
4728 if (!stmt_could_throw_p (cfun, stmt))
4729 return 0;
4731 return lookup_stmt_eh_lp (stmt);
4734 /* Record the store STMT for store merging optimization if it can be
4735 optimized. Return true if any changes were made. */
4737 bool
4738 pass_store_merging::process_store (gimple *stmt)
4740 tree lhs = gimple_assign_lhs (stmt);
4741 tree rhs = gimple_assign_rhs1 (stmt);
4742 poly_uint64 bitsize, bitpos = 0;
4743 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4744 tree base_addr
4745 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4746 &bitregion_start, &bitregion_end);
4747 if (known_eq (bitsize, 0U))
4748 return false;
4750 bool invalid = (base_addr == NULL_TREE
4751 || (maybe_gt (bitsize,
4752 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4753 && TREE_CODE (rhs) != INTEGER_CST
4754 && (TREE_CODE (rhs) != CONSTRUCTOR
4755 || CONSTRUCTOR_NELTS (rhs) != 0)));
4756 enum tree_code rhs_code = ERROR_MARK;
4757 bool bit_not_p = false;
4758 struct symbolic_number n;
4759 gimple *ins_stmt = NULL;
4760 store_operand_info ops[2];
4761 if (invalid)
4763 else if (TREE_CODE (rhs) == STRING_CST)
4765 rhs_code = STRING_CST;
4766 ops[0].val = rhs;
4768 else if (rhs_valid_for_store_merging_p (rhs))
4770 rhs_code = INTEGER_CST;
4771 ops[0].val = rhs;
4773 else if (TREE_CODE (rhs) == SSA_NAME)
4775 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4776 if (!is_gimple_assign (def_stmt))
4777 invalid = true;
4778 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4779 bitregion_start, bitregion_end))
4780 rhs_code = MEM_REF;
4781 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4783 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4784 if (TREE_CODE (rhs1) == SSA_NAME
4785 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4787 bit_not_p = true;
4788 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4792 if (rhs_code == ERROR_MARK && !invalid)
4793 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4795 case BIT_AND_EXPR:
4796 case BIT_IOR_EXPR:
4797 case BIT_XOR_EXPR:
4798 tree rhs1, rhs2;
4799 rhs1 = gimple_assign_rhs1 (def_stmt);
4800 rhs2 = gimple_assign_rhs2 (def_stmt);
4801 invalid = true;
4802 if (TREE_CODE (rhs1) != SSA_NAME)
4803 break;
4804 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4805 if (!is_gimple_assign (def_stmt1)
4806 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4807 bitregion_start, bitregion_end))
4808 break;
4809 if (rhs_valid_for_store_merging_p (rhs2))
4810 ops[1].val = rhs2;
4811 else if (TREE_CODE (rhs2) != SSA_NAME)
4812 break;
4813 else
4815 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4816 if (!is_gimple_assign (def_stmt2))
4817 break;
4818 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4819 bitregion_start, bitregion_end))
4820 break;
4822 invalid = false;
4823 break;
4824 default:
4825 invalid = true;
4826 break;
4829 unsigned HOST_WIDE_INT const_bitsize;
4830 if (bitsize.is_constant (&const_bitsize)
4831 && (const_bitsize % BITS_PER_UNIT) == 0
4832 && const_bitsize <= 64
4833 && multiple_p (bitpos, BITS_PER_UNIT))
4835 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4836 if (ins_stmt)
4838 uint64_t nn = n.n;
4839 for (unsigned HOST_WIDE_INT i = 0;
4840 i < const_bitsize;
4841 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4842 if ((nn & MARKER_MASK) == 0
4843 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4845 ins_stmt = NULL;
4846 break;
4848 if (ins_stmt)
4850 if (invalid)
4852 rhs_code = LROTATE_EXPR;
4853 ops[0].base_addr = NULL_TREE;
4854 ops[1].base_addr = NULL_TREE;
4856 invalid = false;
4861 if (invalid
4862 && bitsize.is_constant (&const_bitsize)
4863 && ((const_bitsize % BITS_PER_UNIT) != 0
4864 || !multiple_p (bitpos, BITS_PER_UNIT))
4865 && const_bitsize <= MAX_FIXED_MODE_SIZE)
4867 /* Bypass a conversion to the bit-field type. */
4868 if (!bit_not_p
4869 && is_gimple_assign (def_stmt)
4870 && CONVERT_EXPR_CODE_P (rhs_code))
4872 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4873 if (TREE_CODE (rhs1) == SSA_NAME
4874 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4875 rhs = rhs1;
4877 rhs_code = BIT_INSERT_EXPR;
4878 bit_not_p = false;
4879 ops[0].val = rhs;
4880 ops[0].base_addr = NULL_TREE;
4881 ops[1].base_addr = NULL_TREE;
4882 invalid = false;
4885 else
4886 invalid = true;
4888 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4889 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4890 if (invalid
4891 || !bitsize.is_constant (&const_bitsize)
4892 || !bitpos.is_constant (&const_bitpos)
4893 || !bitregion_start.is_constant (&const_bitregion_start)
4894 || !bitregion_end.is_constant (&const_bitregion_end))
4895 return terminate_all_aliasing_chains (NULL, stmt);
4897 if (!ins_stmt)
4898 memset (&n, 0, sizeof (n));
4900 class imm_store_chain_info **chain_info = NULL;
4901 bool ret = false;
4902 if (base_addr)
4903 chain_info = m_stores.get (base_addr);
4905 store_immediate_info *info;
4906 if (chain_info)
4908 unsigned int ord = (*chain_info)->m_store_info.length ();
4909 info = new store_immediate_info (const_bitsize, const_bitpos,
4910 const_bitregion_start,
4911 const_bitregion_end,
4912 stmt, ord, rhs_code, n, ins_stmt,
4913 bit_not_p, lp_nr_for_store (stmt),
4914 ops[0], ops[1]);
4915 if (dump_file && (dump_flags & TDF_DETAILS))
4917 fprintf (dump_file, "Recording immediate store from stmt:\n");
4918 print_gimple_stmt (dump_file, stmt, 0);
4920 (*chain_info)->m_store_info.safe_push (info);
4921 ret |= terminate_all_aliasing_chains (chain_info, stmt);
4922 /* If we reach the limit of stores to merge in a chain terminate and
4923 process the chain now. */
4924 if ((*chain_info)->m_store_info.length ()
4925 == (unsigned int) param_max_stores_to_merge)
4927 if (dump_file && (dump_flags & TDF_DETAILS))
4928 fprintf (dump_file,
4929 "Reached maximum number of statements to merge:\n");
4930 ret |= terminate_and_process_chain (*chain_info);
4932 return ret;
4935 /* Store aliases any existing chain? */
4936 ret |= terminate_all_aliasing_chains (NULL, stmt);
4937 /* Start a new chain. */
4938 class imm_store_chain_info *new_chain
4939 = new imm_store_chain_info (m_stores_head, base_addr);
4940 info = new store_immediate_info (const_bitsize, const_bitpos,
4941 const_bitregion_start,
4942 const_bitregion_end,
4943 stmt, 0, rhs_code, n, ins_stmt,
4944 bit_not_p, lp_nr_for_store (stmt),
4945 ops[0], ops[1]);
4946 new_chain->m_store_info.safe_push (info);
4947 m_stores.put (base_addr, new_chain);
4948 if (dump_file && (dump_flags & TDF_DETAILS))
4950 fprintf (dump_file, "Starting new chain with statement:\n");
4951 print_gimple_stmt (dump_file, stmt, 0);
4952 fprintf (dump_file, "The base object is:\n");
4953 print_generic_expr (dump_file, base_addr);
4954 fprintf (dump_file, "\n");
4956 return ret;
4959 /* Return true if STMT is a store valid for store merging. */
4961 static bool
4962 store_valid_for_store_merging_p (gimple *stmt)
4964 return gimple_assign_single_p (stmt)
4965 && gimple_vdef (stmt)
4966 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4967 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
4970 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4972 /* Return the status of basic block BB wrt store merging. */
4974 static enum basic_block_status
4975 get_status_for_store_merging (basic_block bb)
4977 unsigned int num_statements = 0;
4978 gimple_stmt_iterator gsi;
4979 edge e;
4981 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4983 gimple *stmt = gsi_stmt (gsi);
4985 if (is_gimple_debug (stmt))
4986 continue;
4988 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4989 break;
4992 if (num_statements == 0)
4993 return BB_INVALID;
4995 if (cfun->can_throw_non_call_exceptions && cfun->eh
4996 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4997 && (e = find_fallthru_edge (bb->succs))
4998 && e->dest == bb->next_bb)
4999 return BB_EXTENDED_VALID;
5001 return num_statements >= 2 ? BB_VALID : BB_INVALID;
5004 /* Entry point for the pass. Go over each basic block recording chains of
5005 immediate stores. Upon encountering a terminating statement (as defined
5006 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5007 variants. */
5009 unsigned int
5010 pass_store_merging::execute (function *fun)
5012 basic_block bb;
5013 hash_set<gimple *> orig_stmts;
5014 bool changed = false, open_chains = false;
5016 /* If the function can throw and catch non-call exceptions, we'll be trying
5017 to merge stores across different basic blocks so we need to first unsplit
5018 the EH edges in order to streamline the CFG of the function. */
5019 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5020 unsplit_eh_edges ();
5022 calculate_dominance_info (CDI_DOMINATORS);
5024 FOR_EACH_BB_FN (bb, fun)
5026 const basic_block_status bb_status = get_status_for_store_merging (bb);
5027 gimple_stmt_iterator gsi;
5029 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5031 changed |= terminate_and_process_all_chains ();
5032 open_chains = false;
5035 if (bb_status == BB_INVALID)
5036 continue;
5038 if (dump_file && (dump_flags & TDF_DETAILS))
5039 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5041 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5043 gimple *stmt = gsi_stmt (gsi);
5045 if (is_gimple_debug (stmt))
5046 continue;
5048 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5050 /* Terminate all chains. */
5051 if (dump_file && (dump_flags & TDF_DETAILS))
5052 fprintf (dump_file, "Volatile access terminates "
5053 "all chains\n");
5054 changed |= terminate_and_process_all_chains ();
5055 open_chains = false;
5056 continue;
5059 if (store_valid_for_store_merging_p (stmt))
5060 changed |= process_store (stmt);
5061 else
5062 changed |= terminate_all_aliasing_chains (NULL, stmt);
5065 if (bb_status == BB_EXTENDED_VALID)
5066 open_chains = true;
5067 else
5069 changed |= terminate_and_process_all_chains ();
5070 open_chains = false;
5074 if (open_chains)
5075 changed |= terminate_and_process_all_chains ();
5077 /* If the function can throw and catch non-call exceptions and something
5078 changed during the pass, then the CFG has (very likely) changed too. */
5079 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5081 free_dominance_info (CDI_DOMINATORS);
5082 return TODO_cleanup_cfg;
5085 return 0;
5088 } // anon namespace
5090 /* Construct and return a store merging pass object. */
5092 gimple_opt_pass *
5093 make_pass_store_merging (gcc::context *ctxt)
5095 return new pass_store_merging (ctxt);
5098 #if CHECKING_P
5100 namespace selftest {
5102 /* Selftests for store merging helpers. */
5104 /* Assert that all elements of the byte arrays X and Y, both of length N
5105 are equal. */
5107 static void
5108 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5110 for (unsigned int i = 0; i < n; i++)
5112 if (x[i] != y[i])
5114 fprintf (stderr, "Arrays do not match. X:\n");
5115 dump_char_array (stderr, x, n);
5116 fprintf (stderr, "Y:\n");
5117 dump_char_array (stderr, y, n);
5119 ASSERT_EQ (x[i], y[i]);
5123 /* Test shift_bytes_in_array_left and that it carries bits across between
5124 bytes correctly. */
5126 static void
5127 verify_shift_bytes_in_array_left (void)
5129 /* byte 1 | byte 0
5130 00011111 | 11100000. */
5131 unsigned char orig[2] = { 0xe0, 0x1f };
5132 unsigned char in[2];
5133 memcpy (in, orig, sizeof orig);
5135 unsigned char expected[2] = { 0x80, 0x7f };
5136 shift_bytes_in_array_left (in, sizeof (in), 2);
5137 verify_array_eq (in, expected, sizeof (in));
5139 memcpy (in, orig, sizeof orig);
5140 memcpy (expected, orig, sizeof orig);
5141 /* Check that shifting by zero doesn't change anything. */
5142 shift_bytes_in_array_left (in, sizeof (in), 0);
5143 verify_array_eq (in, expected, sizeof (in));
5147 /* Test shift_bytes_in_array_right and that it carries bits across between
5148 bytes correctly. */
5150 static void
5151 verify_shift_bytes_in_array_right (void)
5153 /* byte 1 | byte 0
5154 00011111 | 11100000. */
5155 unsigned char orig[2] = { 0x1f, 0xe0};
5156 unsigned char in[2];
5157 memcpy (in, orig, sizeof orig);
5158 unsigned char expected[2] = { 0x07, 0xf8};
5159 shift_bytes_in_array_right (in, sizeof (in), 2);
5160 verify_array_eq (in, expected, sizeof (in));
5162 memcpy (in, orig, sizeof orig);
5163 memcpy (expected, orig, sizeof orig);
5164 /* Check that shifting by zero doesn't change anything. */
5165 shift_bytes_in_array_right (in, sizeof (in), 0);
5166 verify_array_eq (in, expected, sizeof (in));
5169 /* Test clear_bit_region that it clears exactly the bits asked and
5170 nothing more. */
5172 static void
5173 verify_clear_bit_region (void)
5175 /* Start with all bits set and test clearing various patterns in them. */
5176 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5177 unsigned char in[3];
5178 unsigned char expected[3];
5179 memcpy (in, orig, sizeof in);
5181 /* Check zeroing out all the bits. */
5182 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5183 expected[0] = expected[1] = expected[2] = 0;
5184 verify_array_eq (in, expected, sizeof in);
5186 memcpy (in, orig, sizeof in);
5187 /* Leave the first and last bits intact. */
5188 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5189 expected[0] = 0x1;
5190 expected[1] = 0;
5191 expected[2] = 0x80;
5192 verify_array_eq (in, expected, sizeof in);
5195 /* Test clear_bit_region_be that it clears exactly the bits asked and
5196 nothing more. */
5198 static void
5199 verify_clear_bit_region_be (void)
5201 /* Start with all bits set and test clearing various patterns in them. */
5202 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5203 unsigned char in[3];
5204 unsigned char expected[3];
5205 memcpy (in, orig, sizeof in);
5207 /* Check zeroing out all the bits. */
5208 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5209 expected[0] = expected[1] = expected[2] = 0;
5210 verify_array_eq (in, expected, sizeof in);
5212 memcpy (in, orig, sizeof in);
5213 /* Leave the first and last bits intact. */
5214 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5215 expected[0] = 0x80;
5216 expected[1] = 0;
5217 expected[2] = 0x1;
5218 verify_array_eq (in, expected, sizeof in);
5222 /* Run all of the selftests within this file. */
5224 void
5225 store_merging_c_tests (void)
5227 verify_shift_bytes_in_array_left ();
5228 verify_shift_bytes_in_array_right ();
5229 verify_clear_bit_region ();
5230 verify_clear_bit_region_be ();
5233 } // namespace selftest
5234 #endif /* CHECKING_P. */