gcov: make profile merging smarter
[official-gcc.git] / gcc / gimple-ssa-store-merging.c
blob4efa200428ac18b3d7a95bc505d0613484fbc009
1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2021 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
182 namespace {
184 struct bswap_stat
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
260 inline bool
261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
279 switch (code)
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
311 inline bool
312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
314 tree lhs_type;
316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 return false;
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
325 return true;
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
331 bool
332 init_symbolic_number (struct symbolic_number *n, tree src)
334 int size;
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337 return false;
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
359 return true;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
366 bool
367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
394 if (!integer_zerop (off))
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
401 base_addr = TREE_OPERAND (base_addr, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
415 bitpos += bit_offset.force_shwi ();
417 else
418 base_addr = build_fold_addr_expr (base_addr);
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
441 gimple *
442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
481 if (start1 < start2)
483 n_start = n1;
484 start_sub = start2 - start1;
486 else
488 n_start = n2;
489 start_sub = start1 - start2;
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
505 end = end2;
506 end_sub = end2 - end1;
508 else
510 end = end1;
511 end_sub = end1 - end2;
513 n_end = (end2 > end1) ? n2 : n1;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
521 n->range = end - MIN (start1, start2) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
540 else
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 uint64_t masked1, masked2;
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
572 return source_stmt;
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
581 gimple *
582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
592 rhs1 = gimple_assign_rhs1 (stmt);
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
603 return NULL;
605 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
606 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
607 if (bitpos % BITS_PER_UNIT == 0
608 && bitsize % BITS_PER_UNIT == 0
609 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN)
613 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
615 /* Shift. */
616 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
617 return NULL;
619 /* Mask. */
620 uint64_t mask = 0;
621 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
622 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
623 i++, tmp <<= BITS_PER_UNIT)
624 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
625 n->n &= mask;
627 /* Convert. */
628 n->type = TREE_TYPE (rhs1);
629 if (!n->base_addr)
630 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
632 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
635 return NULL;
638 if (TREE_CODE (rhs1) != SSA_NAME)
639 return NULL;
641 code = gimple_assign_rhs_code (stmt);
642 rhs_class = gimple_assign_rhs_class (stmt);
643 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
645 if (rhs_class == GIMPLE_BINARY_RHS)
646 rhs2 = gimple_assign_rhs2 (stmt);
648 /* Handle unary rhs and binary rhs with integer constants as second
649 operand. */
651 if (rhs_class == GIMPLE_UNARY_RHS
652 || (rhs_class == GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2) == INTEGER_CST))
655 if (code != BIT_AND_EXPR
656 && code != LSHIFT_EXPR
657 && code != RSHIFT_EXPR
658 && code != LROTATE_EXPR
659 && code != RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code))
661 return NULL;
663 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
667 if (!source_stmt1)
669 if (gimple_assign_load_p (stmt)
670 || !init_symbolic_number (n, rhs1))
671 return NULL;
672 source_stmt1 = stmt;
675 switch (code)
677 case BIT_AND_EXPR:
679 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
680 uint64_t val = int_cst_value (rhs2), mask = 0;
681 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
683 /* Only constants masking full bytes are allowed. */
684 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
685 if ((val & tmp) != 0 && (val & tmp) != tmp)
686 return NULL;
687 else if (val & tmp)
688 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
690 n->n &= mask;
692 break;
693 case LSHIFT_EXPR:
694 case RSHIFT_EXPR:
695 case LROTATE_EXPR:
696 case RROTATE_EXPR:
697 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
698 return NULL;
699 break;
700 CASE_CONVERT:
702 int i, type_size, old_type_size;
703 tree type;
705 type = TREE_TYPE (gimple_assign_lhs (stmt));
706 type_size = TYPE_PRECISION (type);
707 if (type_size % BITS_PER_UNIT != 0)
708 return NULL;
709 type_size /= BITS_PER_UNIT;
710 if (type_size > 64 / BITS_PER_MARKER)
711 return NULL;
713 /* Sign extension: result is dependent on the value. */
714 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
715 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
716 && HEAD_MARKER (n->n, old_type_size))
717 for (i = 0; i < type_size - old_type_size; i++)
718 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size - 1 - i) * BITS_PER_MARKER);
721 if (type_size < 64 / BITS_PER_MARKER)
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
727 n->type = type;
728 if (!n->base_addr)
729 n->range = type_size;
731 break;
732 default:
733 return NULL;
735 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
738 /* Handle binary rhs. */
740 if (rhs_class == GIMPLE_BINARY_RHS)
742 struct symbolic_number n1, n2;
743 gimple *source_stmt, *source_stmt2;
745 if (code != BIT_IOR_EXPR)
746 return NULL;
748 if (TREE_CODE (rhs2) != SSA_NAME)
749 return NULL;
751 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
753 switch (code)
755 case BIT_IOR_EXPR:
756 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
758 if (!source_stmt1)
759 return NULL;
761 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
763 if (!source_stmt2)
764 return NULL;
766 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
767 return NULL;
769 if (n1.vuse != n2.vuse)
770 return NULL;
772 source_stmt
773 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
775 if (!source_stmt)
776 return NULL;
778 if (!verify_symbolic_number_p (n, stmt))
779 return NULL;
781 break;
782 default:
783 return NULL;
785 return source_stmt;
787 return NULL;
790 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
793 void
794 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
795 uint64_t *cmpnop, bool *cast64_to_32)
797 unsigned rsize;
798 uint64_t tmpn, mask;
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
803 *cmpxchg = CMPXCHG;
804 *cmpnop = CMPNOP;
805 *cast64_to_32 = false;
807 /* Find real size of result (highest non-zero byte). */
808 if (n->base_addr)
809 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
810 else
811 rsize = n->range;
813 /* Zero out the bits corresponding to untouched bytes in original gimple
814 expression. */
815 if (n->range < (int) sizeof (int64_t))
817 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
818 if (n->base_addr == NULL
819 && n->range == 4
820 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
822 /* If all bytes in n->n are either 0 or in [5..8] range, this
823 might be a candidate for (unsigned) __builtin_bswap64 (src).
824 It is not worth it for (unsigned short) __builtin_bswap64 (src)
825 or (unsigned short) __builtin_bswap32 (src). */
826 *cast64_to_32 = true;
827 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
828 if ((tmpn & MARKER_MASK)
829 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
831 *cast64_to_32 = false;
832 break;
835 if (*cast64_to_32)
836 *cmpxchg &= mask;
837 else
838 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
839 *cmpnop &= mask;
842 /* Zero out the bits corresponding to unused bytes in the result of the
843 gimple expression. */
844 if (rsize < n->range)
846 if (BYTES_BIG_ENDIAN)
848 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
849 *cmpxchg &= mask;
850 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
852 else
854 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
855 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
856 *cmpnop &= mask;
858 n->range = rsize;
861 if (*cast64_to_32)
862 n->range = 8;
863 n->range *= BITS_PER_UNIT;
866 /* Check if STMT completes a bswap implementation or a read in a given
867 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
868 accordingly. It also sets N to represent the kind of operations
869 performed: size of the resulting expression and whether it works on
870 a memory source, and if so alias-set and vuse. At last, the
871 function returns a stmt whose rhs's first tree is the source
872 expression. */
874 gimple *
875 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
876 bool *cast64_to_32, uint64_t *mask)
878 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
879 if (!tree_fits_uhwi_p (type_size))
880 return NULL;
882 /* The last parameter determines the depth search limit. It usually
883 correlates directly to the number n of bytes to be touched. We
884 increase that number by 2 * (log2(n) + 1) here in order to also
885 cover signed -> unsigned conversions of the src operand as can be seen
886 in libgcc, and for initial shift/and operation of the src operand. */
887 int limit = tree_to_uhwi (type_size);
888 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
889 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
891 if (!ins_stmt)
893 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
894 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
895 return NULL;
896 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
897 if (sz != 16 && sz != 32 && sz != 64)
898 return NULL;
899 tree rhs = gimple_assign_rhs1 (stmt);
900 if (CONSTRUCTOR_NELTS (rhs) == 0)
901 return NULL;
902 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
903 unsigned HOST_WIDE_INT eltsz
904 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
905 if (TYPE_PRECISION (eltype) != eltsz)
906 return NULL;
907 constructor_elt *elt;
908 unsigned int i;
909 tree type = build_nonstandard_integer_type (sz, 1);
910 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
912 if (TREE_CODE (elt->value) != SSA_NAME
913 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
914 return NULL;
915 struct symbolic_number n1;
916 gimple *source_stmt
917 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
918 limit - 1);
920 if (!source_stmt)
921 return NULL;
923 n1.type = type;
924 if (!n1.base_addr)
925 n1.range = sz / BITS_PER_UNIT;
927 if (i == 0)
929 ins_stmt = source_stmt;
930 *n = n1;
932 else
934 if (n->vuse != n1.vuse)
935 return NULL;
937 struct symbolic_number n0 = *n;
939 if (!BYTES_BIG_ENDIAN)
941 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
942 return NULL;
944 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
945 return NULL;
946 ins_stmt
947 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n);
949 if (!ins_stmt)
950 return NULL;
955 uint64_t cmpxchg, cmpnop;
956 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
958 /* A complete byte swap should make the symbolic number to start with
959 the largest digit in the highest order byte. Unchanged symbolic
960 number indicates a read with same endianness as target architecture. */
961 *mask = ~(uint64_t) 0;
962 if (n->n == cmpnop)
963 *bswap = false;
964 else if (n->n == cmpxchg)
965 *bswap = true;
966 else
968 int set = 0;
969 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
970 if ((n->n & msk) == 0)
971 *mask &= ~msk;
972 else if ((n->n & msk) == (cmpxchg & msk))
973 set++;
974 else
975 return NULL;
976 if (set < 2)
977 return NULL;
978 *bswap = true;
981 /* Useless bit manipulation performed by code. */
982 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
983 return NULL;
985 return ins_stmt;
988 const pass_data pass_data_optimize_bswap =
990 GIMPLE_PASS, /* type */
991 "bswap", /* name */
992 OPTGROUP_NONE, /* optinfo_flags */
993 TV_NONE, /* tv_id */
994 PROP_ssa, /* properties_required */
995 0, /* properties_provided */
996 0, /* properties_destroyed */
997 0, /* todo_flags_start */
998 0, /* todo_flags_finish */
1001 class pass_optimize_bswap : public gimple_opt_pass
1003 public:
1004 pass_optimize_bswap (gcc::context *ctxt)
1005 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1008 /* opt_pass methods: */
1009 virtual bool gate (function *)
1011 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1014 virtual unsigned int execute (function *);
1016 }; // class pass_optimize_bswap
1018 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1019 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1020 first. */
1022 static tree
1023 bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1024 bool before)
1026 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1027 || POINTER_TYPE_P (TREE_TYPE (val)));
1028 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1030 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1031 if (POINTER_TYPE_P (TREE_TYPE (val)))
1033 gimple *g
1034 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1035 NOP_EXPR, val);
1036 if (before)
1037 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1038 else
1039 gsi_insert_after (gsi, g, GSI_NEW_STMT);
1040 val = gimple_assign_lhs (g);
1042 tree itype = build_nonstandard_integer_type (prec, 1);
1043 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1044 if (before)
1045 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1046 else
1047 gsi_insert_after (gsi, g, GSI_NEW_STMT);
1048 val = gimple_assign_lhs (g);
1050 return build1 (VIEW_CONVERT_EXPR, type, val);
1053 /* Perform the bswap optimization: replace the expression computed in the rhs
1054 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1055 bswap, load or load + bswap expression.
1056 Which of these alternatives replace the rhs is given by N->base_addr (non
1057 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1058 load to perform are also given in N while the builtin bswap invoke is given
1059 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1060 load statements involved to construct the rhs in gsi_stmt (GSI) and
1061 N->range gives the size of the rhs expression for maintaining some
1062 statistics.
1064 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1065 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1066 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1068 tree
1069 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1070 tree bswap_type, tree load_type, struct symbolic_number *n,
1071 bool bswap, uint64_t mask)
1073 tree src, tmp, tgt = NULL_TREE;
1074 gimple *bswap_stmt, *mask_stmt = NULL;
1075 tree_code conv_code = NOP_EXPR;
1077 gimple *cur_stmt = gsi_stmt (gsi);
1078 src = n->src;
1079 if (cur_stmt)
1081 tgt = gimple_assign_lhs (cur_stmt);
1082 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1083 && tgt
1084 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1085 conv_code = VIEW_CONVERT_EXPR;
1088 /* Need to load the value from memory first. */
1089 if (n->base_addr)
1091 gimple_stmt_iterator gsi_ins = gsi;
1092 if (ins_stmt)
1093 gsi_ins = gsi_for_stmt (ins_stmt);
1094 tree addr_expr, addr_tmp, val_expr, val_tmp;
1095 tree load_offset_ptr, aligned_load_type;
1096 gimple *load_stmt;
1097 unsigned align = get_object_alignment (src);
1098 poly_int64 load_offset = 0;
1100 if (cur_stmt)
1102 basic_block ins_bb = gimple_bb (ins_stmt);
1103 basic_block cur_bb = gimple_bb (cur_stmt);
1104 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1105 return NULL_TREE;
1107 /* Move cur_stmt just before one of the load of the original
1108 to ensure it has the same VUSE. See PR61517 for what could
1109 go wrong. */
1110 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1111 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1112 gsi_move_before (&gsi, &gsi_ins);
1113 gsi = gsi_for_stmt (cur_stmt);
1115 else
1116 gsi = gsi_ins;
1118 /* Compute address to load from and cast according to the size
1119 of the load. */
1120 addr_expr = build_fold_addr_expr (src);
1121 if (is_gimple_mem_ref_addr (addr_expr))
1122 addr_tmp = unshare_expr (addr_expr);
1123 else
1125 addr_tmp = unshare_expr (n->base_addr);
1126 if (!is_gimple_mem_ref_addr (addr_tmp))
1127 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1128 is_gimple_mem_ref_addr,
1129 NULL_TREE, true,
1130 GSI_SAME_STMT);
1131 load_offset = n->bytepos;
1132 if (n->offset)
1134 tree off
1135 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1136 true, NULL_TREE, true,
1137 GSI_SAME_STMT);
1138 gimple *stmt
1139 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1140 POINTER_PLUS_EXPR, addr_tmp, off);
1141 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1142 addr_tmp = gimple_assign_lhs (stmt);
1146 /* Perform the load. */
1147 aligned_load_type = load_type;
1148 if (align < TYPE_ALIGN (load_type))
1149 aligned_load_type = build_aligned_type (load_type, align);
1150 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1151 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1152 load_offset_ptr);
1154 if (!bswap)
1156 if (n->range == 16)
1157 nop_stats.found_16bit++;
1158 else if (n->range == 32)
1159 nop_stats.found_32bit++;
1160 else
1162 gcc_assert (n->range == 64);
1163 nop_stats.found_64bit++;
1166 /* Convert the result of load if necessary. */
1167 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1169 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1170 "load_dst");
1171 load_stmt = gimple_build_assign (val_tmp, val_expr);
1172 gimple_set_vuse (load_stmt, n->vuse);
1173 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1174 if (conv_code == VIEW_CONVERT_EXPR)
1175 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1176 true);
1177 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1178 update_stmt (cur_stmt);
1180 else if (cur_stmt)
1182 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1183 gimple_set_vuse (cur_stmt, n->vuse);
1184 update_stmt (cur_stmt);
1186 else
1188 tgt = make_ssa_name (load_type);
1189 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1190 gimple_set_vuse (cur_stmt, n->vuse);
1191 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1194 if (dump_file)
1196 fprintf (dump_file,
1197 "%d bit load in target endianness found at: ",
1198 (int) n->range);
1199 print_gimple_stmt (dump_file, cur_stmt, 0);
1201 return tgt;
1203 else
1205 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1206 load_stmt = gimple_build_assign (val_tmp, val_expr);
1207 gimple_set_vuse (load_stmt, n->vuse);
1208 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1210 src = val_tmp;
1212 else if (!bswap)
1214 gimple *g = NULL;
1215 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1217 if (!is_gimple_val (src))
1218 return NULL_TREE;
1219 if (conv_code == VIEW_CONVERT_EXPR)
1220 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
1221 g = gimple_build_assign (tgt, conv_code, src);
1223 else if (cur_stmt)
1224 g = gimple_build_assign (tgt, src);
1225 else
1226 tgt = src;
1227 if (n->range == 16)
1228 nop_stats.found_16bit++;
1229 else if (n->range == 32)
1230 nop_stats.found_32bit++;
1231 else
1233 gcc_assert (n->range == 64);
1234 nop_stats.found_64bit++;
1236 if (dump_file)
1238 fprintf (dump_file,
1239 "%d bit reshuffle in target endianness found at: ",
1240 (int) n->range);
1241 if (cur_stmt)
1242 print_gimple_stmt (dump_file, cur_stmt, 0);
1243 else
1245 print_generic_expr (dump_file, tgt, TDF_NONE);
1246 fprintf (dump_file, "\n");
1249 if (cur_stmt)
1250 gsi_replace (&gsi, g, true);
1251 return tgt;
1253 else if (TREE_CODE (src) == BIT_FIELD_REF)
1254 src = TREE_OPERAND (src, 0);
1256 if (n->range == 16)
1257 bswap_stats.found_16bit++;
1258 else if (n->range == 32)
1259 bswap_stats.found_32bit++;
1260 else
1262 gcc_assert (n->range == 64);
1263 bswap_stats.found_64bit++;
1266 tmp = src;
1268 /* Convert the src expression if necessary. */
1269 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1271 gimple *convert_stmt;
1273 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1274 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1275 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1278 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1279 are considered as rotation of 2N bit values by N bits is generally not
1280 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1281 gives 0x03040102 while a bswap for that value is 0x04030201. */
1282 if (bswap && n->range == 16)
1284 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1285 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1286 bswap_stmt = gimple_build_assign (NULL, src);
1288 else
1289 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1291 if (tgt == NULL_TREE)
1292 tgt = make_ssa_name (bswap_type);
1293 tmp = tgt;
1295 if (mask != ~(uint64_t) 0)
1297 tree m = build_int_cst (bswap_type, mask);
1298 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1299 gimple_set_lhs (bswap_stmt, tmp);
1300 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1301 tmp = tgt;
1304 /* Convert the result if necessary. */
1305 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1307 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1308 tree atmp = tmp;
1309 gimple_stmt_iterator gsi2 = gsi;
1310 if (conv_code == VIEW_CONVERT_EXPR)
1311 atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1312 gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1313 gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
1316 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1318 if (dump_file)
1320 fprintf (dump_file, "%d bit bswap implementation found at: ",
1321 (int) n->range);
1322 if (cur_stmt)
1323 print_gimple_stmt (dump_file, cur_stmt, 0);
1324 else
1326 print_generic_expr (dump_file, tgt, TDF_NONE);
1327 fprintf (dump_file, "\n");
1331 if (cur_stmt)
1333 if (mask_stmt)
1334 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1335 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1336 gsi_remove (&gsi, true);
1338 else
1340 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1341 if (mask_stmt)
1342 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1344 return tgt;
1347 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1348 using bswap optimizations. CDI_DOMINATORS need to be
1349 computed on entry. Return true if it has been optimized and
1350 TODO_update_ssa is needed. */
1352 static bool
1353 maybe_optimize_vector_constructor (gimple *cur_stmt)
1355 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1356 struct symbolic_number n;
1357 bool bswap;
1359 gcc_assert (is_gimple_assign (cur_stmt)
1360 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1362 tree rhs = gimple_assign_rhs1 (cur_stmt);
1363 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1364 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1365 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1366 return false;
1368 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1369 switch (sz)
1371 case 16:
1372 load_type = bswap_type = uint16_type_node;
1373 break;
1374 case 32:
1375 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1376 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1378 load_type = uint32_type_node;
1379 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1380 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1382 else
1383 return false;
1384 break;
1385 case 64:
1386 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1387 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1388 || (word_mode == SImode
1389 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1390 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1392 load_type = uint64_type_node;
1393 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1394 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1396 else
1397 return false;
1398 break;
1399 default:
1400 return false;
1403 bool cast64_to_32;
1404 uint64_t mask;
1405 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1406 &cast64_to_32, &mask);
1407 if (!ins_stmt
1408 || n.range != (unsigned HOST_WIDE_INT) sz
1409 || cast64_to_32
1410 || mask != ~(uint64_t) 0)
1411 return false;
1413 if (bswap && !fndecl && n.range != 16)
1414 return false;
1416 memset (&nop_stats, 0, sizeof (nop_stats));
1417 memset (&bswap_stats, 0, sizeof (bswap_stats));
1418 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1419 bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
1422 /* Find manual byte swap implementations as well as load in a given
1423 endianness. Byte swaps are turned into a bswap builtin invokation
1424 while endian loads are converted to bswap builtin invokation or
1425 simple load according to the target endianness. */
1427 unsigned int
1428 pass_optimize_bswap::execute (function *fun)
1430 basic_block bb;
1431 bool bswap32_p, bswap64_p;
1432 bool changed = false;
1433 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1435 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1436 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1437 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1438 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1439 || (bswap32_p && word_mode == SImode)));
1441 /* Determine the argument type of the builtins. The code later on
1442 assumes that the return and argument type are the same. */
1443 if (bswap32_p)
1445 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1446 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1449 if (bswap64_p)
1451 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1452 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1455 memset (&nop_stats, 0, sizeof (nop_stats));
1456 memset (&bswap_stats, 0, sizeof (bswap_stats));
1457 calculate_dominance_info (CDI_DOMINATORS);
1459 FOR_EACH_BB_FN (bb, fun)
1461 gimple_stmt_iterator gsi;
1463 /* We do a reverse scan for bswap patterns to make sure we get the
1464 widest match. As bswap pattern matching doesn't handle previously
1465 inserted smaller bswap replacements as sub-patterns, the wider
1466 variant wouldn't be detected. */
1467 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1469 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1470 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1471 enum tree_code code;
1472 struct symbolic_number n;
1473 bool bswap, cast64_to_32;
1474 uint64_t mask;
1476 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1477 might be moved to a different basic block by bswap_replace and gsi
1478 must not points to it if that's the case. Moving the gsi_prev
1479 there make sure that gsi points to the statement previous to
1480 cur_stmt while still making sure that all statements are
1481 considered in this basic block. */
1482 gsi_prev (&gsi);
1484 if (!is_gimple_assign (cur_stmt))
1485 continue;
1487 code = gimple_assign_rhs_code (cur_stmt);
1488 switch (code)
1490 case LROTATE_EXPR:
1491 case RROTATE_EXPR:
1492 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1493 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1494 % BITS_PER_UNIT)
1495 continue;
1496 /* Fall through. */
1497 case BIT_IOR_EXPR:
1498 break;
1499 case CONSTRUCTOR:
1501 tree rhs = gimple_assign_rhs1 (cur_stmt);
1502 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1503 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1504 break;
1506 continue;
1507 default:
1508 continue;
1511 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1512 &cast64_to_32, &mask);
1514 if (!ins_stmt)
1515 continue;
1517 switch (n.range)
1519 case 16:
1520 /* Already in canonical form, nothing to do. */
1521 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1522 continue;
1523 load_type = bswap_type = uint16_type_node;
1524 break;
1525 case 32:
1526 load_type = uint32_type_node;
1527 if (bswap32_p)
1529 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1530 bswap_type = bswap32_type;
1532 break;
1533 case 64:
1534 load_type = uint64_type_node;
1535 if (bswap64_p)
1537 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1538 bswap_type = bswap64_type;
1540 break;
1541 default:
1542 continue;
1545 if (bswap && !fndecl && n.range != 16)
1546 continue;
1548 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1549 bswap_type, load_type, &n, bswap, mask))
1550 changed = true;
1554 statistics_counter_event (fun, "16-bit nop implementations found",
1555 nop_stats.found_16bit);
1556 statistics_counter_event (fun, "32-bit nop implementations found",
1557 nop_stats.found_32bit);
1558 statistics_counter_event (fun, "64-bit nop implementations found",
1559 nop_stats.found_64bit);
1560 statistics_counter_event (fun, "16-bit bswap implementations found",
1561 bswap_stats.found_16bit);
1562 statistics_counter_event (fun, "32-bit bswap implementations found",
1563 bswap_stats.found_32bit);
1564 statistics_counter_event (fun, "64-bit bswap implementations found",
1565 bswap_stats.found_64bit);
1567 return (changed ? TODO_update_ssa : 0);
1570 } // anon namespace
1572 gimple_opt_pass *
1573 make_pass_optimize_bswap (gcc::context *ctxt)
1575 return new pass_optimize_bswap (ctxt);
1578 namespace {
1580 /* Struct recording one operand for the store, which is either a constant,
1581 then VAL represents the constant and all the other fields are zero, or
1582 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1583 and the other fields also reflect the memory load, or an SSA name, then
1584 VAL represents the SSA name and all the other fields are zero, */
1586 class store_operand_info
1588 public:
1589 tree val;
1590 tree base_addr;
1591 poly_uint64 bitsize;
1592 poly_uint64 bitpos;
1593 poly_uint64 bitregion_start;
1594 poly_uint64 bitregion_end;
1595 gimple *stmt;
1596 bool bit_not_p;
1597 store_operand_info ();
1600 store_operand_info::store_operand_info ()
1601 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1602 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1606 /* Struct recording the information about a single store of an immediate
1607 to memory. These are created in the first phase and coalesced into
1608 merged_store_group objects in the second phase. */
1610 class store_immediate_info
1612 public:
1613 unsigned HOST_WIDE_INT bitsize;
1614 unsigned HOST_WIDE_INT bitpos;
1615 unsigned HOST_WIDE_INT bitregion_start;
1616 /* This is one past the last bit of the bit region. */
1617 unsigned HOST_WIDE_INT bitregion_end;
1618 gimple *stmt;
1619 unsigned int order;
1620 /* INTEGER_CST for constant store, STRING_CST for string store,
1621 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1622 BIT_INSERT_EXPR for bit insertion.
1623 LROTATE_EXPR if it can be only bswap optimized and
1624 ops are not really meaningful.
1625 NOP_EXPR if bswap optimization detected identity, ops
1626 are not meaningful. */
1627 enum tree_code rhs_code;
1628 /* Two fields for bswap optimization purposes. */
1629 struct symbolic_number n;
1630 gimple *ins_stmt;
1631 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1632 bool bit_not_p;
1633 /* True if ops have been swapped and thus ops[1] represents
1634 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1635 bool ops_swapped_p;
1636 /* The index number of the landing pad, or 0 if there is none. */
1637 int lp_nr;
1638 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1639 just the first one. */
1640 store_operand_info ops[2];
1641 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1642 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1643 gimple *, unsigned int, enum tree_code,
1644 struct symbolic_number &, gimple *, bool, int,
1645 const store_operand_info &,
1646 const store_operand_info &);
1649 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1650 unsigned HOST_WIDE_INT bp,
1651 unsigned HOST_WIDE_INT brs,
1652 unsigned HOST_WIDE_INT bre,
1653 gimple *st,
1654 unsigned int ord,
1655 enum tree_code rhscode,
1656 struct symbolic_number &nr,
1657 gimple *ins_stmtp,
1658 bool bitnotp,
1659 int nr2,
1660 const store_operand_info &op0r,
1661 const store_operand_info &op1r)
1662 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1663 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1664 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1665 lp_nr (nr2), ops { op0r, op1r }
1669 /* Struct representing a group of stores to contiguous memory locations.
1670 These are produced by the second phase (coalescing) and consumed in the
1671 third phase that outputs the widened stores. */
1673 class merged_store_group
1675 public:
1676 unsigned HOST_WIDE_INT start;
1677 unsigned HOST_WIDE_INT width;
1678 unsigned HOST_WIDE_INT bitregion_start;
1679 unsigned HOST_WIDE_INT bitregion_end;
1680 /* The size of the allocated memory for val and mask. */
1681 unsigned HOST_WIDE_INT buf_size;
1682 unsigned HOST_WIDE_INT align_base;
1683 poly_uint64 load_align_base[2];
1685 unsigned int align;
1686 unsigned int load_align[2];
1687 unsigned int first_order;
1688 unsigned int last_order;
1689 bool bit_insertion;
1690 bool string_concatenation;
1691 bool only_constants;
1692 bool consecutive;
1693 unsigned int first_nonmergeable_order;
1694 int lp_nr;
1696 auto_vec<store_immediate_info *> stores;
1697 /* We record the first and last original statements in the sequence because
1698 we'll need their vuse/vdef and replacement position. It's easier to keep
1699 track of them separately as 'stores' is reordered by apply_stores. */
1700 gimple *last_stmt;
1701 gimple *first_stmt;
1702 unsigned char *val;
1703 unsigned char *mask;
1705 merged_store_group (store_immediate_info *);
1706 ~merged_store_group ();
1707 bool can_be_merged_into (store_immediate_info *);
1708 void merge_into (store_immediate_info *);
1709 void merge_overlapping (store_immediate_info *);
1710 bool apply_stores ();
1711 private:
1712 void do_merge (store_immediate_info *);
1715 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1717 static void
1718 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1720 if (!fd)
1721 return;
1723 for (unsigned int i = 0; i < len; i++)
1724 fprintf (fd, "%02x ", ptr[i]);
1725 fprintf (fd, "\n");
1728 /* Clear out LEN bits starting from bit START in the byte array
1729 PTR. This clears the bits to the *right* from START.
1730 START must be within [0, BITS_PER_UNIT) and counts starting from
1731 the least significant bit. */
1733 static void
1734 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1735 unsigned int len)
1737 if (len == 0)
1738 return;
1739 /* Clear len bits to the right of start. */
1740 else if (len <= start + 1)
1742 unsigned char mask = (~(~0U << len));
1743 mask = mask << (start + 1U - len);
1744 ptr[0] &= ~mask;
1746 else if (start != BITS_PER_UNIT - 1)
1748 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1749 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1750 len - (start % BITS_PER_UNIT) - 1);
1752 else if (start == BITS_PER_UNIT - 1
1753 && len > BITS_PER_UNIT)
1755 unsigned int nbytes = len / BITS_PER_UNIT;
1756 memset (ptr, 0, nbytes);
1757 if (len % BITS_PER_UNIT != 0)
1758 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1759 len % BITS_PER_UNIT);
1761 else
1762 gcc_unreachable ();
1765 /* In the byte array PTR clear the bit region starting at bit
1766 START and is LEN bits wide.
1767 For regions spanning multiple bytes do this recursively until we reach
1768 zero LEN or a region contained within a single byte. */
1770 static void
1771 clear_bit_region (unsigned char *ptr, unsigned int start,
1772 unsigned int len)
1774 /* Degenerate base case. */
1775 if (len == 0)
1776 return;
1777 else if (start >= BITS_PER_UNIT)
1778 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1779 /* Second base case. */
1780 else if ((start + len) <= BITS_PER_UNIT)
1782 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1783 mask >>= BITS_PER_UNIT - (start + len);
1785 ptr[0] &= ~mask;
1787 return;
1789 /* Clear most significant bits in a byte and proceed with the next byte. */
1790 else if (start != 0)
1792 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1793 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1795 /* Whole bytes need to be cleared. */
1796 else if (start == 0 && len > BITS_PER_UNIT)
1798 unsigned int nbytes = len / BITS_PER_UNIT;
1799 /* We could recurse on each byte but we clear whole bytes, so a simple
1800 memset will do. */
1801 memset (ptr, '\0', nbytes);
1802 /* Clear the remaining sub-byte region if there is one. */
1803 if (len % BITS_PER_UNIT != 0)
1804 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1806 else
1807 gcc_unreachable ();
1810 /* Write BITLEN bits of EXPR to the byte array PTR at
1811 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1812 Return true if the operation succeeded. */
1814 static bool
1815 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1816 unsigned int total_bytes)
1818 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1819 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1820 || (bitpos % BITS_PER_UNIT)
1821 || !int_mode_for_size (bitlen, 0).exists ());
1822 bool empty_ctor_p
1823 = (TREE_CODE (expr) == CONSTRUCTOR
1824 && CONSTRUCTOR_NELTS (expr) == 0
1825 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1826 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1828 if (!sub_byte_op_p)
1830 if (first_byte >= total_bytes)
1831 return false;
1832 total_bytes -= first_byte;
1833 if (empty_ctor_p)
1835 unsigned HOST_WIDE_INT rhs_bytes
1836 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1837 if (rhs_bytes > total_bytes)
1838 return false;
1839 memset (ptr + first_byte, '\0', rhs_bytes);
1840 return true;
1842 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1845 /* LITTLE-ENDIAN
1846 We are writing a non byte-sized quantity or at a position that is not
1847 at a byte boundary.
1848 |--------|--------|--------| ptr + first_byte
1850 xxx xxxxxxxx xxx< bp>
1851 |______EXPR____|
1853 First native_encode_expr EXPR into a temporary buffer and shift each
1854 byte in the buffer by 'bp' (carrying the bits over as necessary).
1855 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1856 <------bitlen---->< bp>
1857 Then we clear the destination bits:
1858 |---00000|00000000|000-----| ptr + first_byte
1859 <-------bitlen--->< bp>
1861 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1862 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1864 BIG-ENDIAN
1865 We are writing a non byte-sized quantity or at a position that is not
1866 at a byte boundary.
1867 ptr + first_byte |--------|--------|--------|
1869 <bp >xxx xxxxxxxx xxx
1870 |_____EXPR_____|
1872 First native_encode_expr EXPR into a temporary buffer and shift each
1873 byte in the buffer to the right by (carrying the bits over as necessary).
1874 We shift by as much as needed to align the most significant bit of EXPR
1875 with bitpos:
1876 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1877 <---bitlen----> <bp ><-----bitlen----->
1878 Then we clear the destination bits:
1879 ptr + first_byte |-----000||00000000||00000---|
1880 <bp ><-------bitlen----->
1882 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1883 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1884 The awkwardness comes from the fact that bitpos is counted from the
1885 most significant bit of a byte. */
1887 /* We must be dealing with fixed-size data at this point, since the
1888 total size is also fixed. */
1889 unsigned int byte_size;
1890 if (empty_ctor_p)
1892 unsigned HOST_WIDE_INT rhs_bytes
1893 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1894 if (rhs_bytes > total_bytes)
1895 return false;
1896 byte_size = rhs_bytes;
1898 else
1900 fixed_size_mode mode
1901 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1902 byte_size
1903 = mode == BLKmode
1904 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1905 : GET_MODE_SIZE (mode);
1907 /* Allocate an extra byte so that we have space to shift into. */
1908 byte_size++;
1909 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1910 memset (tmpbuf, '\0', byte_size);
1911 /* The store detection code should only have allowed constants that are
1912 accepted by native_encode_expr or empty ctors. */
1913 if (!empty_ctor_p
1914 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1915 gcc_unreachable ();
1917 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1918 bytes to write. This means it can write more than
1919 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1920 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1921 bitlen and zero out the bits that are not relevant as well (that may
1922 contain a sign bit due to sign-extension). */
1923 unsigned int padding
1924 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1925 /* On big-endian the padding is at the 'front' so just skip the initial
1926 bytes. */
1927 if (BYTES_BIG_ENDIAN)
1928 tmpbuf += padding;
1930 byte_size -= padding;
1932 if (bitlen % BITS_PER_UNIT != 0)
1934 if (BYTES_BIG_ENDIAN)
1935 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1936 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1937 else
1938 clear_bit_region (tmpbuf, bitlen,
1939 byte_size * BITS_PER_UNIT - bitlen);
1941 /* Left shifting relies on the last byte being clear if bitlen is
1942 a multiple of BITS_PER_UNIT, which might not be clear if
1943 there are padding bytes. */
1944 else if (!BYTES_BIG_ENDIAN)
1945 tmpbuf[byte_size - 1] = '\0';
1947 /* Clear the bit region in PTR where the bits from TMPBUF will be
1948 inserted into. */
1949 if (BYTES_BIG_ENDIAN)
1950 clear_bit_region_be (ptr + first_byte,
1951 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1952 else
1953 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1955 int shift_amnt;
1956 int bitlen_mod = bitlen % BITS_PER_UNIT;
1957 int bitpos_mod = bitpos % BITS_PER_UNIT;
1959 bool skip_byte = false;
1960 if (BYTES_BIG_ENDIAN)
1962 /* BITPOS and BITLEN are exactly aligned and no shifting
1963 is necessary. */
1964 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1965 || (bitpos_mod == 0 && bitlen_mod == 0))
1966 shift_amnt = 0;
1967 /* |. . . . . . . .|
1968 <bp > <blen >.
1969 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1970 of the value until it aligns with 'bp' in the next byte over. */
1971 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1973 shift_amnt = bitlen_mod + bitpos_mod;
1974 skip_byte = bitlen_mod != 0;
1976 /* |. . . . . . . .|
1977 <----bp--->
1978 <---blen---->.
1979 Shift the value right within the same byte so it aligns with 'bp'. */
1980 else
1981 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1983 else
1984 shift_amnt = bitpos % BITS_PER_UNIT;
1986 /* Create the shifted version of EXPR. */
1987 if (!BYTES_BIG_ENDIAN)
1989 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1990 if (shift_amnt == 0)
1991 byte_size--;
1993 else
1995 gcc_assert (BYTES_BIG_ENDIAN);
1996 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1997 /* If shifting right forced us to move into the next byte skip the now
1998 empty byte. */
1999 if (skip_byte)
2001 tmpbuf++;
2002 byte_size--;
2006 /* Insert the bits from TMPBUF. */
2007 for (unsigned int i = 0; i < byte_size; i++)
2008 ptr[first_byte + i] |= tmpbuf[i];
2010 return true;
2013 /* Sorting function for store_immediate_info objects.
2014 Sorts them by bitposition. */
2016 static int
2017 sort_by_bitpos (const void *x, const void *y)
2019 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2020 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2022 if ((*tmp)->bitpos < (*tmp2)->bitpos)
2023 return -1;
2024 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2025 return 1;
2026 else
2027 /* If they are the same let's use the order which is guaranteed to
2028 be different. */
2029 return (*tmp)->order - (*tmp2)->order;
2032 /* Sorting function for store_immediate_info objects.
2033 Sorts them by the order field. */
2035 static int
2036 sort_by_order (const void *x, const void *y)
2038 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2039 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2041 if ((*tmp)->order < (*tmp2)->order)
2042 return -1;
2043 else if ((*tmp)->order > (*tmp2)->order)
2044 return 1;
2046 gcc_unreachable ();
2049 /* Initialize a merged_store_group object from a store_immediate_info
2050 object. */
2052 merged_store_group::merged_store_group (store_immediate_info *info)
2054 start = info->bitpos;
2055 width = info->bitsize;
2056 bitregion_start = info->bitregion_start;
2057 bitregion_end = info->bitregion_end;
2058 /* VAL has memory allocated for it in apply_stores once the group
2059 width has been finalized. */
2060 val = NULL;
2061 mask = NULL;
2062 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2063 string_concatenation = info->rhs_code == STRING_CST;
2064 only_constants = info->rhs_code == INTEGER_CST;
2065 consecutive = true;
2066 first_nonmergeable_order = ~0U;
2067 lp_nr = info->lp_nr;
2068 unsigned HOST_WIDE_INT align_bitpos = 0;
2069 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2070 &align, &align_bitpos);
2071 align_base = start - align_bitpos;
2072 for (int i = 0; i < 2; ++i)
2074 store_operand_info &op = info->ops[i];
2075 if (op.base_addr == NULL_TREE)
2077 load_align[i] = 0;
2078 load_align_base[i] = 0;
2080 else
2082 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2083 load_align_base[i] = op.bitpos - align_bitpos;
2086 stores.create (1);
2087 stores.safe_push (info);
2088 last_stmt = info->stmt;
2089 last_order = info->order;
2090 first_stmt = last_stmt;
2091 first_order = last_order;
2092 buf_size = 0;
2095 merged_store_group::~merged_store_group ()
2097 if (val)
2098 XDELETEVEC (val);
2101 /* Return true if the store described by INFO can be merged into the group. */
2103 bool
2104 merged_store_group::can_be_merged_into (store_immediate_info *info)
2106 /* Do not merge bswap patterns. */
2107 if (info->rhs_code == LROTATE_EXPR)
2108 return false;
2110 if (info->lp_nr != lp_nr)
2111 return false;
2113 /* The canonical case. */
2114 if (info->rhs_code == stores[0]->rhs_code)
2115 return true;
2117 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2118 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2119 return !string_concatenation;
2121 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2122 return !string_concatenation;
2124 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2125 only for small regions since this can generate a lot of instructions. */
2126 if (info->rhs_code == MEM_REF
2127 && (stores[0]->rhs_code == INTEGER_CST
2128 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2129 && info->bitregion_start == stores[0]->bitregion_start
2130 && info->bitregion_end == stores[0]->bitregion_end
2131 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2132 return !string_concatenation;
2134 if (stores[0]->rhs_code == MEM_REF
2135 && (info->rhs_code == INTEGER_CST
2136 || info->rhs_code == BIT_INSERT_EXPR)
2137 && info->bitregion_start == stores[0]->bitregion_start
2138 && info->bitregion_end == stores[0]->bitregion_end
2139 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2140 return !string_concatenation;
2142 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2143 if (info->rhs_code == STRING_CST
2144 && stores[0]->rhs_code == INTEGER_CST
2145 && stores[0]->bitsize == CHAR_BIT)
2146 return !bit_insertion;
2148 if (stores[0]->rhs_code == STRING_CST
2149 && info->rhs_code == INTEGER_CST
2150 && info->bitsize == CHAR_BIT)
2151 return !bit_insertion;
2153 return false;
2156 /* Helper method for merge_into and merge_overlapping to do
2157 the common part. */
2159 void
2160 merged_store_group::do_merge (store_immediate_info *info)
2162 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2163 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2165 unsigned int this_align;
2166 unsigned HOST_WIDE_INT align_bitpos = 0;
2167 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2168 &this_align, &align_bitpos);
2169 if (this_align > align)
2171 align = this_align;
2172 align_base = info->bitpos - align_bitpos;
2174 for (int i = 0; i < 2; ++i)
2176 store_operand_info &op = info->ops[i];
2177 if (!op.base_addr)
2178 continue;
2180 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2181 if (this_align > load_align[i])
2183 load_align[i] = this_align;
2184 load_align_base[i] = op.bitpos - align_bitpos;
2188 gimple *stmt = info->stmt;
2189 stores.safe_push (info);
2190 if (info->order > last_order)
2192 last_order = info->order;
2193 last_stmt = stmt;
2195 else if (info->order < first_order)
2197 first_order = info->order;
2198 first_stmt = stmt;
2201 if (info->bitpos != start + width)
2202 consecutive = false;
2204 /* We need to use extraction if there is any bit-field. */
2205 if (info->rhs_code == BIT_INSERT_EXPR)
2207 bit_insertion = true;
2208 gcc_assert (!string_concatenation);
2211 /* We want to use concatenation if there is any string. */
2212 if (info->rhs_code == STRING_CST)
2214 string_concatenation = true;
2215 gcc_assert (!bit_insertion);
2218 /* But we cannot use it if we don't have consecutive stores. */
2219 if (!consecutive)
2220 string_concatenation = false;
2222 if (info->rhs_code != INTEGER_CST)
2223 only_constants = false;
2226 /* Merge a store recorded by INFO into this merged store.
2227 The store is not overlapping with the existing recorded
2228 stores. */
2230 void
2231 merged_store_group::merge_into (store_immediate_info *info)
2233 do_merge (info);
2235 /* Make sure we're inserting in the position we think we're inserting. */
2236 gcc_assert (info->bitpos >= start + width
2237 && info->bitregion_start <= bitregion_end);
2239 width = info->bitpos + info->bitsize - start;
2242 /* Merge a store described by INFO into this merged store.
2243 INFO overlaps in some way with the current store (i.e. it's not contiguous
2244 which is handled by merged_store_group::merge_into). */
2246 void
2247 merged_store_group::merge_overlapping (store_immediate_info *info)
2249 do_merge (info);
2251 /* If the store extends the size of the group, extend the width. */
2252 if (info->bitpos + info->bitsize > start + width)
2253 width = info->bitpos + info->bitsize - start;
2256 /* Go through all the recorded stores in this group in program order and
2257 apply their values to the VAL byte array to create the final merged
2258 value. Return true if the operation succeeded. */
2260 bool
2261 merged_store_group::apply_stores ()
2263 store_immediate_info *info;
2264 unsigned int i;
2266 /* Make sure we have more than one store in the group, otherwise we cannot
2267 merge anything. */
2268 if (bitregion_start % BITS_PER_UNIT != 0
2269 || bitregion_end % BITS_PER_UNIT != 0
2270 || stores.length () == 1)
2271 return false;
2273 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2275 /* Really do string concatenation for large strings only. */
2276 if (buf_size <= MOVE_MAX)
2277 string_concatenation = false;
2279 /* Create a power-of-2-sized buffer for native_encode_expr. */
2280 if (!string_concatenation)
2281 buf_size = 1 << ceil_log2 (buf_size);
2283 val = XNEWVEC (unsigned char, 2 * buf_size);
2284 mask = val + buf_size;
2285 memset (val, 0, buf_size);
2286 memset (mask, ~0U, buf_size);
2288 stores.qsort (sort_by_order);
2290 FOR_EACH_VEC_ELT (stores, i, info)
2292 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2293 tree cst;
2294 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2295 cst = info->ops[0].val;
2296 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2297 cst = info->ops[1].val;
2298 else
2299 cst = NULL_TREE;
2300 bool ret = true;
2301 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2302 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2303 buf_size);
2304 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2305 if (BYTES_BIG_ENDIAN)
2306 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2307 - (pos_in_buffer % BITS_PER_UNIT)),
2308 info->bitsize);
2309 else
2310 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2311 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2313 if (ret)
2315 fputs ("After writing ", dump_file);
2316 print_generic_expr (dump_file, cst, TDF_NONE);
2317 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2318 " at position %d\n", info->bitsize, pos_in_buffer);
2319 fputs (" the merged value contains ", dump_file);
2320 dump_char_array (dump_file, val, buf_size);
2321 fputs (" the merged mask contains ", dump_file);
2322 dump_char_array (dump_file, mask, buf_size);
2323 if (bit_insertion)
2324 fputs (" bit insertion is required\n", dump_file);
2325 if (string_concatenation)
2326 fputs (" string concatenation is required\n", dump_file);
2328 else
2329 fprintf (dump_file, "Failed to merge stores\n");
2331 if (!ret)
2332 return false;
2334 stores.qsort (sort_by_bitpos);
2335 return true;
2338 /* Structure describing the store chain. */
2340 class imm_store_chain_info
2342 public:
2343 /* Doubly-linked list that imposes an order on chain processing.
2344 PNXP (prev's next pointer) points to the head of a list, or to
2345 the next field in the previous chain in the list.
2346 See pass_store_merging::m_stores_head for more rationale. */
2347 imm_store_chain_info *next, **pnxp;
2348 tree base_addr;
2349 auto_vec<store_immediate_info *> m_store_info;
2350 auto_vec<merged_store_group *> m_merged_store_groups;
2352 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2353 : next (inspt), pnxp (&inspt), base_addr (b_a)
2355 inspt = this;
2356 if (next)
2358 gcc_checking_assert (pnxp == next->pnxp);
2359 next->pnxp = &next;
2362 ~imm_store_chain_info ()
2364 *pnxp = next;
2365 if (next)
2367 gcc_checking_assert (&next == next->pnxp);
2368 next->pnxp = pnxp;
2371 bool terminate_and_process_chain ();
2372 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2373 unsigned int);
2374 bool coalesce_immediate_stores ();
2375 bool output_merged_store (merged_store_group *);
2376 bool output_merged_stores ();
2379 const pass_data pass_data_tree_store_merging = {
2380 GIMPLE_PASS, /* type */
2381 "store-merging", /* name */
2382 OPTGROUP_NONE, /* optinfo_flags */
2383 TV_GIMPLE_STORE_MERGING, /* tv_id */
2384 PROP_ssa, /* properties_required */
2385 0, /* properties_provided */
2386 0, /* properties_destroyed */
2387 0, /* todo_flags_start */
2388 TODO_update_ssa, /* todo_flags_finish */
2391 class pass_store_merging : public gimple_opt_pass
2393 public:
2394 pass_store_merging (gcc::context *ctxt)
2395 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2396 m_n_chains (0), m_n_stores (0)
2400 /* Pass not supported for PDP-endian, nor for insane hosts or
2401 target character sizes where native_{encode,interpret}_expr
2402 doesn't work properly. */
2403 virtual bool
2404 gate (function *)
2406 return flag_store_merging
2407 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2408 && CHAR_BIT == 8
2409 && BITS_PER_UNIT == 8;
2412 virtual unsigned int execute (function *);
2414 private:
2415 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2417 /* Form a doubly-linked stack of the elements of m_stores, so that
2418 we can iterate over them in a predictable way. Using this order
2419 avoids extraneous differences in the compiler output just because
2420 of tree pointer variations (e.g. different chains end up in
2421 different positions of m_stores, so they are handled in different
2422 orders, so they allocate or release SSA names in different
2423 orders, and when they get reused, subsequent passes end up
2424 getting different SSA names, which may ultimately change
2425 decisions when going out of SSA). */
2426 imm_store_chain_info *m_stores_head;
2428 /* The number of store chains currently tracked. */
2429 unsigned m_n_chains;
2430 /* The number of stores currently tracked. */
2431 unsigned m_n_stores;
2433 bool process_store (gimple *);
2434 bool terminate_and_process_chain (imm_store_chain_info *);
2435 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2436 bool terminate_and_process_all_chains ();
2437 }; // class pass_store_merging
2439 /* Terminate and process all recorded chains. Return true if any changes
2440 were made. */
2442 bool
2443 pass_store_merging::terminate_and_process_all_chains ()
2445 bool ret = false;
2446 while (m_stores_head)
2447 ret |= terminate_and_process_chain (m_stores_head);
2448 gcc_assert (m_stores.is_empty ());
2449 return ret;
2452 /* Terminate all chains that are affected by the statement STMT.
2453 CHAIN_INFO is the chain we should ignore from the checks if
2454 non-NULL. Return true if any changes were made. */
2456 bool
2457 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2458 **chain_info,
2459 gimple *stmt)
2461 bool ret = false;
2463 /* If the statement doesn't touch memory it can't alias. */
2464 if (!gimple_vuse (stmt))
2465 return false;
2467 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2468 ao_ref store_lhs_ref;
2469 ao_ref_init (&store_lhs_ref, store_lhs);
2470 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2472 next = cur->next;
2474 /* We already checked all the stores in chain_info and terminated the
2475 chain if necessary. Skip it here. */
2476 if (chain_info && *chain_info == cur)
2477 continue;
2479 store_immediate_info *info;
2480 unsigned int i;
2481 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2483 tree lhs = gimple_assign_lhs (info->stmt);
2484 ao_ref lhs_ref;
2485 ao_ref_init (&lhs_ref, lhs);
2486 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2487 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2488 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2489 &lhs_ref, false)))
2491 if (dump_file && (dump_flags & TDF_DETAILS))
2493 fprintf (dump_file, "stmt causes chain termination:\n");
2494 print_gimple_stmt (dump_file, stmt, 0);
2496 ret |= terminate_and_process_chain (cur);
2497 break;
2502 return ret;
2505 /* Helper function. Terminate the recorded chain storing to base object
2506 BASE. Return true if the merging and output was successful. The m_stores
2507 entry is removed after the processing in any case. */
2509 bool
2510 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2512 m_n_stores -= chain_info->m_store_info.length ();
2513 m_n_chains--;
2514 bool ret = chain_info->terminate_and_process_chain ();
2515 m_stores.remove (chain_info->base_addr);
2516 delete chain_info;
2517 return ret;
2520 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2521 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2522 be able to sink load of REF across stores between FIRST and LAST, up
2523 to right before LAST. */
2525 bool
2526 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2528 ao_ref r;
2529 ao_ref_init (&r, ref);
2530 unsigned int count = 0;
2531 tree vop = gimple_vdef (last);
2532 gimple *stmt;
2534 /* Return true conservatively if the basic blocks are different. */
2535 if (gimple_bb (first) != gimple_bb (last))
2536 return true;
2540 stmt = SSA_NAME_DEF_STMT (vop);
2541 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2542 return true;
2543 if (gimple_store_p (stmt)
2544 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2545 return true;
2546 /* Avoid quadratic compile time by bounding the number of checks
2547 we perform. */
2548 if (++count > MAX_STORE_ALIAS_CHECKS)
2549 return true;
2550 vop = gimple_vuse (stmt);
2552 while (stmt != first);
2554 return false;
2557 /* Return true if INFO->ops[IDX] is mergeable with the
2558 corresponding loads already in MERGED_STORE group.
2559 BASE_ADDR is the base address of the whole store group. */
2561 bool
2562 compatible_load_p (merged_store_group *merged_store,
2563 store_immediate_info *info,
2564 tree base_addr, int idx)
2566 store_immediate_info *infof = merged_store->stores[0];
2567 if (!info->ops[idx].base_addr
2568 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2569 info->bitpos - infof->bitpos)
2570 || !operand_equal_p (info->ops[idx].base_addr,
2571 infof->ops[idx].base_addr, 0))
2572 return false;
2574 store_immediate_info *infol = merged_store->stores.last ();
2575 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2576 /* In this case all vuses should be the same, e.g.
2577 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2579 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2580 and we can emit the coalesced load next to any of those loads. */
2581 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2582 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2583 return true;
2585 /* Otherwise, at least for now require that the load has the same
2586 vuse as the store. See following examples. */
2587 if (gimple_vuse (info->stmt) != load_vuse)
2588 return false;
2590 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2591 || (infof != infol
2592 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2593 return false;
2595 /* If the load is from the same location as the store, already
2596 the construction of the immediate chain info guarantees no intervening
2597 stores, so no further checks are needed. Example:
2598 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2599 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2600 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2601 return true;
2603 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2604 of the stores in the group, or any other stores in between those.
2605 Previous calls to compatible_load_p ensured that for all the
2606 merged_store->stores IDX loads, no stmts starting with
2607 merged_store->first_stmt and ending right before merged_store->last_stmt
2608 clobbers those loads. */
2609 gimple *first = merged_store->first_stmt;
2610 gimple *last = merged_store->last_stmt;
2611 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2612 comes before the so far first load, we'll be changing
2613 merged_store->first_stmt. In that case we need to give up if
2614 any of the earlier processed loads clobber with the stmts in the new
2615 range. */
2616 if (info->order < merged_store->first_order)
2618 for (store_immediate_info *infoc : merged_store->stores)
2619 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2620 return false;
2621 first = info->stmt;
2623 /* Similarly, we could change merged_store->last_stmt, so ensure
2624 in that case no stmts in the new range clobber any of the earlier
2625 processed loads. */
2626 else if (info->order > merged_store->last_order)
2628 for (store_immediate_info *infoc : merged_store->stores)
2629 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2630 return false;
2631 last = info->stmt;
2633 /* And finally, we'd be adding a new load to the set, ensure it isn't
2634 clobbered in the new range. */
2635 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2636 return false;
2638 /* Otherwise, we are looking for:
2639 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2641 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2642 return true;
2645 /* Add all refs loaded to compute VAL to REFS vector. */
2647 void
2648 gather_bswap_load_refs (vec<tree> *refs, tree val)
2650 if (TREE_CODE (val) != SSA_NAME)
2651 return;
2653 gimple *stmt = SSA_NAME_DEF_STMT (val);
2654 if (!is_gimple_assign (stmt))
2655 return;
2657 if (gimple_assign_load_p (stmt))
2659 refs->safe_push (gimple_assign_rhs1 (stmt));
2660 return;
2663 switch (gimple_assign_rhs_class (stmt))
2665 case GIMPLE_BINARY_RHS:
2666 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2667 /* FALLTHRU */
2668 case GIMPLE_UNARY_RHS:
2669 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2670 break;
2671 default:
2672 gcc_unreachable ();
2676 /* Check if there are any stores in M_STORE_INFO after index I
2677 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2678 a potential group ending with END that have their order
2679 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2680 all the stores already merged and the one under consideration
2681 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2682 Consider:
2683 MEM[(long long int *)p_28] = 0;
2684 MEM[(long long int *)p_28 + 8B] = 0;
2685 MEM[(long long int *)p_28 + 16B] = 0;
2686 MEM[(long long int *)p_28 + 24B] = 0;
2687 _129 = (int) _130;
2688 MEM[(int *)p_28 + 8B] = _129;
2689 MEM[(int *)p_28].a = -1;
2690 We already have
2691 MEM[(long long int *)p_28] = 0;
2692 MEM[(int *)p_28].a = -1;
2693 stmts in the current group and need to consider if it is safe to
2694 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2695 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2696 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2697 into the group and merging of those 3 stores is successful, merged
2698 stmts will be emitted at the latest store from that group, i.e.
2699 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2700 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2701 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2702 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2703 into the group. That way it will be its own store group and will
2704 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2705 INTEGER_CST stores, those are mergeable using merge_overlapping,
2706 so don't return false for those.
2708 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2709 (exclusive), whether they don't overlap the bitrange START to END
2710 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2711 prevent merging in cases like:
2712 MEM <char[12]> [&b + 8B] = {};
2713 MEM[(short *) &b] = 5;
2714 _5 = *x_4(D);
2715 MEM <long long unsigned int> [&b + 2B] = _5;
2716 MEM[(char *)&b + 16B] = 88;
2717 MEM[(int *)&b + 20B] = 1;
2718 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2719 be merged with it, because the = _5 store overlaps these and is in between
2720 them in sort_by_order ordering. If it was merged, the merged store would
2721 go after the = _5 store and thus change behavior. */
2723 static bool
2724 check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2725 unsigned int i,
2726 bool all_integer_cst_p, unsigned int first_order,
2727 unsigned int last_order, unsigned HOST_WIDE_INT start,
2728 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2729 unsigned end_earlier)
2731 unsigned int len = m_store_info.length ();
2732 for (unsigned int j = first_earlier; j < end_earlier; j++)
2734 store_immediate_info *info = m_store_info[j];
2735 if (info->order > first_order
2736 && info->order < last_order
2737 && info->bitpos + info->bitsize > start)
2738 return false;
2740 for (++i; i < len; ++i)
2742 store_immediate_info *info = m_store_info[i];
2743 if (info->bitpos >= end)
2744 break;
2745 if (info->order < last_order
2746 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2747 return false;
2749 return true;
2752 /* Return true if m_store_info[first] and at least one following store
2753 form a group which store try_size bitsize value which is byte swapped
2754 from a memory load or some value, or identity from some value.
2755 This uses the bswap pass APIs. */
2757 bool
2758 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2759 unsigned int first,
2760 unsigned int try_size,
2761 unsigned int first_earlier)
2763 unsigned int len = m_store_info.length (), last = first;
2764 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2765 if (width >= try_size)
2766 return false;
2767 for (unsigned int i = first + 1; i < len; ++i)
2769 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2770 || m_store_info[i]->lp_nr != merged_store->lp_nr
2771 || m_store_info[i]->ins_stmt == NULL)
2772 return false;
2773 width += m_store_info[i]->bitsize;
2774 if (width >= try_size)
2776 last = i;
2777 break;
2780 if (width != try_size)
2781 return false;
2783 bool allow_unaligned
2784 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2785 /* Punt if the combined store would not be aligned and we need alignment. */
2786 if (!allow_unaligned)
2788 unsigned int align = merged_store->align;
2789 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2790 for (unsigned int i = first + 1; i <= last; ++i)
2792 unsigned int this_align;
2793 unsigned HOST_WIDE_INT align_bitpos = 0;
2794 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2795 &this_align, &align_bitpos);
2796 if (this_align > align)
2798 align = this_align;
2799 align_base = m_store_info[i]->bitpos - align_bitpos;
2802 unsigned HOST_WIDE_INT align_bitpos
2803 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2804 if (align_bitpos)
2805 align = least_bit_hwi (align_bitpos);
2806 if (align < try_size)
2807 return false;
2810 tree type;
2811 switch (try_size)
2813 case 16: type = uint16_type_node; break;
2814 case 32: type = uint32_type_node; break;
2815 case 64: type = uint64_type_node; break;
2816 default: gcc_unreachable ();
2818 struct symbolic_number n;
2819 gimple *ins_stmt = NULL;
2820 int vuse_store = -1;
2821 unsigned int first_order = merged_store->first_order;
2822 unsigned int last_order = merged_store->last_order;
2823 gimple *first_stmt = merged_store->first_stmt;
2824 gimple *last_stmt = merged_store->last_stmt;
2825 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2826 store_immediate_info *infof = m_store_info[first];
2828 for (unsigned int i = first; i <= last; ++i)
2830 store_immediate_info *info = m_store_info[i];
2831 struct symbolic_number this_n = info->n;
2832 this_n.type = type;
2833 if (!this_n.base_addr)
2834 this_n.range = try_size / BITS_PER_UNIT;
2835 else
2836 /* Update vuse in case it has changed by output_merged_stores. */
2837 this_n.vuse = gimple_vuse (info->ins_stmt);
2838 unsigned int bitpos = info->bitpos - infof->bitpos;
2839 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2840 BYTES_BIG_ENDIAN
2841 ? try_size - info->bitsize - bitpos
2842 : bitpos))
2843 return false;
2844 if (this_n.base_addr && vuse_store)
2846 unsigned int j;
2847 for (j = first; j <= last; ++j)
2848 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2849 break;
2850 if (j > last)
2852 if (vuse_store == 1)
2853 return false;
2854 vuse_store = 0;
2857 if (i == first)
2859 n = this_n;
2860 ins_stmt = info->ins_stmt;
2862 else
2864 if (n.base_addr && n.vuse != this_n.vuse)
2866 if (vuse_store == 0)
2867 return false;
2868 vuse_store = 1;
2870 if (info->order > last_order)
2872 last_order = info->order;
2873 last_stmt = info->stmt;
2875 else if (info->order < first_order)
2877 first_order = info->order;
2878 first_stmt = info->stmt;
2880 end = MAX (end, info->bitpos + info->bitsize);
2882 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2883 &this_n, &n);
2884 if (ins_stmt == NULL)
2885 return false;
2889 uint64_t cmpxchg, cmpnop;
2890 bool cast64_to_32;
2891 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
2893 /* A complete byte swap should make the symbolic number to start with
2894 the largest digit in the highest order byte. Unchanged symbolic
2895 number indicates a read with same endianness as target architecture. */
2896 if (n.n != cmpnop && n.n != cmpxchg)
2897 return false;
2899 /* For now. */
2900 if (cast64_to_32)
2901 return false;
2903 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2904 return false;
2906 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2907 merged_store->start, end, first_earlier, first))
2908 return false;
2910 /* Don't handle memory copy this way if normal non-bswap processing
2911 would handle it too. */
2912 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2914 unsigned int i;
2915 for (i = first; i <= last; ++i)
2916 if (m_store_info[i]->rhs_code != MEM_REF)
2917 break;
2918 if (i == last + 1)
2919 return false;
2922 if (n.n == cmpxchg)
2923 switch (try_size)
2925 case 16:
2926 /* Will emit LROTATE_EXPR. */
2927 break;
2928 case 32:
2929 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2930 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2931 break;
2932 return false;
2933 case 64:
2934 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2935 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2936 break;
2937 return false;
2938 default:
2939 gcc_unreachable ();
2942 if (!allow_unaligned && n.base_addr)
2944 unsigned int align = get_object_alignment (n.src);
2945 if (align < try_size)
2946 return false;
2949 /* If each load has vuse of the corresponding store, need to verify
2950 the loads can be sunk right before the last store. */
2951 if (vuse_store == 1)
2953 auto_vec<tree, 64> refs;
2954 for (unsigned int i = first; i <= last; ++i)
2955 gather_bswap_load_refs (&refs,
2956 gimple_assign_rhs1 (m_store_info[i]->stmt));
2958 for (tree ref : refs)
2959 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2960 return false;
2961 n.vuse = NULL_TREE;
2964 infof->n = n;
2965 infof->ins_stmt = ins_stmt;
2966 for (unsigned int i = first; i <= last; ++i)
2968 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2969 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2970 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2971 if (i != first)
2972 merged_store->merge_into (m_store_info[i]);
2975 return true;
2978 /* Go through the candidate stores recorded in m_store_info and merge them
2979 into merged_store_group objects recorded into m_merged_store_groups
2980 representing the widened stores. Return true if coalescing was successful
2981 and the number of widened stores is fewer than the original number
2982 of stores. */
2984 bool
2985 imm_store_chain_info::coalesce_immediate_stores ()
2987 /* Anything less can't be processed. */
2988 if (m_store_info.length () < 2)
2989 return false;
2991 if (dump_file && (dump_flags & TDF_DETAILS))
2992 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2993 m_store_info.length ());
2995 store_immediate_info *info;
2996 unsigned int i, ignore = 0;
2997 unsigned int first_earlier = 0;
2998 unsigned int end_earlier = 0;
3000 /* Order the stores by the bitposition they write to. */
3001 m_store_info.qsort (sort_by_bitpos);
3003 info = m_store_info[0];
3004 merged_store_group *merged_store = new merged_store_group (info);
3005 if (dump_file && (dump_flags & TDF_DETAILS))
3006 fputs ("New store group\n", dump_file);
3008 FOR_EACH_VEC_ELT (m_store_info, i, info)
3010 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3012 if (i <= ignore)
3013 goto done;
3015 while (first_earlier < end_earlier
3016 && (m_store_info[first_earlier]->bitpos
3017 + m_store_info[first_earlier]->bitsize
3018 <= merged_store->start))
3019 first_earlier++;
3021 /* First try to handle group of stores like:
3022 p[0] = data >> 24;
3023 p[1] = data >> 16;
3024 p[2] = data >> 8;
3025 p[3] = data;
3026 using the bswap framework. */
3027 if (info->bitpos == merged_store->start + merged_store->width
3028 && merged_store->stores.length () == 1
3029 && merged_store->stores[0]->ins_stmt != NULL
3030 && info->lp_nr == merged_store->lp_nr
3031 && info->ins_stmt != NULL)
3033 unsigned int try_size;
3034 for (try_size = 64; try_size >= 16; try_size >>= 1)
3035 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3036 first_earlier))
3037 break;
3039 if (try_size >= 16)
3041 ignore = i + merged_store->stores.length () - 1;
3042 m_merged_store_groups.safe_push (merged_store);
3043 if (ignore < m_store_info.length ())
3045 merged_store = new merged_store_group (m_store_info[ignore]);
3046 end_earlier = ignore;
3048 else
3049 merged_store = NULL;
3050 goto done;
3054 new_bitregion_start
3055 = MIN (merged_store->bitregion_start, info->bitregion_start);
3056 new_bitregion_end
3057 = MAX (merged_store->bitregion_end, info->bitregion_end);
3059 if (info->order >= merged_store->first_nonmergeable_order
3060 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3061 > (unsigned) param_store_merging_max_size))
3064 /* |---store 1---|
3065 |---store 2---|
3066 Overlapping stores. */
3067 else if (IN_RANGE (info->bitpos, merged_store->start,
3068 merged_store->start + merged_store->width - 1)
3069 /* |---store 1---||---store 2---|
3070 Handle also the consecutive INTEGER_CST stores case here,
3071 as we have here the code to deal with overlaps. */
3072 || (info->bitregion_start <= merged_store->bitregion_end
3073 && info->rhs_code == INTEGER_CST
3074 && merged_store->only_constants
3075 && merged_store->can_be_merged_into (info)))
3077 /* Only allow overlapping stores of constants. */
3078 if (info->rhs_code == INTEGER_CST
3079 && merged_store->only_constants
3080 && info->lp_nr == merged_store->lp_nr)
3082 unsigned int first_order
3083 = MIN (merged_store->first_order, info->order);
3084 unsigned int last_order
3085 = MAX (merged_store->last_order, info->order);
3086 unsigned HOST_WIDE_INT end
3087 = MAX (merged_store->start + merged_store->width,
3088 info->bitpos + info->bitsize);
3089 if (check_no_overlap (m_store_info, i, true, first_order,
3090 last_order, merged_store->start, end,
3091 first_earlier, end_earlier))
3093 /* check_no_overlap call above made sure there are no
3094 overlapping stores with non-INTEGER_CST rhs_code
3095 in between the first and last of the stores we've
3096 just merged. If there are any INTEGER_CST rhs_code
3097 stores in between, we need to merge_overlapping them
3098 even if in the sort_by_bitpos order there are other
3099 overlapping stores in between. Keep those stores as is.
3100 Example:
3101 MEM[(int *)p_28] = 0;
3102 MEM[(char *)p_28 + 3B] = 1;
3103 MEM[(char *)p_28 + 1B] = 2;
3104 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3105 We can't merge the zero store with the store of two and
3106 not merge anything else, because the store of one is
3107 in the original order in between those two, but in
3108 store_by_bitpos order it comes after the last store that
3109 we can't merge with them. We can merge the first 3 stores
3110 and keep the last store as is though. */
3111 unsigned int len = m_store_info.length ();
3112 unsigned int try_order = last_order;
3113 unsigned int first_nonmergeable_order;
3114 unsigned int k;
3115 bool last_iter = false;
3116 int attempts = 0;
3119 unsigned int max_order = 0;
3120 unsigned int min_order = first_order;
3121 unsigned first_nonmergeable_int_order = ~0U;
3122 unsigned HOST_WIDE_INT this_end = end;
3123 k = i;
3124 first_nonmergeable_order = ~0U;
3125 for (unsigned int j = i + 1; j < len; ++j)
3127 store_immediate_info *info2 = m_store_info[j];
3128 if (info2->bitpos >= this_end)
3129 break;
3130 if (info2->order < try_order)
3132 if (info2->rhs_code != INTEGER_CST
3133 || info2->lp_nr != merged_store->lp_nr)
3135 /* Normally check_no_overlap makes sure this
3136 doesn't happen, but if end grows below,
3137 then we need to process more stores than
3138 check_no_overlap verified. Example:
3139 MEM[(int *)p_5] = 0;
3140 MEM[(short *)p_5 + 3B] = 1;
3141 MEM[(char *)p_5 + 4B] = _9;
3142 MEM[(char *)p_5 + 2B] = 2; */
3143 k = 0;
3144 break;
3146 k = j;
3147 min_order = MIN (min_order, info2->order);
3148 this_end = MAX (this_end,
3149 info2->bitpos + info2->bitsize);
3151 else if (info2->rhs_code == INTEGER_CST
3152 && info2->lp_nr == merged_store->lp_nr
3153 && !last_iter)
3155 max_order = MAX (max_order, info2->order + 1);
3156 first_nonmergeable_int_order
3157 = MIN (first_nonmergeable_int_order,
3158 info2->order);
3160 else
3161 first_nonmergeable_order
3162 = MIN (first_nonmergeable_order, info2->order);
3164 if (k > i
3165 && !check_no_overlap (m_store_info, len - 1, true,
3166 min_order, try_order,
3167 merged_store->start, this_end,
3168 first_earlier, end_earlier))
3169 k = 0;
3170 if (k == 0)
3172 if (last_order == try_order)
3173 break;
3174 /* If this failed, but only because we grew
3175 try_order, retry with the last working one,
3176 so that we merge at least something. */
3177 try_order = last_order;
3178 last_iter = true;
3179 continue;
3181 last_order = try_order;
3182 /* Retry with a larger try_order to see if we could
3183 merge some further INTEGER_CST stores. */
3184 if (max_order
3185 && (first_nonmergeable_int_order
3186 < first_nonmergeable_order))
3188 try_order = MIN (max_order,
3189 first_nonmergeable_order);
3190 try_order
3191 = MIN (try_order,
3192 merged_store->first_nonmergeable_order);
3193 if (try_order > last_order && ++attempts < 16)
3194 continue;
3196 first_nonmergeable_order
3197 = MIN (first_nonmergeable_order,
3198 first_nonmergeable_int_order);
3199 end = this_end;
3200 break;
3202 while (1);
3204 if (k != 0)
3206 merged_store->merge_overlapping (info);
3208 merged_store->first_nonmergeable_order
3209 = MIN (merged_store->first_nonmergeable_order,
3210 first_nonmergeable_order);
3212 for (unsigned int j = i + 1; j <= k; j++)
3214 store_immediate_info *info2 = m_store_info[j];
3215 gcc_assert (info2->bitpos < end);
3216 if (info2->order < last_order)
3218 gcc_assert (info2->rhs_code == INTEGER_CST);
3219 if (info != info2)
3220 merged_store->merge_overlapping (info2);
3222 /* Other stores are kept and not merged in any
3223 way. */
3225 ignore = k;
3226 goto done;
3231 /* |---store 1---||---store 2---|
3232 This store is consecutive to the previous one.
3233 Merge it into the current store group. There can be gaps in between
3234 the stores, but there can't be gaps in between bitregions. */
3235 else if (info->bitregion_start <= merged_store->bitregion_end
3236 && merged_store->can_be_merged_into (info))
3238 store_immediate_info *infof = merged_store->stores[0];
3240 /* All the rhs_code ops that take 2 operands are commutative,
3241 swap the operands if it could make the operands compatible. */
3242 if (infof->ops[0].base_addr
3243 && infof->ops[1].base_addr
3244 && info->ops[0].base_addr
3245 && info->ops[1].base_addr
3246 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3247 info->bitpos - infof->bitpos)
3248 && operand_equal_p (info->ops[1].base_addr,
3249 infof->ops[0].base_addr, 0))
3251 std::swap (info->ops[0], info->ops[1]);
3252 info->ops_swapped_p = true;
3254 if (check_no_overlap (m_store_info, i, false,
3255 MIN (merged_store->first_order, info->order),
3256 MAX (merged_store->last_order, info->order),
3257 merged_store->start,
3258 MAX (merged_store->start + merged_store->width,
3259 info->bitpos + info->bitsize),
3260 first_earlier, end_earlier))
3262 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3263 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3265 info->rhs_code = BIT_INSERT_EXPR;
3266 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3267 info->ops[0].base_addr = NULL_TREE;
3269 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3271 for (store_immediate_info *infoj : merged_store->stores)
3273 infoj->rhs_code = BIT_INSERT_EXPR;
3274 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3275 infoj->ops[0].base_addr = NULL_TREE;
3277 merged_store->bit_insertion = true;
3279 if ((infof->ops[0].base_addr
3280 ? compatible_load_p (merged_store, info, base_addr, 0)
3281 : !info->ops[0].base_addr)
3282 && (infof->ops[1].base_addr
3283 ? compatible_load_p (merged_store, info, base_addr, 1)
3284 : !info->ops[1].base_addr))
3286 merged_store->merge_into (info);
3287 goto done;
3292 /* |---store 1---| <gap> |---store 2---|.
3293 Gap between stores or the rhs not compatible. Start a new group. */
3295 /* Try to apply all the stores recorded for the group to determine
3296 the bitpattern they write and discard it if that fails.
3297 This will also reject single-store groups. */
3298 if (merged_store->apply_stores ())
3299 m_merged_store_groups.safe_push (merged_store);
3300 else
3301 delete merged_store;
3303 merged_store = new merged_store_group (info);
3304 end_earlier = i;
3305 if (dump_file && (dump_flags & TDF_DETAILS))
3306 fputs ("New store group\n", dump_file);
3308 done:
3309 if (dump_file && (dump_flags & TDF_DETAILS))
3311 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3312 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3313 i, info->bitsize, info->bitpos);
3314 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3315 fputc ('\n', dump_file);
3319 /* Record or discard the last store group. */
3320 if (merged_store)
3322 if (merged_store->apply_stores ())
3323 m_merged_store_groups.safe_push (merged_store);
3324 else
3325 delete merged_store;
3328 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3330 bool success
3331 = !m_merged_store_groups.is_empty ()
3332 && m_merged_store_groups.length () < m_store_info.length ();
3334 if (success && dump_file)
3335 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3336 m_merged_store_groups.length ());
3338 return success;
3341 /* Return the type to use for the merged stores or loads described by STMTS.
3342 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3343 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3344 of the MEM_REFs if any. */
3346 static tree
3347 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3348 unsigned short *cliquep, unsigned short *basep)
3350 gimple *stmt;
3351 unsigned int i;
3352 tree type = NULL_TREE;
3353 tree ret = NULL_TREE;
3354 *cliquep = 0;
3355 *basep = 0;
3357 FOR_EACH_VEC_ELT (stmts, i, stmt)
3359 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3360 : gimple_assign_lhs (stmt);
3361 tree type1 = reference_alias_ptr_type (ref);
3362 tree base = get_base_address (ref);
3364 if (i == 0)
3366 if (TREE_CODE (base) == MEM_REF)
3368 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3369 *basep = MR_DEPENDENCE_BASE (base);
3371 ret = type = type1;
3372 continue;
3374 if (!alias_ptr_types_compatible_p (type, type1))
3375 ret = ptr_type_node;
3376 if (TREE_CODE (base) != MEM_REF
3377 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3378 || *basep != MR_DEPENDENCE_BASE (base))
3380 *cliquep = 0;
3381 *basep = 0;
3384 return ret;
3387 /* Return the location_t information we can find among the statements
3388 in STMTS. */
3390 static location_t
3391 get_location_for_stmts (vec<gimple *> &stmts)
3393 for (gimple *stmt : stmts)
3394 if (gimple_has_location (stmt))
3395 return gimple_location (stmt);
3397 return UNKNOWN_LOCATION;
3400 /* Used to decribe a store resulting from splitting a wide store in smaller
3401 regularly-sized stores in split_group. */
3403 class split_store
3405 public:
3406 unsigned HOST_WIDE_INT bytepos;
3407 unsigned HOST_WIDE_INT size;
3408 unsigned HOST_WIDE_INT align;
3409 auto_vec<store_immediate_info *> orig_stores;
3410 /* True if there is a single orig stmt covering the whole split store. */
3411 bool orig;
3412 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3413 unsigned HOST_WIDE_INT);
3416 /* Simple constructor. */
3418 split_store::split_store (unsigned HOST_WIDE_INT bp,
3419 unsigned HOST_WIDE_INT sz,
3420 unsigned HOST_WIDE_INT al)
3421 : bytepos (bp), size (sz), align (al), orig (false)
3423 orig_stores.create (0);
3426 /* Record all stores in GROUP that write to the region starting at BITPOS and
3427 is of size BITSIZE. Record infos for such statements in STORES if
3428 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3429 if there is exactly one original store in the range (in that case ignore
3430 clobber stmts, unless there are only clobber stmts). */
3432 static store_immediate_info *
3433 find_constituent_stores (class merged_store_group *group,
3434 vec<store_immediate_info *> *stores,
3435 unsigned int *first,
3436 unsigned HOST_WIDE_INT bitpos,
3437 unsigned HOST_WIDE_INT bitsize)
3439 store_immediate_info *info, *ret = NULL;
3440 unsigned int i;
3441 bool second = false;
3442 bool update_first = true;
3443 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3444 for (i = *first; group->stores.iterate (i, &info); ++i)
3446 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3447 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3448 if (stmt_end <= bitpos)
3450 /* BITPOS passed to this function never decreases from within the
3451 same split_group call, so optimize and don't scan info records
3452 which are known to end before or at BITPOS next time.
3453 Only do it if all stores before this one also pass this. */
3454 if (update_first)
3455 *first = i + 1;
3456 continue;
3458 else
3459 update_first = false;
3461 /* The stores in GROUP are ordered by bitposition so if we're past
3462 the region for this group return early. */
3463 if (stmt_start >= end)
3464 return ret;
3466 if (gimple_clobber_p (info->stmt))
3468 if (stores)
3469 stores->safe_push (info);
3470 if (ret == NULL)
3471 ret = info;
3472 continue;
3474 if (stores)
3476 stores->safe_push (info);
3477 if (ret && !gimple_clobber_p (ret->stmt))
3479 ret = NULL;
3480 second = true;
3483 else if (ret && !gimple_clobber_p (ret->stmt))
3484 return NULL;
3485 if (!second)
3486 ret = info;
3488 return ret;
3491 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3492 store have multiple uses. If any SSA_NAME has multiple uses, also
3493 count statements needed to compute it. */
3495 static unsigned
3496 count_multiple_uses (store_immediate_info *info)
3498 gimple *stmt = info->stmt;
3499 unsigned ret = 0;
3500 switch (info->rhs_code)
3502 case INTEGER_CST:
3503 case STRING_CST:
3504 return 0;
3505 case BIT_AND_EXPR:
3506 case BIT_IOR_EXPR:
3507 case BIT_XOR_EXPR:
3508 if (info->bit_not_p)
3510 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3511 ret = 1; /* Fall through below to return
3512 the BIT_NOT_EXPR stmt and then
3513 BIT_{AND,IOR,XOR}_EXPR and anything it
3514 uses. */
3515 else
3516 /* stmt is after this the BIT_NOT_EXPR. */
3517 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3519 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3521 ret += 1 + info->ops[0].bit_not_p;
3522 if (info->ops[1].base_addr)
3523 ret += 1 + info->ops[1].bit_not_p;
3524 return ret + 1;
3526 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3527 /* stmt is now the BIT_*_EXPR. */
3528 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3529 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3530 else if (info->ops[info->ops_swapped_p].bit_not_p)
3532 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3533 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3534 ++ret;
3536 if (info->ops[1].base_addr == NULL_TREE)
3538 gcc_checking_assert (!info->ops_swapped_p);
3539 return ret;
3541 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3542 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3543 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3545 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3546 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3547 ++ret;
3549 return ret;
3550 case MEM_REF:
3551 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3552 return 1 + info->ops[0].bit_not_p;
3553 else if (info->ops[0].bit_not_p)
3555 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3556 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3557 return 1;
3559 return 0;
3560 case BIT_INSERT_EXPR:
3561 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3562 default:
3563 gcc_unreachable ();
3567 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3568 vector (if non-NULL) with split_store structs describing the byte offset
3569 (from the base), the bit size and alignment of each store as well as the
3570 original statements involved in each such split group.
3571 This is to separate the splitting strategy from the statement
3572 building/emission/linking done in output_merged_store.
3573 Return number of new stores.
3574 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3575 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3576 BZERO_FIRST may be true only when the first store covers the whole group
3577 and clears it; if BZERO_FIRST is true, keep that first store in the set
3578 unmodified and emit further stores for the overrides only.
3579 If SPLIT_STORES is NULL, it is just a dry run to count number of
3580 new stores. */
3582 static unsigned int
3583 split_group (merged_store_group *group, bool allow_unaligned_store,
3584 bool allow_unaligned_load, bool bzero_first,
3585 vec<split_store *> *split_stores,
3586 unsigned *total_orig,
3587 unsigned *total_new)
3589 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3590 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3591 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3592 unsigned HOST_WIDE_INT group_align = group->align;
3593 unsigned HOST_WIDE_INT align_base = group->align_base;
3594 unsigned HOST_WIDE_INT group_load_align = group_align;
3595 bool any_orig = false;
3597 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3599 /* For bswap framework using sets of stores, all the checking has been done
3600 earlier in try_coalesce_bswap and the result always needs to be emitted
3601 as a single store. Likewise for string concatenation, */
3602 if (group->stores[0]->rhs_code == LROTATE_EXPR
3603 || group->stores[0]->rhs_code == NOP_EXPR
3604 || group->string_concatenation)
3606 gcc_assert (!bzero_first);
3607 if (total_orig)
3609 /* Avoid the old/new stmt count heuristics. It should be
3610 always beneficial. */
3611 total_new[0] = 1;
3612 total_orig[0] = 2;
3615 if (split_stores)
3617 unsigned HOST_WIDE_INT align_bitpos
3618 = (group->start - align_base) & (group_align - 1);
3619 unsigned HOST_WIDE_INT align = group_align;
3620 if (align_bitpos)
3621 align = least_bit_hwi (align_bitpos);
3622 bytepos = group->start / BITS_PER_UNIT;
3623 split_store *store
3624 = new split_store (bytepos, group->width, align);
3625 unsigned int first = 0;
3626 find_constituent_stores (group, &store->orig_stores,
3627 &first, group->start, group->width);
3628 split_stores->safe_push (store);
3631 return 1;
3634 unsigned int ret = 0, first = 0;
3635 unsigned HOST_WIDE_INT try_pos = bytepos;
3637 if (total_orig)
3639 unsigned int i;
3640 store_immediate_info *info = group->stores[0];
3642 total_new[0] = 0;
3643 total_orig[0] = 1; /* The orig store. */
3644 info = group->stores[0];
3645 if (info->ops[0].base_addr)
3646 total_orig[0]++;
3647 if (info->ops[1].base_addr)
3648 total_orig[0]++;
3649 switch (info->rhs_code)
3651 case BIT_AND_EXPR:
3652 case BIT_IOR_EXPR:
3653 case BIT_XOR_EXPR:
3654 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3655 break;
3656 default:
3657 break;
3659 total_orig[0] *= group->stores.length ();
3661 FOR_EACH_VEC_ELT (group->stores, i, info)
3663 total_new[0] += count_multiple_uses (info);
3664 total_orig[0] += (info->bit_not_p
3665 + info->ops[0].bit_not_p
3666 + info->ops[1].bit_not_p);
3670 if (!allow_unaligned_load)
3671 for (int i = 0; i < 2; ++i)
3672 if (group->load_align[i])
3673 group_load_align = MIN (group_load_align, group->load_align[i]);
3675 if (bzero_first)
3677 store_immediate_info *gstore;
3678 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3679 if (!gimple_clobber_p (gstore->stmt))
3680 break;
3681 ++first;
3682 ret = 1;
3683 if (split_stores)
3685 split_store *store
3686 = new split_store (bytepos, gstore->bitsize, align_base);
3687 store->orig_stores.safe_push (gstore);
3688 store->orig = true;
3689 any_orig = true;
3690 split_stores->safe_push (store);
3694 while (size > 0)
3696 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3697 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3698 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3700 /* Skip padding bytes. */
3701 ++try_pos;
3702 size -= BITS_PER_UNIT;
3703 continue;
3706 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3707 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3708 unsigned HOST_WIDE_INT align_bitpos
3709 = (try_bitpos - align_base) & (group_align - 1);
3710 unsigned HOST_WIDE_INT align = group_align;
3711 bool found_orig = false;
3712 if (align_bitpos)
3713 align = least_bit_hwi (align_bitpos);
3714 if (!allow_unaligned_store)
3715 try_size = MIN (try_size, align);
3716 if (!allow_unaligned_load)
3718 /* If we can't do or don't want to do unaligned stores
3719 as well as loads, we need to take the loads into account
3720 as well. */
3721 unsigned HOST_WIDE_INT load_align = group_load_align;
3722 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3723 if (align_bitpos)
3724 load_align = least_bit_hwi (align_bitpos);
3725 for (int i = 0; i < 2; ++i)
3726 if (group->load_align[i])
3728 align_bitpos
3729 = known_alignment (try_bitpos
3730 - group->stores[0]->bitpos
3731 + group->stores[0]->ops[i].bitpos
3732 - group->load_align_base[i]);
3733 if (align_bitpos & (group_load_align - 1))
3735 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3736 load_align = MIN (load_align, a);
3739 try_size = MIN (try_size, load_align);
3741 store_immediate_info *info
3742 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3743 if (info && !gimple_clobber_p (info->stmt))
3745 /* If there is just one original statement for the range, see if
3746 we can just reuse the original store which could be even larger
3747 than try_size. */
3748 unsigned HOST_WIDE_INT stmt_end
3749 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3750 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3751 stmt_end - try_bitpos);
3752 if (info && info->bitpos >= try_bitpos)
3754 store_immediate_info *info2 = NULL;
3755 unsigned int first_copy = first;
3756 if (info->bitpos > try_bitpos
3757 && stmt_end - try_bitpos <= try_size)
3759 info2 = find_constituent_stores (group, NULL, &first_copy,
3760 try_bitpos,
3761 info->bitpos - try_bitpos);
3762 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3764 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3766 info2 = find_constituent_stores (group, NULL, &first_copy,
3767 stmt_end,
3768 (try_bitpos + try_size)
3769 - stmt_end);
3770 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3772 if (info2 == NULL)
3774 try_size = stmt_end - try_bitpos;
3775 found_orig = true;
3776 goto found;
3781 /* Approximate store bitsize for the case when there are no padding
3782 bits. */
3783 while (try_size > size)
3784 try_size /= 2;
3785 /* Now look for whole padding bytes at the end of that bitsize. */
3786 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3787 if (group->mask[try_pos - bytepos + nonmasked - 1]
3788 != (unsigned char) ~0U
3789 && (!bzero_first
3790 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3791 break;
3792 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3794 /* If entire try_size range is padding, skip it. */
3795 try_pos += try_size / BITS_PER_UNIT;
3796 size -= try_size;
3797 continue;
3799 /* Otherwise try to decrease try_size if second half, last 3 quarters
3800 etc. are padding. */
3801 nonmasked *= BITS_PER_UNIT;
3802 while (nonmasked <= try_size / 2)
3803 try_size /= 2;
3804 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3806 /* Now look for whole padding bytes at the start of that bitsize. */
3807 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3808 for (masked = 0; masked < try_bytesize; ++masked)
3809 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3810 && (!bzero_first
3811 || group->val[try_pos - bytepos + masked] != 0))
3812 break;
3813 masked *= BITS_PER_UNIT;
3814 gcc_assert (masked < try_size);
3815 if (masked >= try_size / 2)
3817 while (masked >= try_size / 2)
3819 try_size /= 2;
3820 try_pos += try_size / BITS_PER_UNIT;
3821 size -= try_size;
3822 masked -= try_size;
3824 /* Need to recompute the alignment, so just retry at the new
3825 position. */
3826 continue;
3830 found:
3831 ++ret;
3833 if (split_stores)
3835 split_store *store
3836 = new split_store (try_pos, try_size, align);
3837 info = find_constituent_stores (group, &store->orig_stores,
3838 &first, try_bitpos, try_size);
3839 if (info
3840 && !gimple_clobber_p (info->stmt)
3841 && info->bitpos >= try_bitpos
3842 && info->bitpos + info->bitsize <= try_bitpos + try_size
3843 && (store->orig_stores.length () == 1
3844 || found_orig
3845 || (info->bitpos == try_bitpos
3846 && (info->bitpos + info->bitsize
3847 == try_bitpos + try_size))))
3849 store->orig = true;
3850 any_orig = true;
3852 split_stores->safe_push (store);
3855 try_pos += try_size / BITS_PER_UNIT;
3856 size -= try_size;
3859 if (total_orig)
3861 unsigned int i;
3862 split_store *store;
3863 /* If we are reusing some original stores and any of the
3864 original SSA_NAMEs had multiple uses, we need to subtract
3865 those now before we add the new ones. */
3866 if (total_new[0] && any_orig)
3868 FOR_EACH_VEC_ELT (*split_stores, i, store)
3869 if (store->orig)
3870 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3872 total_new[0] += ret; /* The new store. */
3873 store_immediate_info *info = group->stores[0];
3874 if (info->ops[0].base_addr)
3875 total_new[0] += ret;
3876 if (info->ops[1].base_addr)
3877 total_new[0] += ret;
3878 switch (info->rhs_code)
3880 case BIT_AND_EXPR:
3881 case BIT_IOR_EXPR:
3882 case BIT_XOR_EXPR:
3883 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3884 break;
3885 default:
3886 break;
3888 FOR_EACH_VEC_ELT (*split_stores, i, store)
3890 unsigned int j;
3891 bool bit_not_p[3] = { false, false, false };
3892 /* If all orig_stores have certain bit_not_p set, then
3893 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3894 If some orig_stores have certain bit_not_p set, then
3895 we'd use a BIT_XOR_EXPR with a mask and need to account for
3896 it. */
3897 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3899 if (info->ops[0].bit_not_p)
3900 bit_not_p[0] = true;
3901 if (info->ops[1].bit_not_p)
3902 bit_not_p[1] = true;
3903 if (info->bit_not_p)
3904 bit_not_p[2] = true;
3906 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3911 return ret;
3914 /* Return the operation through which the operand IDX (if < 2) or
3915 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3916 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3917 the bits should be xored with mask. */
3919 static enum tree_code
3920 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3922 unsigned int i;
3923 store_immediate_info *info;
3924 unsigned int cnt = 0;
3925 bool any_paddings = false;
3926 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3928 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3929 if (bit_not_p)
3931 ++cnt;
3932 tree lhs = gimple_assign_lhs (info->stmt);
3933 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3934 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3935 any_paddings = true;
3938 mask = NULL_TREE;
3939 if (cnt == 0)
3940 return NOP_EXPR;
3941 if (cnt == split_store->orig_stores.length () && !any_paddings)
3942 return BIT_NOT_EXPR;
3944 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3945 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3946 unsigned char *buf
3947 = XALLOCAVEC (unsigned char, buf_size);
3948 memset (buf, ~0U, buf_size);
3949 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3951 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3952 if (!bit_not_p)
3953 continue;
3954 /* Clear regions with bit_not_p and invert afterwards, rather than
3955 clear regions with !bit_not_p, so that gaps in between stores aren't
3956 set in the mask. */
3957 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3958 unsigned HOST_WIDE_INT prec = bitsize;
3959 unsigned int pos_in_buffer = 0;
3960 if (any_paddings)
3962 tree lhs = gimple_assign_lhs (info->stmt);
3963 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3964 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3965 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3967 if (info->bitpos < try_bitpos)
3969 gcc_assert (info->bitpos + bitsize > try_bitpos);
3970 if (!BYTES_BIG_ENDIAN)
3972 if (prec <= try_bitpos - info->bitpos)
3973 continue;
3974 prec -= try_bitpos - info->bitpos;
3976 bitsize -= try_bitpos - info->bitpos;
3977 if (BYTES_BIG_ENDIAN && prec > bitsize)
3978 prec = bitsize;
3980 else
3981 pos_in_buffer = info->bitpos - try_bitpos;
3982 if (prec < bitsize)
3984 /* If this is a bool inversion, invert just the least significant
3985 prec bits rather than all bits of it. */
3986 if (BYTES_BIG_ENDIAN)
3988 pos_in_buffer += bitsize - prec;
3989 if (pos_in_buffer >= split_store->size)
3990 continue;
3992 bitsize = prec;
3994 if (pos_in_buffer + bitsize > split_store->size)
3995 bitsize = split_store->size - pos_in_buffer;
3996 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3997 if (BYTES_BIG_ENDIAN)
3998 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3999 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4000 else
4001 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4003 for (unsigned int i = 0; i < buf_size; ++i)
4004 buf[i] = ~buf[i];
4005 mask = native_interpret_expr (int_type, buf, buf_size);
4006 return BIT_XOR_EXPR;
4009 /* Given a merged store group GROUP output the widened version of it.
4010 The store chain is against the base object BASE.
4011 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4012 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4013 Make sure that the number of statements output is less than the number of
4014 original statements. If a better sequence is possible emit it and
4015 return true. */
4017 bool
4018 imm_store_chain_info::output_merged_store (merged_store_group *group)
4020 const unsigned HOST_WIDE_INT start_byte_pos
4021 = group->bitregion_start / BITS_PER_UNIT;
4022 unsigned int orig_num_stmts = group->stores.length ();
4023 if (orig_num_stmts < 2)
4024 return false;
4026 bool allow_unaligned_store
4027 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4028 bool allow_unaligned_load = allow_unaligned_store;
4029 bool bzero_first = false;
4030 store_immediate_info *store;
4031 unsigned int num_clobber_stmts = 0;
4032 if (group->stores[0]->rhs_code == INTEGER_CST)
4034 unsigned int i;
4035 FOR_EACH_VEC_ELT (group->stores, i, store)
4036 if (gimple_clobber_p (store->stmt))
4037 num_clobber_stmts++;
4038 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4039 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4040 && group->start == store->bitpos
4041 && group->width == store->bitsize
4042 && (group->start % BITS_PER_UNIT) == 0
4043 && (group->width % BITS_PER_UNIT) == 0)
4045 bzero_first = true;
4046 break;
4048 else
4049 break;
4050 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4051 if (gimple_clobber_p (store->stmt))
4052 num_clobber_stmts++;
4053 if (num_clobber_stmts == orig_num_stmts)
4054 return false;
4055 orig_num_stmts -= num_clobber_stmts;
4057 if (allow_unaligned_store || bzero_first)
4059 /* If unaligned stores are allowed, see how many stores we'd emit
4060 for unaligned and how many stores we'd emit for aligned stores.
4061 Only use unaligned stores if it allows fewer stores than aligned.
4062 Similarly, if there is a whole region clear first, prefer expanding
4063 it together compared to expanding clear first followed by merged
4064 further stores. */
4065 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4066 int pass_min = 0;
4067 for (int pass = 0; pass < 4; ++pass)
4069 if (!allow_unaligned_store && (pass & 1) != 0)
4070 continue;
4071 if (!bzero_first && (pass & 2) != 0)
4072 continue;
4073 cnt[pass] = split_group (group, (pass & 1) != 0,
4074 allow_unaligned_load, (pass & 2) != 0,
4075 NULL, NULL, NULL);
4076 if (cnt[pass] < cnt[pass_min])
4077 pass_min = pass;
4079 if ((pass_min & 1) == 0)
4080 allow_unaligned_store = false;
4081 if ((pass_min & 2) == 0)
4082 bzero_first = false;
4085 auto_vec<class split_store *, 32> split_stores;
4086 split_store *split_store;
4087 unsigned total_orig, total_new, i;
4088 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4089 &split_stores, &total_orig, &total_new);
4091 /* Determine if there is a clobber covering the whole group at the start,
4092 followed by proposed split stores that cover the whole group. In that
4093 case, prefer the transformation even if
4094 split_stores.length () == orig_num_stmts. */
4095 bool clobber_first = false;
4096 if (num_clobber_stmts
4097 && gimple_clobber_p (group->stores[0]->stmt)
4098 && group->start == group->stores[0]->bitpos
4099 && group->width == group->stores[0]->bitsize
4100 && (group->start % BITS_PER_UNIT) == 0
4101 && (group->width % BITS_PER_UNIT) == 0)
4103 clobber_first = true;
4104 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4105 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4106 if (split_store->bytepos != pos)
4108 clobber_first = false;
4109 break;
4111 else
4112 pos += split_store->size / BITS_PER_UNIT;
4113 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4114 clobber_first = false;
4117 if (split_stores.length () >= orig_num_stmts + clobber_first)
4120 /* We didn't manage to reduce the number of statements. Bail out. */
4121 if (dump_file && (dump_flags & TDF_DETAILS))
4122 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4123 " Not profitable to emit new sequence.\n",
4124 orig_num_stmts);
4125 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4126 delete split_store;
4127 return false;
4129 if (total_orig <= total_new)
4131 /* If number of estimated new statements is above estimated original
4132 statements, bail out too. */
4133 if (dump_file && (dump_flags & TDF_DETAILS))
4134 fprintf (dump_file, "Estimated number of original stmts (%u)"
4135 " not larger than estimated number of new"
4136 " stmts (%u).\n",
4137 total_orig, total_new);
4138 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4139 delete split_store;
4140 return false;
4142 if (group->stores[0]->rhs_code == INTEGER_CST)
4144 bool all_orig = true;
4145 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4146 if (!split_store->orig)
4148 all_orig = false;
4149 break;
4151 if (all_orig)
4153 unsigned int cnt = split_stores.length ();
4154 store_immediate_info *store;
4155 FOR_EACH_VEC_ELT (group->stores, i, store)
4156 if (gimple_clobber_p (store->stmt))
4157 ++cnt;
4158 /* Punt if we wouldn't make any real changes, i.e. keep all
4159 orig stmts + all clobbers. */
4160 if (cnt == group->stores.length ())
4162 if (dump_file && (dump_flags & TDF_DETAILS))
4163 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4164 " Not profitable to emit new sequence.\n",
4165 orig_num_stmts);
4166 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4167 delete split_store;
4168 return false;
4173 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4174 gimple_seq seq = NULL;
4175 tree last_vdef, new_vuse;
4176 last_vdef = gimple_vdef (group->last_stmt);
4177 new_vuse = gimple_vuse (group->last_stmt);
4178 tree bswap_res = NULL_TREE;
4180 /* Clobbers are not removed. */
4181 if (gimple_clobber_p (group->last_stmt))
4183 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4184 gimple_set_vdef (group->last_stmt, new_vuse);
4187 if (group->stores[0]->rhs_code == LROTATE_EXPR
4188 || group->stores[0]->rhs_code == NOP_EXPR)
4190 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4191 gimple *ins_stmt = group->stores[0]->ins_stmt;
4192 struct symbolic_number *n = &group->stores[0]->n;
4193 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4195 switch (n->range)
4197 case 16:
4198 load_type = bswap_type = uint16_type_node;
4199 break;
4200 case 32:
4201 load_type = uint32_type_node;
4202 if (bswap)
4204 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4205 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4207 break;
4208 case 64:
4209 load_type = uint64_type_node;
4210 if (bswap)
4212 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4213 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4215 break;
4216 default:
4217 gcc_unreachable ();
4220 /* If the loads have each vuse of the corresponding store,
4221 we've checked the aliasing already in try_coalesce_bswap and
4222 we want to sink the need load into seq. So need to use new_vuse
4223 on the load. */
4224 if (n->base_addr)
4226 if (n->vuse == NULL)
4228 n->vuse = new_vuse;
4229 ins_stmt = NULL;
4231 else
4232 /* Update vuse in case it has changed by output_merged_stores. */
4233 n->vuse = gimple_vuse (ins_stmt);
4235 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4236 bswap_type, load_type, n, bswap,
4237 ~(uint64_t) 0);
4238 gcc_assert (bswap_res);
4241 gimple *stmt = NULL;
4242 auto_vec<gimple *, 32> orig_stmts;
4243 gimple_seq this_seq;
4244 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4245 is_gimple_mem_ref_addr, NULL_TREE);
4246 gimple_seq_add_seq_without_update (&seq, this_seq);
4248 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4249 gimple_seq load_seq[2] = { NULL, NULL };
4250 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4251 for (int j = 0; j < 2; ++j)
4253 store_operand_info &op = group->stores[0]->ops[j];
4254 if (op.base_addr == NULL_TREE)
4255 continue;
4257 store_immediate_info *infol = group->stores.last ();
4258 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4260 /* We can't pick the location randomly; while we've verified
4261 all the loads have the same vuse, they can be still in different
4262 basic blocks and we need to pick the one from the last bb:
4263 int x = q[0];
4264 if (x == N) return;
4265 int y = q[1];
4266 p[0] = x;
4267 p[1] = y;
4268 otherwise if we put the wider load at the q[0] load, we might
4269 segfault if q[1] is not mapped. */
4270 basic_block bb = gimple_bb (op.stmt);
4271 gimple *ostmt = op.stmt;
4272 store_immediate_info *info;
4273 FOR_EACH_VEC_ELT (group->stores, i, info)
4275 gimple *tstmt = info->ops[j].stmt;
4276 basic_block tbb = gimple_bb (tstmt);
4277 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4279 ostmt = tstmt;
4280 bb = tbb;
4283 load_gsi[j] = gsi_for_stmt (ostmt);
4284 load_addr[j]
4285 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4286 &load_seq[j], is_gimple_mem_ref_addr,
4287 NULL_TREE);
4289 else if (operand_equal_p (base_addr, op.base_addr, 0))
4290 load_addr[j] = addr;
4291 else
4293 load_addr[j]
4294 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4295 &this_seq, is_gimple_mem_ref_addr,
4296 NULL_TREE);
4297 gimple_seq_add_seq_without_update (&seq, this_seq);
4301 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4303 const unsigned HOST_WIDE_INT try_size = split_store->size;
4304 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4305 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4306 const unsigned HOST_WIDE_INT try_align = split_store->align;
4307 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4308 tree dest, src;
4309 location_t loc;
4311 if (split_store->orig)
4313 /* If there is just a single non-clobber constituent store
4314 which covers the whole area, just reuse the lhs and rhs. */
4315 gimple *orig_stmt = NULL;
4316 store_immediate_info *store;
4317 unsigned int j;
4318 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4319 if (!gimple_clobber_p (store->stmt))
4321 orig_stmt = store->stmt;
4322 break;
4324 dest = gimple_assign_lhs (orig_stmt);
4325 src = gimple_assign_rhs1 (orig_stmt);
4326 loc = gimple_location (orig_stmt);
4328 else
4330 store_immediate_info *info;
4331 unsigned short clique, base;
4332 unsigned int k;
4333 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4334 orig_stmts.safe_push (info->stmt);
4335 tree offset_type
4336 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4337 tree dest_type;
4338 loc = get_location_for_stmts (orig_stmts);
4339 orig_stmts.truncate (0);
4341 if (group->string_concatenation)
4342 dest_type
4343 = build_array_type_nelts (char_type_node,
4344 try_size / BITS_PER_UNIT);
4345 else
4347 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4348 dest_type = build_aligned_type (dest_type, try_align);
4350 dest = fold_build2 (MEM_REF, dest_type, addr,
4351 build_int_cst (offset_type, try_pos));
4352 if (TREE_CODE (dest) == MEM_REF)
4354 MR_DEPENDENCE_CLIQUE (dest) = clique;
4355 MR_DEPENDENCE_BASE (dest) = base;
4358 tree mask;
4359 if (bswap_res || group->string_concatenation)
4360 mask = integer_zero_node;
4361 else
4362 mask = native_interpret_expr (dest_type,
4363 group->mask + try_offset,
4364 group->buf_size);
4366 tree ops[2];
4367 for (int j = 0;
4368 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4369 ++j)
4371 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4372 if (bswap_res)
4373 ops[j] = bswap_res;
4374 else if (group->string_concatenation)
4376 ops[j] = build_string (try_size / BITS_PER_UNIT,
4377 (const char *) group->val + try_offset);
4378 TREE_TYPE (ops[j]) = dest_type;
4380 else if (op.base_addr)
4382 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4383 orig_stmts.safe_push (info->ops[j].stmt);
4385 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4386 &clique, &base);
4387 location_t load_loc = get_location_for_stmts (orig_stmts);
4388 orig_stmts.truncate (0);
4390 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4391 unsigned HOST_WIDE_INT align_bitpos
4392 = known_alignment (try_bitpos
4393 - split_store->orig_stores[0]->bitpos
4394 + op.bitpos);
4395 if (align_bitpos & (load_align - 1))
4396 load_align = least_bit_hwi (align_bitpos);
4398 tree load_int_type
4399 = build_nonstandard_integer_type (try_size, UNSIGNED);
4400 load_int_type
4401 = build_aligned_type (load_int_type, load_align);
4403 poly_uint64 load_pos
4404 = exact_div (try_bitpos
4405 - split_store->orig_stores[0]->bitpos
4406 + op.bitpos,
4407 BITS_PER_UNIT);
4408 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4409 build_int_cst (offset_type, load_pos));
4410 if (TREE_CODE (ops[j]) == MEM_REF)
4412 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4413 MR_DEPENDENCE_BASE (ops[j]) = base;
4415 if (!integer_zerop (mask))
4417 /* The load might load some bits (that will be masked
4418 off later on) uninitialized, avoid -W*uninitialized
4419 warnings in that case. */
4420 suppress_warning (ops[j], OPT_Wuninitialized);
4423 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4424 gimple_set_location (stmt, load_loc);
4425 if (gsi_bb (load_gsi[j]))
4427 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4428 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4430 else
4432 gimple_set_vuse (stmt, new_vuse);
4433 gimple_seq_add_stmt_without_update (&seq, stmt);
4435 ops[j] = gimple_assign_lhs (stmt);
4436 tree xor_mask;
4437 enum tree_code inv_op
4438 = invert_op (split_store, j, dest_type, xor_mask);
4439 if (inv_op != NOP_EXPR)
4441 stmt = gimple_build_assign (make_ssa_name (dest_type),
4442 inv_op, ops[j], xor_mask);
4443 gimple_set_location (stmt, load_loc);
4444 ops[j] = gimple_assign_lhs (stmt);
4446 if (gsi_bb (load_gsi[j]))
4447 gimple_seq_add_stmt_without_update (&load_seq[j],
4448 stmt);
4449 else
4450 gimple_seq_add_stmt_without_update (&seq, stmt);
4453 else
4454 ops[j] = native_interpret_expr (dest_type,
4455 group->val + try_offset,
4456 group->buf_size);
4459 switch (split_store->orig_stores[0]->rhs_code)
4461 case BIT_AND_EXPR:
4462 case BIT_IOR_EXPR:
4463 case BIT_XOR_EXPR:
4464 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4466 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4467 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4469 location_t bit_loc;
4470 bit_loc = get_location_for_stmts (orig_stmts);
4471 orig_stmts.truncate (0);
4473 stmt
4474 = gimple_build_assign (make_ssa_name (dest_type),
4475 split_store->orig_stores[0]->rhs_code,
4476 ops[0], ops[1]);
4477 gimple_set_location (stmt, bit_loc);
4478 /* If there is just one load and there is a separate
4479 load_seq[0], emit the bitwise op right after it. */
4480 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4481 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4482 /* Otherwise, if at least one load is in seq, we need to
4483 emit the bitwise op right before the store. If there
4484 are two loads and are emitted somewhere else, it would
4485 be better to emit the bitwise op as early as possible;
4486 we don't track where that would be possible right now
4487 though. */
4488 else
4489 gimple_seq_add_stmt_without_update (&seq, stmt);
4490 src = gimple_assign_lhs (stmt);
4491 tree xor_mask;
4492 enum tree_code inv_op;
4493 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4494 if (inv_op != NOP_EXPR)
4496 stmt = gimple_build_assign (make_ssa_name (dest_type),
4497 inv_op, src, xor_mask);
4498 gimple_set_location (stmt, bit_loc);
4499 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4500 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4501 else
4502 gimple_seq_add_stmt_without_update (&seq, stmt);
4503 src = gimple_assign_lhs (stmt);
4505 break;
4506 case LROTATE_EXPR:
4507 case NOP_EXPR:
4508 src = ops[0];
4509 if (!is_gimple_val (src))
4511 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4512 src);
4513 gimple_seq_add_stmt_without_update (&seq, stmt);
4514 src = gimple_assign_lhs (stmt);
4516 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4518 stmt = gimple_build_assign (make_ssa_name (dest_type),
4519 NOP_EXPR, src);
4520 gimple_seq_add_stmt_without_update (&seq, stmt);
4521 src = gimple_assign_lhs (stmt);
4523 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4524 if (inv_op != NOP_EXPR)
4526 stmt = gimple_build_assign (make_ssa_name (dest_type),
4527 inv_op, src, xor_mask);
4528 gimple_set_location (stmt, loc);
4529 gimple_seq_add_stmt_without_update (&seq, stmt);
4530 src = gimple_assign_lhs (stmt);
4532 break;
4533 default:
4534 src = ops[0];
4535 break;
4538 /* If bit insertion is required, we use the source as an accumulator
4539 into which the successive bit-field values are manually inserted.
4540 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4541 if (group->bit_insertion)
4542 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4543 if (info->rhs_code == BIT_INSERT_EXPR
4544 && info->bitpos < try_bitpos + try_size
4545 && info->bitpos + info->bitsize > try_bitpos)
4547 /* Mask, truncate, convert to final type, shift and ior into
4548 the accumulator. Note that every step can be a no-op. */
4549 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4550 const HOST_WIDE_INT end_gap
4551 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4552 tree tem = info->ops[0].val;
4553 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4555 const unsigned HOST_WIDE_INT size
4556 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4557 tree integer_type
4558 = build_nonstandard_integer_type (size, UNSIGNED);
4559 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4560 integer_type, tem);
4562 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4564 tree bitfield_type
4565 = build_nonstandard_integer_type (info->bitsize,
4566 UNSIGNED);
4567 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4569 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4571 const unsigned HOST_WIDE_INT imask
4572 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4573 tem = gimple_build (&seq, loc,
4574 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4575 build_int_cst (TREE_TYPE (tem),
4576 imask));
4578 const HOST_WIDE_INT shift
4579 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4580 if (shift < 0)
4581 tem = gimple_build (&seq, loc,
4582 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4583 build_int_cst (NULL_TREE, -shift));
4584 tem = gimple_convert (&seq, loc, dest_type, tem);
4585 if (shift > 0)
4586 tem = gimple_build (&seq, loc,
4587 LSHIFT_EXPR, dest_type, tem,
4588 build_int_cst (NULL_TREE, shift));
4589 src = gimple_build (&seq, loc,
4590 BIT_IOR_EXPR, dest_type, tem, src);
4593 if (!integer_zerop (mask))
4595 tree tem = make_ssa_name (dest_type);
4596 tree load_src = unshare_expr (dest);
4597 /* The load might load some or all bits uninitialized,
4598 avoid -W*uninitialized warnings in that case.
4599 As optimization, it would be nice if all the bits are
4600 provably uninitialized (no stores at all yet or previous
4601 store a CLOBBER) we'd optimize away the load and replace
4602 it e.g. with 0. */
4603 suppress_warning (load_src, OPT_Wuninitialized);
4604 stmt = gimple_build_assign (tem, load_src);
4605 gimple_set_location (stmt, loc);
4606 gimple_set_vuse (stmt, new_vuse);
4607 gimple_seq_add_stmt_without_update (&seq, stmt);
4609 /* FIXME: If there is a single chunk of zero bits in mask,
4610 perhaps use BIT_INSERT_EXPR instead? */
4611 stmt = gimple_build_assign (make_ssa_name (dest_type),
4612 BIT_AND_EXPR, tem, mask);
4613 gimple_set_location (stmt, loc);
4614 gimple_seq_add_stmt_without_update (&seq, stmt);
4615 tem = gimple_assign_lhs (stmt);
4617 if (TREE_CODE (src) == INTEGER_CST)
4618 src = wide_int_to_tree (dest_type,
4619 wi::bit_and_not (wi::to_wide (src),
4620 wi::to_wide (mask)));
4621 else
4623 tree nmask
4624 = wide_int_to_tree (dest_type,
4625 wi::bit_not (wi::to_wide (mask)));
4626 stmt = gimple_build_assign (make_ssa_name (dest_type),
4627 BIT_AND_EXPR, src, nmask);
4628 gimple_set_location (stmt, loc);
4629 gimple_seq_add_stmt_without_update (&seq, stmt);
4630 src = gimple_assign_lhs (stmt);
4632 stmt = gimple_build_assign (make_ssa_name (dest_type),
4633 BIT_IOR_EXPR, tem, src);
4634 gimple_set_location (stmt, loc);
4635 gimple_seq_add_stmt_without_update (&seq, stmt);
4636 src = gimple_assign_lhs (stmt);
4640 stmt = gimple_build_assign (dest, src);
4641 gimple_set_location (stmt, loc);
4642 gimple_set_vuse (stmt, new_vuse);
4643 gimple_seq_add_stmt_without_update (&seq, stmt);
4645 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4646 add_stmt_to_eh_lp (stmt, group->lp_nr);
4648 tree new_vdef;
4649 if (i < split_stores.length () - 1)
4650 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4651 else
4652 new_vdef = last_vdef;
4654 gimple_set_vdef (stmt, new_vdef);
4655 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4656 new_vuse = new_vdef;
4659 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4660 delete split_store;
4662 gcc_assert (seq);
4663 if (dump_file)
4665 fprintf (dump_file,
4666 "New sequence of %u stores to replace old one of %u stores\n",
4667 split_stores.length (), orig_num_stmts);
4668 if (dump_flags & TDF_DETAILS)
4669 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4672 if (gimple_clobber_p (group->last_stmt))
4673 update_stmt (group->last_stmt);
4675 if (group->lp_nr > 0)
4677 /* We're going to insert a sequence of (potentially) throwing stores
4678 into an active EH region. This means that we're going to create
4679 new basic blocks with EH edges pointing to the post landing pad
4680 and, therefore, to have to update its PHI nodes, if any. For the
4681 virtual PHI node, we're going to use the VDEFs created above, but
4682 for the other nodes, we need to record the original reaching defs. */
4683 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4684 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4685 basic_block last_bb = gimple_bb (group->last_stmt);
4686 edge last_edge = find_edge (last_bb, lp_bb);
4687 auto_vec<tree, 16> last_defs;
4688 gphi_iterator gpi;
4689 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4691 gphi *phi = gpi.phi ();
4692 tree last_def;
4693 if (virtual_operand_p (gimple_phi_result (phi)))
4694 last_def = NULL_TREE;
4695 else
4696 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4697 last_defs.safe_push (last_def);
4700 /* Do the insertion. Then, if new basic blocks have been created in the
4701 process, rewind the chain of VDEFs create above to walk the new basic
4702 blocks and update the corresponding arguments of the PHI nodes. */
4703 update_modified_stmts (seq);
4704 if (gimple_find_sub_bbs (seq, &last_gsi))
4705 while (last_vdef != gimple_vuse (group->last_stmt))
4707 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4708 if (stmt_could_throw_p (cfun, stmt))
4710 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4711 unsigned int i;
4712 for (gpi = gsi_start_phis (lp_bb), i = 0;
4713 !gsi_end_p (gpi);
4714 gsi_next (&gpi), i++)
4716 gphi *phi = gpi.phi ();
4717 tree new_def;
4718 if (virtual_operand_p (gimple_phi_result (phi)))
4719 new_def = last_vdef;
4720 else
4721 new_def = last_defs[i];
4722 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4725 last_vdef = gimple_vuse (stmt);
4728 else
4729 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4731 for (int j = 0; j < 2; ++j)
4732 if (load_seq[j])
4733 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4735 return true;
4738 /* Process the merged_store_group objects created in the coalescing phase.
4739 The stores are all against the base object BASE.
4740 Try to output the widened stores and delete the original statements if
4741 successful. Return true iff any changes were made. */
4743 bool
4744 imm_store_chain_info::output_merged_stores ()
4746 unsigned int i;
4747 merged_store_group *merged_store;
4748 bool ret = false;
4749 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4751 if (dbg_cnt (store_merging)
4752 && output_merged_store (merged_store))
4754 unsigned int j;
4755 store_immediate_info *store;
4756 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4758 gimple *stmt = store->stmt;
4759 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4760 /* Don't remove clobbers, they are still useful even if
4761 everything is overwritten afterwards. */
4762 if (gimple_clobber_p (stmt))
4763 continue;
4764 gsi_remove (&gsi, true);
4765 if (store->lp_nr)
4766 remove_stmt_from_eh_lp (stmt);
4767 if (stmt != merged_store->last_stmt)
4769 unlink_stmt_vdef (stmt);
4770 release_defs (stmt);
4773 ret = true;
4776 if (ret && dump_file)
4777 fprintf (dump_file, "Merging successful!\n");
4779 return ret;
4782 /* Coalesce the store_immediate_info objects recorded against the base object
4783 BASE in the first phase and output them.
4784 Delete the allocated structures.
4785 Return true if any changes were made. */
4787 bool
4788 imm_store_chain_info::terminate_and_process_chain ()
4790 if (dump_file && (dump_flags & TDF_DETAILS))
4791 fprintf (dump_file, "Terminating chain with %u stores\n",
4792 m_store_info.length ());
4793 /* Process store chain. */
4794 bool ret = false;
4795 if (m_store_info.length () > 1)
4797 ret = coalesce_immediate_stores ();
4798 if (ret)
4799 ret = output_merged_stores ();
4802 /* Delete all the entries we allocated ourselves. */
4803 store_immediate_info *info;
4804 unsigned int i;
4805 FOR_EACH_VEC_ELT (m_store_info, i, info)
4806 delete info;
4808 merged_store_group *merged_info;
4809 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4810 delete merged_info;
4812 return ret;
4815 /* Return true iff LHS is a destination potentially interesting for
4816 store merging. In practice these are the codes that get_inner_reference
4817 can process. */
4819 static bool
4820 lhs_valid_for_store_merging_p (tree lhs)
4822 if (DECL_P (lhs))
4823 return true;
4825 switch (TREE_CODE (lhs))
4827 case ARRAY_REF:
4828 case ARRAY_RANGE_REF:
4829 case BIT_FIELD_REF:
4830 case COMPONENT_REF:
4831 case MEM_REF:
4832 case VIEW_CONVERT_EXPR:
4833 return true;
4834 default:
4835 return false;
4838 gcc_unreachable ();
4841 /* Return true if the tree RHS is a constant we want to consider
4842 during store merging. In practice accept all codes that
4843 native_encode_expr accepts. */
4845 static bool
4846 rhs_valid_for_store_merging_p (tree rhs)
4848 unsigned HOST_WIDE_INT size;
4849 if (TREE_CODE (rhs) == CONSTRUCTOR
4850 && CONSTRUCTOR_NELTS (rhs) == 0
4851 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4852 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4853 return true;
4854 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4855 && native_encode_expr (rhs, NULL, size) != 0);
4858 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4859 and return true on success or false on failure. */
4861 static bool
4862 adjust_bit_pos (poly_offset_int byte_off,
4863 poly_int64 *pbitpos,
4864 poly_uint64 *pbitregion_start,
4865 poly_uint64 *pbitregion_end)
4867 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4868 bit_off += *pbitpos;
4870 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4872 if (maybe_ne (*pbitregion_end, 0U))
4874 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4875 bit_off += *pbitregion_start;
4876 if (bit_off.to_uhwi (pbitregion_start))
4878 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4879 bit_off += *pbitregion_end;
4880 if (!bit_off.to_uhwi (pbitregion_end))
4881 *pbitregion_end = 0;
4883 else
4884 *pbitregion_end = 0;
4886 return true;
4888 else
4889 return false;
4892 /* If MEM is a memory reference usable for store merging (either as
4893 store destination or for loads), return the non-NULL base_addr
4894 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4895 Otherwise return NULL, *PBITPOS should be still valid even for that
4896 case. */
4898 static tree
4899 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4900 poly_uint64 *pbitpos,
4901 poly_uint64 *pbitregion_start,
4902 poly_uint64 *pbitregion_end)
4904 poly_int64 bitsize, bitpos;
4905 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4906 machine_mode mode;
4907 int unsignedp = 0, reversep = 0, volatilep = 0;
4908 tree offset;
4909 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4910 &unsignedp, &reversep, &volatilep);
4911 *pbitsize = bitsize;
4912 if (known_eq (bitsize, 0))
4913 return NULL_TREE;
4915 if (TREE_CODE (mem) == COMPONENT_REF
4916 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4918 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4919 if (maybe_ne (bitregion_end, 0U))
4920 bitregion_end += 1;
4923 if (reversep)
4924 return NULL_TREE;
4926 /* We do not want to rewrite TARGET_MEM_REFs. */
4927 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4928 return NULL_TREE;
4929 /* In some cases get_inner_reference may return a
4930 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4931 canonicalize the base_addr to MEM_REF [ptr] and take
4932 byteoffset into account in the bitpos. This occurs in
4933 PR 23684 and this way we can catch more chains. */
4934 else if (TREE_CODE (base_addr) == MEM_REF)
4936 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4937 &bitregion_start, &bitregion_end))
4938 return NULL_TREE;
4939 base_addr = TREE_OPERAND (base_addr, 0);
4941 /* get_inner_reference returns the base object, get at its
4942 address now. */
4943 else
4945 if (maybe_lt (bitpos, 0))
4946 return NULL_TREE;
4947 base_addr = build_fold_addr_expr (base_addr);
4950 if (offset)
4952 /* If the access is variable offset then a base decl has to be
4953 address-taken to be able to emit pointer-based stores to it.
4954 ??? We might be able to get away with re-using the original
4955 base up to the first variable part and then wrapping that inside
4956 a BIT_FIELD_REF. */
4957 tree base = get_base_address (base_addr);
4958 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4959 return NULL_TREE;
4961 /* Similarly to above for the base, remove constant from the offset. */
4962 if (TREE_CODE (offset) == PLUS_EXPR
4963 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4964 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4965 &bitpos, &bitregion_start, &bitregion_end))
4966 offset = TREE_OPERAND (offset, 0);
4968 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4969 base_addr, offset);
4972 if (known_eq (bitregion_end, 0U))
4974 bitregion_start = round_down_to_byte_boundary (bitpos);
4975 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4978 *pbitsize = bitsize;
4979 *pbitpos = bitpos;
4980 *pbitregion_start = bitregion_start;
4981 *pbitregion_end = bitregion_end;
4982 return base_addr;
4985 /* Return true if STMT is a load that can be used for store merging.
4986 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4987 BITREGION_END are properties of the corresponding store. */
4989 static bool
4990 handled_load (gimple *stmt, store_operand_info *op,
4991 poly_uint64 bitsize, poly_uint64 bitpos,
4992 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4994 if (!is_gimple_assign (stmt))
4995 return false;
4996 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4998 tree rhs1 = gimple_assign_rhs1 (stmt);
4999 if (TREE_CODE (rhs1) == SSA_NAME
5000 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5001 bitregion_start, bitregion_end))
5003 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5004 been optimized earlier, but if allowed here, would confuse the
5005 multiple uses counting. */
5006 if (op->bit_not_p)
5007 return false;
5008 op->bit_not_p = !op->bit_not_p;
5009 return true;
5011 return false;
5013 if (gimple_vuse (stmt)
5014 && gimple_assign_load_p (stmt)
5015 && !stmt_can_throw_internal (cfun, stmt)
5016 && !gimple_has_volatile_ops (stmt))
5018 tree mem = gimple_assign_rhs1 (stmt);
5019 op->base_addr
5020 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5021 &op->bitregion_start,
5022 &op->bitregion_end);
5023 if (op->base_addr != NULL_TREE
5024 && known_eq (op->bitsize, bitsize)
5025 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5026 && known_ge (op->bitpos - op->bitregion_start,
5027 bitpos - bitregion_start)
5028 && known_ge (op->bitregion_end - op->bitpos,
5029 bitregion_end - bitpos))
5031 op->stmt = stmt;
5032 op->val = mem;
5033 op->bit_not_p = false;
5034 return true;
5037 return false;
5040 /* Return the index number of the landing pad for STMT, if any. */
5042 static int
5043 lp_nr_for_store (gimple *stmt)
5045 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5046 return 0;
5048 if (!stmt_could_throw_p (cfun, stmt))
5049 return 0;
5051 return lookup_stmt_eh_lp (stmt);
5054 /* Record the store STMT for store merging optimization if it can be
5055 optimized. Return true if any changes were made. */
5057 bool
5058 pass_store_merging::process_store (gimple *stmt)
5060 tree lhs = gimple_assign_lhs (stmt);
5061 tree rhs = gimple_assign_rhs1 (stmt);
5062 poly_uint64 bitsize, bitpos = 0;
5063 poly_uint64 bitregion_start = 0, bitregion_end = 0;
5064 tree base_addr
5065 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5066 &bitregion_start, &bitregion_end);
5067 if (known_eq (bitsize, 0U))
5068 return false;
5070 bool invalid = (base_addr == NULL_TREE
5071 || (maybe_gt (bitsize,
5072 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5073 && TREE_CODE (rhs) != INTEGER_CST
5074 && (TREE_CODE (rhs) != CONSTRUCTOR
5075 || CONSTRUCTOR_NELTS (rhs) != 0)));
5076 enum tree_code rhs_code = ERROR_MARK;
5077 bool bit_not_p = false;
5078 struct symbolic_number n;
5079 gimple *ins_stmt = NULL;
5080 store_operand_info ops[2];
5081 if (invalid)
5083 else if (TREE_CODE (rhs) == STRING_CST)
5085 rhs_code = STRING_CST;
5086 ops[0].val = rhs;
5088 else if (rhs_valid_for_store_merging_p (rhs))
5090 rhs_code = INTEGER_CST;
5091 ops[0].val = rhs;
5093 else if (TREE_CODE (rhs) == SSA_NAME)
5095 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5096 if (!is_gimple_assign (def_stmt))
5097 invalid = true;
5098 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5099 bitregion_start, bitregion_end))
5100 rhs_code = MEM_REF;
5101 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5103 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5104 if (TREE_CODE (rhs1) == SSA_NAME
5105 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5107 bit_not_p = true;
5108 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5112 if (rhs_code == ERROR_MARK && !invalid)
5113 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5115 case BIT_AND_EXPR:
5116 case BIT_IOR_EXPR:
5117 case BIT_XOR_EXPR:
5118 tree rhs1, rhs2;
5119 rhs1 = gimple_assign_rhs1 (def_stmt);
5120 rhs2 = gimple_assign_rhs2 (def_stmt);
5121 invalid = true;
5122 if (TREE_CODE (rhs1) != SSA_NAME)
5123 break;
5124 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5125 if (!is_gimple_assign (def_stmt1)
5126 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5127 bitregion_start, bitregion_end))
5128 break;
5129 if (rhs_valid_for_store_merging_p (rhs2))
5130 ops[1].val = rhs2;
5131 else if (TREE_CODE (rhs2) != SSA_NAME)
5132 break;
5133 else
5135 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5136 if (!is_gimple_assign (def_stmt2))
5137 break;
5138 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5139 bitregion_start, bitregion_end))
5140 break;
5142 invalid = false;
5143 break;
5144 default:
5145 invalid = true;
5146 break;
5149 unsigned HOST_WIDE_INT const_bitsize;
5150 if (bitsize.is_constant (&const_bitsize)
5151 && (const_bitsize % BITS_PER_UNIT) == 0
5152 && const_bitsize <= 64
5153 && multiple_p (bitpos, BITS_PER_UNIT))
5155 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5156 if (ins_stmt)
5158 uint64_t nn = n.n;
5159 for (unsigned HOST_WIDE_INT i = 0;
5160 i < const_bitsize;
5161 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5162 if ((nn & MARKER_MASK) == 0
5163 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5165 ins_stmt = NULL;
5166 break;
5168 if (ins_stmt)
5170 if (invalid)
5172 rhs_code = LROTATE_EXPR;
5173 ops[0].base_addr = NULL_TREE;
5174 ops[1].base_addr = NULL_TREE;
5176 invalid = false;
5181 if (invalid
5182 && bitsize.is_constant (&const_bitsize)
5183 && ((const_bitsize % BITS_PER_UNIT) != 0
5184 || !multiple_p (bitpos, BITS_PER_UNIT))
5185 && const_bitsize <= MAX_FIXED_MODE_SIZE)
5187 /* Bypass a conversion to the bit-field type. */
5188 if (!bit_not_p
5189 && is_gimple_assign (def_stmt)
5190 && CONVERT_EXPR_CODE_P (rhs_code))
5192 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5193 if (TREE_CODE (rhs1) == SSA_NAME
5194 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5195 rhs = rhs1;
5197 rhs_code = BIT_INSERT_EXPR;
5198 bit_not_p = false;
5199 ops[0].val = rhs;
5200 ops[0].base_addr = NULL_TREE;
5201 ops[1].base_addr = NULL_TREE;
5202 invalid = false;
5205 else
5206 invalid = true;
5208 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5209 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5210 if (invalid
5211 || !bitsize.is_constant (&const_bitsize)
5212 || !bitpos.is_constant (&const_bitpos)
5213 || !bitregion_start.is_constant (&const_bitregion_start)
5214 || !bitregion_end.is_constant (&const_bitregion_end))
5215 return terminate_all_aliasing_chains (NULL, stmt);
5217 if (!ins_stmt)
5218 memset (&n, 0, sizeof (n));
5220 class imm_store_chain_info **chain_info = NULL;
5221 bool ret = false;
5222 if (base_addr)
5223 chain_info = m_stores.get (base_addr);
5225 store_immediate_info *info;
5226 if (chain_info)
5228 unsigned int ord = (*chain_info)->m_store_info.length ();
5229 info = new store_immediate_info (const_bitsize, const_bitpos,
5230 const_bitregion_start,
5231 const_bitregion_end,
5232 stmt, ord, rhs_code, n, ins_stmt,
5233 bit_not_p, lp_nr_for_store (stmt),
5234 ops[0], ops[1]);
5235 if (dump_file && (dump_flags & TDF_DETAILS))
5237 fprintf (dump_file, "Recording immediate store from stmt:\n");
5238 print_gimple_stmt (dump_file, stmt, 0);
5240 (*chain_info)->m_store_info.safe_push (info);
5241 m_n_stores++;
5242 ret |= terminate_all_aliasing_chains (chain_info, stmt);
5243 /* If we reach the limit of stores to merge in a chain terminate and
5244 process the chain now. */
5245 if ((*chain_info)->m_store_info.length ()
5246 == (unsigned int) param_max_stores_to_merge)
5248 if (dump_file && (dump_flags & TDF_DETAILS))
5249 fprintf (dump_file,
5250 "Reached maximum number of statements to merge:\n");
5251 ret |= terminate_and_process_chain (*chain_info);
5254 else
5256 /* Store aliases any existing chain? */
5257 ret |= terminate_all_aliasing_chains (NULL, stmt);
5259 /* Start a new chain. */
5260 class imm_store_chain_info *new_chain
5261 = new imm_store_chain_info (m_stores_head, base_addr);
5262 info = new store_immediate_info (const_bitsize, const_bitpos,
5263 const_bitregion_start,
5264 const_bitregion_end,
5265 stmt, 0, rhs_code, n, ins_stmt,
5266 bit_not_p, lp_nr_for_store (stmt),
5267 ops[0], ops[1]);
5268 new_chain->m_store_info.safe_push (info);
5269 m_n_stores++;
5270 m_stores.put (base_addr, new_chain);
5271 m_n_chains++;
5272 if (dump_file && (dump_flags & TDF_DETAILS))
5274 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5275 m_n_chains);
5276 print_gimple_stmt (dump_file, stmt, 0);
5277 fprintf (dump_file, "The base object is:\n");
5278 print_generic_expr (dump_file, base_addr);
5279 fprintf (dump_file, "\n");
5283 /* Prune oldest chains so that after adding the chain or store above
5284 we're again within the limits set by the params. */
5285 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5286 || m_n_stores > (unsigned)param_max_stores_to_track)
5288 if (dump_file && (dump_flags & TDF_DETAILS))
5289 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5290 "terminating oldest chain(s).\n", m_n_chains,
5291 param_max_store_chains_to_track, m_n_stores,
5292 param_max_stores_to_track);
5293 imm_store_chain_info **e = &m_stores_head;
5294 unsigned idx = 0;
5295 unsigned n_stores = 0;
5296 while (*e)
5298 if (idx >= (unsigned)param_max_store_chains_to_track
5299 || (n_stores + (*e)->m_store_info.length ()
5300 > (unsigned)param_max_stores_to_track))
5301 ret |= terminate_and_process_chain (*e);
5302 else
5304 n_stores += (*e)->m_store_info.length ();
5305 e = &(*e)->next;
5306 ++idx;
5311 return ret;
5314 /* Return true if STMT is a store valid for store merging. */
5316 static bool
5317 store_valid_for_store_merging_p (gimple *stmt)
5319 return gimple_assign_single_p (stmt)
5320 && gimple_vdef (stmt)
5321 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5322 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5325 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5327 /* Return the status of basic block BB wrt store merging. */
5329 static enum basic_block_status
5330 get_status_for_store_merging (basic_block bb)
5332 unsigned int num_statements = 0;
5333 unsigned int num_constructors = 0;
5334 gimple_stmt_iterator gsi;
5335 edge e;
5337 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5339 gimple *stmt = gsi_stmt (gsi);
5341 if (is_gimple_debug (stmt))
5342 continue;
5344 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5345 break;
5347 if (is_gimple_assign (stmt)
5348 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5350 tree rhs = gimple_assign_rhs1 (stmt);
5351 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5352 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5353 && gimple_assign_lhs (stmt) != NULL_TREE)
5355 HOST_WIDE_INT sz
5356 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5357 if (sz == 16 || sz == 32 || sz == 64)
5359 num_constructors = 1;
5360 break;
5366 if (num_statements == 0 && num_constructors == 0)
5367 return BB_INVALID;
5369 if (cfun->can_throw_non_call_exceptions && cfun->eh
5370 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
5371 && (e = find_fallthru_edge (bb->succs))
5372 && e->dest == bb->next_bb)
5373 return BB_EXTENDED_VALID;
5375 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5378 /* Entry point for the pass. Go over each basic block recording chains of
5379 immediate stores. Upon encountering a terminating statement (as defined
5380 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5381 variants. */
5383 unsigned int
5384 pass_store_merging::execute (function *fun)
5386 basic_block bb;
5387 hash_set<gimple *> orig_stmts;
5388 bool changed = false, open_chains = false;
5390 /* If the function can throw and catch non-call exceptions, we'll be trying
5391 to merge stores across different basic blocks so we need to first unsplit
5392 the EH edges in order to streamline the CFG of the function. */
5393 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5394 unsplit_eh_edges ();
5396 calculate_dominance_info (CDI_DOMINATORS);
5398 FOR_EACH_BB_FN (bb, fun)
5400 const basic_block_status bb_status = get_status_for_store_merging (bb);
5401 gimple_stmt_iterator gsi;
5403 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5405 changed |= terminate_and_process_all_chains ();
5406 open_chains = false;
5409 if (bb_status == BB_INVALID)
5410 continue;
5412 if (dump_file && (dump_flags & TDF_DETAILS))
5413 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5415 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5417 gimple *stmt = gsi_stmt (gsi);
5418 gsi_next (&gsi);
5420 if (is_gimple_debug (stmt))
5421 continue;
5423 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5425 /* Terminate all chains. */
5426 if (dump_file && (dump_flags & TDF_DETAILS))
5427 fprintf (dump_file, "Volatile access terminates "
5428 "all chains\n");
5429 changed |= terminate_and_process_all_chains ();
5430 open_chains = false;
5431 continue;
5434 if (is_gimple_assign (stmt)
5435 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5436 && maybe_optimize_vector_constructor (stmt))
5437 continue;
5439 if (store_valid_for_store_merging_p (stmt))
5440 changed |= process_store (stmt);
5441 else
5442 changed |= terminate_all_aliasing_chains (NULL, stmt);
5445 if (bb_status == BB_EXTENDED_VALID)
5446 open_chains = true;
5447 else
5449 changed |= terminate_and_process_all_chains ();
5450 open_chains = false;
5454 if (open_chains)
5455 changed |= terminate_and_process_all_chains ();
5457 /* If the function can throw and catch non-call exceptions and something
5458 changed during the pass, then the CFG has (very likely) changed too. */
5459 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5461 free_dominance_info (CDI_DOMINATORS);
5462 return TODO_cleanup_cfg;
5465 return 0;
5468 } // anon namespace
5470 /* Construct and return a store merging pass object. */
5472 gimple_opt_pass *
5473 make_pass_store_merging (gcc::context *ctxt)
5475 return new pass_store_merging (ctxt);
5478 #if CHECKING_P
5480 namespace selftest {
5482 /* Selftests for store merging helpers. */
5484 /* Assert that all elements of the byte arrays X and Y, both of length N
5485 are equal. */
5487 static void
5488 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5490 for (unsigned int i = 0; i < n; i++)
5492 if (x[i] != y[i])
5494 fprintf (stderr, "Arrays do not match. X:\n");
5495 dump_char_array (stderr, x, n);
5496 fprintf (stderr, "Y:\n");
5497 dump_char_array (stderr, y, n);
5499 ASSERT_EQ (x[i], y[i]);
5503 /* Test shift_bytes_in_array_left and that it carries bits across between
5504 bytes correctly. */
5506 static void
5507 verify_shift_bytes_in_array_left (void)
5509 /* byte 1 | byte 0
5510 00011111 | 11100000. */
5511 unsigned char orig[2] = { 0xe0, 0x1f };
5512 unsigned char in[2];
5513 memcpy (in, orig, sizeof orig);
5515 unsigned char expected[2] = { 0x80, 0x7f };
5516 shift_bytes_in_array_left (in, sizeof (in), 2);
5517 verify_array_eq (in, expected, sizeof (in));
5519 memcpy (in, orig, sizeof orig);
5520 memcpy (expected, orig, sizeof orig);
5521 /* Check that shifting by zero doesn't change anything. */
5522 shift_bytes_in_array_left (in, sizeof (in), 0);
5523 verify_array_eq (in, expected, sizeof (in));
5527 /* Test shift_bytes_in_array_right and that it carries bits across between
5528 bytes correctly. */
5530 static void
5531 verify_shift_bytes_in_array_right (void)
5533 /* byte 1 | byte 0
5534 00011111 | 11100000. */
5535 unsigned char orig[2] = { 0x1f, 0xe0};
5536 unsigned char in[2];
5537 memcpy (in, orig, sizeof orig);
5538 unsigned char expected[2] = { 0x07, 0xf8};
5539 shift_bytes_in_array_right (in, sizeof (in), 2);
5540 verify_array_eq (in, expected, sizeof (in));
5542 memcpy (in, orig, sizeof orig);
5543 memcpy (expected, orig, sizeof orig);
5544 /* Check that shifting by zero doesn't change anything. */
5545 shift_bytes_in_array_right (in, sizeof (in), 0);
5546 verify_array_eq (in, expected, sizeof (in));
5549 /* Test clear_bit_region that it clears exactly the bits asked and
5550 nothing more. */
5552 static void
5553 verify_clear_bit_region (void)
5555 /* Start with all bits set and test clearing various patterns in them. */
5556 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5557 unsigned char in[3];
5558 unsigned char expected[3];
5559 memcpy (in, orig, sizeof in);
5561 /* Check zeroing out all the bits. */
5562 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5563 expected[0] = expected[1] = expected[2] = 0;
5564 verify_array_eq (in, expected, sizeof in);
5566 memcpy (in, orig, sizeof in);
5567 /* Leave the first and last bits intact. */
5568 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5569 expected[0] = 0x1;
5570 expected[1] = 0;
5571 expected[2] = 0x80;
5572 verify_array_eq (in, expected, sizeof in);
5575 /* Test clear_bit_region_be that it clears exactly the bits asked and
5576 nothing more. */
5578 static void
5579 verify_clear_bit_region_be (void)
5581 /* Start with all bits set and test clearing various patterns in them. */
5582 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5583 unsigned char in[3];
5584 unsigned char expected[3];
5585 memcpy (in, orig, sizeof in);
5587 /* Check zeroing out all the bits. */
5588 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5589 expected[0] = expected[1] = expected[2] = 0;
5590 verify_array_eq (in, expected, sizeof in);
5592 memcpy (in, orig, sizeof in);
5593 /* Leave the first and last bits intact. */
5594 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5595 expected[0] = 0x80;
5596 expected[1] = 0;
5597 expected[2] = 0x1;
5598 verify_array_eq (in, expected, sizeof in);
5602 /* Run all of the selftests within this file. */
5604 void
5605 store_merging_c_tests (void)
5607 verify_shift_bytes_in_array_left ();
5608 verify_shift_bytes_in_array_right ();
5609 verify_clear_bit_region ();
5610 verify_clear_bit_region_be ();
5613 } // namespace selftest
5614 #endif /* CHECKING_P. */