rs6000: inline ldouble __gcc_qsub
[official-gcc.git] / gcc / gimple-ssa-store-merging.c
blob781c02d4ddb03e074761418f7aab5eb0d98afd94
1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2021 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
182 namespace {
184 struct bswap_stat
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
260 inline bool
261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
279 switch (code)
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
311 inline bool
312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
314 tree lhs_type;
316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 return false;
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
325 return true;
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
331 bool
332 init_symbolic_number (struct symbolic_number *n, tree src)
334 int size;
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337 return false;
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
359 return true;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
366 bool
367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
394 if (!integer_zerop (off))
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
401 base_addr = TREE_OPERAND (base_addr, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
415 bitpos += bit_offset.force_shwi ();
417 else
418 base_addr = build_fold_addr_expr (base_addr);
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
441 gimple *
442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
481 if (start1 < start2)
483 n_start = n1;
484 start_sub = start2 - start1;
486 else
488 n_start = n2;
489 start_sub = start1 - start2;
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
505 end = end2;
506 end_sub = end2 - end1;
508 else
510 end = end1;
511 end_sub = end1 - end2;
513 n_end = (end2 > end1) ? n2 : n1;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
521 n->range = end - MIN (start1, start2) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
540 else
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 uint64_t masked1, masked2;
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
572 return source_stmt;
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
581 gimple *
582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
592 rhs1 = gimple_assign_rhs1 (stmt);
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
603 return NULL;
605 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
606 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
607 if (bitpos % BITS_PER_UNIT == 0
608 && bitsize % BITS_PER_UNIT == 0
609 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN)
613 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
615 /* Shift. */
616 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
617 return NULL;
619 /* Mask. */
620 uint64_t mask = 0;
621 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
622 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
623 i++, tmp <<= BITS_PER_UNIT)
624 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
625 n->n &= mask;
627 /* Convert. */
628 n->type = TREE_TYPE (rhs1);
629 if (!n->base_addr)
630 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
632 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
635 return NULL;
638 if (TREE_CODE (rhs1) != SSA_NAME)
639 return NULL;
641 code = gimple_assign_rhs_code (stmt);
642 rhs_class = gimple_assign_rhs_class (stmt);
643 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
645 if (rhs_class == GIMPLE_BINARY_RHS)
646 rhs2 = gimple_assign_rhs2 (stmt);
648 /* Handle unary rhs and binary rhs with integer constants as second
649 operand. */
651 if (rhs_class == GIMPLE_UNARY_RHS
652 || (rhs_class == GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2) == INTEGER_CST))
655 if (code != BIT_AND_EXPR
656 && code != LSHIFT_EXPR
657 && code != RSHIFT_EXPR
658 && code != LROTATE_EXPR
659 && code != RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code))
661 return NULL;
663 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
667 if (!source_stmt1)
669 if (gimple_assign_load_p (stmt)
670 || !init_symbolic_number (n, rhs1))
671 return NULL;
672 source_stmt1 = stmt;
675 switch (code)
677 case BIT_AND_EXPR:
679 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
680 uint64_t val = int_cst_value (rhs2), mask = 0;
681 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
683 /* Only constants masking full bytes are allowed. */
684 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
685 if ((val & tmp) != 0 && (val & tmp) != tmp)
686 return NULL;
687 else if (val & tmp)
688 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
690 n->n &= mask;
692 break;
693 case LSHIFT_EXPR:
694 case RSHIFT_EXPR:
695 case LROTATE_EXPR:
696 case RROTATE_EXPR:
697 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
698 return NULL;
699 break;
700 CASE_CONVERT:
702 int i, type_size, old_type_size;
703 tree type;
705 type = TREE_TYPE (gimple_assign_lhs (stmt));
706 type_size = TYPE_PRECISION (type);
707 if (type_size % BITS_PER_UNIT != 0)
708 return NULL;
709 type_size /= BITS_PER_UNIT;
710 if (type_size > 64 / BITS_PER_MARKER)
711 return NULL;
713 /* Sign extension: result is dependent on the value. */
714 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
715 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
716 && HEAD_MARKER (n->n, old_type_size))
717 for (i = 0; i < type_size - old_type_size; i++)
718 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size - 1 - i) * BITS_PER_MARKER);
721 if (type_size < 64 / BITS_PER_MARKER)
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
727 n->type = type;
728 if (!n->base_addr)
729 n->range = type_size;
731 break;
732 default:
733 return NULL;
735 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
738 /* Handle binary rhs. */
740 if (rhs_class == GIMPLE_BINARY_RHS)
742 struct symbolic_number n1, n2;
743 gimple *source_stmt, *source_stmt2;
745 if (code != BIT_IOR_EXPR)
746 return NULL;
748 if (TREE_CODE (rhs2) != SSA_NAME)
749 return NULL;
751 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
753 switch (code)
755 case BIT_IOR_EXPR:
756 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
758 if (!source_stmt1)
759 return NULL;
761 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
763 if (!source_stmt2)
764 return NULL;
766 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
767 return NULL;
769 if (n1.vuse != n2.vuse)
770 return NULL;
772 source_stmt
773 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
775 if (!source_stmt)
776 return NULL;
778 if (!verify_symbolic_number_p (n, stmt))
779 return NULL;
781 break;
782 default:
783 return NULL;
785 return source_stmt;
787 return NULL;
790 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
793 void
794 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
795 uint64_t *cmpnop, bool *cast64_to_32)
797 unsigned rsize;
798 uint64_t tmpn, mask;
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
803 *cmpxchg = CMPXCHG;
804 *cmpnop = CMPNOP;
805 *cast64_to_32 = false;
807 /* Find real size of result (highest non-zero byte). */
808 if (n->base_addr)
809 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
810 else
811 rsize = n->range;
813 /* Zero out the bits corresponding to untouched bytes in original gimple
814 expression. */
815 if (n->range < (int) sizeof (int64_t))
817 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
818 if (n->base_addr == NULL
819 && n->range == 4
820 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
822 /* If all bytes in n->n are either 0 or in [5..8] range, this
823 might be a candidate for (unsigned) __builtin_bswap64 (src).
824 It is not worth it for (unsigned short) __builtin_bswap64 (src)
825 or (unsigned short) __builtin_bswap32 (src). */
826 *cast64_to_32 = true;
827 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
828 if ((tmpn & MARKER_MASK)
829 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
831 *cast64_to_32 = false;
832 break;
835 if (*cast64_to_32)
836 *cmpxchg &= mask;
837 else
838 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
839 *cmpnop &= mask;
842 /* Zero out the bits corresponding to unused bytes in the result of the
843 gimple expression. */
844 if (rsize < n->range)
846 if (BYTES_BIG_ENDIAN)
848 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
849 *cmpxchg &= mask;
850 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
852 else
854 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
855 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
856 *cmpnop &= mask;
858 n->range = rsize;
861 if (*cast64_to_32)
862 n->range = 8;
863 n->range *= BITS_PER_UNIT;
866 /* Check if STMT completes a bswap implementation or a read in a given
867 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
868 accordingly. It also sets N to represent the kind of operations
869 performed: size of the resulting expression and whether it works on
870 a memory source, and if so alias-set and vuse. At last, the
871 function returns a stmt whose rhs's first tree is the source
872 expression. */
874 gimple *
875 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
876 bool *cast64_to_32, uint64_t *mask)
878 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
879 if (!tree_fits_uhwi_p (type_size))
880 return NULL;
882 /* The last parameter determines the depth search limit. It usually
883 correlates directly to the number n of bytes to be touched. We
884 increase that number by 2 * (log2(n) + 1) here in order to also
885 cover signed -> unsigned conversions of the src operand as can be seen
886 in libgcc, and for initial shift/and operation of the src operand. */
887 int limit = tree_to_uhwi (type_size);
888 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
889 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
891 if (!ins_stmt)
893 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
894 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
895 return NULL;
896 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
897 if (sz != 16 && sz != 32 && sz != 64)
898 return NULL;
899 tree rhs = gimple_assign_rhs1 (stmt);
900 if (CONSTRUCTOR_NELTS (rhs) == 0)
901 return NULL;
902 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
903 unsigned HOST_WIDE_INT eltsz
904 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
905 if (TYPE_PRECISION (eltype) != eltsz)
906 return NULL;
907 constructor_elt *elt;
908 unsigned int i;
909 tree type = build_nonstandard_integer_type (sz, 1);
910 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
912 if (TREE_CODE (elt->value) != SSA_NAME
913 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
914 return NULL;
915 struct symbolic_number n1;
916 gimple *source_stmt
917 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
918 limit - 1);
920 if (!source_stmt)
921 return NULL;
923 n1.type = type;
924 if (!n1.base_addr)
925 n1.range = sz / BITS_PER_UNIT;
927 if (i == 0)
929 ins_stmt = source_stmt;
930 *n = n1;
932 else
934 if (n->vuse != n1.vuse)
935 return NULL;
937 struct symbolic_number n0 = *n;
939 if (!BYTES_BIG_ENDIAN)
941 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
942 return NULL;
944 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
945 return NULL;
946 ins_stmt
947 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n);
949 if (!ins_stmt)
950 return NULL;
955 uint64_t cmpxchg, cmpnop;
956 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
958 /* A complete byte swap should make the symbolic number to start with
959 the largest digit in the highest order byte. Unchanged symbolic
960 number indicates a read with same endianness as target architecture. */
961 *mask = ~(uint64_t) 0;
962 if (n->n == cmpnop)
963 *bswap = false;
964 else if (n->n == cmpxchg)
965 *bswap = true;
966 else
968 int set = 0;
969 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
970 if ((n->n & msk) == 0)
971 *mask &= ~msk;
972 else if ((n->n & msk) == (cmpxchg & msk))
973 set++;
974 else
975 return NULL;
976 if (set < 2)
977 return NULL;
978 *bswap = true;
981 /* Useless bit manipulation performed by code. */
982 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
983 return NULL;
985 return ins_stmt;
988 const pass_data pass_data_optimize_bswap =
990 GIMPLE_PASS, /* type */
991 "bswap", /* name */
992 OPTGROUP_NONE, /* optinfo_flags */
993 TV_NONE, /* tv_id */
994 PROP_ssa, /* properties_required */
995 0, /* properties_provided */
996 0, /* properties_destroyed */
997 0, /* todo_flags_start */
998 0, /* todo_flags_finish */
1001 class pass_optimize_bswap : public gimple_opt_pass
1003 public:
1004 pass_optimize_bswap (gcc::context *ctxt)
1005 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1008 /* opt_pass methods: */
1009 virtual bool gate (function *)
1011 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1014 virtual unsigned int execute (function *);
1016 }; // class pass_optimize_bswap
1018 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1019 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1020 first. */
1022 static tree
1023 bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val)
1025 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1026 || POINTER_TYPE_P (TREE_TYPE (val)));
1027 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1029 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1030 if (POINTER_TYPE_P (TREE_TYPE (val)))
1032 gimple *g
1033 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1034 NOP_EXPR, val);
1035 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1036 val = gimple_assign_lhs (g);
1038 tree itype = build_nonstandard_integer_type (prec, 1);
1039 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1040 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1041 val = gimple_assign_lhs (g);
1043 return build1 (VIEW_CONVERT_EXPR, type, val);
1046 /* Perform the bswap optimization: replace the expression computed in the rhs
1047 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1048 bswap, load or load + bswap expression.
1049 Which of these alternatives replace the rhs is given by N->base_addr (non
1050 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1051 load to perform are also given in N while the builtin bswap invoke is given
1052 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1053 load statements involved to construct the rhs in gsi_stmt (GSI) and
1054 N->range gives the size of the rhs expression for maintaining some
1055 statistics.
1057 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1058 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1059 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1061 tree
1062 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1063 tree bswap_type, tree load_type, struct symbolic_number *n,
1064 bool bswap, uint64_t mask)
1066 tree src, tmp, tgt = NULL_TREE;
1067 gimple *bswap_stmt, *mask_stmt = NULL;
1068 tree_code conv_code = NOP_EXPR;
1070 gimple *cur_stmt = gsi_stmt (gsi);
1071 src = n->src;
1072 if (cur_stmt)
1074 tgt = gimple_assign_lhs (cur_stmt);
1075 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1076 && tgt
1077 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1078 conv_code = VIEW_CONVERT_EXPR;
1081 /* Need to load the value from memory first. */
1082 if (n->base_addr)
1084 gimple_stmt_iterator gsi_ins = gsi;
1085 if (ins_stmt)
1086 gsi_ins = gsi_for_stmt (ins_stmt);
1087 tree addr_expr, addr_tmp, val_expr, val_tmp;
1088 tree load_offset_ptr, aligned_load_type;
1089 gimple *load_stmt;
1090 unsigned align = get_object_alignment (src);
1091 poly_int64 load_offset = 0;
1093 if (cur_stmt)
1095 basic_block ins_bb = gimple_bb (ins_stmt);
1096 basic_block cur_bb = gimple_bb (cur_stmt);
1097 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1098 return NULL_TREE;
1100 /* Move cur_stmt just before one of the load of the original
1101 to ensure it has the same VUSE. See PR61517 for what could
1102 go wrong. */
1103 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1104 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1105 gsi_move_before (&gsi, &gsi_ins);
1106 gsi = gsi_for_stmt (cur_stmt);
1108 else
1109 gsi = gsi_ins;
1111 /* Compute address to load from and cast according to the size
1112 of the load. */
1113 addr_expr = build_fold_addr_expr (src);
1114 if (is_gimple_mem_ref_addr (addr_expr))
1115 addr_tmp = unshare_expr (addr_expr);
1116 else
1118 addr_tmp = unshare_expr (n->base_addr);
1119 if (!is_gimple_mem_ref_addr (addr_tmp))
1120 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1121 is_gimple_mem_ref_addr,
1122 NULL_TREE, true,
1123 GSI_SAME_STMT);
1124 load_offset = n->bytepos;
1125 if (n->offset)
1127 tree off
1128 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1129 true, NULL_TREE, true,
1130 GSI_SAME_STMT);
1131 gimple *stmt
1132 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1133 POINTER_PLUS_EXPR, addr_tmp, off);
1134 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1135 addr_tmp = gimple_assign_lhs (stmt);
1139 /* Perform the load. */
1140 aligned_load_type = load_type;
1141 if (align < TYPE_ALIGN (load_type))
1142 aligned_load_type = build_aligned_type (load_type, align);
1143 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1144 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1145 load_offset_ptr);
1147 if (!bswap)
1149 if (n->range == 16)
1150 nop_stats.found_16bit++;
1151 else if (n->range == 32)
1152 nop_stats.found_32bit++;
1153 else
1155 gcc_assert (n->range == 64);
1156 nop_stats.found_64bit++;
1159 /* Convert the result of load if necessary. */
1160 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1162 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1163 "load_dst");
1164 load_stmt = gimple_build_assign (val_tmp, val_expr);
1165 gimple_set_vuse (load_stmt, n->vuse);
1166 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1167 if (conv_code == VIEW_CONVERT_EXPR)
1168 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp);
1169 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1170 update_stmt (cur_stmt);
1172 else if (cur_stmt)
1174 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1175 gimple_set_vuse (cur_stmt, n->vuse);
1176 update_stmt (cur_stmt);
1178 else
1180 tgt = make_ssa_name (load_type);
1181 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1182 gimple_set_vuse (cur_stmt, n->vuse);
1183 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1186 if (dump_file)
1188 fprintf (dump_file,
1189 "%d bit load in target endianness found at: ",
1190 (int) n->range);
1191 print_gimple_stmt (dump_file, cur_stmt, 0);
1193 return tgt;
1195 else
1197 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1198 load_stmt = gimple_build_assign (val_tmp, val_expr);
1199 gimple_set_vuse (load_stmt, n->vuse);
1200 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1202 src = val_tmp;
1204 else if (!bswap)
1206 gimple *g = NULL;
1207 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1209 if (!is_gimple_val (src))
1210 return NULL_TREE;
1211 if (conv_code == VIEW_CONVERT_EXPR)
1212 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src);
1213 g = gimple_build_assign (tgt, conv_code, src);
1215 else if (cur_stmt)
1216 g = gimple_build_assign (tgt, src);
1217 else
1218 tgt = src;
1219 if (n->range == 16)
1220 nop_stats.found_16bit++;
1221 else if (n->range == 32)
1222 nop_stats.found_32bit++;
1223 else
1225 gcc_assert (n->range == 64);
1226 nop_stats.found_64bit++;
1228 if (dump_file)
1230 fprintf (dump_file,
1231 "%d bit reshuffle in target endianness found at: ",
1232 (int) n->range);
1233 if (cur_stmt)
1234 print_gimple_stmt (dump_file, cur_stmt, 0);
1235 else
1237 print_generic_expr (dump_file, tgt, TDF_NONE);
1238 fprintf (dump_file, "\n");
1241 if (cur_stmt)
1242 gsi_replace (&gsi, g, true);
1243 return tgt;
1245 else if (TREE_CODE (src) == BIT_FIELD_REF)
1246 src = TREE_OPERAND (src, 0);
1248 if (n->range == 16)
1249 bswap_stats.found_16bit++;
1250 else if (n->range == 32)
1251 bswap_stats.found_32bit++;
1252 else
1254 gcc_assert (n->range == 64);
1255 bswap_stats.found_64bit++;
1258 tmp = src;
1260 /* Convert the src expression if necessary. */
1261 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1263 gimple *convert_stmt;
1265 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1266 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1267 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1270 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1271 are considered as rotation of 2N bit values by N bits is generally not
1272 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1273 gives 0x03040102 while a bswap for that value is 0x04030201. */
1274 if (bswap && n->range == 16)
1276 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1277 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1278 bswap_stmt = gimple_build_assign (NULL, src);
1280 else
1281 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1283 if (tgt == NULL_TREE)
1284 tgt = make_ssa_name (bswap_type);
1285 tmp = tgt;
1287 if (mask != ~(uint64_t) 0)
1289 tree m = build_int_cst (bswap_type, mask);
1290 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1291 gimple_set_lhs (bswap_stmt, tmp);
1292 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1293 tmp = tgt;
1296 /* Convert the result if necessary. */
1297 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1299 gimple *convert_stmt;
1301 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1302 tree atmp = tmp;
1303 if (conv_code == VIEW_CONVERT_EXPR)
1304 atmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), tmp);
1305 convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1306 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1309 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1311 if (dump_file)
1313 fprintf (dump_file, "%d bit bswap implementation found at: ",
1314 (int) n->range);
1315 if (cur_stmt)
1316 print_gimple_stmt (dump_file, cur_stmt, 0);
1317 else
1319 print_generic_expr (dump_file, tgt, TDF_NONE);
1320 fprintf (dump_file, "\n");
1324 if (cur_stmt)
1326 if (mask_stmt)
1327 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1328 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1329 gsi_remove (&gsi, true);
1331 else
1333 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1334 if (mask_stmt)
1335 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1337 return tgt;
1340 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1341 using bswap optimizations. CDI_DOMINATORS need to be
1342 computed on entry. Return true if it has been optimized and
1343 TODO_update_ssa is needed. */
1345 static bool
1346 maybe_optimize_vector_constructor (gimple *cur_stmt)
1348 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1349 struct symbolic_number n;
1350 bool bswap;
1352 gcc_assert (is_gimple_assign (cur_stmt)
1353 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1355 tree rhs = gimple_assign_rhs1 (cur_stmt);
1356 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1357 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1358 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1359 return false;
1361 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1362 switch (sz)
1364 case 16:
1365 load_type = bswap_type = uint16_type_node;
1366 break;
1367 case 32:
1368 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1369 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1371 load_type = uint32_type_node;
1372 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1373 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1375 else
1376 return false;
1377 break;
1378 case 64:
1379 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1380 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1381 || (word_mode == SImode
1382 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1383 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1385 load_type = uint64_type_node;
1386 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1387 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1389 else
1390 return false;
1391 break;
1392 default:
1393 return false;
1396 bool cast64_to_32;
1397 uint64_t mask;
1398 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1399 &cast64_to_32, &mask);
1400 if (!ins_stmt
1401 || n.range != (unsigned HOST_WIDE_INT) sz
1402 || cast64_to_32
1403 || mask != ~(uint64_t) 0)
1404 return false;
1406 if (bswap && !fndecl && n.range != 16)
1407 return false;
1409 memset (&nop_stats, 0, sizeof (nop_stats));
1410 memset (&bswap_stats, 0, sizeof (bswap_stats));
1411 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1412 bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
1415 /* Find manual byte swap implementations as well as load in a given
1416 endianness. Byte swaps are turned into a bswap builtin invokation
1417 while endian loads are converted to bswap builtin invokation or
1418 simple load according to the target endianness. */
1420 unsigned int
1421 pass_optimize_bswap::execute (function *fun)
1423 basic_block bb;
1424 bool bswap32_p, bswap64_p;
1425 bool changed = false;
1426 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1428 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1429 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1430 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1431 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1432 || (bswap32_p && word_mode == SImode)));
1434 /* Determine the argument type of the builtins. The code later on
1435 assumes that the return and argument type are the same. */
1436 if (bswap32_p)
1438 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1439 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1442 if (bswap64_p)
1444 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1445 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1448 memset (&nop_stats, 0, sizeof (nop_stats));
1449 memset (&bswap_stats, 0, sizeof (bswap_stats));
1450 calculate_dominance_info (CDI_DOMINATORS);
1452 FOR_EACH_BB_FN (bb, fun)
1454 gimple_stmt_iterator gsi;
1456 /* We do a reverse scan for bswap patterns to make sure we get the
1457 widest match. As bswap pattern matching doesn't handle previously
1458 inserted smaller bswap replacements as sub-patterns, the wider
1459 variant wouldn't be detected. */
1460 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1462 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1463 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1464 enum tree_code code;
1465 struct symbolic_number n;
1466 bool bswap, cast64_to_32;
1467 uint64_t mask;
1469 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1470 might be moved to a different basic block by bswap_replace and gsi
1471 must not points to it if that's the case. Moving the gsi_prev
1472 there make sure that gsi points to the statement previous to
1473 cur_stmt while still making sure that all statements are
1474 considered in this basic block. */
1475 gsi_prev (&gsi);
1477 if (!is_gimple_assign (cur_stmt))
1478 continue;
1480 code = gimple_assign_rhs_code (cur_stmt);
1481 switch (code)
1483 case LROTATE_EXPR:
1484 case RROTATE_EXPR:
1485 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1486 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1487 % BITS_PER_UNIT)
1488 continue;
1489 /* Fall through. */
1490 case BIT_IOR_EXPR:
1491 break;
1492 case CONSTRUCTOR:
1494 tree rhs = gimple_assign_rhs1 (cur_stmt);
1495 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1496 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1497 break;
1499 continue;
1500 default:
1501 continue;
1504 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1505 &cast64_to_32, &mask);
1507 if (!ins_stmt)
1508 continue;
1510 switch (n.range)
1512 case 16:
1513 /* Already in canonical form, nothing to do. */
1514 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1515 continue;
1516 load_type = bswap_type = uint16_type_node;
1517 break;
1518 case 32:
1519 load_type = uint32_type_node;
1520 if (bswap32_p)
1522 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1523 bswap_type = bswap32_type;
1525 break;
1526 case 64:
1527 load_type = uint64_type_node;
1528 if (bswap64_p)
1530 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1531 bswap_type = bswap64_type;
1533 break;
1534 default:
1535 continue;
1538 if (bswap && !fndecl && n.range != 16)
1539 continue;
1541 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1542 bswap_type, load_type, &n, bswap, mask))
1543 changed = true;
1547 statistics_counter_event (fun, "16-bit nop implementations found",
1548 nop_stats.found_16bit);
1549 statistics_counter_event (fun, "32-bit nop implementations found",
1550 nop_stats.found_32bit);
1551 statistics_counter_event (fun, "64-bit nop implementations found",
1552 nop_stats.found_64bit);
1553 statistics_counter_event (fun, "16-bit bswap implementations found",
1554 bswap_stats.found_16bit);
1555 statistics_counter_event (fun, "32-bit bswap implementations found",
1556 bswap_stats.found_32bit);
1557 statistics_counter_event (fun, "64-bit bswap implementations found",
1558 bswap_stats.found_64bit);
1560 return (changed ? TODO_update_ssa : 0);
1563 } // anon namespace
1565 gimple_opt_pass *
1566 make_pass_optimize_bswap (gcc::context *ctxt)
1568 return new pass_optimize_bswap (ctxt);
1571 namespace {
1573 /* Struct recording one operand for the store, which is either a constant,
1574 then VAL represents the constant and all the other fields are zero, or
1575 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1576 and the other fields also reflect the memory load, or an SSA name, then
1577 VAL represents the SSA name and all the other fields are zero, */
1579 class store_operand_info
1581 public:
1582 tree val;
1583 tree base_addr;
1584 poly_uint64 bitsize;
1585 poly_uint64 bitpos;
1586 poly_uint64 bitregion_start;
1587 poly_uint64 bitregion_end;
1588 gimple *stmt;
1589 bool bit_not_p;
1590 store_operand_info ();
1593 store_operand_info::store_operand_info ()
1594 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1595 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1599 /* Struct recording the information about a single store of an immediate
1600 to memory. These are created in the first phase and coalesced into
1601 merged_store_group objects in the second phase. */
1603 class store_immediate_info
1605 public:
1606 unsigned HOST_WIDE_INT bitsize;
1607 unsigned HOST_WIDE_INT bitpos;
1608 unsigned HOST_WIDE_INT bitregion_start;
1609 /* This is one past the last bit of the bit region. */
1610 unsigned HOST_WIDE_INT bitregion_end;
1611 gimple *stmt;
1612 unsigned int order;
1613 /* INTEGER_CST for constant store, STRING_CST for string store,
1614 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1615 BIT_INSERT_EXPR for bit insertion.
1616 LROTATE_EXPR if it can be only bswap optimized and
1617 ops are not really meaningful.
1618 NOP_EXPR if bswap optimization detected identity, ops
1619 are not meaningful. */
1620 enum tree_code rhs_code;
1621 /* Two fields for bswap optimization purposes. */
1622 struct symbolic_number n;
1623 gimple *ins_stmt;
1624 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1625 bool bit_not_p;
1626 /* True if ops have been swapped and thus ops[1] represents
1627 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1628 bool ops_swapped_p;
1629 /* The index number of the landing pad, or 0 if there is none. */
1630 int lp_nr;
1631 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1632 just the first one. */
1633 store_operand_info ops[2];
1634 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1635 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1636 gimple *, unsigned int, enum tree_code,
1637 struct symbolic_number &, gimple *, bool, int,
1638 const store_operand_info &,
1639 const store_operand_info &);
1642 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1643 unsigned HOST_WIDE_INT bp,
1644 unsigned HOST_WIDE_INT brs,
1645 unsigned HOST_WIDE_INT bre,
1646 gimple *st,
1647 unsigned int ord,
1648 enum tree_code rhscode,
1649 struct symbolic_number &nr,
1650 gimple *ins_stmtp,
1651 bool bitnotp,
1652 int nr2,
1653 const store_operand_info &op0r,
1654 const store_operand_info &op1r)
1655 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1656 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1657 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1658 lp_nr (nr2), ops { op0r, op1r }
1662 /* Struct representing a group of stores to contiguous memory locations.
1663 These are produced by the second phase (coalescing) and consumed in the
1664 third phase that outputs the widened stores. */
1666 class merged_store_group
1668 public:
1669 unsigned HOST_WIDE_INT start;
1670 unsigned HOST_WIDE_INT width;
1671 unsigned HOST_WIDE_INT bitregion_start;
1672 unsigned HOST_WIDE_INT bitregion_end;
1673 /* The size of the allocated memory for val and mask. */
1674 unsigned HOST_WIDE_INT buf_size;
1675 unsigned HOST_WIDE_INT align_base;
1676 poly_uint64 load_align_base[2];
1678 unsigned int align;
1679 unsigned int load_align[2];
1680 unsigned int first_order;
1681 unsigned int last_order;
1682 bool bit_insertion;
1683 bool string_concatenation;
1684 bool only_constants;
1685 bool consecutive;
1686 unsigned int first_nonmergeable_order;
1687 int lp_nr;
1689 auto_vec<store_immediate_info *> stores;
1690 /* We record the first and last original statements in the sequence because
1691 we'll need their vuse/vdef and replacement position. It's easier to keep
1692 track of them separately as 'stores' is reordered by apply_stores. */
1693 gimple *last_stmt;
1694 gimple *first_stmt;
1695 unsigned char *val;
1696 unsigned char *mask;
1698 merged_store_group (store_immediate_info *);
1699 ~merged_store_group ();
1700 bool can_be_merged_into (store_immediate_info *);
1701 void merge_into (store_immediate_info *);
1702 void merge_overlapping (store_immediate_info *);
1703 bool apply_stores ();
1704 private:
1705 void do_merge (store_immediate_info *);
1708 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1710 static void
1711 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1713 if (!fd)
1714 return;
1716 for (unsigned int i = 0; i < len; i++)
1717 fprintf (fd, "%02x ", ptr[i]);
1718 fprintf (fd, "\n");
1721 /* Clear out LEN bits starting from bit START in the byte array
1722 PTR. This clears the bits to the *right* from START.
1723 START must be within [0, BITS_PER_UNIT) and counts starting from
1724 the least significant bit. */
1726 static void
1727 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1728 unsigned int len)
1730 if (len == 0)
1731 return;
1732 /* Clear len bits to the right of start. */
1733 else if (len <= start + 1)
1735 unsigned char mask = (~(~0U << len));
1736 mask = mask << (start + 1U - len);
1737 ptr[0] &= ~mask;
1739 else if (start != BITS_PER_UNIT - 1)
1741 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1742 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1743 len - (start % BITS_PER_UNIT) - 1);
1745 else if (start == BITS_PER_UNIT - 1
1746 && len > BITS_PER_UNIT)
1748 unsigned int nbytes = len / BITS_PER_UNIT;
1749 memset (ptr, 0, nbytes);
1750 if (len % BITS_PER_UNIT != 0)
1751 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1752 len % BITS_PER_UNIT);
1754 else
1755 gcc_unreachable ();
1758 /* In the byte array PTR clear the bit region starting at bit
1759 START and is LEN bits wide.
1760 For regions spanning multiple bytes do this recursively until we reach
1761 zero LEN or a region contained within a single byte. */
1763 static void
1764 clear_bit_region (unsigned char *ptr, unsigned int start,
1765 unsigned int len)
1767 /* Degenerate base case. */
1768 if (len == 0)
1769 return;
1770 else if (start >= BITS_PER_UNIT)
1771 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1772 /* Second base case. */
1773 else if ((start + len) <= BITS_PER_UNIT)
1775 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1776 mask >>= BITS_PER_UNIT - (start + len);
1778 ptr[0] &= ~mask;
1780 return;
1782 /* Clear most significant bits in a byte and proceed with the next byte. */
1783 else if (start != 0)
1785 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1786 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1788 /* Whole bytes need to be cleared. */
1789 else if (start == 0 && len > BITS_PER_UNIT)
1791 unsigned int nbytes = len / BITS_PER_UNIT;
1792 /* We could recurse on each byte but we clear whole bytes, so a simple
1793 memset will do. */
1794 memset (ptr, '\0', nbytes);
1795 /* Clear the remaining sub-byte region if there is one. */
1796 if (len % BITS_PER_UNIT != 0)
1797 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1799 else
1800 gcc_unreachable ();
1803 /* Write BITLEN bits of EXPR to the byte array PTR at
1804 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1805 Return true if the operation succeeded. */
1807 static bool
1808 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1809 unsigned int total_bytes)
1811 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1812 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1813 || (bitpos % BITS_PER_UNIT)
1814 || !int_mode_for_size (bitlen, 0).exists ());
1815 bool empty_ctor_p
1816 = (TREE_CODE (expr) == CONSTRUCTOR
1817 && CONSTRUCTOR_NELTS (expr) == 0
1818 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1819 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1821 if (!sub_byte_op_p)
1823 if (first_byte >= total_bytes)
1824 return false;
1825 total_bytes -= first_byte;
1826 if (empty_ctor_p)
1828 unsigned HOST_WIDE_INT rhs_bytes
1829 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1830 if (rhs_bytes > total_bytes)
1831 return false;
1832 memset (ptr + first_byte, '\0', rhs_bytes);
1833 return true;
1835 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1838 /* LITTLE-ENDIAN
1839 We are writing a non byte-sized quantity or at a position that is not
1840 at a byte boundary.
1841 |--------|--------|--------| ptr + first_byte
1843 xxx xxxxxxxx xxx< bp>
1844 |______EXPR____|
1846 First native_encode_expr EXPR into a temporary buffer and shift each
1847 byte in the buffer by 'bp' (carrying the bits over as necessary).
1848 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1849 <------bitlen---->< bp>
1850 Then we clear the destination bits:
1851 |---00000|00000000|000-----| ptr + first_byte
1852 <-------bitlen--->< bp>
1854 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1855 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1857 BIG-ENDIAN
1858 We are writing a non byte-sized quantity or at a position that is not
1859 at a byte boundary.
1860 ptr + first_byte |--------|--------|--------|
1862 <bp >xxx xxxxxxxx xxx
1863 |_____EXPR_____|
1865 First native_encode_expr EXPR into a temporary buffer and shift each
1866 byte in the buffer to the right by (carrying the bits over as necessary).
1867 We shift by as much as needed to align the most significant bit of EXPR
1868 with bitpos:
1869 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1870 <---bitlen----> <bp ><-----bitlen----->
1871 Then we clear the destination bits:
1872 ptr + first_byte |-----000||00000000||00000---|
1873 <bp ><-------bitlen----->
1875 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1876 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1877 The awkwardness comes from the fact that bitpos is counted from the
1878 most significant bit of a byte. */
1880 /* We must be dealing with fixed-size data at this point, since the
1881 total size is also fixed. */
1882 unsigned int byte_size;
1883 if (empty_ctor_p)
1885 unsigned HOST_WIDE_INT rhs_bytes
1886 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1887 if (rhs_bytes > total_bytes)
1888 return false;
1889 byte_size = rhs_bytes;
1891 else
1893 fixed_size_mode mode
1894 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1895 byte_size
1896 = mode == BLKmode
1897 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1898 : GET_MODE_SIZE (mode);
1900 /* Allocate an extra byte so that we have space to shift into. */
1901 byte_size++;
1902 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1903 memset (tmpbuf, '\0', byte_size);
1904 /* The store detection code should only have allowed constants that are
1905 accepted by native_encode_expr or empty ctors. */
1906 if (!empty_ctor_p
1907 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1908 gcc_unreachable ();
1910 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1911 bytes to write. This means it can write more than
1912 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1913 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1914 bitlen and zero out the bits that are not relevant as well (that may
1915 contain a sign bit due to sign-extension). */
1916 unsigned int padding
1917 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1918 /* On big-endian the padding is at the 'front' so just skip the initial
1919 bytes. */
1920 if (BYTES_BIG_ENDIAN)
1921 tmpbuf += padding;
1923 byte_size -= padding;
1925 if (bitlen % BITS_PER_UNIT != 0)
1927 if (BYTES_BIG_ENDIAN)
1928 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1929 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1930 else
1931 clear_bit_region (tmpbuf, bitlen,
1932 byte_size * BITS_PER_UNIT - bitlen);
1934 /* Left shifting relies on the last byte being clear if bitlen is
1935 a multiple of BITS_PER_UNIT, which might not be clear if
1936 there are padding bytes. */
1937 else if (!BYTES_BIG_ENDIAN)
1938 tmpbuf[byte_size - 1] = '\0';
1940 /* Clear the bit region in PTR where the bits from TMPBUF will be
1941 inserted into. */
1942 if (BYTES_BIG_ENDIAN)
1943 clear_bit_region_be (ptr + first_byte,
1944 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1945 else
1946 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1948 int shift_amnt;
1949 int bitlen_mod = bitlen % BITS_PER_UNIT;
1950 int bitpos_mod = bitpos % BITS_PER_UNIT;
1952 bool skip_byte = false;
1953 if (BYTES_BIG_ENDIAN)
1955 /* BITPOS and BITLEN are exactly aligned and no shifting
1956 is necessary. */
1957 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1958 || (bitpos_mod == 0 && bitlen_mod == 0))
1959 shift_amnt = 0;
1960 /* |. . . . . . . .|
1961 <bp > <blen >.
1962 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1963 of the value until it aligns with 'bp' in the next byte over. */
1964 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1966 shift_amnt = bitlen_mod + bitpos_mod;
1967 skip_byte = bitlen_mod != 0;
1969 /* |. . . . . . . .|
1970 <----bp--->
1971 <---blen---->.
1972 Shift the value right within the same byte so it aligns with 'bp'. */
1973 else
1974 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1976 else
1977 shift_amnt = bitpos % BITS_PER_UNIT;
1979 /* Create the shifted version of EXPR. */
1980 if (!BYTES_BIG_ENDIAN)
1982 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1983 if (shift_amnt == 0)
1984 byte_size--;
1986 else
1988 gcc_assert (BYTES_BIG_ENDIAN);
1989 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1990 /* If shifting right forced us to move into the next byte skip the now
1991 empty byte. */
1992 if (skip_byte)
1994 tmpbuf++;
1995 byte_size--;
1999 /* Insert the bits from TMPBUF. */
2000 for (unsigned int i = 0; i < byte_size; i++)
2001 ptr[first_byte + i] |= tmpbuf[i];
2003 return true;
2006 /* Sorting function for store_immediate_info objects.
2007 Sorts them by bitposition. */
2009 static int
2010 sort_by_bitpos (const void *x, const void *y)
2012 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2013 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2015 if ((*tmp)->bitpos < (*tmp2)->bitpos)
2016 return -1;
2017 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2018 return 1;
2019 else
2020 /* If they are the same let's use the order which is guaranteed to
2021 be different. */
2022 return (*tmp)->order - (*tmp2)->order;
2025 /* Sorting function for store_immediate_info objects.
2026 Sorts them by the order field. */
2028 static int
2029 sort_by_order (const void *x, const void *y)
2031 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2032 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2034 if ((*tmp)->order < (*tmp2)->order)
2035 return -1;
2036 else if ((*tmp)->order > (*tmp2)->order)
2037 return 1;
2039 gcc_unreachable ();
2042 /* Initialize a merged_store_group object from a store_immediate_info
2043 object. */
2045 merged_store_group::merged_store_group (store_immediate_info *info)
2047 start = info->bitpos;
2048 width = info->bitsize;
2049 bitregion_start = info->bitregion_start;
2050 bitregion_end = info->bitregion_end;
2051 /* VAL has memory allocated for it in apply_stores once the group
2052 width has been finalized. */
2053 val = NULL;
2054 mask = NULL;
2055 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2056 string_concatenation = info->rhs_code == STRING_CST;
2057 only_constants = info->rhs_code == INTEGER_CST;
2058 consecutive = true;
2059 first_nonmergeable_order = ~0U;
2060 lp_nr = info->lp_nr;
2061 unsigned HOST_WIDE_INT align_bitpos = 0;
2062 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2063 &align, &align_bitpos);
2064 align_base = start - align_bitpos;
2065 for (int i = 0; i < 2; ++i)
2067 store_operand_info &op = info->ops[i];
2068 if (op.base_addr == NULL_TREE)
2070 load_align[i] = 0;
2071 load_align_base[i] = 0;
2073 else
2075 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2076 load_align_base[i] = op.bitpos - align_bitpos;
2079 stores.create (1);
2080 stores.safe_push (info);
2081 last_stmt = info->stmt;
2082 last_order = info->order;
2083 first_stmt = last_stmt;
2084 first_order = last_order;
2085 buf_size = 0;
2088 merged_store_group::~merged_store_group ()
2090 if (val)
2091 XDELETEVEC (val);
2094 /* Return true if the store described by INFO can be merged into the group. */
2096 bool
2097 merged_store_group::can_be_merged_into (store_immediate_info *info)
2099 /* Do not merge bswap patterns. */
2100 if (info->rhs_code == LROTATE_EXPR)
2101 return false;
2103 if (info->lp_nr != lp_nr)
2104 return false;
2106 /* The canonical case. */
2107 if (info->rhs_code == stores[0]->rhs_code)
2108 return true;
2110 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2111 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2112 return !string_concatenation;
2114 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2115 return !string_concatenation;
2117 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2118 only for small regions since this can generate a lot of instructions. */
2119 if (info->rhs_code == MEM_REF
2120 && (stores[0]->rhs_code == INTEGER_CST
2121 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2122 && info->bitregion_start == stores[0]->bitregion_start
2123 && info->bitregion_end == stores[0]->bitregion_end
2124 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2125 return !string_concatenation;
2127 if (stores[0]->rhs_code == MEM_REF
2128 && (info->rhs_code == INTEGER_CST
2129 || info->rhs_code == BIT_INSERT_EXPR)
2130 && info->bitregion_start == stores[0]->bitregion_start
2131 && info->bitregion_end == stores[0]->bitregion_end
2132 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2133 return !string_concatenation;
2135 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2136 if (info->rhs_code == STRING_CST
2137 && stores[0]->rhs_code == INTEGER_CST
2138 && stores[0]->bitsize == CHAR_BIT)
2139 return !bit_insertion;
2141 if (stores[0]->rhs_code == STRING_CST
2142 && info->rhs_code == INTEGER_CST
2143 && info->bitsize == CHAR_BIT)
2144 return !bit_insertion;
2146 return false;
2149 /* Helper method for merge_into and merge_overlapping to do
2150 the common part. */
2152 void
2153 merged_store_group::do_merge (store_immediate_info *info)
2155 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2156 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2158 unsigned int this_align;
2159 unsigned HOST_WIDE_INT align_bitpos = 0;
2160 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2161 &this_align, &align_bitpos);
2162 if (this_align > align)
2164 align = this_align;
2165 align_base = info->bitpos - align_bitpos;
2167 for (int i = 0; i < 2; ++i)
2169 store_operand_info &op = info->ops[i];
2170 if (!op.base_addr)
2171 continue;
2173 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2174 if (this_align > load_align[i])
2176 load_align[i] = this_align;
2177 load_align_base[i] = op.bitpos - align_bitpos;
2181 gimple *stmt = info->stmt;
2182 stores.safe_push (info);
2183 if (info->order > last_order)
2185 last_order = info->order;
2186 last_stmt = stmt;
2188 else if (info->order < first_order)
2190 first_order = info->order;
2191 first_stmt = stmt;
2194 if (info->bitpos != start + width)
2195 consecutive = false;
2197 /* We need to use extraction if there is any bit-field. */
2198 if (info->rhs_code == BIT_INSERT_EXPR)
2200 bit_insertion = true;
2201 gcc_assert (!string_concatenation);
2204 /* We want to use concatenation if there is any string. */
2205 if (info->rhs_code == STRING_CST)
2207 string_concatenation = true;
2208 gcc_assert (!bit_insertion);
2211 /* But we cannot use it if we don't have consecutive stores. */
2212 if (!consecutive)
2213 string_concatenation = false;
2215 if (info->rhs_code != INTEGER_CST)
2216 only_constants = false;
2219 /* Merge a store recorded by INFO into this merged store.
2220 The store is not overlapping with the existing recorded
2221 stores. */
2223 void
2224 merged_store_group::merge_into (store_immediate_info *info)
2226 do_merge (info);
2228 /* Make sure we're inserting in the position we think we're inserting. */
2229 gcc_assert (info->bitpos >= start + width
2230 && info->bitregion_start <= bitregion_end);
2232 width = info->bitpos + info->bitsize - start;
2235 /* Merge a store described by INFO into this merged store.
2236 INFO overlaps in some way with the current store (i.e. it's not contiguous
2237 which is handled by merged_store_group::merge_into). */
2239 void
2240 merged_store_group::merge_overlapping (store_immediate_info *info)
2242 do_merge (info);
2244 /* If the store extends the size of the group, extend the width. */
2245 if (info->bitpos + info->bitsize > start + width)
2246 width = info->bitpos + info->bitsize - start;
2249 /* Go through all the recorded stores in this group in program order and
2250 apply their values to the VAL byte array to create the final merged
2251 value. Return true if the operation succeeded. */
2253 bool
2254 merged_store_group::apply_stores ()
2256 store_immediate_info *info;
2257 unsigned int i;
2259 /* Make sure we have more than one store in the group, otherwise we cannot
2260 merge anything. */
2261 if (bitregion_start % BITS_PER_UNIT != 0
2262 || bitregion_end % BITS_PER_UNIT != 0
2263 || stores.length () == 1)
2264 return false;
2266 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2268 /* Really do string concatenation for large strings only. */
2269 if (buf_size <= MOVE_MAX)
2270 string_concatenation = false;
2272 /* Create a power-of-2-sized buffer for native_encode_expr. */
2273 if (!string_concatenation)
2274 buf_size = 1 << ceil_log2 (buf_size);
2276 val = XNEWVEC (unsigned char, 2 * buf_size);
2277 mask = val + buf_size;
2278 memset (val, 0, buf_size);
2279 memset (mask, ~0U, buf_size);
2281 stores.qsort (sort_by_order);
2283 FOR_EACH_VEC_ELT (stores, i, info)
2285 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2286 tree cst;
2287 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2288 cst = info->ops[0].val;
2289 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2290 cst = info->ops[1].val;
2291 else
2292 cst = NULL_TREE;
2293 bool ret = true;
2294 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2295 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2296 buf_size);
2297 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2298 if (BYTES_BIG_ENDIAN)
2299 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2300 - (pos_in_buffer % BITS_PER_UNIT)),
2301 info->bitsize);
2302 else
2303 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2304 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2306 if (ret)
2308 fputs ("After writing ", dump_file);
2309 print_generic_expr (dump_file, cst, TDF_NONE);
2310 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2311 " at position %d\n", info->bitsize, pos_in_buffer);
2312 fputs (" the merged value contains ", dump_file);
2313 dump_char_array (dump_file, val, buf_size);
2314 fputs (" the merged mask contains ", dump_file);
2315 dump_char_array (dump_file, mask, buf_size);
2316 if (bit_insertion)
2317 fputs (" bit insertion is required\n", dump_file);
2318 if (string_concatenation)
2319 fputs (" string concatenation is required\n", dump_file);
2321 else
2322 fprintf (dump_file, "Failed to merge stores\n");
2324 if (!ret)
2325 return false;
2327 stores.qsort (sort_by_bitpos);
2328 return true;
2331 /* Structure describing the store chain. */
2333 class imm_store_chain_info
2335 public:
2336 /* Doubly-linked list that imposes an order on chain processing.
2337 PNXP (prev's next pointer) points to the head of a list, or to
2338 the next field in the previous chain in the list.
2339 See pass_store_merging::m_stores_head for more rationale. */
2340 imm_store_chain_info *next, **pnxp;
2341 tree base_addr;
2342 auto_vec<store_immediate_info *> m_store_info;
2343 auto_vec<merged_store_group *> m_merged_store_groups;
2345 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2346 : next (inspt), pnxp (&inspt), base_addr (b_a)
2348 inspt = this;
2349 if (next)
2351 gcc_checking_assert (pnxp == next->pnxp);
2352 next->pnxp = &next;
2355 ~imm_store_chain_info ()
2357 *pnxp = next;
2358 if (next)
2360 gcc_checking_assert (&next == next->pnxp);
2361 next->pnxp = pnxp;
2364 bool terminate_and_process_chain ();
2365 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2366 unsigned int);
2367 bool coalesce_immediate_stores ();
2368 bool output_merged_store (merged_store_group *);
2369 bool output_merged_stores ();
2372 const pass_data pass_data_tree_store_merging = {
2373 GIMPLE_PASS, /* type */
2374 "store-merging", /* name */
2375 OPTGROUP_NONE, /* optinfo_flags */
2376 TV_GIMPLE_STORE_MERGING, /* tv_id */
2377 PROP_ssa, /* properties_required */
2378 0, /* properties_provided */
2379 0, /* properties_destroyed */
2380 0, /* todo_flags_start */
2381 TODO_update_ssa, /* todo_flags_finish */
2384 class pass_store_merging : public gimple_opt_pass
2386 public:
2387 pass_store_merging (gcc::context *ctxt)
2388 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2389 m_n_chains (0), m_n_stores (0)
2393 /* Pass not supported for PDP-endian, nor for insane hosts or
2394 target character sizes where native_{encode,interpret}_expr
2395 doesn't work properly. */
2396 virtual bool
2397 gate (function *)
2399 return flag_store_merging
2400 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2401 && CHAR_BIT == 8
2402 && BITS_PER_UNIT == 8;
2405 virtual unsigned int execute (function *);
2407 private:
2408 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2410 /* Form a doubly-linked stack of the elements of m_stores, so that
2411 we can iterate over them in a predictable way. Using this order
2412 avoids extraneous differences in the compiler output just because
2413 of tree pointer variations (e.g. different chains end up in
2414 different positions of m_stores, so they are handled in different
2415 orders, so they allocate or release SSA names in different
2416 orders, and when they get reused, subsequent passes end up
2417 getting different SSA names, which may ultimately change
2418 decisions when going out of SSA). */
2419 imm_store_chain_info *m_stores_head;
2421 /* The number of store chains currently tracked. */
2422 unsigned m_n_chains;
2423 /* The number of stores currently tracked. */
2424 unsigned m_n_stores;
2426 bool process_store (gimple *);
2427 bool terminate_and_process_chain (imm_store_chain_info *);
2428 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2429 bool terminate_and_process_all_chains ();
2430 }; // class pass_store_merging
2432 /* Terminate and process all recorded chains. Return true if any changes
2433 were made. */
2435 bool
2436 pass_store_merging::terminate_and_process_all_chains ()
2438 bool ret = false;
2439 while (m_stores_head)
2440 ret |= terminate_and_process_chain (m_stores_head);
2441 gcc_assert (m_stores.is_empty ());
2442 return ret;
2445 /* Terminate all chains that are affected by the statement STMT.
2446 CHAIN_INFO is the chain we should ignore from the checks if
2447 non-NULL. Return true if any changes were made. */
2449 bool
2450 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2451 **chain_info,
2452 gimple *stmt)
2454 bool ret = false;
2456 /* If the statement doesn't touch memory it can't alias. */
2457 if (!gimple_vuse (stmt))
2458 return false;
2460 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2461 ao_ref store_lhs_ref;
2462 ao_ref_init (&store_lhs_ref, store_lhs);
2463 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2465 next = cur->next;
2467 /* We already checked all the stores in chain_info and terminated the
2468 chain if necessary. Skip it here. */
2469 if (chain_info && *chain_info == cur)
2470 continue;
2472 store_immediate_info *info;
2473 unsigned int i;
2474 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2476 tree lhs = gimple_assign_lhs (info->stmt);
2477 ao_ref lhs_ref;
2478 ao_ref_init (&lhs_ref, lhs);
2479 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2480 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2481 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2482 &lhs_ref, false)))
2484 if (dump_file && (dump_flags & TDF_DETAILS))
2486 fprintf (dump_file, "stmt causes chain termination:\n");
2487 print_gimple_stmt (dump_file, stmt, 0);
2489 ret |= terminate_and_process_chain (cur);
2490 break;
2495 return ret;
2498 /* Helper function. Terminate the recorded chain storing to base object
2499 BASE. Return true if the merging and output was successful. The m_stores
2500 entry is removed after the processing in any case. */
2502 bool
2503 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2505 m_n_stores -= chain_info->m_store_info.length ();
2506 m_n_chains--;
2507 bool ret = chain_info->terminate_and_process_chain ();
2508 m_stores.remove (chain_info->base_addr);
2509 delete chain_info;
2510 return ret;
2513 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2514 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2515 be able to sink load of REF across stores between FIRST and LAST, up
2516 to right before LAST. */
2518 bool
2519 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2521 ao_ref r;
2522 ao_ref_init (&r, ref);
2523 unsigned int count = 0;
2524 tree vop = gimple_vdef (last);
2525 gimple *stmt;
2527 /* Return true conservatively if the basic blocks are different. */
2528 if (gimple_bb (first) != gimple_bb (last))
2529 return true;
2533 stmt = SSA_NAME_DEF_STMT (vop);
2534 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2535 return true;
2536 if (gimple_store_p (stmt)
2537 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2538 return true;
2539 /* Avoid quadratic compile time by bounding the number of checks
2540 we perform. */
2541 if (++count > MAX_STORE_ALIAS_CHECKS)
2542 return true;
2543 vop = gimple_vuse (stmt);
2545 while (stmt != first);
2547 return false;
2550 /* Return true if INFO->ops[IDX] is mergeable with the
2551 corresponding loads already in MERGED_STORE group.
2552 BASE_ADDR is the base address of the whole store group. */
2554 bool
2555 compatible_load_p (merged_store_group *merged_store,
2556 store_immediate_info *info,
2557 tree base_addr, int idx)
2559 store_immediate_info *infof = merged_store->stores[0];
2560 if (!info->ops[idx].base_addr
2561 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2562 info->bitpos - infof->bitpos)
2563 || !operand_equal_p (info->ops[idx].base_addr,
2564 infof->ops[idx].base_addr, 0))
2565 return false;
2567 store_immediate_info *infol = merged_store->stores.last ();
2568 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2569 /* In this case all vuses should be the same, e.g.
2570 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2572 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2573 and we can emit the coalesced load next to any of those loads. */
2574 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2575 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2576 return true;
2578 /* Otherwise, at least for now require that the load has the same
2579 vuse as the store. See following examples. */
2580 if (gimple_vuse (info->stmt) != load_vuse)
2581 return false;
2583 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2584 || (infof != infol
2585 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2586 return false;
2588 /* If the load is from the same location as the store, already
2589 the construction of the immediate chain info guarantees no intervening
2590 stores, so no further checks are needed. Example:
2591 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2592 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2593 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2594 return true;
2596 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2597 of the stores in the group, or any other stores in between those.
2598 Previous calls to compatible_load_p ensured that for all the
2599 merged_store->stores IDX loads, no stmts starting with
2600 merged_store->first_stmt and ending right before merged_store->last_stmt
2601 clobbers those loads. */
2602 gimple *first = merged_store->first_stmt;
2603 gimple *last = merged_store->last_stmt;
2604 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2605 comes before the so far first load, we'll be changing
2606 merged_store->first_stmt. In that case we need to give up if
2607 any of the earlier processed loads clobber with the stmts in the new
2608 range. */
2609 if (info->order < merged_store->first_order)
2611 for (store_immediate_info *infoc : merged_store->stores)
2612 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2613 return false;
2614 first = info->stmt;
2616 /* Similarly, we could change merged_store->last_stmt, so ensure
2617 in that case no stmts in the new range clobber any of the earlier
2618 processed loads. */
2619 else if (info->order > merged_store->last_order)
2621 for (store_immediate_info *infoc : merged_store->stores)
2622 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2623 return false;
2624 last = info->stmt;
2626 /* And finally, we'd be adding a new load to the set, ensure it isn't
2627 clobbered in the new range. */
2628 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2629 return false;
2631 /* Otherwise, we are looking for:
2632 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2634 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2635 return true;
2638 /* Add all refs loaded to compute VAL to REFS vector. */
2640 void
2641 gather_bswap_load_refs (vec<tree> *refs, tree val)
2643 if (TREE_CODE (val) != SSA_NAME)
2644 return;
2646 gimple *stmt = SSA_NAME_DEF_STMT (val);
2647 if (!is_gimple_assign (stmt))
2648 return;
2650 if (gimple_assign_load_p (stmt))
2652 refs->safe_push (gimple_assign_rhs1 (stmt));
2653 return;
2656 switch (gimple_assign_rhs_class (stmt))
2658 case GIMPLE_BINARY_RHS:
2659 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2660 /* FALLTHRU */
2661 case GIMPLE_UNARY_RHS:
2662 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2663 break;
2664 default:
2665 gcc_unreachable ();
2669 /* Check if there are any stores in M_STORE_INFO after index I
2670 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2671 a potential group ending with END that have their order
2672 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2673 all the stores already merged and the one under consideration
2674 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2675 Consider:
2676 MEM[(long long int *)p_28] = 0;
2677 MEM[(long long int *)p_28 + 8B] = 0;
2678 MEM[(long long int *)p_28 + 16B] = 0;
2679 MEM[(long long int *)p_28 + 24B] = 0;
2680 _129 = (int) _130;
2681 MEM[(int *)p_28 + 8B] = _129;
2682 MEM[(int *)p_28].a = -1;
2683 We already have
2684 MEM[(long long int *)p_28] = 0;
2685 MEM[(int *)p_28].a = -1;
2686 stmts in the current group and need to consider if it is safe to
2687 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2688 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2689 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2690 into the group and merging of those 3 stores is successful, merged
2691 stmts will be emitted at the latest store from that group, i.e.
2692 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2693 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2694 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2695 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2696 into the group. That way it will be its own store group and will
2697 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2698 INTEGER_CST stores, those are mergeable using merge_overlapping,
2699 so don't return false for those.
2701 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2702 (exclusive), whether they don't overlap the bitrange START to END
2703 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2704 prevent merging in cases like:
2705 MEM <char[12]> [&b + 8B] = {};
2706 MEM[(short *) &b] = 5;
2707 _5 = *x_4(D);
2708 MEM <long long unsigned int> [&b + 2B] = _5;
2709 MEM[(char *)&b + 16B] = 88;
2710 MEM[(int *)&b + 20B] = 1;
2711 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2712 be merged with it, because the = _5 store overlaps these and is in between
2713 them in sort_by_order ordering. If it was merged, the merged store would
2714 go after the = _5 store and thus change behavior. */
2716 static bool
2717 check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2718 unsigned int i,
2719 bool all_integer_cst_p, unsigned int first_order,
2720 unsigned int last_order, unsigned HOST_WIDE_INT start,
2721 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2722 unsigned end_earlier)
2724 unsigned int len = m_store_info.length ();
2725 for (unsigned int j = first_earlier; j < end_earlier; j++)
2727 store_immediate_info *info = m_store_info[j];
2728 if (info->order > first_order
2729 && info->order < last_order
2730 && info->bitpos + info->bitsize > start)
2731 return false;
2733 for (++i; i < len; ++i)
2735 store_immediate_info *info = m_store_info[i];
2736 if (info->bitpos >= end)
2737 break;
2738 if (info->order < last_order
2739 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2740 return false;
2742 return true;
2745 /* Return true if m_store_info[first] and at least one following store
2746 form a group which store try_size bitsize value which is byte swapped
2747 from a memory load or some value, or identity from some value.
2748 This uses the bswap pass APIs. */
2750 bool
2751 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2752 unsigned int first,
2753 unsigned int try_size,
2754 unsigned int first_earlier)
2756 unsigned int len = m_store_info.length (), last = first;
2757 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2758 if (width >= try_size)
2759 return false;
2760 for (unsigned int i = first + 1; i < len; ++i)
2762 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2763 || m_store_info[i]->lp_nr != merged_store->lp_nr
2764 || m_store_info[i]->ins_stmt == NULL)
2765 return false;
2766 width += m_store_info[i]->bitsize;
2767 if (width >= try_size)
2769 last = i;
2770 break;
2773 if (width != try_size)
2774 return false;
2776 bool allow_unaligned
2777 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2778 /* Punt if the combined store would not be aligned and we need alignment. */
2779 if (!allow_unaligned)
2781 unsigned int align = merged_store->align;
2782 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2783 for (unsigned int i = first + 1; i <= last; ++i)
2785 unsigned int this_align;
2786 unsigned HOST_WIDE_INT align_bitpos = 0;
2787 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2788 &this_align, &align_bitpos);
2789 if (this_align > align)
2791 align = this_align;
2792 align_base = m_store_info[i]->bitpos - align_bitpos;
2795 unsigned HOST_WIDE_INT align_bitpos
2796 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2797 if (align_bitpos)
2798 align = least_bit_hwi (align_bitpos);
2799 if (align < try_size)
2800 return false;
2803 tree type;
2804 switch (try_size)
2806 case 16: type = uint16_type_node; break;
2807 case 32: type = uint32_type_node; break;
2808 case 64: type = uint64_type_node; break;
2809 default: gcc_unreachable ();
2811 struct symbolic_number n;
2812 gimple *ins_stmt = NULL;
2813 int vuse_store = -1;
2814 unsigned int first_order = merged_store->first_order;
2815 unsigned int last_order = merged_store->last_order;
2816 gimple *first_stmt = merged_store->first_stmt;
2817 gimple *last_stmt = merged_store->last_stmt;
2818 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2819 store_immediate_info *infof = m_store_info[first];
2821 for (unsigned int i = first; i <= last; ++i)
2823 store_immediate_info *info = m_store_info[i];
2824 struct symbolic_number this_n = info->n;
2825 this_n.type = type;
2826 if (!this_n.base_addr)
2827 this_n.range = try_size / BITS_PER_UNIT;
2828 else
2829 /* Update vuse in case it has changed by output_merged_stores. */
2830 this_n.vuse = gimple_vuse (info->ins_stmt);
2831 unsigned int bitpos = info->bitpos - infof->bitpos;
2832 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2833 BYTES_BIG_ENDIAN
2834 ? try_size - info->bitsize - bitpos
2835 : bitpos))
2836 return false;
2837 if (this_n.base_addr && vuse_store)
2839 unsigned int j;
2840 for (j = first; j <= last; ++j)
2841 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2842 break;
2843 if (j > last)
2845 if (vuse_store == 1)
2846 return false;
2847 vuse_store = 0;
2850 if (i == first)
2852 n = this_n;
2853 ins_stmt = info->ins_stmt;
2855 else
2857 if (n.base_addr && n.vuse != this_n.vuse)
2859 if (vuse_store == 0)
2860 return false;
2861 vuse_store = 1;
2863 if (info->order > last_order)
2865 last_order = info->order;
2866 last_stmt = info->stmt;
2868 else if (info->order < first_order)
2870 first_order = info->order;
2871 first_stmt = info->stmt;
2873 end = MAX (end, info->bitpos + info->bitsize);
2875 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2876 &this_n, &n);
2877 if (ins_stmt == NULL)
2878 return false;
2882 uint64_t cmpxchg, cmpnop;
2883 bool cast64_to_32;
2884 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
2886 /* A complete byte swap should make the symbolic number to start with
2887 the largest digit in the highest order byte. Unchanged symbolic
2888 number indicates a read with same endianness as target architecture. */
2889 if (n.n != cmpnop && n.n != cmpxchg)
2890 return false;
2892 /* For now. */
2893 if (cast64_to_32)
2894 return false;
2896 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2897 return false;
2899 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2900 merged_store->start, end, first_earlier, first))
2901 return false;
2903 /* Don't handle memory copy this way if normal non-bswap processing
2904 would handle it too. */
2905 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2907 unsigned int i;
2908 for (i = first; i <= last; ++i)
2909 if (m_store_info[i]->rhs_code != MEM_REF)
2910 break;
2911 if (i == last + 1)
2912 return false;
2915 if (n.n == cmpxchg)
2916 switch (try_size)
2918 case 16:
2919 /* Will emit LROTATE_EXPR. */
2920 break;
2921 case 32:
2922 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2923 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2924 break;
2925 return false;
2926 case 64:
2927 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2928 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2929 break;
2930 return false;
2931 default:
2932 gcc_unreachable ();
2935 if (!allow_unaligned && n.base_addr)
2937 unsigned int align = get_object_alignment (n.src);
2938 if (align < try_size)
2939 return false;
2942 /* If each load has vuse of the corresponding store, need to verify
2943 the loads can be sunk right before the last store. */
2944 if (vuse_store == 1)
2946 auto_vec<tree, 64> refs;
2947 for (unsigned int i = first; i <= last; ++i)
2948 gather_bswap_load_refs (&refs,
2949 gimple_assign_rhs1 (m_store_info[i]->stmt));
2951 for (tree ref : refs)
2952 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2953 return false;
2954 n.vuse = NULL_TREE;
2957 infof->n = n;
2958 infof->ins_stmt = ins_stmt;
2959 for (unsigned int i = first; i <= last; ++i)
2961 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2962 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2963 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2964 if (i != first)
2965 merged_store->merge_into (m_store_info[i]);
2968 return true;
2971 /* Go through the candidate stores recorded in m_store_info and merge them
2972 into merged_store_group objects recorded into m_merged_store_groups
2973 representing the widened stores. Return true if coalescing was successful
2974 and the number of widened stores is fewer than the original number
2975 of stores. */
2977 bool
2978 imm_store_chain_info::coalesce_immediate_stores ()
2980 /* Anything less can't be processed. */
2981 if (m_store_info.length () < 2)
2982 return false;
2984 if (dump_file && (dump_flags & TDF_DETAILS))
2985 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2986 m_store_info.length ());
2988 store_immediate_info *info;
2989 unsigned int i, ignore = 0;
2990 unsigned int first_earlier = 0;
2991 unsigned int end_earlier = 0;
2993 /* Order the stores by the bitposition they write to. */
2994 m_store_info.qsort (sort_by_bitpos);
2996 info = m_store_info[0];
2997 merged_store_group *merged_store = new merged_store_group (info);
2998 if (dump_file && (dump_flags & TDF_DETAILS))
2999 fputs ("New store group\n", dump_file);
3001 FOR_EACH_VEC_ELT (m_store_info, i, info)
3003 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3005 if (i <= ignore)
3006 goto done;
3008 while (first_earlier < end_earlier
3009 && (m_store_info[first_earlier]->bitpos
3010 + m_store_info[first_earlier]->bitsize
3011 <= merged_store->start))
3012 first_earlier++;
3014 /* First try to handle group of stores like:
3015 p[0] = data >> 24;
3016 p[1] = data >> 16;
3017 p[2] = data >> 8;
3018 p[3] = data;
3019 using the bswap framework. */
3020 if (info->bitpos == merged_store->start + merged_store->width
3021 && merged_store->stores.length () == 1
3022 && merged_store->stores[0]->ins_stmt != NULL
3023 && info->lp_nr == merged_store->lp_nr
3024 && info->ins_stmt != NULL)
3026 unsigned int try_size;
3027 for (try_size = 64; try_size >= 16; try_size >>= 1)
3028 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3029 first_earlier))
3030 break;
3032 if (try_size >= 16)
3034 ignore = i + merged_store->stores.length () - 1;
3035 m_merged_store_groups.safe_push (merged_store);
3036 if (ignore < m_store_info.length ())
3038 merged_store = new merged_store_group (m_store_info[ignore]);
3039 end_earlier = ignore;
3041 else
3042 merged_store = NULL;
3043 goto done;
3047 new_bitregion_start
3048 = MIN (merged_store->bitregion_start, info->bitregion_start);
3049 new_bitregion_end
3050 = MAX (merged_store->bitregion_end, info->bitregion_end);
3052 if (info->order >= merged_store->first_nonmergeable_order
3053 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3054 > (unsigned) param_store_merging_max_size))
3057 /* |---store 1---|
3058 |---store 2---|
3059 Overlapping stores. */
3060 else if (IN_RANGE (info->bitpos, merged_store->start,
3061 merged_store->start + merged_store->width - 1)
3062 /* |---store 1---||---store 2---|
3063 Handle also the consecutive INTEGER_CST stores case here,
3064 as we have here the code to deal with overlaps. */
3065 || (info->bitregion_start <= merged_store->bitregion_end
3066 && info->rhs_code == INTEGER_CST
3067 && merged_store->only_constants
3068 && merged_store->can_be_merged_into (info)))
3070 /* Only allow overlapping stores of constants. */
3071 if (info->rhs_code == INTEGER_CST
3072 && merged_store->only_constants
3073 && info->lp_nr == merged_store->lp_nr)
3075 unsigned int first_order
3076 = MIN (merged_store->first_order, info->order);
3077 unsigned int last_order
3078 = MAX (merged_store->last_order, info->order);
3079 unsigned HOST_WIDE_INT end
3080 = MAX (merged_store->start + merged_store->width,
3081 info->bitpos + info->bitsize);
3082 if (check_no_overlap (m_store_info, i, true, first_order,
3083 last_order, merged_store->start, end,
3084 first_earlier, end_earlier))
3086 /* check_no_overlap call above made sure there are no
3087 overlapping stores with non-INTEGER_CST rhs_code
3088 in between the first and last of the stores we've
3089 just merged. If there are any INTEGER_CST rhs_code
3090 stores in between, we need to merge_overlapping them
3091 even if in the sort_by_bitpos order there are other
3092 overlapping stores in between. Keep those stores as is.
3093 Example:
3094 MEM[(int *)p_28] = 0;
3095 MEM[(char *)p_28 + 3B] = 1;
3096 MEM[(char *)p_28 + 1B] = 2;
3097 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3098 We can't merge the zero store with the store of two and
3099 not merge anything else, because the store of one is
3100 in the original order in between those two, but in
3101 store_by_bitpos order it comes after the last store that
3102 we can't merge with them. We can merge the first 3 stores
3103 and keep the last store as is though. */
3104 unsigned int len = m_store_info.length ();
3105 unsigned int try_order = last_order;
3106 unsigned int first_nonmergeable_order;
3107 unsigned int k;
3108 bool last_iter = false;
3109 int attempts = 0;
3112 unsigned int max_order = 0;
3113 unsigned int min_order = first_order;
3114 unsigned first_nonmergeable_int_order = ~0U;
3115 unsigned HOST_WIDE_INT this_end = end;
3116 k = i;
3117 first_nonmergeable_order = ~0U;
3118 for (unsigned int j = i + 1; j < len; ++j)
3120 store_immediate_info *info2 = m_store_info[j];
3121 if (info2->bitpos >= this_end)
3122 break;
3123 if (info2->order < try_order)
3125 if (info2->rhs_code != INTEGER_CST
3126 || info2->lp_nr != merged_store->lp_nr)
3128 /* Normally check_no_overlap makes sure this
3129 doesn't happen, but if end grows below,
3130 then we need to process more stores than
3131 check_no_overlap verified. Example:
3132 MEM[(int *)p_5] = 0;
3133 MEM[(short *)p_5 + 3B] = 1;
3134 MEM[(char *)p_5 + 4B] = _9;
3135 MEM[(char *)p_5 + 2B] = 2; */
3136 k = 0;
3137 break;
3139 k = j;
3140 min_order = MIN (min_order, info2->order);
3141 this_end = MAX (this_end,
3142 info2->bitpos + info2->bitsize);
3144 else if (info2->rhs_code == INTEGER_CST
3145 && info2->lp_nr == merged_store->lp_nr
3146 && !last_iter)
3148 max_order = MAX (max_order, info2->order + 1);
3149 first_nonmergeable_int_order
3150 = MIN (first_nonmergeable_int_order,
3151 info2->order);
3153 else
3154 first_nonmergeable_order
3155 = MIN (first_nonmergeable_order, info2->order);
3157 if (k > i
3158 && !check_no_overlap (m_store_info, len - 1, true,
3159 min_order, try_order,
3160 merged_store->start, this_end,
3161 first_earlier, end_earlier))
3162 k = 0;
3163 if (k == 0)
3165 if (last_order == try_order)
3166 break;
3167 /* If this failed, but only because we grew
3168 try_order, retry with the last working one,
3169 so that we merge at least something. */
3170 try_order = last_order;
3171 last_iter = true;
3172 continue;
3174 last_order = try_order;
3175 /* Retry with a larger try_order to see if we could
3176 merge some further INTEGER_CST stores. */
3177 if (max_order
3178 && (first_nonmergeable_int_order
3179 < first_nonmergeable_order))
3181 try_order = MIN (max_order,
3182 first_nonmergeable_order);
3183 try_order
3184 = MIN (try_order,
3185 merged_store->first_nonmergeable_order);
3186 if (try_order > last_order && ++attempts < 16)
3187 continue;
3189 first_nonmergeable_order
3190 = MIN (first_nonmergeable_order,
3191 first_nonmergeable_int_order);
3192 end = this_end;
3193 break;
3195 while (1);
3197 if (k != 0)
3199 merged_store->merge_overlapping (info);
3201 merged_store->first_nonmergeable_order
3202 = MIN (merged_store->first_nonmergeable_order,
3203 first_nonmergeable_order);
3205 for (unsigned int j = i + 1; j <= k; j++)
3207 store_immediate_info *info2 = m_store_info[j];
3208 gcc_assert (info2->bitpos < end);
3209 if (info2->order < last_order)
3211 gcc_assert (info2->rhs_code == INTEGER_CST);
3212 if (info != info2)
3213 merged_store->merge_overlapping (info2);
3215 /* Other stores are kept and not merged in any
3216 way. */
3218 ignore = k;
3219 goto done;
3224 /* |---store 1---||---store 2---|
3225 This store is consecutive to the previous one.
3226 Merge it into the current store group. There can be gaps in between
3227 the stores, but there can't be gaps in between bitregions. */
3228 else if (info->bitregion_start <= merged_store->bitregion_end
3229 && merged_store->can_be_merged_into (info))
3231 store_immediate_info *infof = merged_store->stores[0];
3233 /* All the rhs_code ops that take 2 operands are commutative,
3234 swap the operands if it could make the operands compatible. */
3235 if (infof->ops[0].base_addr
3236 && infof->ops[1].base_addr
3237 && info->ops[0].base_addr
3238 && info->ops[1].base_addr
3239 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3240 info->bitpos - infof->bitpos)
3241 && operand_equal_p (info->ops[1].base_addr,
3242 infof->ops[0].base_addr, 0))
3244 std::swap (info->ops[0], info->ops[1]);
3245 info->ops_swapped_p = true;
3247 if (check_no_overlap (m_store_info, i, false,
3248 MIN (merged_store->first_order, info->order),
3249 MAX (merged_store->last_order, info->order),
3250 merged_store->start,
3251 MAX (merged_store->start + merged_store->width,
3252 info->bitpos + info->bitsize),
3253 first_earlier, end_earlier))
3255 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3256 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3258 info->rhs_code = BIT_INSERT_EXPR;
3259 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3260 info->ops[0].base_addr = NULL_TREE;
3262 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3264 for (store_immediate_info *infoj : merged_store->stores)
3266 infoj->rhs_code = BIT_INSERT_EXPR;
3267 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3268 infoj->ops[0].base_addr = NULL_TREE;
3270 merged_store->bit_insertion = true;
3272 if ((infof->ops[0].base_addr
3273 ? compatible_load_p (merged_store, info, base_addr, 0)
3274 : !info->ops[0].base_addr)
3275 && (infof->ops[1].base_addr
3276 ? compatible_load_p (merged_store, info, base_addr, 1)
3277 : !info->ops[1].base_addr))
3279 merged_store->merge_into (info);
3280 goto done;
3285 /* |---store 1---| <gap> |---store 2---|.
3286 Gap between stores or the rhs not compatible. Start a new group. */
3288 /* Try to apply all the stores recorded for the group to determine
3289 the bitpattern they write and discard it if that fails.
3290 This will also reject single-store groups. */
3291 if (merged_store->apply_stores ())
3292 m_merged_store_groups.safe_push (merged_store);
3293 else
3294 delete merged_store;
3296 merged_store = new merged_store_group (info);
3297 end_earlier = i;
3298 if (dump_file && (dump_flags & TDF_DETAILS))
3299 fputs ("New store group\n", dump_file);
3301 done:
3302 if (dump_file && (dump_flags & TDF_DETAILS))
3304 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3305 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3306 i, info->bitsize, info->bitpos);
3307 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3308 fputc ('\n', dump_file);
3312 /* Record or discard the last store group. */
3313 if (merged_store)
3315 if (merged_store->apply_stores ())
3316 m_merged_store_groups.safe_push (merged_store);
3317 else
3318 delete merged_store;
3321 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3323 bool success
3324 = !m_merged_store_groups.is_empty ()
3325 && m_merged_store_groups.length () < m_store_info.length ();
3327 if (success && dump_file)
3328 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3329 m_merged_store_groups.length ());
3331 return success;
3334 /* Return the type to use for the merged stores or loads described by STMTS.
3335 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3336 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3337 of the MEM_REFs if any. */
3339 static tree
3340 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3341 unsigned short *cliquep, unsigned short *basep)
3343 gimple *stmt;
3344 unsigned int i;
3345 tree type = NULL_TREE;
3346 tree ret = NULL_TREE;
3347 *cliquep = 0;
3348 *basep = 0;
3350 FOR_EACH_VEC_ELT (stmts, i, stmt)
3352 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3353 : gimple_assign_lhs (stmt);
3354 tree type1 = reference_alias_ptr_type (ref);
3355 tree base = get_base_address (ref);
3357 if (i == 0)
3359 if (TREE_CODE (base) == MEM_REF)
3361 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3362 *basep = MR_DEPENDENCE_BASE (base);
3364 ret = type = type1;
3365 continue;
3367 if (!alias_ptr_types_compatible_p (type, type1))
3368 ret = ptr_type_node;
3369 if (TREE_CODE (base) != MEM_REF
3370 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3371 || *basep != MR_DEPENDENCE_BASE (base))
3373 *cliquep = 0;
3374 *basep = 0;
3377 return ret;
3380 /* Return the location_t information we can find among the statements
3381 in STMTS. */
3383 static location_t
3384 get_location_for_stmts (vec<gimple *> &stmts)
3386 for (gimple *stmt : stmts)
3387 if (gimple_has_location (stmt))
3388 return gimple_location (stmt);
3390 return UNKNOWN_LOCATION;
3393 /* Used to decribe a store resulting from splitting a wide store in smaller
3394 regularly-sized stores in split_group. */
3396 class split_store
3398 public:
3399 unsigned HOST_WIDE_INT bytepos;
3400 unsigned HOST_WIDE_INT size;
3401 unsigned HOST_WIDE_INT align;
3402 auto_vec<store_immediate_info *> orig_stores;
3403 /* True if there is a single orig stmt covering the whole split store. */
3404 bool orig;
3405 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3406 unsigned HOST_WIDE_INT);
3409 /* Simple constructor. */
3411 split_store::split_store (unsigned HOST_WIDE_INT bp,
3412 unsigned HOST_WIDE_INT sz,
3413 unsigned HOST_WIDE_INT al)
3414 : bytepos (bp), size (sz), align (al), orig (false)
3416 orig_stores.create (0);
3419 /* Record all stores in GROUP that write to the region starting at BITPOS and
3420 is of size BITSIZE. Record infos for such statements in STORES if
3421 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3422 if there is exactly one original store in the range (in that case ignore
3423 clobber stmts, unless there are only clobber stmts). */
3425 static store_immediate_info *
3426 find_constituent_stores (class merged_store_group *group,
3427 vec<store_immediate_info *> *stores,
3428 unsigned int *first,
3429 unsigned HOST_WIDE_INT bitpos,
3430 unsigned HOST_WIDE_INT bitsize)
3432 store_immediate_info *info, *ret = NULL;
3433 unsigned int i;
3434 bool second = false;
3435 bool update_first = true;
3436 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3437 for (i = *first; group->stores.iterate (i, &info); ++i)
3439 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3440 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3441 if (stmt_end <= bitpos)
3443 /* BITPOS passed to this function never decreases from within the
3444 same split_group call, so optimize and don't scan info records
3445 which are known to end before or at BITPOS next time.
3446 Only do it if all stores before this one also pass this. */
3447 if (update_first)
3448 *first = i + 1;
3449 continue;
3451 else
3452 update_first = false;
3454 /* The stores in GROUP are ordered by bitposition so if we're past
3455 the region for this group return early. */
3456 if (stmt_start >= end)
3457 return ret;
3459 if (gimple_clobber_p (info->stmt))
3461 if (stores)
3462 stores->safe_push (info);
3463 if (ret == NULL)
3464 ret = info;
3465 continue;
3467 if (stores)
3469 stores->safe_push (info);
3470 if (ret && !gimple_clobber_p (ret->stmt))
3472 ret = NULL;
3473 second = true;
3476 else if (ret && !gimple_clobber_p (ret->stmt))
3477 return NULL;
3478 if (!second)
3479 ret = info;
3481 return ret;
3484 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3485 store have multiple uses. If any SSA_NAME has multiple uses, also
3486 count statements needed to compute it. */
3488 static unsigned
3489 count_multiple_uses (store_immediate_info *info)
3491 gimple *stmt = info->stmt;
3492 unsigned ret = 0;
3493 switch (info->rhs_code)
3495 case INTEGER_CST:
3496 case STRING_CST:
3497 return 0;
3498 case BIT_AND_EXPR:
3499 case BIT_IOR_EXPR:
3500 case BIT_XOR_EXPR:
3501 if (info->bit_not_p)
3503 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3504 ret = 1; /* Fall through below to return
3505 the BIT_NOT_EXPR stmt and then
3506 BIT_{AND,IOR,XOR}_EXPR and anything it
3507 uses. */
3508 else
3509 /* stmt is after this the BIT_NOT_EXPR. */
3510 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3512 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3514 ret += 1 + info->ops[0].bit_not_p;
3515 if (info->ops[1].base_addr)
3516 ret += 1 + info->ops[1].bit_not_p;
3517 return ret + 1;
3519 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3520 /* stmt is now the BIT_*_EXPR. */
3521 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3522 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3523 else if (info->ops[info->ops_swapped_p].bit_not_p)
3525 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3526 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3527 ++ret;
3529 if (info->ops[1].base_addr == NULL_TREE)
3531 gcc_checking_assert (!info->ops_swapped_p);
3532 return ret;
3534 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3535 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3536 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3538 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3539 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3540 ++ret;
3542 return ret;
3543 case MEM_REF:
3544 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3545 return 1 + info->ops[0].bit_not_p;
3546 else if (info->ops[0].bit_not_p)
3548 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3549 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3550 return 1;
3552 return 0;
3553 case BIT_INSERT_EXPR:
3554 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3555 default:
3556 gcc_unreachable ();
3560 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3561 vector (if non-NULL) with split_store structs describing the byte offset
3562 (from the base), the bit size and alignment of each store as well as the
3563 original statements involved in each such split group.
3564 This is to separate the splitting strategy from the statement
3565 building/emission/linking done in output_merged_store.
3566 Return number of new stores.
3567 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3568 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3569 BZERO_FIRST may be true only when the first store covers the whole group
3570 and clears it; if BZERO_FIRST is true, keep that first store in the set
3571 unmodified and emit further stores for the overrides only.
3572 If SPLIT_STORES is NULL, it is just a dry run to count number of
3573 new stores. */
3575 static unsigned int
3576 split_group (merged_store_group *group, bool allow_unaligned_store,
3577 bool allow_unaligned_load, bool bzero_first,
3578 vec<split_store *> *split_stores,
3579 unsigned *total_orig,
3580 unsigned *total_new)
3582 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3583 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3584 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3585 unsigned HOST_WIDE_INT group_align = group->align;
3586 unsigned HOST_WIDE_INT align_base = group->align_base;
3587 unsigned HOST_WIDE_INT group_load_align = group_align;
3588 bool any_orig = false;
3590 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3592 /* For bswap framework using sets of stores, all the checking has been done
3593 earlier in try_coalesce_bswap and the result always needs to be emitted
3594 as a single store. Likewise for string concatenation, */
3595 if (group->stores[0]->rhs_code == LROTATE_EXPR
3596 || group->stores[0]->rhs_code == NOP_EXPR
3597 || group->string_concatenation)
3599 gcc_assert (!bzero_first);
3600 if (total_orig)
3602 /* Avoid the old/new stmt count heuristics. It should be
3603 always beneficial. */
3604 total_new[0] = 1;
3605 total_orig[0] = 2;
3608 if (split_stores)
3610 unsigned HOST_WIDE_INT align_bitpos
3611 = (group->start - align_base) & (group_align - 1);
3612 unsigned HOST_WIDE_INT align = group_align;
3613 if (align_bitpos)
3614 align = least_bit_hwi (align_bitpos);
3615 bytepos = group->start / BITS_PER_UNIT;
3616 split_store *store
3617 = new split_store (bytepos, group->width, align);
3618 unsigned int first = 0;
3619 find_constituent_stores (group, &store->orig_stores,
3620 &first, group->start, group->width);
3621 split_stores->safe_push (store);
3624 return 1;
3627 unsigned int ret = 0, first = 0;
3628 unsigned HOST_WIDE_INT try_pos = bytepos;
3630 if (total_orig)
3632 unsigned int i;
3633 store_immediate_info *info = group->stores[0];
3635 total_new[0] = 0;
3636 total_orig[0] = 1; /* The orig store. */
3637 info = group->stores[0];
3638 if (info->ops[0].base_addr)
3639 total_orig[0]++;
3640 if (info->ops[1].base_addr)
3641 total_orig[0]++;
3642 switch (info->rhs_code)
3644 case BIT_AND_EXPR:
3645 case BIT_IOR_EXPR:
3646 case BIT_XOR_EXPR:
3647 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3648 break;
3649 default:
3650 break;
3652 total_orig[0] *= group->stores.length ();
3654 FOR_EACH_VEC_ELT (group->stores, i, info)
3656 total_new[0] += count_multiple_uses (info);
3657 total_orig[0] += (info->bit_not_p
3658 + info->ops[0].bit_not_p
3659 + info->ops[1].bit_not_p);
3663 if (!allow_unaligned_load)
3664 for (int i = 0; i < 2; ++i)
3665 if (group->load_align[i])
3666 group_load_align = MIN (group_load_align, group->load_align[i]);
3668 if (bzero_first)
3670 store_immediate_info *gstore;
3671 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3672 if (!gimple_clobber_p (gstore->stmt))
3673 break;
3674 ++first;
3675 ret = 1;
3676 if (split_stores)
3678 split_store *store
3679 = new split_store (bytepos, gstore->bitsize, align_base);
3680 store->orig_stores.safe_push (gstore);
3681 store->orig = true;
3682 any_orig = true;
3683 split_stores->safe_push (store);
3687 while (size > 0)
3689 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3690 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3691 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3693 /* Skip padding bytes. */
3694 ++try_pos;
3695 size -= BITS_PER_UNIT;
3696 continue;
3699 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3700 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3701 unsigned HOST_WIDE_INT align_bitpos
3702 = (try_bitpos - align_base) & (group_align - 1);
3703 unsigned HOST_WIDE_INT align = group_align;
3704 bool found_orig = false;
3705 if (align_bitpos)
3706 align = least_bit_hwi (align_bitpos);
3707 if (!allow_unaligned_store)
3708 try_size = MIN (try_size, align);
3709 if (!allow_unaligned_load)
3711 /* If we can't do or don't want to do unaligned stores
3712 as well as loads, we need to take the loads into account
3713 as well. */
3714 unsigned HOST_WIDE_INT load_align = group_load_align;
3715 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3716 if (align_bitpos)
3717 load_align = least_bit_hwi (align_bitpos);
3718 for (int i = 0; i < 2; ++i)
3719 if (group->load_align[i])
3721 align_bitpos
3722 = known_alignment (try_bitpos
3723 - group->stores[0]->bitpos
3724 + group->stores[0]->ops[i].bitpos
3725 - group->load_align_base[i]);
3726 if (align_bitpos & (group_load_align - 1))
3728 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3729 load_align = MIN (load_align, a);
3732 try_size = MIN (try_size, load_align);
3734 store_immediate_info *info
3735 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3736 if (info && !gimple_clobber_p (info->stmt))
3738 /* If there is just one original statement for the range, see if
3739 we can just reuse the original store which could be even larger
3740 than try_size. */
3741 unsigned HOST_WIDE_INT stmt_end
3742 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3743 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3744 stmt_end - try_bitpos);
3745 if (info && info->bitpos >= try_bitpos)
3747 store_immediate_info *info2 = NULL;
3748 unsigned int first_copy = first;
3749 if (info->bitpos > try_bitpos
3750 && stmt_end - try_bitpos <= try_size)
3752 info2 = find_constituent_stores (group, NULL, &first_copy,
3753 try_bitpos,
3754 info->bitpos - try_bitpos);
3755 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3757 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3759 info2 = find_constituent_stores (group, NULL, &first_copy,
3760 stmt_end,
3761 (try_bitpos + try_size)
3762 - stmt_end);
3763 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3765 if (info2 == NULL)
3767 try_size = stmt_end - try_bitpos;
3768 found_orig = true;
3769 goto found;
3774 /* Approximate store bitsize for the case when there are no padding
3775 bits. */
3776 while (try_size > size)
3777 try_size /= 2;
3778 /* Now look for whole padding bytes at the end of that bitsize. */
3779 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3780 if (group->mask[try_pos - bytepos + nonmasked - 1]
3781 != (unsigned char) ~0U
3782 && (!bzero_first
3783 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3784 break;
3785 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3787 /* If entire try_size range is padding, skip it. */
3788 try_pos += try_size / BITS_PER_UNIT;
3789 size -= try_size;
3790 continue;
3792 /* Otherwise try to decrease try_size if second half, last 3 quarters
3793 etc. are padding. */
3794 nonmasked *= BITS_PER_UNIT;
3795 while (nonmasked <= try_size / 2)
3796 try_size /= 2;
3797 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3799 /* Now look for whole padding bytes at the start of that bitsize. */
3800 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3801 for (masked = 0; masked < try_bytesize; ++masked)
3802 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3803 && (!bzero_first
3804 || group->val[try_pos - bytepos + masked] != 0))
3805 break;
3806 masked *= BITS_PER_UNIT;
3807 gcc_assert (masked < try_size);
3808 if (masked >= try_size / 2)
3810 while (masked >= try_size / 2)
3812 try_size /= 2;
3813 try_pos += try_size / BITS_PER_UNIT;
3814 size -= try_size;
3815 masked -= try_size;
3817 /* Need to recompute the alignment, so just retry at the new
3818 position. */
3819 continue;
3823 found:
3824 ++ret;
3826 if (split_stores)
3828 split_store *store
3829 = new split_store (try_pos, try_size, align);
3830 info = find_constituent_stores (group, &store->orig_stores,
3831 &first, try_bitpos, try_size);
3832 if (info
3833 && !gimple_clobber_p (info->stmt)
3834 && info->bitpos >= try_bitpos
3835 && info->bitpos + info->bitsize <= try_bitpos + try_size
3836 && (store->orig_stores.length () == 1
3837 || found_orig
3838 || (info->bitpos == try_bitpos
3839 && (info->bitpos + info->bitsize
3840 == try_bitpos + try_size))))
3842 store->orig = true;
3843 any_orig = true;
3845 split_stores->safe_push (store);
3848 try_pos += try_size / BITS_PER_UNIT;
3849 size -= try_size;
3852 if (total_orig)
3854 unsigned int i;
3855 split_store *store;
3856 /* If we are reusing some original stores and any of the
3857 original SSA_NAMEs had multiple uses, we need to subtract
3858 those now before we add the new ones. */
3859 if (total_new[0] && any_orig)
3861 FOR_EACH_VEC_ELT (*split_stores, i, store)
3862 if (store->orig)
3863 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3865 total_new[0] += ret; /* The new store. */
3866 store_immediate_info *info = group->stores[0];
3867 if (info->ops[0].base_addr)
3868 total_new[0] += ret;
3869 if (info->ops[1].base_addr)
3870 total_new[0] += ret;
3871 switch (info->rhs_code)
3873 case BIT_AND_EXPR:
3874 case BIT_IOR_EXPR:
3875 case BIT_XOR_EXPR:
3876 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3877 break;
3878 default:
3879 break;
3881 FOR_EACH_VEC_ELT (*split_stores, i, store)
3883 unsigned int j;
3884 bool bit_not_p[3] = { false, false, false };
3885 /* If all orig_stores have certain bit_not_p set, then
3886 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3887 If some orig_stores have certain bit_not_p set, then
3888 we'd use a BIT_XOR_EXPR with a mask and need to account for
3889 it. */
3890 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3892 if (info->ops[0].bit_not_p)
3893 bit_not_p[0] = true;
3894 if (info->ops[1].bit_not_p)
3895 bit_not_p[1] = true;
3896 if (info->bit_not_p)
3897 bit_not_p[2] = true;
3899 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3904 return ret;
3907 /* Return the operation through which the operand IDX (if < 2) or
3908 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3909 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3910 the bits should be xored with mask. */
3912 static enum tree_code
3913 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3915 unsigned int i;
3916 store_immediate_info *info;
3917 unsigned int cnt = 0;
3918 bool any_paddings = false;
3919 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3921 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3922 if (bit_not_p)
3924 ++cnt;
3925 tree lhs = gimple_assign_lhs (info->stmt);
3926 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3927 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3928 any_paddings = true;
3931 mask = NULL_TREE;
3932 if (cnt == 0)
3933 return NOP_EXPR;
3934 if (cnt == split_store->orig_stores.length () && !any_paddings)
3935 return BIT_NOT_EXPR;
3937 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3938 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3939 unsigned char *buf
3940 = XALLOCAVEC (unsigned char, buf_size);
3941 memset (buf, ~0U, buf_size);
3942 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3944 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3945 if (!bit_not_p)
3946 continue;
3947 /* Clear regions with bit_not_p and invert afterwards, rather than
3948 clear regions with !bit_not_p, so that gaps in between stores aren't
3949 set in the mask. */
3950 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3951 unsigned HOST_WIDE_INT prec = bitsize;
3952 unsigned int pos_in_buffer = 0;
3953 if (any_paddings)
3955 tree lhs = gimple_assign_lhs (info->stmt);
3956 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3957 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3958 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3960 if (info->bitpos < try_bitpos)
3962 gcc_assert (info->bitpos + bitsize > try_bitpos);
3963 if (!BYTES_BIG_ENDIAN)
3965 if (prec <= try_bitpos - info->bitpos)
3966 continue;
3967 prec -= try_bitpos - info->bitpos;
3969 bitsize -= try_bitpos - info->bitpos;
3970 if (BYTES_BIG_ENDIAN && prec > bitsize)
3971 prec = bitsize;
3973 else
3974 pos_in_buffer = info->bitpos - try_bitpos;
3975 if (prec < bitsize)
3977 /* If this is a bool inversion, invert just the least significant
3978 prec bits rather than all bits of it. */
3979 if (BYTES_BIG_ENDIAN)
3981 pos_in_buffer += bitsize - prec;
3982 if (pos_in_buffer >= split_store->size)
3983 continue;
3985 bitsize = prec;
3987 if (pos_in_buffer + bitsize > split_store->size)
3988 bitsize = split_store->size - pos_in_buffer;
3989 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3990 if (BYTES_BIG_ENDIAN)
3991 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3992 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3993 else
3994 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3996 for (unsigned int i = 0; i < buf_size; ++i)
3997 buf[i] = ~buf[i];
3998 mask = native_interpret_expr (int_type, buf, buf_size);
3999 return BIT_XOR_EXPR;
4002 /* Given a merged store group GROUP output the widened version of it.
4003 The store chain is against the base object BASE.
4004 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4005 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4006 Make sure that the number of statements output is less than the number of
4007 original statements. If a better sequence is possible emit it and
4008 return true. */
4010 bool
4011 imm_store_chain_info::output_merged_store (merged_store_group *group)
4013 const unsigned HOST_WIDE_INT start_byte_pos
4014 = group->bitregion_start / BITS_PER_UNIT;
4015 unsigned int orig_num_stmts = group->stores.length ();
4016 if (orig_num_stmts < 2)
4017 return false;
4019 bool allow_unaligned_store
4020 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4021 bool allow_unaligned_load = allow_unaligned_store;
4022 bool bzero_first = false;
4023 store_immediate_info *store;
4024 unsigned int num_clobber_stmts = 0;
4025 if (group->stores[0]->rhs_code == INTEGER_CST)
4027 unsigned int i;
4028 FOR_EACH_VEC_ELT (group->stores, i, store)
4029 if (gimple_clobber_p (store->stmt))
4030 num_clobber_stmts++;
4031 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4032 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4033 && group->start == store->bitpos
4034 && group->width == store->bitsize
4035 && (group->start % BITS_PER_UNIT) == 0
4036 && (group->width % BITS_PER_UNIT) == 0)
4038 bzero_first = true;
4039 break;
4041 else
4042 break;
4043 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4044 if (gimple_clobber_p (store->stmt))
4045 num_clobber_stmts++;
4046 if (num_clobber_stmts == orig_num_stmts)
4047 return false;
4048 orig_num_stmts -= num_clobber_stmts;
4050 if (allow_unaligned_store || bzero_first)
4052 /* If unaligned stores are allowed, see how many stores we'd emit
4053 for unaligned and how many stores we'd emit for aligned stores.
4054 Only use unaligned stores if it allows fewer stores than aligned.
4055 Similarly, if there is a whole region clear first, prefer expanding
4056 it together compared to expanding clear first followed by merged
4057 further stores. */
4058 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4059 int pass_min = 0;
4060 for (int pass = 0; pass < 4; ++pass)
4062 if (!allow_unaligned_store && (pass & 1) != 0)
4063 continue;
4064 if (!bzero_first && (pass & 2) != 0)
4065 continue;
4066 cnt[pass] = split_group (group, (pass & 1) != 0,
4067 allow_unaligned_load, (pass & 2) != 0,
4068 NULL, NULL, NULL);
4069 if (cnt[pass] < cnt[pass_min])
4070 pass_min = pass;
4072 if ((pass_min & 1) == 0)
4073 allow_unaligned_store = false;
4074 if ((pass_min & 2) == 0)
4075 bzero_first = false;
4078 auto_vec<class split_store *, 32> split_stores;
4079 split_store *split_store;
4080 unsigned total_orig, total_new, i;
4081 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4082 &split_stores, &total_orig, &total_new);
4084 /* Determine if there is a clobber covering the whole group at the start,
4085 followed by proposed split stores that cover the whole group. In that
4086 case, prefer the transformation even if
4087 split_stores.length () == orig_num_stmts. */
4088 bool clobber_first = false;
4089 if (num_clobber_stmts
4090 && gimple_clobber_p (group->stores[0]->stmt)
4091 && group->start == group->stores[0]->bitpos
4092 && group->width == group->stores[0]->bitsize
4093 && (group->start % BITS_PER_UNIT) == 0
4094 && (group->width % BITS_PER_UNIT) == 0)
4096 clobber_first = true;
4097 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4098 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4099 if (split_store->bytepos != pos)
4101 clobber_first = false;
4102 break;
4104 else
4105 pos += split_store->size / BITS_PER_UNIT;
4106 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4107 clobber_first = false;
4110 if (split_stores.length () >= orig_num_stmts + clobber_first)
4113 /* We didn't manage to reduce the number of statements. Bail out. */
4114 if (dump_file && (dump_flags & TDF_DETAILS))
4115 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4116 " Not profitable to emit new sequence.\n",
4117 orig_num_stmts);
4118 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4119 delete split_store;
4120 return false;
4122 if (total_orig <= total_new)
4124 /* If number of estimated new statements is above estimated original
4125 statements, bail out too. */
4126 if (dump_file && (dump_flags & TDF_DETAILS))
4127 fprintf (dump_file, "Estimated number of original stmts (%u)"
4128 " not larger than estimated number of new"
4129 " stmts (%u).\n",
4130 total_orig, total_new);
4131 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4132 delete split_store;
4133 return false;
4135 if (group->stores[0]->rhs_code == INTEGER_CST)
4137 bool all_orig = true;
4138 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4139 if (!split_store->orig)
4141 all_orig = false;
4142 break;
4144 if (all_orig)
4146 unsigned int cnt = split_stores.length ();
4147 store_immediate_info *store;
4148 FOR_EACH_VEC_ELT (group->stores, i, store)
4149 if (gimple_clobber_p (store->stmt))
4150 ++cnt;
4151 /* Punt if we wouldn't make any real changes, i.e. keep all
4152 orig stmts + all clobbers. */
4153 if (cnt == group->stores.length ())
4155 if (dump_file && (dump_flags & TDF_DETAILS))
4156 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4157 " Not profitable to emit new sequence.\n",
4158 orig_num_stmts);
4159 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4160 delete split_store;
4161 return false;
4166 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4167 gimple_seq seq = NULL;
4168 tree last_vdef, new_vuse;
4169 last_vdef = gimple_vdef (group->last_stmt);
4170 new_vuse = gimple_vuse (group->last_stmt);
4171 tree bswap_res = NULL_TREE;
4173 /* Clobbers are not removed. */
4174 if (gimple_clobber_p (group->last_stmt))
4176 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4177 gimple_set_vdef (group->last_stmt, new_vuse);
4180 if (group->stores[0]->rhs_code == LROTATE_EXPR
4181 || group->stores[0]->rhs_code == NOP_EXPR)
4183 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4184 gimple *ins_stmt = group->stores[0]->ins_stmt;
4185 struct symbolic_number *n = &group->stores[0]->n;
4186 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4188 switch (n->range)
4190 case 16:
4191 load_type = bswap_type = uint16_type_node;
4192 break;
4193 case 32:
4194 load_type = uint32_type_node;
4195 if (bswap)
4197 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4198 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4200 break;
4201 case 64:
4202 load_type = uint64_type_node;
4203 if (bswap)
4205 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4206 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4208 break;
4209 default:
4210 gcc_unreachable ();
4213 /* If the loads have each vuse of the corresponding store,
4214 we've checked the aliasing already in try_coalesce_bswap and
4215 we want to sink the need load into seq. So need to use new_vuse
4216 on the load. */
4217 if (n->base_addr)
4219 if (n->vuse == NULL)
4221 n->vuse = new_vuse;
4222 ins_stmt = NULL;
4224 else
4225 /* Update vuse in case it has changed by output_merged_stores. */
4226 n->vuse = gimple_vuse (ins_stmt);
4228 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4229 bswap_type, load_type, n, bswap,
4230 ~(uint64_t) 0);
4231 gcc_assert (bswap_res);
4234 gimple *stmt = NULL;
4235 auto_vec<gimple *, 32> orig_stmts;
4236 gimple_seq this_seq;
4237 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4238 is_gimple_mem_ref_addr, NULL_TREE);
4239 gimple_seq_add_seq_without_update (&seq, this_seq);
4241 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4242 gimple_seq load_seq[2] = { NULL, NULL };
4243 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4244 for (int j = 0; j < 2; ++j)
4246 store_operand_info &op = group->stores[0]->ops[j];
4247 if (op.base_addr == NULL_TREE)
4248 continue;
4250 store_immediate_info *infol = group->stores.last ();
4251 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4253 /* We can't pick the location randomly; while we've verified
4254 all the loads have the same vuse, they can be still in different
4255 basic blocks and we need to pick the one from the last bb:
4256 int x = q[0];
4257 if (x == N) return;
4258 int y = q[1];
4259 p[0] = x;
4260 p[1] = y;
4261 otherwise if we put the wider load at the q[0] load, we might
4262 segfault if q[1] is not mapped. */
4263 basic_block bb = gimple_bb (op.stmt);
4264 gimple *ostmt = op.stmt;
4265 store_immediate_info *info;
4266 FOR_EACH_VEC_ELT (group->stores, i, info)
4268 gimple *tstmt = info->ops[j].stmt;
4269 basic_block tbb = gimple_bb (tstmt);
4270 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4272 ostmt = tstmt;
4273 bb = tbb;
4276 load_gsi[j] = gsi_for_stmt (ostmt);
4277 load_addr[j]
4278 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4279 &load_seq[j], is_gimple_mem_ref_addr,
4280 NULL_TREE);
4282 else if (operand_equal_p (base_addr, op.base_addr, 0))
4283 load_addr[j] = addr;
4284 else
4286 load_addr[j]
4287 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4288 &this_seq, is_gimple_mem_ref_addr,
4289 NULL_TREE);
4290 gimple_seq_add_seq_without_update (&seq, this_seq);
4294 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4296 const unsigned HOST_WIDE_INT try_size = split_store->size;
4297 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4298 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4299 const unsigned HOST_WIDE_INT try_align = split_store->align;
4300 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4301 tree dest, src;
4302 location_t loc;
4304 if (split_store->orig)
4306 /* If there is just a single non-clobber constituent store
4307 which covers the whole area, just reuse the lhs and rhs. */
4308 gimple *orig_stmt = NULL;
4309 store_immediate_info *store;
4310 unsigned int j;
4311 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4312 if (!gimple_clobber_p (store->stmt))
4314 orig_stmt = store->stmt;
4315 break;
4317 dest = gimple_assign_lhs (orig_stmt);
4318 src = gimple_assign_rhs1 (orig_stmt);
4319 loc = gimple_location (orig_stmt);
4321 else
4323 store_immediate_info *info;
4324 unsigned short clique, base;
4325 unsigned int k;
4326 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4327 orig_stmts.safe_push (info->stmt);
4328 tree offset_type
4329 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4330 tree dest_type;
4331 loc = get_location_for_stmts (orig_stmts);
4332 orig_stmts.truncate (0);
4334 if (group->string_concatenation)
4335 dest_type
4336 = build_array_type_nelts (char_type_node,
4337 try_size / BITS_PER_UNIT);
4338 else
4340 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4341 dest_type = build_aligned_type (dest_type, try_align);
4343 dest = fold_build2 (MEM_REF, dest_type, addr,
4344 build_int_cst (offset_type, try_pos));
4345 if (TREE_CODE (dest) == MEM_REF)
4347 MR_DEPENDENCE_CLIQUE (dest) = clique;
4348 MR_DEPENDENCE_BASE (dest) = base;
4351 tree mask;
4352 if (bswap_res || group->string_concatenation)
4353 mask = integer_zero_node;
4354 else
4355 mask = native_interpret_expr (dest_type,
4356 group->mask + try_offset,
4357 group->buf_size);
4359 tree ops[2];
4360 for (int j = 0;
4361 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4362 ++j)
4364 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4365 if (bswap_res)
4366 ops[j] = bswap_res;
4367 else if (group->string_concatenation)
4369 ops[j] = build_string (try_size / BITS_PER_UNIT,
4370 (const char *) group->val + try_offset);
4371 TREE_TYPE (ops[j]) = dest_type;
4373 else if (op.base_addr)
4375 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4376 orig_stmts.safe_push (info->ops[j].stmt);
4378 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4379 &clique, &base);
4380 location_t load_loc = get_location_for_stmts (orig_stmts);
4381 orig_stmts.truncate (0);
4383 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4384 unsigned HOST_WIDE_INT align_bitpos
4385 = known_alignment (try_bitpos
4386 - split_store->orig_stores[0]->bitpos
4387 + op.bitpos);
4388 if (align_bitpos & (load_align - 1))
4389 load_align = least_bit_hwi (align_bitpos);
4391 tree load_int_type
4392 = build_nonstandard_integer_type (try_size, UNSIGNED);
4393 load_int_type
4394 = build_aligned_type (load_int_type, load_align);
4396 poly_uint64 load_pos
4397 = exact_div (try_bitpos
4398 - split_store->orig_stores[0]->bitpos
4399 + op.bitpos,
4400 BITS_PER_UNIT);
4401 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4402 build_int_cst (offset_type, load_pos));
4403 if (TREE_CODE (ops[j]) == MEM_REF)
4405 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4406 MR_DEPENDENCE_BASE (ops[j]) = base;
4408 if (!integer_zerop (mask))
4410 /* The load might load some bits (that will be masked
4411 off later on) uninitialized, avoid -W*uninitialized
4412 warnings in that case. */
4413 suppress_warning (ops[j], OPT_Wuninitialized);
4416 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4417 gimple_set_location (stmt, load_loc);
4418 if (gsi_bb (load_gsi[j]))
4420 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4421 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4423 else
4425 gimple_set_vuse (stmt, new_vuse);
4426 gimple_seq_add_stmt_without_update (&seq, stmt);
4428 ops[j] = gimple_assign_lhs (stmt);
4429 tree xor_mask;
4430 enum tree_code inv_op
4431 = invert_op (split_store, j, dest_type, xor_mask);
4432 if (inv_op != NOP_EXPR)
4434 stmt = gimple_build_assign (make_ssa_name (dest_type),
4435 inv_op, ops[j], xor_mask);
4436 gimple_set_location (stmt, load_loc);
4437 ops[j] = gimple_assign_lhs (stmt);
4439 if (gsi_bb (load_gsi[j]))
4440 gimple_seq_add_stmt_without_update (&load_seq[j],
4441 stmt);
4442 else
4443 gimple_seq_add_stmt_without_update (&seq, stmt);
4446 else
4447 ops[j] = native_interpret_expr (dest_type,
4448 group->val + try_offset,
4449 group->buf_size);
4452 switch (split_store->orig_stores[0]->rhs_code)
4454 case BIT_AND_EXPR:
4455 case BIT_IOR_EXPR:
4456 case BIT_XOR_EXPR:
4457 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4459 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4460 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4462 location_t bit_loc;
4463 bit_loc = get_location_for_stmts (orig_stmts);
4464 orig_stmts.truncate (0);
4466 stmt
4467 = gimple_build_assign (make_ssa_name (dest_type),
4468 split_store->orig_stores[0]->rhs_code,
4469 ops[0], ops[1]);
4470 gimple_set_location (stmt, bit_loc);
4471 /* If there is just one load and there is a separate
4472 load_seq[0], emit the bitwise op right after it. */
4473 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4474 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4475 /* Otherwise, if at least one load is in seq, we need to
4476 emit the bitwise op right before the store. If there
4477 are two loads and are emitted somewhere else, it would
4478 be better to emit the bitwise op as early as possible;
4479 we don't track where that would be possible right now
4480 though. */
4481 else
4482 gimple_seq_add_stmt_without_update (&seq, stmt);
4483 src = gimple_assign_lhs (stmt);
4484 tree xor_mask;
4485 enum tree_code inv_op;
4486 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4487 if (inv_op != NOP_EXPR)
4489 stmt = gimple_build_assign (make_ssa_name (dest_type),
4490 inv_op, src, xor_mask);
4491 gimple_set_location (stmt, bit_loc);
4492 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4493 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4494 else
4495 gimple_seq_add_stmt_without_update (&seq, stmt);
4496 src = gimple_assign_lhs (stmt);
4498 break;
4499 case LROTATE_EXPR:
4500 case NOP_EXPR:
4501 src = ops[0];
4502 if (!is_gimple_val (src))
4504 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4505 src);
4506 gimple_seq_add_stmt_without_update (&seq, stmt);
4507 src = gimple_assign_lhs (stmt);
4509 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4511 stmt = gimple_build_assign (make_ssa_name (dest_type),
4512 NOP_EXPR, src);
4513 gimple_seq_add_stmt_without_update (&seq, stmt);
4514 src = gimple_assign_lhs (stmt);
4516 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4517 if (inv_op != NOP_EXPR)
4519 stmt = gimple_build_assign (make_ssa_name (dest_type),
4520 inv_op, src, xor_mask);
4521 gimple_set_location (stmt, loc);
4522 gimple_seq_add_stmt_without_update (&seq, stmt);
4523 src = gimple_assign_lhs (stmt);
4525 break;
4526 default:
4527 src = ops[0];
4528 break;
4531 /* If bit insertion is required, we use the source as an accumulator
4532 into which the successive bit-field values are manually inserted.
4533 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4534 if (group->bit_insertion)
4535 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4536 if (info->rhs_code == BIT_INSERT_EXPR
4537 && info->bitpos < try_bitpos + try_size
4538 && info->bitpos + info->bitsize > try_bitpos)
4540 /* Mask, truncate, convert to final type, shift and ior into
4541 the accumulator. Note that every step can be a no-op. */
4542 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4543 const HOST_WIDE_INT end_gap
4544 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4545 tree tem = info->ops[0].val;
4546 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4548 const unsigned HOST_WIDE_INT size
4549 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4550 tree integer_type
4551 = build_nonstandard_integer_type (size, UNSIGNED);
4552 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4553 integer_type, tem);
4555 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4557 tree bitfield_type
4558 = build_nonstandard_integer_type (info->bitsize,
4559 UNSIGNED);
4560 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4562 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4564 const unsigned HOST_WIDE_INT imask
4565 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4566 tem = gimple_build (&seq, loc,
4567 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4568 build_int_cst (TREE_TYPE (tem),
4569 imask));
4571 const HOST_WIDE_INT shift
4572 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4573 if (shift < 0)
4574 tem = gimple_build (&seq, loc,
4575 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4576 build_int_cst (NULL_TREE, -shift));
4577 tem = gimple_convert (&seq, loc, dest_type, tem);
4578 if (shift > 0)
4579 tem = gimple_build (&seq, loc,
4580 LSHIFT_EXPR, dest_type, tem,
4581 build_int_cst (NULL_TREE, shift));
4582 src = gimple_build (&seq, loc,
4583 BIT_IOR_EXPR, dest_type, tem, src);
4586 if (!integer_zerop (mask))
4588 tree tem = make_ssa_name (dest_type);
4589 tree load_src = unshare_expr (dest);
4590 /* The load might load some or all bits uninitialized,
4591 avoid -W*uninitialized warnings in that case.
4592 As optimization, it would be nice if all the bits are
4593 provably uninitialized (no stores at all yet or previous
4594 store a CLOBBER) we'd optimize away the load and replace
4595 it e.g. with 0. */
4596 suppress_warning (load_src, OPT_Wuninitialized);
4597 stmt = gimple_build_assign (tem, load_src);
4598 gimple_set_location (stmt, loc);
4599 gimple_set_vuse (stmt, new_vuse);
4600 gimple_seq_add_stmt_without_update (&seq, stmt);
4602 /* FIXME: If there is a single chunk of zero bits in mask,
4603 perhaps use BIT_INSERT_EXPR instead? */
4604 stmt = gimple_build_assign (make_ssa_name (dest_type),
4605 BIT_AND_EXPR, tem, mask);
4606 gimple_set_location (stmt, loc);
4607 gimple_seq_add_stmt_without_update (&seq, stmt);
4608 tem = gimple_assign_lhs (stmt);
4610 if (TREE_CODE (src) == INTEGER_CST)
4611 src = wide_int_to_tree (dest_type,
4612 wi::bit_and_not (wi::to_wide (src),
4613 wi::to_wide (mask)));
4614 else
4616 tree nmask
4617 = wide_int_to_tree (dest_type,
4618 wi::bit_not (wi::to_wide (mask)));
4619 stmt = gimple_build_assign (make_ssa_name (dest_type),
4620 BIT_AND_EXPR, src, nmask);
4621 gimple_set_location (stmt, loc);
4622 gimple_seq_add_stmt_without_update (&seq, stmt);
4623 src = gimple_assign_lhs (stmt);
4625 stmt = gimple_build_assign (make_ssa_name (dest_type),
4626 BIT_IOR_EXPR, tem, src);
4627 gimple_set_location (stmt, loc);
4628 gimple_seq_add_stmt_without_update (&seq, stmt);
4629 src = gimple_assign_lhs (stmt);
4633 stmt = gimple_build_assign (dest, src);
4634 gimple_set_location (stmt, loc);
4635 gimple_set_vuse (stmt, new_vuse);
4636 gimple_seq_add_stmt_without_update (&seq, stmt);
4638 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4639 add_stmt_to_eh_lp (stmt, group->lp_nr);
4641 tree new_vdef;
4642 if (i < split_stores.length () - 1)
4643 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4644 else
4645 new_vdef = last_vdef;
4647 gimple_set_vdef (stmt, new_vdef);
4648 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4649 new_vuse = new_vdef;
4652 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4653 delete split_store;
4655 gcc_assert (seq);
4656 if (dump_file)
4658 fprintf (dump_file,
4659 "New sequence of %u stores to replace old one of %u stores\n",
4660 split_stores.length (), orig_num_stmts);
4661 if (dump_flags & TDF_DETAILS)
4662 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4665 if (gimple_clobber_p (group->last_stmt))
4666 update_stmt (group->last_stmt);
4668 if (group->lp_nr > 0)
4670 /* We're going to insert a sequence of (potentially) throwing stores
4671 into an active EH region. This means that we're going to create
4672 new basic blocks with EH edges pointing to the post landing pad
4673 and, therefore, to have to update its PHI nodes, if any. For the
4674 virtual PHI node, we're going to use the VDEFs created above, but
4675 for the other nodes, we need to record the original reaching defs. */
4676 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4677 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4678 basic_block last_bb = gimple_bb (group->last_stmt);
4679 edge last_edge = find_edge (last_bb, lp_bb);
4680 auto_vec<tree, 16> last_defs;
4681 gphi_iterator gpi;
4682 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4684 gphi *phi = gpi.phi ();
4685 tree last_def;
4686 if (virtual_operand_p (gimple_phi_result (phi)))
4687 last_def = NULL_TREE;
4688 else
4689 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4690 last_defs.safe_push (last_def);
4693 /* Do the insertion. Then, if new basic blocks have been created in the
4694 process, rewind the chain of VDEFs create above to walk the new basic
4695 blocks and update the corresponding arguments of the PHI nodes. */
4696 update_modified_stmts (seq);
4697 if (gimple_find_sub_bbs (seq, &last_gsi))
4698 while (last_vdef != gimple_vuse (group->last_stmt))
4700 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4701 if (stmt_could_throw_p (cfun, stmt))
4703 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4704 unsigned int i;
4705 for (gpi = gsi_start_phis (lp_bb), i = 0;
4706 !gsi_end_p (gpi);
4707 gsi_next (&gpi), i++)
4709 gphi *phi = gpi.phi ();
4710 tree new_def;
4711 if (virtual_operand_p (gimple_phi_result (phi)))
4712 new_def = last_vdef;
4713 else
4714 new_def = last_defs[i];
4715 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4718 last_vdef = gimple_vuse (stmt);
4721 else
4722 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4724 for (int j = 0; j < 2; ++j)
4725 if (load_seq[j])
4726 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4728 return true;
4731 /* Process the merged_store_group objects created in the coalescing phase.
4732 The stores are all against the base object BASE.
4733 Try to output the widened stores and delete the original statements if
4734 successful. Return true iff any changes were made. */
4736 bool
4737 imm_store_chain_info::output_merged_stores ()
4739 unsigned int i;
4740 merged_store_group *merged_store;
4741 bool ret = false;
4742 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4744 if (dbg_cnt (store_merging)
4745 && output_merged_store (merged_store))
4747 unsigned int j;
4748 store_immediate_info *store;
4749 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4751 gimple *stmt = store->stmt;
4752 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4753 /* Don't remove clobbers, they are still useful even if
4754 everything is overwritten afterwards. */
4755 if (gimple_clobber_p (stmt))
4756 continue;
4757 gsi_remove (&gsi, true);
4758 if (store->lp_nr)
4759 remove_stmt_from_eh_lp (stmt);
4760 if (stmt != merged_store->last_stmt)
4762 unlink_stmt_vdef (stmt);
4763 release_defs (stmt);
4766 ret = true;
4769 if (ret && dump_file)
4770 fprintf (dump_file, "Merging successful!\n");
4772 return ret;
4775 /* Coalesce the store_immediate_info objects recorded against the base object
4776 BASE in the first phase and output them.
4777 Delete the allocated structures.
4778 Return true if any changes were made. */
4780 bool
4781 imm_store_chain_info::terminate_and_process_chain ()
4783 if (dump_file && (dump_flags & TDF_DETAILS))
4784 fprintf (dump_file, "Terminating chain with %u stores\n",
4785 m_store_info.length ());
4786 /* Process store chain. */
4787 bool ret = false;
4788 if (m_store_info.length () > 1)
4790 ret = coalesce_immediate_stores ();
4791 if (ret)
4792 ret = output_merged_stores ();
4795 /* Delete all the entries we allocated ourselves. */
4796 store_immediate_info *info;
4797 unsigned int i;
4798 FOR_EACH_VEC_ELT (m_store_info, i, info)
4799 delete info;
4801 merged_store_group *merged_info;
4802 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4803 delete merged_info;
4805 return ret;
4808 /* Return true iff LHS is a destination potentially interesting for
4809 store merging. In practice these are the codes that get_inner_reference
4810 can process. */
4812 static bool
4813 lhs_valid_for_store_merging_p (tree lhs)
4815 if (DECL_P (lhs))
4816 return true;
4818 switch (TREE_CODE (lhs))
4820 case ARRAY_REF:
4821 case ARRAY_RANGE_REF:
4822 case BIT_FIELD_REF:
4823 case COMPONENT_REF:
4824 case MEM_REF:
4825 case VIEW_CONVERT_EXPR:
4826 return true;
4827 default:
4828 return false;
4831 gcc_unreachable ();
4834 /* Return true if the tree RHS is a constant we want to consider
4835 during store merging. In practice accept all codes that
4836 native_encode_expr accepts. */
4838 static bool
4839 rhs_valid_for_store_merging_p (tree rhs)
4841 unsigned HOST_WIDE_INT size;
4842 if (TREE_CODE (rhs) == CONSTRUCTOR
4843 && CONSTRUCTOR_NELTS (rhs) == 0
4844 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4845 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4846 return true;
4847 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4848 && native_encode_expr (rhs, NULL, size) != 0);
4851 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4852 and return true on success or false on failure. */
4854 static bool
4855 adjust_bit_pos (poly_offset_int byte_off,
4856 poly_int64 *pbitpos,
4857 poly_uint64 *pbitregion_start,
4858 poly_uint64 *pbitregion_end)
4860 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4861 bit_off += *pbitpos;
4863 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4865 if (maybe_ne (*pbitregion_end, 0U))
4867 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4868 bit_off += *pbitregion_start;
4869 if (bit_off.to_uhwi (pbitregion_start))
4871 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4872 bit_off += *pbitregion_end;
4873 if (!bit_off.to_uhwi (pbitregion_end))
4874 *pbitregion_end = 0;
4876 else
4877 *pbitregion_end = 0;
4879 return true;
4881 else
4882 return false;
4885 /* If MEM is a memory reference usable for store merging (either as
4886 store destination or for loads), return the non-NULL base_addr
4887 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4888 Otherwise return NULL, *PBITPOS should be still valid even for that
4889 case. */
4891 static tree
4892 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4893 poly_uint64 *pbitpos,
4894 poly_uint64 *pbitregion_start,
4895 poly_uint64 *pbitregion_end)
4897 poly_int64 bitsize, bitpos;
4898 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4899 machine_mode mode;
4900 int unsignedp = 0, reversep = 0, volatilep = 0;
4901 tree offset;
4902 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4903 &unsignedp, &reversep, &volatilep);
4904 *pbitsize = bitsize;
4905 if (known_eq (bitsize, 0))
4906 return NULL_TREE;
4908 if (TREE_CODE (mem) == COMPONENT_REF
4909 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4911 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4912 if (maybe_ne (bitregion_end, 0U))
4913 bitregion_end += 1;
4916 if (reversep)
4917 return NULL_TREE;
4919 /* We do not want to rewrite TARGET_MEM_REFs. */
4920 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4921 return NULL_TREE;
4922 /* In some cases get_inner_reference may return a
4923 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4924 canonicalize the base_addr to MEM_REF [ptr] and take
4925 byteoffset into account in the bitpos. This occurs in
4926 PR 23684 and this way we can catch more chains. */
4927 else if (TREE_CODE (base_addr) == MEM_REF)
4929 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4930 &bitregion_start, &bitregion_end))
4931 return NULL_TREE;
4932 base_addr = TREE_OPERAND (base_addr, 0);
4934 /* get_inner_reference returns the base object, get at its
4935 address now. */
4936 else
4938 if (maybe_lt (bitpos, 0))
4939 return NULL_TREE;
4940 base_addr = build_fold_addr_expr (base_addr);
4943 if (offset)
4945 /* If the access is variable offset then a base decl has to be
4946 address-taken to be able to emit pointer-based stores to it.
4947 ??? We might be able to get away with re-using the original
4948 base up to the first variable part and then wrapping that inside
4949 a BIT_FIELD_REF. */
4950 tree base = get_base_address (base_addr);
4951 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4952 return NULL_TREE;
4954 /* Similarly to above for the base, remove constant from the offset. */
4955 if (TREE_CODE (offset) == PLUS_EXPR
4956 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4957 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4958 &bitpos, &bitregion_start, &bitregion_end))
4959 offset = TREE_OPERAND (offset, 0);
4961 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4962 base_addr, offset);
4965 if (known_eq (bitregion_end, 0U))
4967 bitregion_start = round_down_to_byte_boundary (bitpos);
4968 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4971 *pbitsize = bitsize;
4972 *pbitpos = bitpos;
4973 *pbitregion_start = bitregion_start;
4974 *pbitregion_end = bitregion_end;
4975 return base_addr;
4978 /* Return true if STMT is a load that can be used for store merging.
4979 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4980 BITREGION_END are properties of the corresponding store. */
4982 static bool
4983 handled_load (gimple *stmt, store_operand_info *op,
4984 poly_uint64 bitsize, poly_uint64 bitpos,
4985 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4987 if (!is_gimple_assign (stmt))
4988 return false;
4989 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4991 tree rhs1 = gimple_assign_rhs1 (stmt);
4992 if (TREE_CODE (rhs1) == SSA_NAME
4993 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4994 bitregion_start, bitregion_end))
4996 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4997 been optimized earlier, but if allowed here, would confuse the
4998 multiple uses counting. */
4999 if (op->bit_not_p)
5000 return false;
5001 op->bit_not_p = !op->bit_not_p;
5002 return true;
5004 return false;
5006 if (gimple_vuse (stmt)
5007 && gimple_assign_load_p (stmt)
5008 && !stmt_can_throw_internal (cfun, stmt)
5009 && !gimple_has_volatile_ops (stmt))
5011 tree mem = gimple_assign_rhs1 (stmt);
5012 op->base_addr
5013 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5014 &op->bitregion_start,
5015 &op->bitregion_end);
5016 if (op->base_addr != NULL_TREE
5017 && known_eq (op->bitsize, bitsize)
5018 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5019 && known_ge (op->bitpos - op->bitregion_start,
5020 bitpos - bitregion_start)
5021 && known_ge (op->bitregion_end - op->bitpos,
5022 bitregion_end - bitpos))
5024 op->stmt = stmt;
5025 op->val = mem;
5026 op->bit_not_p = false;
5027 return true;
5030 return false;
5033 /* Return the index number of the landing pad for STMT, if any. */
5035 static int
5036 lp_nr_for_store (gimple *stmt)
5038 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5039 return 0;
5041 if (!stmt_could_throw_p (cfun, stmt))
5042 return 0;
5044 return lookup_stmt_eh_lp (stmt);
5047 /* Record the store STMT for store merging optimization if it can be
5048 optimized. Return true if any changes were made. */
5050 bool
5051 pass_store_merging::process_store (gimple *stmt)
5053 tree lhs = gimple_assign_lhs (stmt);
5054 tree rhs = gimple_assign_rhs1 (stmt);
5055 poly_uint64 bitsize, bitpos = 0;
5056 poly_uint64 bitregion_start = 0, bitregion_end = 0;
5057 tree base_addr
5058 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5059 &bitregion_start, &bitregion_end);
5060 if (known_eq (bitsize, 0U))
5061 return false;
5063 bool invalid = (base_addr == NULL_TREE
5064 || (maybe_gt (bitsize,
5065 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5066 && TREE_CODE (rhs) != INTEGER_CST
5067 && (TREE_CODE (rhs) != CONSTRUCTOR
5068 || CONSTRUCTOR_NELTS (rhs) != 0)));
5069 enum tree_code rhs_code = ERROR_MARK;
5070 bool bit_not_p = false;
5071 struct symbolic_number n;
5072 gimple *ins_stmt = NULL;
5073 store_operand_info ops[2];
5074 if (invalid)
5076 else if (TREE_CODE (rhs) == STRING_CST)
5078 rhs_code = STRING_CST;
5079 ops[0].val = rhs;
5081 else if (rhs_valid_for_store_merging_p (rhs))
5083 rhs_code = INTEGER_CST;
5084 ops[0].val = rhs;
5086 else if (TREE_CODE (rhs) == SSA_NAME)
5088 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5089 if (!is_gimple_assign (def_stmt))
5090 invalid = true;
5091 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5092 bitregion_start, bitregion_end))
5093 rhs_code = MEM_REF;
5094 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5096 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5097 if (TREE_CODE (rhs1) == SSA_NAME
5098 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5100 bit_not_p = true;
5101 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5105 if (rhs_code == ERROR_MARK && !invalid)
5106 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5108 case BIT_AND_EXPR:
5109 case BIT_IOR_EXPR:
5110 case BIT_XOR_EXPR:
5111 tree rhs1, rhs2;
5112 rhs1 = gimple_assign_rhs1 (def_stmt);
5113 rhs2 = gimple_assign_rhs2 (def_stmt);
5114 invalid = true;
5115 if (TREE_CODE (rhs1) != SSA_NAME)
5116 break;
5117 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5118 if (!is_gimple_assign (def_stmt1)
5119 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5120 bitregion_start, bitregion_end))
5121 break;
5122 if (rhs_valid_for_store_merging_p (rhs2))
5123 ops[1].val = rhs2;
5124 else if (TREE_CODE (rhs2) != SSA_NAME)
5125 break;
5126 else
5128 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5129 if (!is_gimple_assign (def_stmt2))
5130 break;
5131 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5132 bitregion_start, bitregion_end))
5133 break;
5135 invalid = false;
5136 break;
5137 default:
5138 invalid = true;
5139 break;
5142 unsigned HOST_WIDE_INT const_bitsize;
5143 if (bitsize.is_constant (&const_bitsize)
5144 && (const_bitsize % BITS_PER_UNIT) == 0
5145 && const_bitsize <= 64
5146 && multiple_p (bitpos, BITS_PER_UNIT))
5148 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5149 if (ins_stmt)
5151 uint64_t nn = n.n;
5152 for (unsigned HOST_WIDE_INT i = 0;
5153 i < const_bitsize;
5154 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5155 if ((nn & MARKER_MASK) == 0
5156 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5158 ins_stmt = NULL;
5159 break;
5161 if (ins_stmt)
5163 if (invalid)
5165 rhs_code = LROTATE_EXPR;
5166 ops[0].base_addr = NULL_TREE;
5167 ops[1].base_addr = NULL_TREE;
5169 invalid = false;
5174 if (invalid
5175 && bitsize.is_constant (&const_bitsize)
5176 && ((const_bitsize % BITS_PER_UNIT) != 0
5177 || !multiple_p (bitpos, BITS_PER_UNIT))
5178 && const_bitsize <= MAX_FIXED_MODE_SIZE)
5180 /* Bypass a conversion to the bit-field type. */
5181 if (!bit_not_p
5182 && is_gimple_assign (def_stmt)
5183 && CONVERT_EXPR_CODE_P (rhs_code))
5185 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5186 if (TREE_CODE (rhs1) == SSA_NAME
5187 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5188 rhs = rhs1;
5190 rhs_code = BIT_INSERT_EXPR;
5191 bit_not_p = false;
5192 ops[0].val = rhs;
5193 ops[0].base_addr = NULL_TREE;
5194 ops[1].base_addr = NULL_TREE;
5195 invalid = false;
5198 else
5199 invalid = true;
5201 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5202 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5203 if (invalid
5204 || !bitsize.is_constant (&const_bitsize)
5205 || !bitpos.is_constant (&const_bitpos)
5206 || !bitregion_start.is_constant (&const_bitregion_start)
5207 || !bitregion_end.is_constant (&const_bitregion_end))
5208 return terminate_all_aliasing_chains (NULL, stmt);
5210 if (!ins_stmt)
5211 memset (&n, 0, sizeof (n));
5213 class imm_store_chain_info **chain_info = NULL;
5214 bool ret = false;
5215 if (base_addr)
5216 chain_info = m_stores.get (base_addr);
5218 store_immediate_info *info;
5219 if (chain_info)
5221 unsigned int ord = (*chain_info)->m_store_info.length ();
5222 info = new store_immediate_info (const_bitsize, const_bitpos,
5223 const_bitregion_start,
5224 const_bitregion_end,
5225 stmt, ord, rhs_code, n, ins_stmt,
5226 bit_not_p, lp_nr_for_store (stmt),
5227 ops[0], ops[1]);
5228 if (dump_file && (dump_flags & TDF_DETAILS))
5230 fprintf (dump_file, "Recording immediate store from stmt:\n");
5231 print_gimple_stmt (dump_file, stmt, 0);
5233 (*chain_info)->m_store_info.safe_push (info);
5234 m_n_stores++;
5235 ret |= terminate_all_aliasing_chains (chain_info, stmt);
5236 /* If we reach the limit of stores to merge in a chain terminate and
5237 process the chain now. */
5238 if ((*chain_info)->m_store_info.length ()
5239 == (unsigned int) param_max_stores_to_merge)
5241 if (dump_file && (dump_flags & TDF_DETAILS))
5242 fprintf (dump_file,
5243 "Reached maximum number of statements to merge:\n");
5244 ret |= terminate_and_process_chain (*chain_info);
5247 else
5249 /* Store aliases any existing chain? */
5250 ret |= terminate_all_aliasing_chains (NULL, stmt);
5252 /* Start a new chain. */
5253 class imm_store_chain_info *new_chain
5254 = new imm_store_chain_info (m_stores_head, base_addr);
5255 info = new store_immediate_info (const_bitsize, const_bitpos,
5256 const_bitregion_start,
5257 const_bitregion_end,
5258 stmt, 0, rhs_code, n, ins_stmt,
5259 bit_not_p, lp_nr_for_store (stmt),
5260 ops[0], ops[1]);
5261 new_chain->m_store_info.safe_push (info);
5262 m_n_stores++;
5263 m_stores.put (base_addr, new_chain);
5264 m_n_chains++;
5265 if (dump_file && (dump_flags & TDF_DETAILS))
5267 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5268 m_n_chains);
5269 print_gimple_stmt (dump_file, stmt, 0);
5270 fprintf (dump_file, "The base object is:\n");
5271 print_generic_expr (dump_file, base_addr);
5272 fprintf (dump_file, "\n");
5276 /* Prune oldest chains so that after adding the chain or store above
5277 we're again within the limits set by the params. */
5278 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5279 || m_n_stores > (unsigned)param_max_stores_to_track)
5281 if (dump_file && (dump_flags & TDF_DETAILS))
5282 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5283 "terminating oldest chain(s).\n", m_n_chains,
5284 param_max_store_chains_to_track, m_n_stores,
5285 param_max_stores_to_track);
5286 imm_store_chain_info **e = &m_stores_head;
5287 unsigned idx = 0;
5288 unsigned n_stores = 0;
5289 while (*e)
5291 if (idx >= (unsigned)param_max_store_chains_to_track
5292 || (n_stores + (*e)->m_store_info.length ()
5293 > (unsigned)param_max_stores_to_track))
5294 ret |= terminate_and_process_chain (*e);
5295 else
5297 n_stores += (*e)->m_store_info.length ();
5298 e = &(*e)->next;
5299 ++idx;
5304 return ret;
5307 /* Return true if STMT is a store valid for store merging. */
5309 static bool
5310 store_valid_for_store_merging_p (gimple *stmt)
5312 return gimple_assign_single_p (stmt)
5313 && gimple_vdef (stmt)
5314 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5315 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5318 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5320 /* Return the status of basic block BB wrt store merging. */
5322 static enum basic_block_status
5323 get_status_for_store_merging (basic_block bb)
5325 unsigned int num_statements = 0;
5326 unsigned int num_constructors = 0;
5327 gimple_stmt_iterator gsi;
5328 edge e;
5330 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5332 gimple *stmt = gsi_stmt (gsi);
5334 if (is_gimple_debug (stmt))
5335 continue;
5337 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5338 break;
5340 if (is_gimple_assign (stmt)
5341 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5343 tree rhs = gimple_assign_rhs1 (stmt);
5344 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5345 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5346 && gimple_assign_lhs (stmt) != NULL_TREE)
5348 HOST_WIDE_INT sz
5349 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5350 if (sz == 16 || sz == 32 || sz == 64)
5352 num_constructors = 1;
5353 break;
5359 if (num_statements == 0 && num_constructors == 0)
5360 return BB_INVALID;
5362 if (cfun->can_throw_non_call_exceptions && cfun->eh
5363 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
5364 && (e = find_fallthru_edge (bb->succs))
5365 && e->dest == bb->next_bb)
5366 return BB_EXTENDED_VALID;
5368 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5371 /* Entry point for the pass. Go over each basic block recording chains of
5372 immediate stores. Upon encountering a terminating statement (as defined
5373 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5374 variants. */
5376 unsigned int
5377 pass_store_merging::execute (function *fun)
5379 basic_block bb;
5380 hash_set<gimple *> orig_stmts;
5381 bool changed = false, open_chains = false;
5383 /* If the function can throw and catch non-call exceptions, we'll be trying
5384 to merge stores across different basic blocks so we need to first unsplit
5385 the EH edges in order to streamline the CFG of the function. */
5386 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5387 unsplit_eh_edges ();
5389 calculate_dominance_info (CDI_DOMINATORS);
5391 FOR_EACH_BB_FN (bb, fun)
5393 const basic_block_status bb_status = get_status_for_store_merging (bb);
5394 gimple_stmt_iterator gsi;
5396 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5398 changed |= terminate_and_process_all_chains ();
5399 open_chains = false;
5402 if (bb_status == BB_INVALID)
5403 continue;
5405 if (dump_file && (dump_flags & TDF_DETAILS))
5406 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5408 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5410 gimple *stmt = gsi_stmt (gsi);
5411 gsi_next (&gsi);
5413 if (is_gimple_debug (stmt))
5414 continue;
5416 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5418 /* Terminate all chains. */
5419 if (dump_file && (dump_flags & TDF_DETAILS))
5420 fprintf (dump_file, "Volatile access terminates "
5421 "all chains\n");
5422 changed |= terminate_and_process_all_chains ();
5423 open_chains = false;
5424 continue;
5427 if (is_gimple_assign (stmt)
5428 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5429 && maybe_optimize_vector_constructor (stmt))
5430 continue;
5432 if (store_valid_for_store_merging_p (stmt))
5433 changed |= process_store (stmt);
5434 else
5435 changed |= terminate_all_aliasing_chains (NULL, stmt);
5438 if (bb_status == BB_EXTENDED_VALID)
5439 open_chains = true;
5440 else
5442 changed |= terminate_and_process_all_chains ();
5443 open_chains = false;
5447 if (open_chains)
5448 changed |= terminate_and_process_all_chains ();
5450 /* If the function can throw and catch non-call exceptions and something
5451 changed during the pass, then the CFG has (very likely) changed too. */
5452 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5454 free_dominance_info (CDI_DOMINATORS);
5455 return TODO_cleanup_cfg;
5458 return 0;
5461 } // anon namespace
5463 /* Construct and return a store merging pass object. */
5465 gimple_opt_pass *
5466 make_pass_store_merging (gcc::context *ctxt)
5468 return new pass_store_merging (ctxt);
5471 #if CHECKING_P
5473 namespace selftest {
5475 /* Selftests for store merging helpers. */
5477 /* Assert that all elements of the byte arrays X and Y, both of length N
5478 are equal. */
5480 static void
5481 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5483 for (unsigned int i = 0; i < n; i++)
5485 if (x[i] != y[i])
5487 fprintf (stderr, "Arrays do not match. X:\n");
5488 dump_char_array (stderr, x, n);
5489 fprintf (stderr, "Y:\n");
5490 dump_char_array (stderr, y, n);
5492 ASSERT_EQ (x[i], y[i]);
5496 /* Test shift_bytes_in_array_left and that it carries bits across between
5497 bytes correctly. */
5499 static void
5500 verify_shift_bytes_in_array_left (void)
5502 /* byte 1 | byte 0
5503 00011111 | 11100000. */
5504 unsigned char orig[2] = { 0xe0, 0x1f };
5505 unsigned char in[2];
5506 memcpy (in, orig, sizeof orig);
5508 unsigned char expected[2] = { 0x80, 0x7f };
5509 shift_bytes_in_array_left (in, sizeof (in), 2);
5510 verify_array_eq (in, expected, sizeof (in));
5512 memcpy (in, orig, sizeof orig);
5513 memcpy (expected, orig, sizeof orig);
5514 /* Check that shifting by zero doesn't change anything. */
5515 shift_bytes_in_array_left (in, sizeof (in), 0);
5516 verify_array_eq (in, expected, sizeof (in));
5520 /* Test shift_bytes_in_array_right and that it carries bits across between
5521 bytes correctly. */
5523 static void
5524 verify_shift_bytes_in_array_right (void)
5526 /* byte 1 | byte 0
5527 00011111 | 11100000. */
5528 unsigned char orig[2] = { 0x1f, 0xe0};
5529 unsigned char in[2];
5530 memcpy (in, orig, sizeof orig);
5531 unsigned char expected[2] = { 0x07, 0xf8};
5532 shift_bytes_in_array_right (in, sizeof (in), 2);
5533 verify_array_eq (in, expected, sizeof (in));
5535 memcpy (in, orig, sizeof orig);
5536 memcpy (expected, orig, sizeof orig);
5537 /* Check that shifting by zero doesn't change anything. */
5538 shift_bytes_in_array_right (in, sizeof (in), 0);
5539 verify_array_eq (in, expected, sizeof (in));
5542 /* Test clear_bit_region that it clears exactly the bits asked and
5543 nothing more. */
5545 static void
5546 verify_clear_bit_region (void)
5548 /* Start with all bits set and test clearing various patterns in them. */
5549 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5550 unsigned char in[3];
5551 unsigned char expected[3];
5552 memcpy (in, orig, sizeof in);
5554 /* Check zeroing out all the bits. */
5555 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5556 expected[0] = expected[1] = expected[2] = 0;
5557 verify_array_eq (in, expected, sizeof in);
5559 memcpy (in, orig, sizeof in);
5560 /* Leave the first and last bits intact. */
5561 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5562 expected[0] = 0x1;
5563 expected[1] = 0;
5564 expected[2] = 0x80;
5565 verify_array_eq (in, expected, sizeof in);
5568 /* Test clear_bit_region_be that it clears exactly the bits asked and
5569 nothing more. */
5571 static void
5572 verify_clear_bit_region_be (void)
5574 /* Start with all bits set and test clearing various patterns in them. */
5575 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5576 unsigned char in[3];
5577 unsigned char expected[3];
5578 memcpy (in, orig, sizeof in);
5580 /* Check zeroing out all the bits. */
5581 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5582 expected[0] = expected[1] = expected[2] = 0;
5583 verify_array_eq (in, expected, sizeof in);
5585 memcpy (in, orig, sizeof in);
5586 /* Leave the first and last bits intact. */
5587 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5588 expected[0] = 0x80;
5589 expected[1] = 0;
5590 expected[2] = 0x1;
5591 verify_array_eq (in, expected, sizeof in);
5595 /* Run all of the selftests within this file. */
5597 void
5598 store_merging_c_tests (void)
5600 verify_shift_bytes_in_array_left ();
5601 verify_shift_bytes_in_array_right ();
5602 verify_clear_bit_region ();
5603 verify_clear_bit_region_be ();
5606 } // namespace selftest
5607 #endif /* CHECKING_P. */