ada: Fix wrong finalization for call to BIP function in conditional expression
[official-gcc.git] / gcc / gimple-ssa-store-merging.cc
blobdf7afd2fd78a4502a3606d2de6d8c12bce14e6dc
1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2023 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
182 namespace {
184 struct bswap_stat
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
260 inline bool
261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 uint64_t head_marker;
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
279 switch (code)
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
311 inline bool
312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
314 tree lhs_type;
316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 return false;
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
325 return true;
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
331 bool
332 init_symbolic_number (struct symbolic_number *n, tree src)
334 int size;
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337 return false;
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
359 return true;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
366 bool
367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
394 if (!integer_zerop (off))
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
401 base_addr = TREE_OPERAND (base_addr, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
415 bitpos += bit_offset.force_shwi ();
417 else
418 base_addr = build_fold_addr_expr (base_addr);
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
437 /* Compute the symbolic number N representing the result of a bitwise OR,
438 bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
441 gimple *
442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n, enum tree_code code)
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
481 if (start1 < start2)
483 n_start = n1;
484 start_sub = start2 - start1;
486 else
488 n_start = n2;
489 start_sub = start1 - start2;
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
505 end = end2;
506 end_sub = end2 - end1;
508 else
510 end = end1;
511 end_sub = end1 - end2;
513 n_end = (end2 > end1) ? n2 : n1;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
521 n->range = end - MIN (start1, start2) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
540 else
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559 uint64_t res_n = n1->n | n2->n;
561 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
563 uint64_t masked1, masked2;
565 masked1 = n1->n & mask;
566 masked2 = n2->n & mask;
567 /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 if (masked1 && masked2)
570 /* + can carry into upper bits, just punt. */
571 if (code == PLUS_EXPR)
572 return NULL;
573 /* x | x is still x. */
574 if (code == BIT_IOR_EXPR && masked1 == masked2)
575 continue;
576 if (code == BIT_XOR_EXPR)
578 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 unknown values and unknown ^ unknown is unknown. */
580 if (masked1 == masked2
581 && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 << i * BITS_PER_MARKER))
584 res_n &= ~mask;
585 continue;
588 /* Otherwise set the byte to unknown, it might still be
589 later masked off. */
590 res_n |= mask;
593 n->n = res_n;
594 n->n_ops = n1->n_ops + n2->n_ops;
596 return source_stmt;
599 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 the operation given by the rhs of STMT on the result. If the operation
601 could successfully be executed the function returns a gimple stmt whose
602 rhs's first tree is the expression of the source operand and NULL
603 otherwise. */
605 gimple *
606 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
608 enum tree_code code;
609 tree rhs1, rhs2 = NULL;
610 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611 enum gimple_rhs_class rhs_class;
613 if (!limit || !is_gimple_assign (stmt))
614 return NULL;
616 rhs1 = gimple_assign_rhs1 (stmt);
618 if (find_bswap_or_nop_load (stmt, rhs1, n))
619 return stmt;
621 /* Handle BIT_FIELD_REF. */
622 if (TREE_CODE (rhs1) == BIT_FIELD_REF
623 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
625 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
626 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
627 return NULL;
629 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
630 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
631 if (bitpos % BITS_PER_UNIT == 0
632 && bitsize % BITS_PER_UNIT == 0
633 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
635 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
636 if (BYTES_BIG_ENDIAN)
637 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
639 /* Shift. */
640 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
641 return NULL;
643 /* Mask. */
644 uint64_t mask = 0;
645 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
646 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
647 i++, tmp <<= BITS_PER_UNIT)
648 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649 n->n &= mask;
651 /* Convert. */
652 n->type = TREE_TYPE (rhs1);
653 if (!n->base_addr)
654 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
656 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
659 return NULL;
662 if (TREE_CODE (rhs1) != SSA_NAME)
663 return NULL;
665 code = gimple_assign_rhs_code (stmt);
666 rhs_class = gimple_assign_rhs_class (stmt);
667 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
669 if (rhs_class == GIMPLE_BINARY_RHS)
670 rhs2 = gimple_assign_rhs2 (stmt);
672 /* Handle unary rhs and binary rhs with integer constants as second
673 operand. */
675 if (rhs_class == GIMPLE_UNARY_RHS
676 || (rhs_class == GIMPLE_BINARY_RHS
677 && TREE_CODE (rhs2) == INTEGER_CST))
679 if (code != BIT_AND_EXPR
680 && code != LSHIFT_EXPR
681 && code != RSHIFT_EXPR
682 && code != LROTATE_EXPR
683 && code != RROTATE_EXPR
684 && !CONVERT_EXPR_CODE_P (code))
685 return NULL;
687 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
689 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
690 we have to initialize the symbolic number. */
691 if (!source_stmt1)
693 if (gimple_assign_load_p (stmt)
694 || !init_symbolic_number (n, rhs1))
695 return NULL;
696 source_stmt1 = stmt;
699 switch (code)
701 case BIT_AND_EXPR:
703 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
704 uint64_t val = int_cst_value (rhs2), mask = 0;
705 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
707 /* Only constants masking full bytes are allowed. */
708 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
709 if ((val & tmp) != 0 && (val & tmp) != tmp)
710 return NULL;
711 else if (val & tmp)
712 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
714 n->n &= mask;
716 break;
717 case LSHIFT_EXPR:
718 case RSHIFT_EXPR:
719 case LROTATE_EXPR:
720 case RROTATE_EXPR:
721 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
722 return NULL;
723 break;
724 CASE_CONVERT:
726 int i, type_size, old_type_size;
727 tree type;
729 type = TREE_TYPE (gimple_assign_lhs (stmt));
730 type_size = TYPE_PRECISION (type);
731 if (type_size % BITS_PER_UNIT != 0)
732 return NULL;
733 type_size /= BITS_PER_UNIT;
734 if (type_size > 64 / BITS_PER_MARKER)
735 return NULL;
737 /* Sign extension: result is dependent on the value. */
738 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
739 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
740 && HEAD_MARKER (n->n, old_type_size))
741 for (i = 0; i < type_size - old_type_size; i++)
742 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
743 << ((type_size - 1 - i) * BITS_PER_MARKER);
745 if (type_size < 64 / BITS_PER_MARKER)
747 /* If STMT casts to a smaller type mask out the bits not
748 belonging to the target type. */
749 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
751 n->type = type;
752 if (!n->base_addr)
753 n->range = type_size;
755 break;
756 default:
757 return NULL;
759 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
762 /* Handle binary rhs. */
764 if (rhs_class == GIMPLE_BINARY_RHS)
766 struct symbolic_number n1, n2;
767 gimple *source_stmt, *source_stmt2;
769 if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
770 return NULL;
772 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
774 switch (code)
776 case BIT_IOR_EXPR:
777 case BIT_XOR_EXPR:
778 case PLUS_EXPR:
779 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
781 if (!source_stmt1)
782 return NULL;
784 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
786 if (!source_stmt2)
787 return NULL;
789 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
790 return NULL;
792 if (n1.vuse != n2.vuse)
793 return NULL;
795 source_stmt
796 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
797 code);
799 if (!source_stmt)
800 return NULL;
802 if (!verify_symbolic_number_p (n, stmt))
803 return NULL;
805 break;
806 default:
807 return NULL;
809 return source_stmt;
811 return NULL;
814 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
815 *CMPXCHG, *CMPNOP and adjust *N. */
817 void
818 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
819 uint64_t *cmpnop, bool *cast64_to_32)
821 unsigned rsize;
822 uint64_t tmpn, mask;
824 /* The number which the find_bswap_or_nop_1 result should match in order
825 to have a full byte swap. The number is shifted to the right
826 according to the size of the symbolic number before using it. */
827 *cmpxchg = CMPXCHG;
828 *cmpnop = CMPNOP;
829 *cast64_to_32 = false;
831 /* Find real size of result (highest non-zero byte). */
832 if (n->base_addr)
833 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
834 else
835 rsize = n->range;
837 /* Zero out the bits corresponding to untouched bytes in original gimple
838 expression. */
839 if (n->range < (int) sizeof (int64_t))
841 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
842 if (n->base_addr == NULL
843 && n->range == 4
844 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
846 /* If all bytes in n->n are either 0 or in [5..8] range, this
847 might be a candidate for (unsigned) __builtin_bswap64 (src).
848 It is not worth it for (unsigned short) __builtin_bswap64 (src)
849 or (unsigned short) __builtin_bswap32 (src). */
850 *cast64_to_32 = true;
851 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
852 if ((tmpn & MARKER_MASK)
853 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
855 *cast64_to_32 = false;
856 break;
859 if (*cast64_to_32)
860 *cmpxchg &= mask;
861 else
862 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
863 *cmpnop &= mask;
866 /* Zero out the bits corresponding to unused bytes in the result of the
867 gimple expression. */
868 if (rsize < n->range)
870 if (BYTES_BIG_ENDIAN)
872 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
873 *cmpxchg &= mask;
874 if (n->range - rsize == sizeof (int64_t))
875 *cmpnop = 0;
876 else
877 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
879 else
881 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
882 if (n->range - rsize == sizeof (int64_t))
883 *cmpxchg = 0;
884 else
885 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
886 *cmpnop &= mask;
888 n->range = rsize;
891 if (*cast64_to_32)
892 n->range = 8;
893 n->range *= BITS_PER_UNIT;
896 /* Check if STMT completes a bswap implementation or a read in a given
897 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
898 accordingly. It also sets N to represent the kind of operations
899 performed: size of the resulting expression and whether it works on
900 a memory source, and if so alias-set and vuse. At last, the
901 function returns a stmt whose rhs's first tree is the source
902 expression. */
904 gimple *
905 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
906 bool *cast64_to_32, uint64_t *mask)
908 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
909 if (!tree_fits_uhwi_p (type_size))
910 return NULL;
912 /* The last parameter determines the depth search limit. It usually
913 correlates directly to the number n of bytes to be touched. We
914 increase that number by 2 * (log2(n) + 1) here in order to also
915 cover signed -> unsigned conversions of the src operand as can be seen
916 in libgcc, and for initial shift/and operation of the src operand. */
917 int limit = tree_to_uhwi (type_size);
918 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
919 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
921 if (!ins_stmt)
923 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
924 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
925 return NULL;
926 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
927 if (sz != 16 && sz != 32 && sz != 64)
928 return NULL;
929 tree rhs = gimple_assign_rhs1 (stmt);
930 if (CONSTRUCTOR_NELTS (rhs) == 0)
931 return NULL;
932 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
933 unsigned HOST_WIDE_INT eltsz
934 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
935 if (TYPE_PRECISION (eltype) != eltsz)
936 return NULL;
937 constructor_elt *elt;
938 unsigned int i;
939 tree type = build_nonstandard_integer_type (sz, 1);
940 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
942 if (TREE_CODE (elt->value) != SSA_NAME
943 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
944 return NULL;
945 struct symbolic_number n1;
946 gimple *source_stmt
947 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
948 limit - 1);
950 if (!source_stmt)
951 return NULL;
953 n1.type = type;
954 if (!n1.base_addr)
955 n1.range = sz / BITS_PER_UNIT;
957 if (i == 0)
959 ins_stmt = source_stmt;
960 *n = n1;
962 else
964 if (n->vuse != n1.vuse)
965 return NULL;
967 struct symbolic_number n0 = *n;
969 if (!BYTES_BIG_ENDIAN)
971 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
972 return NULL;
974 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
975 return NULL;
976 ins_stmt
977 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
978 BIT_IOR_EXPR);
980 if (!ins_stmt)
981 return NULL;
986 uint64_t cmpxchg, cmpnop;
987 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
989 /* A complete byte swap should make the symbolic number to start with
990 the largest digit in the highest order byte. Unchanged symbolic
991 number indicates a read with same endianness as target architecture. */
992 *mask = ~(uint64_t) 0;
993 if (n->n == cmpnop)
994 *bswap = false;
995 else if (n->n == cmpxchg)
996 *bswap = true;
997 else
999 int set = 0;
1000 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1001 if ((n->n & msk) == 0)
1002 *mask &= ~msk;
1003 else if ((n->n & msk) == (cmpxchg & msk))
1004 set++;
1005 else
1006 return NULL;
1007 if (set < 2)
1008 return NULL;
1009 *bswap = true;
1012 /* Useless bit manipulation performed by code. */
1013 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1014 return NULL;
1016 return ins_stmt;
1019 const pass_data pass_data_optimize_bswap =
1021 GIMPLE_PASS, /* type */
1022 "bswap", /* name */
1023 OPTGROUP_NONE, /* optinfo_flags */
1024 TV_NONE, /* tv_id */
1025 PROP_ssa, /* properties_required */
1026 0, /* properties_provided */
1027 0, /* properties_destroyed */
1028 0, /* todo_flags_start */
1029 0, /* todo_flags_finish */
1032 class pass_optimize_bswap : public gimple_opt_pass
1034 public:
1035 pass_optimize_bswap (gcc::context *ctxt)
1036 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1039 /* opt_pass methods: */
1040 bool gate (function *) final override
1042 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1045 unsigned int execute (function *) final override;
1047 }; // class pass_optimize_bswap
1049 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1050 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1051 first. */
1053 static tree
1054 bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1055 bool before)
1057 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1058 || POINTER_TYPE_P (TREE_TYPE (val)));
1059 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1061 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1062 if (POINTER_TYPE_P (TREE_TYPE (val)))
1064 gimple *g
1065 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1066 NOP_EXPR, val);
1067 if (before)
1068 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1069 else
1070 gsi_insert_after (gsi, g, GSI_NEW_STMT);
1071 val = gimple_assign_lhs (g);
1073 tree itype = build_nonstandard_integer_type (prec, 1);
1074 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1075 if (before)
1076 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1077 else
1078 gsi_insert_after (gsi, g, GSI_NEW_STMT);
1079 val = gimple_assign_lhs (g);
1081 return build1 (VIEW_CONVERT_EXPR, type, val);
1084 /* Perform the bswap optimization: replace the expression computed in the rhs
1085 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1086 bswap, load or load + bswap expression.
1087 Which of these alternatives replace the rhs is given by N->base_addr (non
1088 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1089 load to perform are also given in N while the builtin bswap invoke is given
1090 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1091 load statements involved to construct the rhs in gsi_stmt (GSI) and
1092 N->range gives the size of the rhs expression for maintaining some
1093 statistics.
1095 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1096 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1097 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1099 tree
1100 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1101 tree bswap_type, tree load_type, struct symbolic_number *n,
1102 bool bswap, uint64_t mask)
1104 tree src, tmp, tgt = NULL_TREE;
1105 gimple *bswap_stmt, *mask_stmt = NULL;
1106 tree_code conv_code = NOP_EXPR;
1108 gimple *cur_stmt = gsi_stmt (gsi);
1109 src = n->src;
1110 if (cur_stmt)
1112 tgt = gimple_assign_lhs (cur_stmt);
1113 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1114 && tgt
1115 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1116 conv_code = VIEW_CONVERT_EXPR;
1119 /* Need to load the value from memory first. */
1120 if (n->base_addr)
1122 gimple_stmt_iterator gsi_ins = gsi;
1123 if (ins_stmt)
1124 gsi_ins = gsi_for_stmt (ins_stmt);
1125 tree addr_expr, addr_tmp, val_expr, val_tmp;
1126 tree load_offset_ptr, aligned_load_type;
1127 gimple *load_stmt;
1128 unsigned align = get_object_alignment (src);
1129 poly_int64 load_offset = 0;
1131 if (cur_stmt)
1133 basic_block ins_bb = gimple_bb (ins_stmt);
1134 basic_block cur_bb = gimple_bb (cur_stmt);
1135 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1136 return NULL_TREE;
1138 /* Move cur_stmt just before one of the load of the original
1139 to ensure it has the same VUSE. See PR61517 for what could
1140 go wrong. */
1141 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1142 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1143 gsi_move_before (&gsi, &gsi_ins);
1144 gsi = gsi_for_stmt (cur_stmt);
1146 else
1147 gsi = gsi_ins;
1149 /* Compute address to load from and cast according to the size
1150 of the load. */
1151 addr_expr = build_fold_addr_expr (src);
1152 if (is_gimple_mem_ref_addr (addr_expr))
1153 addr_tmp = unshare_expr (addr_expr);
1154 else
1156 addr_tmp = unshare_expr (n->base_addr);
1157 if (!is_gimple_mem_ref_addr (addr_tmp))
1158 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1159 is_gimple_mem_ref_addr,
1160 NULL_TREE, true,
1161 GSI_SAME_STMT);
1162 load_offset = n->bytepos;
1163 if (n->offset)
1165 tree off
1166 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1167 true, NULL_TREE, true,
1168 GSI_SAME_STMT);
1169 gimple *stmt
1170 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1171 POINTER_PLUS_EXPR, addr_tmp, off);
1172 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1173 addr_tmp = gimple_assign_lhs (stmt);
1177 /* Perform the load. */
1178 aligned_load_type = load_type;
1179 if (align < TYPE_ALIGN (load_type))
1180 aligned_load_type = build_aligned_type (load_type, align);
1181 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1182 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1183 load_offset_ptr);
1185 if (!bswap)
1187 if (n->range == 16)
1188 nop_stats.found_16bit++;
1189 else if (n->range == 32)
1190 nop_stats.found_32bit++;
1191 else
1193 gcc_assert (n->range == 64);
1194 nop_stats.found_64bit++;
1197 /* Convert the result of load if necessary. */
1198 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1200 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1201 "load_dst");
1202 load_stmt = gimple_build_assign (val_tmp, val_expr);
1203 gimple_set_vuse (load_stmt, n->vuse);
1204 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1205 if (conv_code == VIEW_CONVERT_EXPR)
1206 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1207 true);
1208 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1209 update_stmt (cur_stmt);
1211 else if (cur_stmt)
1213 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1214 gimple_set_vuse (cur_stmt, n->vuse);
1215 update_stmt (cur_stmt);
1217 else
1219 tgt = make_ssa_name (load_type);
1220 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1221 gimple_set_vuse (cur_stmt, n->vuse);
1222 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1225 if (dump_file)
1227 fprintf (dump_file,
1228 "%d bit load in target endianness found at: ",
1229 (int) n->range);
1230 print_gimple_stmt (dump_file, cur_stmt, 0);
1232 return tgt;
1234 else
1236 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1237 load_stmt = gimple_build_assign (val_tmp, val_expr);
1238 gimple_set_vuse (load_stmt, n->vuse);
1239 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1241 src = val_tmp;
1243 else if (!bswap)
1245 gimple *g = NULL;
1246 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1248 if (!is_gimple_val (src))
1249 return NULL_TREE;
1250 if (conv_code == VIEW_CONVERT_EXPR)
1251 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
1252 g = gimple_build_assign (tgt, conv_code, src);
1254 else if (cur_stmt)
1255 g = gimple_build_assign (tgt, src);
1256 else
1257 tgt = src;
1258 if (n->range == 16)
1259 nop_stats.found_16bit++;
1260 else if (n->range == 32)
1261 nop_stats.found_32bit++;
1262 else
1264 gcc_assert (n->range == 64);
1265 nop_stats.found_64bit++;
1267 if (dump_file)
1269 fprintf (dump_file,
1270 "%d bit reshuffle in target endianness found at: ",
1271 (int) n->range);
1272 if (cur_stmt)
1273 print_gimple_stmt (dump_file, cur_stmt, 0);
1274 else
1276 print_generic_expr (dump_file, tgt, TDF_NONE);
1277 fprintf (dump_file, "\n");
1280 if (cur_stmt)
1281 gsi_replace (&gsi, g, true);
1282 return tgt;
1284 else if (TREE_CODE (src) == BIT_FIELD_REF)
1285 src = TREE_OPERAND (src, 0);
1287 if (n->range == 16)
1288 bswap_stats.found_16bit++;
1289 else if (n->range == 32)
1290 bswap_stats.found_32bit++;
1291 else
1293 gcc_assert (n->range == 64);
1294 bswap_stats.found_64bit++;
1297 tmp = src;
1299 /* Convert the src expression if necessary. */
1300 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1302 gimple *convert_stmt;
1304 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1305 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1306 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1309 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1310 are considered as rotation of 2N bit values by N bits is generally not
1311 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1312 gives 0x03040102 while a bswap for that value is 0x04030201. */
1313 if (bswap && n->range == 16)
1315 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1316 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1317 bswap_stmt = gimple_build_assign (NULL, src);
1319 else
1320 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1322 if (tgt == NULL_TREE)
1323 tgt = make_ssa_name (bswap_type);
1324 tmp = tgt;
1326 if (mask != ~(uint64_t) 0)
1328 tree m = build_int_cst (bswap_type, mask);
1329 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1330 gimple_set_lhs (bswap_stmt, tmp);
1331 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1332 tmp = tgt;
1335 /* Convert the result if necessary. */
1336 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1338 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1339 tree atmp = tmp;
1340 gimple_stmt_iterator gsi2 = gsi;
1341 if (conv_code == VIEW_CONVERT_EXPR)
1342 atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1343 gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1344 gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
1347 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1349 if (dump_file)
1351 fprintf (dump_file, "%d bit bswap implementation found at: ",
1352 (int) n->range);
1353 if (cur_stmt)
1354 print_gimple_stmt (dump_file, cur_stmt, 0);
1355 else
1357 print_generic_expr (dump_file, tgt, TDF_NONE);
1358 fprintf (dump_file, "\n");
1362 if (cur_stmt)
1364 if (mask_stmt)
1365 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1366 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1367 gsi_remove (&gsi, true);
1369 else
1371 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1372 if (mask_stmt)
1373 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1375 return tgt;
1378 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1379 using bswap optimizations. CDI_DOMINATORS need to be
1380 computed on entry. Return true if it has been optimized and
1381 TODO_update_ssa is needed. */
1383 static bool
1384 maybe_optimize_vector_constructor (gimple *cur_stmt)
1386 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1387 struct symbolic_number n;
1388 bool bswap;
1390 gcc_assert (is_gimple_assign (cur_stmt)
1391 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1393 tree rhs = gimple_assign_rhs1 (cur_stmt);
1394 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1395 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1396 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1397 return false;
1399 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1400 switch (sz)
1402 case 16:
1403 load_type = bswap_type = uint16_type_node;
1404 break;
1405 case 32:
1406 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1407 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1409 load_type = uint32_type_node;
1410 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1411 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1413 else
1414 return false;
1415 break;
1416 case 64:
1417 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1418 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1419 || (word_mode == SImode
1420 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1421 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1423 load_type = uint64_type_node;
1424 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1425 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1427 else
1428 return false;
1429 break;
1430 default:
1431 return false;
1434 bool cast64_to_32;
1435 uint64_t mask;
1436 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1437 &cast64_to_32, &mask);
1438 if (!ins_stmt
1439 || n.range != (unsigned HOST_WIDE_INT) sz
1440 || cast64_to_32
1441 || mask != ~(uint64_t) 0)
1442 return false;
1444 if (bswap && !fndecl && n.range != 16)
1445 return false;
1447 memset (&nop_stats, 0, sizeof (nop_stats));
1448 memset (&bswap_stats, 0, sizeof (bswap_stats));
1449 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1450 bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
1453 /* Find manual byte swap implementations as well as load in a given
1454 endianness. Byte swaps are turned into a bswap builtin invokation
1455 while endian loads are converted to bswap builtin invokation or
1456 simple load according to the target endianness. */
1458 unsigned int
1459 pass_optimize_bswap::execute (function *fun)
1461 basic_block bb;
1462 bool bswap32_p, bswap64_p;
1463 bool changed = false;
1464 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1466 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1467 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1468 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1469 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1470 || (bswap32_p && word_mode == SImode)));
1472 /* Determine the argument type of the builtins. The code later on
1473 assumes that the return and argument type are the same. */
1474 if (bswap32_p)
1476 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1477 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1480 if (bswap64_p)
1482 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1483 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1486 memset (&nop_stats, 0, sizeof (nop_stats));
1487 memset (&bswap_stats, 0, sizeof (bswap_stats));
1488 calculate_dominance_info (CDI_DOMINATORS);
1490 FOR_EACH_BB_FN (bb, fun)
1492 gimple_stmt_iterator gsi;
1494 /* We do a reverse scan for bswap patterns to make sure we get the
1495 widest match. As bswap pattern matching doesn't handle previously
1496 inserted smaller bswap replacements as sub-patterns, the wider
1497 variant wouldn't be detected. */
1498 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1500 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1501 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1502 enum tree_code code;
1503 struct symbolic_number n;
1504 bool bswap, cast64_to_32;
1505 uint64_t mask;
1507 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1508 might be moved to a different basic block by bswap_replace and gsi
1509 must not points to it if that's the case. Moving the gsi_prev
1510 there make sure that gsi points to the statement previous to
1511 cur_stmt while still making sure that all statements are
1512 considered in this basic block. */
1513 gsi_prev (&gsi);
1515 if (!is_gimple_assign (cur_stmt))
1516 continue;
1518 code = gimple_assign_rhs_code (cur_stmt);
1519 switch (code)
1521 case LROTATE_EXPR:
1522 case RROTATE_EXPR:
1523 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1524 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1525 % BITS_PER_UNIT)
1526 continue;
1527 /* Fall through. */
1528 case BIT_IOR_EXPR:
1529 case BIT_XOR_EXPR:
1530 case PLUS_EXPR:
1531 break;
1532 case CONSTRUCTOR:
1534 tree rhs = gimple_assign_rhs1 (cur_stmt);
1535 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1536 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1537 break;
1539 continue;
1540 default:
1541 continue;
1544 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1545 &cast64_to_32, &mask);
1547 if (!ins_stmt)
1548 continue;
1550 switch (n.range)
1552 case 16:
1553 /* Already in canonical form, nothing to do. */
1554 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1555 continue;
1556 load_type = bswap_type = uint16_type_node;
1557 break;
1558 case 32:
1559 load_type = uint32_type_node;
1560 if (bswap32_p)
1562 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1563 bswap_type = bswap32_type;
1565 break;
1566 case 64:
1567 load_type = uint64_type_node;
1568 if (bswap64_p)
1570 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1571 bswap_type = bswap64_type;
1573 break;
1574 default:
1575 continue;
1578 if (bswap && !fndecl && n.range != 16)
1579 continue;
1581 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1582 bswap_type, load_type, &n, bswap, mask))
1583 changed = true;
1587 statistics_counter_event (fun, "16-bit nop implementations found",
1588 nop_stats.found_16bit);
1589 statistics_counter_event (fun, "32-bit nop implementations found",
1590 nop_stats.found_32bit);
1591 statistics_counter_event (fun, "64-bit nop implementations found",
1592 nop_stats.found_64bit);
1593 statistics_counter_event (fun, "16-bit bswap implementations found",
1594 bswap_stats.found_16bit);
1595 statistics_counter_event (fun, "32-bit bswap implementations found",
1596 bswap_stats.found_32bit);
1597 statistics_counter_event (fun, "64-bit bswap implementations found",
1598 bswap_stats.found_64bit);
1600 return (changed ? TODO_update_ssa : 0);
1603 } // anon namespace
1605 gimple_opt_pass *
1606 make_pass_optimize_bswap (gcc::context *ctxt)
1608 return new pass_optimize_bswap (ctxt);
1611 namespace {
1613 /* Struct recording one operand for the store, which is either a constant,
1614 then VAL represents the constant and all the other fields are zero, or
1615 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1616 and the other fields also reflect the memory load, or an SSA name, then
1617 VAL represents the SSA name and all the other fields are zero. */
1619 class store_operand_info
1621 public:
1622 tree val;
1623 tree base_addr;
1624 poly_uint64 bitsize;
1625 poly_uint64 bitpos;
1626 poly_uint64 bitregion_start;
1627 poly_uint64 bitregion_end;
1628 gimple *stmt;
1629 bool bit_not_p;
1630 store_operand_info ();
1633 store_operand_info::store_operand_info ()
1634 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1635 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1639 /* Struct recording the information about a single store of an immediate
1640 to memory. These are created in the first phase and coalesced into
1641 merged_store_group objects in the second phase. */
1643 class store_immediate_info
1645 public:
1646 unsigned HOST_WIDE_INT bitsize;
1647 unsigned HOST_WIDE_INT bitpos;
1648 unsigned HOST_WIDE_INT bitregion_start;
1649 /* This is one past the last bit of the bit region. */
1650 unsigned HOST_WIDE_INT bitregion_end;
1651 gimple *stmt;
1652 unsigned int order;
1653 /* INTEGER_CST for constant store, STRING_CST for string store,
1654 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1655 BIT_INSERT_EXPR for bit insertion.
1656 LROTATE_EXPR if it can be only bswap optimized and
1657 ops are not really meaningful.
1658 NOP_EXPR if bswap optimization detected identity, ops
1659 are not meaningful. */
1660 enum tree_code rhs_code;
1661 /* Two fields for bswap optimization purposes. */
1662 struct symbolic_number n;
1663 gimple *ins_stmt;
1664 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1665 bool bit_not_p;
1666 /* True if ops have been swapped and thus ops[1] represents
1667 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1668 bool ops_swapped_p;
1669 /* The index number of the landing pad, or 0 if there is none. */
1670 int lp_nr;
1671 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1672 just the first one. */
1673 store_operand_info ops[2];
1674 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1675 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1676 gimple *, unsigned int, enum tree_code,
1677 struct symbolic_number &, gimple *, bool, int,
1678 const store_operand_info &,
1679 const store_operand_info &);
1682 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1683 unsigned HOST_WIDE_INT bp,
1684 unsigned HOST_WIDE_INT brs,
1685 unsigned HOST_WIDE_INT bre,
1686 gimple *st,
1687 unsigned int ord,
1688 enum tree_code rhscode,
1689 struct symbolic_number &nr,
1690 gimple *ins_stmtp,
1691 bool bitnotp,
1692 int nr2,
1693 const store_operand_info &op0r,
1694 const store_operand_info &op1r)
1695 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1696 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1697 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1698 lp_nr (nr2), ops { op0r, op1r }
1702 /* Struct representing a group of stores to contiguous memory locations.
1703 These are produced by the second phase (coalescing) and consumed in the
1704 third phase that outputs the widened stores. */
1706 class merged_store_group
1708 public:
1709 unsigned HOST_WIDE_INT start;
1710 unsigned HOST_WIDE_INT width;
1711 unsigned HOST_WIDE_INT bitregion_start;
1712 unsigned HOST_WIDE_INT bitregion_end;
1713 /* The size of the allocated memory for val and mask. */
1714 unsigned HOST_WIDE_INT buf_size;
1715 unsigned HOST_WIDE_INT align_base;
1716 poly_uint64 load_align_base[2];
1718 unsigned int align;
1719 unsigned int load_align[2];
1720 unsigned int first_order;
1721 unsigned int last_order;
1722 bool bit_insertion;
1723 bool string_concatenation;
1724 bool only_constants;
1725 bool consecutive;
1726 unsigned int first_nonmergeable_order;
1727 int lp_nr;
1729 auto_vec<store_immediate_info *> stores;
1730 /* We record the first and last original statements in the sequence because
1731 we'll need their vuse/vdef and replacement position. It's easier to keep
1732 track of them separately as 'stores' is reordered by apply_stores. */
1733 gimple *last_stmt;
1734 gimple *first_stmt;
1735 unsigned char *val;
1736 unsigned char *mask;
1738 merged_store_group (store_immediate_info *);
1739 ~merged_store_group ();
1740 bool can_be_merged_into (store_immediate_info *);
1741 void merge_into (store_immediate_info *);
1742 void merge_overlapping (store_immediate_info *);
1743 bool apply_stores ();
1744 private:
1745 void do_merge (store_immediate_info *);
1748 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1750 static void
1751 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1753 if (!fd)
1754 return;
1756 for (unsigned int i = 0; i < len; i++)
1757 fprintf (fd, "%02x ", ptr[i]);
1758 fprintf (fd, "\n");
1761 /* Clear out LEN bits starting from bit START in the byte array
1762 PTR. This clears the bits to the *right* from START.
1763 START must be within [0, BITS_PER_UNIT) and counts starting from
1764 the least significant bit. */
1766 static void
1767 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1768 unsigned int len)
1770 if (len == 0)
1771 return;
1772 /* Clear len bits to the right of start. */
1773 else if (len <= start + 1)
1775 unsigned char mask = (~(~0U << len));
1776 mask = mask << (start + 1U - len);
1777 ptr[0] &= ~mask;
1779 else if (start != BITS_PER_UNIT - 1)
1781 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1782 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1783 len - (start % BITS_PER_UNIT) - 1);
1785 else if (start == BITS_PER_UNIT - 1
1786 && len > BITS_PER_UNIT)
1788 unsigned int nbytes = len / BITS_PER_UNIT;
1789 memset (ptr, 0, nbytes);
1790 if (len % BITS_PER_UNIT != 0)
1791 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1792 len % BITS_PER_UNIT);
1794 else
1795 gcc_unreachable ();
1798 /* In the byte array PTR clear the bit region starting at bit
1799 START and is LEN bits wide.
1800 For regions spanning multiple bytes do this recursively until we reach
1801 zero LEN or a region contained within a single byte. */
1803 static void
1804 clear_bit_region (unsigned char *ptr, unsigned int start,
1805 unsigned int len)
1807 /* Degenerate base case. */
1808 if (len == 0)
1809 return;
1810 else if (start >= BITS_PER_UNIT)
1811 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1812 /* Second base case. */
1813 else if ((start + len) <= BITS_PER_UNIT)
1815 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1816 mask >>= BITS_PER_UNIT - (start + len);
1818 ptr[0] &= ~mask;
1820 return;
1822 /* Clear most significant bits in a byte and proceed with the next byte. */
1823 else if (start != 0)
1825 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1826 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1828 /* Whole bytes need to be cleared. */
1829 else if (start == 0 && len > BITS_PER_UNIT)
1831 unsigned int nbytes = len / BITS_PER_UNIT;
1832 /* We could recurse on each byte but we clear whole bytes, so a simple
1833 memset will do. */
1834 memset (ptr, '\0', nbytes);
1835 /* Clear the remaining sub-byte region if there is one. */
1836 if (len % BITS_PER_UNIT != 0)
1837 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1839 else
1840 gcc_unreachable ();
1843 /* Write BITLEN bits of EXPR to the byte array PTR at
1844 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1845 Return true if the operation succeeded. */
1847 static bool
1848 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1849 unsigned int total_bytes)
1851 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1852 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1853 || (bitpos % BITS_PER_UNIT)
1854 || !int_mode_for_size (bitlen, 0).exists ());
1855 bool empty_ctor_p
1856 = (TREE_CODE (expr) == CONSTRUCTOR
1857 && CONSTRUCTOR_NELTS (expr) == 0
1858 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1859 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1861 if (!sub_byte_op_p)
1863 if (first_byte >= total_bytes)
1864 return false;
1865 total_bytes -= first_byte;
1866 if (empty_ctor_p)
1868 unsigned HOST_WIDE_INT rhs_bytes
1869 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1870 if (rhs_bytes > total_bytes)
1871 return false;
1872 memset (ptr + first_byte, '\0', rhs_bytes);
1873 return true;
1875 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1878 /* LITTLE-ENDIAN
1879 We are writing a non byte-sized quantity or at a position that is not
1880 at a byte boundary.
1881 |--------|--------|--------| ptr + first_byte
1883 xxx xxxxxxxx xxx< bp>
1884 |______EXPR____|
1886 First native_encode_expr EXPR into a temporary buffer and shift each
1887 byte in the buffer by 'bp' (carrying the bits over as necessary).
1888 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1889 <------bitlen---->< bp>
1890 Then we clear the destination bits:
1891 |---00000|00000000|000-----| ptr + first_byte
1892 <-------bitlen--->< bp>
1894 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1895 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1897 BIG-ENDIAN
1898 We are writing a non byte-sized quantity or at a position that is not
1899 at a byte boundary.
1900 ptr + first_byte |--------|--------|--------|
1902 <bp >xxx xxxxxxxx xxx
1903 |_____EXPR_____|
1905 First native_encode_expr EXPR into a temporary buffer and shift each
1906 byte in the buffer to the right by (carrying the bits over as necessary).
1907 We shift by as much as needed to align the most significant bit of EXPR
1908 with bitpos:
1909 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1910 <---bitlen----> <bp ><-----bitlen----->
1911 Then we clear the destination bits:
1912 ptr + first_byte |-----000||00000000||00000---|
1913 <bp ><-------bitlen----->
1915 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1916 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1917 The awkwardness comes from the fact that bitpos is counted from the
1918 most significant bit of a byte. */
1920 /* We must be dealing with fixed-size data at this point, since the
1921 total size is also fixed. */
1922 unsigned int byte_size;
1923 if (empty_ctor_p)
1925 unsigned HOST_WIDE_INT rhs_bytes
1926 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1927 if (rhs_bytes > total_bytes)
1928 return false;
1929 byte_size = rhs_bytes;
1931 else
1933 fixed_size_mode mode
1934 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1935 byte_size
1936 = mode == BLKmode
1937 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1938 : GET_MODE_SIZE (mode);
1940 /* Allocate an extra byte so that we have space to shift into. */
1941 byte_size++;
1942 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1943 memset (tmpbuf, '\0', byte_size);
1944 /* The store detection code should only have allowed constants that are
1945 accepted by native_encode_expr or empty ctors. */
1946 if (!empty_ctor_p
1947 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1948 gcc_unreachable ();
1950 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1951 bytes to write. This means it can write more than
1952 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1953 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1954 bitlen and zero out the bits that are not relevant as well (that may
1955 contain a sign bit due to sign-extension). */
1956 unsigned int padding
1957 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1958 /* On big-endian the padding is at the 'front' so just skip the initial
1959 bytes. */
1960 if (BYTES_BIG_ENDIAN)
1961 tmpbuf += padding;
1963 byte_size -= padding;
1965 if (bitlen % BITS_PER_UNIT != 0)
1967 if (BYTES_BIG_ENDIAN)
1968 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1969 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1970 else
1971 clear_bit_region (tmpbuf, bitlen,
1972 byte_size * BITS_PER_UNIT - bitlen);
1974 /* Left shifting relies on the last byte being clear if bitlen is
1975 a multiple of BITS_PER_UNIT, which might not be clear if
1976 there are padding bytes. */
1977 else if (!BYTES_BIG_ENDIAN)
1978 tmpbuf[byte_size - 1] = '\0';
1980 /* Clear the bit region in PTR where the bits from TMPBUF will be
1981 inserted into. */
1982 if (BYTES_BIG_ENDIAN)
1983 clear_bit_region_be (ptr + first_byte,
1984 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1985 else
1986 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1988 int shift_amnt;
1989 int bitlen_mod = bitlen % BITS_PER_UNIT;
1990 int bitpos_mod = bitpos % BITS_PER_UNIT;
1992 bool skip_byte = false;
1993 if (BYTES_BIG_ENDIAN)
1995 /* BITPOS and BITLEN are exactly aligned and no shifting
1996 is necessary. */
1997 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1998 || (bitpos_mod == 0 && bitlen_mod == 0))
1999 shift_amnt = 0;
2000 /* |. . . . . . . .|
2001 <bp > <blen >.
2002 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2003 of the value until it aligns with 'bp' in the next byte over. */
2004 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2006 shift_amnt = bitlen_mod + bitpos_mod;
2007 skip_byte = bitlen_mod != 0;
2009 /* |. . . . . . . .|
2010 <----bp--->
2011 <---blen---->.
2012 Shift the value right within the same byte so it aligns with 'bp'. */
2013 else
2014 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2016 else
2017 shift_amnt = bitpos % BITS_PER_UNIT;
2019 /* Create the shifted version of EXPR. */
2020 if (!BYTES_BIG_ENDIAN)
2022 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
2023 if (shift_amnt == 0)
2024 byte_size--;
2026 else
2028 gcc_assert (BYTES_BIG_ENDIAN);
2029 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2030 /* If shifting right forced us to move into the next byte skip the now
2031 empty byte. */
2032 if (skip_byte)
2034 tmpbuf++;
2035 byte_size--;
2039 /* Insert the bits from TMPBUF. */
2040 for (unsigned int i = 0; i < byte_size; i++)
2041 ptr[first_byte + i] |= tmpbuf[i];
2043 return true;
2046 /* Sorting function for store_immediate_info objects.
2047 Sorts them by bitposition. */
2049 static int
2050 sort_by_bitpos (const void *x, const void *y)
2052 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2053 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2055 if ((*tmp)->bitpos < (*tmp2)->bitpos)
2056 return -1;
2057 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2058 return 1;
2059 else
2060 /* If they are the same let's use the order which is guaranteed to
2061 be different. */
2062 return (*tmp)->order - (*tmp2)->order;
2065 /* Sorting function for store_immediate_info objects.
2066 Sorts them by the order field. */
2068 static int
2069 sort_by_order (const void *x, const void *y)
2071 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2072 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2074 if ((*tmp)->order < (*tmp2)->order)
2075 return -1;
2076 else if ((*tmp)->order > (*tmp2)->order)
2077 return 1;
2079 gcc_unreachable ();
2082 /* Initialize a merged_store_group object from a store_immediate_info
2083 object. */
2085 merged_store_group::merged_store_group (store_immediate_info *info)
2087 start = info->bitpos;
2088 width = info->bitsize;
2089 bitregion_start = info->bitregion_start;
2090 bitregion_end = info->bitregion_end;
2091 /* VAL has memory allocated for it in apply_stores once the group
2092 width has been finalized. */
2093 val = NULL;
2094 mask = NULL;
2095 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2096 string_concatenation = info->rhs_code == STRING_CST;
2097 only_constants = info->rhs_code == INTEGER_CST;
2098 consecutive = true;
2099 first_nonmergeable_order = ~0U;
2100 lp_nr = info->lp_nr;
2101 unsigned HOST_WIDE_INT align_bitpos = 0;
2102 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2103 &align, &align_bitpos);
2104 align_base = start - align_bitpos;
2105 for (int i = 0; i < 2; ++i)
2107 store_operand_info &op = info->ops[i];
2108 if (op.base_addr == NULL_TREE)
2110 load_align[i] = 0;
2111 load_align_base[i] = 0;
2113 else
2115 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2116 load_align_base[i] = op.bitpos - align_bitpos;
2119 stores.create (1);
2120 stores.safe_push (info);
2121 last_stmt = info->stmt;
2122 last_order = info->order;
2123 first_stmt = last_stmt;
2124 first_order = last_order;
2125 buf_size = 0;
2128 merged_store_group::~merged_store_group ()
2130 if (val)
2131 XDELETEVEC (val);
2134 /* Return true if the store described by INFO can be merged into the group. */
2136 bool
2137 merged_store_group::can_be_merged_into (store_immediate_info *info)
2139 /* Do not merge bswap patterns. */
2140 if (info->rhs_code == LROTATE_EXPR)
2141 return false;
2143 if (info->lp_nr != lp_nr)
2144 return false;
2146 /* The canonical case. */
2147 if (info->rhs_code == stores[0]->rhs_code)
2148 return true;
2150 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2151 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2152 return !string_concatenation;
2154 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2155 return !string_concatenation;
2157 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2158 only for small regions since this can generate a lot of instructions. */
2159 if (info->rhs_code == MEM_REF
2160 && (stores[0]->rhs_code == INTEGER_CST
2161 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2162 && info->bitregion_start == stores[0]->bitregion_start
2163 && info->bitregion_end == stores[0]->bitregion_end
2164 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2165 return !string_concatenation;
2167 if (stores[0]->rhs_code == MEM_REF
2168 && (info->rhs_code == INTEGER_CST
2169 || info->rhs_code == BIT_INSERT_EXPR)
2170 && info->bitregion_start == stores[0]->bitregion_start
2171 && info->bitregion_end == stores[0]->bitregion_end
2172 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2173 return !string_concatenation;
2175 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2176 if (info->rhs_code == STRING_CST
2177 && stores[0]->rhs_code == INTEGER_CST
2178 && stores[0]->bitsize == CHAR_BIT)
2179 return !bit_insertion;
2181 if (stores[0]->rhs_code == STRING_CST
2182 && info->rhs_code == INTEGER_CST
2183 && info->bitsize == CHAR_BIT)
2184 return !bit_insertion;
2186 return false;
2189 /* Helper method for merge_into and merge_overlapping to do
2190 the common part. */
2192 void
2193 merged_store_group::do_merge (store_immediate_info *info)
2195 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2196 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2198 unsigned int this_align;
2199 unsigned HOST_WIDE_INT align_bitpos = 0;
2200 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2201 &this_align, &align_bitpos);
2202 if (this_align > align)
2204 align = this_align;
2205 align_base = info->bitpos - align_bitpos;
2207 for (int i = 0; i < 2; ++i)
2209 store_operand_info &op = info->ops[i];
2210 if (!op.base_addr)
2211 continue;
2213 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2214 if (this_align > load_align[i])
2216 load_align[i] = this_align;
2217 load_align_base[i] = op.bitpos - align_bitpos;
2221 gimple *stmt = info->stmt;
2222 stores.safe_push (info);
2223 if (info->order > last_order)
2225 last_order = info->order;
2226 last_stmt = stmt;
2228 else if (info->order < first_order)
2230 first_order = info->order;
2231 first_stmt = stmt;
2234 if (info->bitpos != start + width)
2235 consecutive = false;
2237 /* We need to use extraction if there is any bit-field. */
2238 if (info->rhs_code == BIT_INSERT_EXPR)
2240 bit_insertion = true;
2241 gcc_assert (!string_concatenation);
2244 /* We want to use concatenation if there is any string. */
2245 if (info->rhs_code == STRING_CST)
2247 string_concatenation = true;
2248 gcc_assert (!bit_insertion);
2251 /* But we cannot use it if we don't have consecutive stores. */
2252 if (!consecutive)
2253 string_concatenation = false;
2255 if (info->rhs_code != INTEGER_CST)
2256 only_constants = false;
2259 /* Merge a store recorded by INFO into this merged store.
2260 The store is not overlapping with the existing recorded
2261 stores. */
2263 void
2264 merged_store_group::merge_into (store_immediate_info *info)
2266 do_merge (info);
2268 /* Make sure we're inserting in the position we think we're inserting. */
2269 gcc_assert (info->bitpos >= start + width
2270 && info->bitregion_start <= bitregion_end);
2272 width = info->bitpos + info->bitsize - start;
2275 /* Merge a store described by INFO into this merged store.
2276 INFO overlaps in some way with the current store (i.e. it's not contiguous
2277 which is handled by merged_store_group::merge_into). */
2279 void
2280 merged_store_group::merge_overlapping (store_immediate_info *info)
2282 do_merge (info);
2284 /* If the store extends the size of the group, extend the width. */
2285 if (info->bitpos + info->bitsize > start + width)
2286 width = info->bitpos + info->bitsize - start;
2289 /* Go through all the recorded stores in this group in program order and
2290 apply their values to the VAL byte array to create the final merged
2291 value. Return true if the operation succeeded. */
2293 bool
2294 merged_store_group::apply_stores ()
2296 store_immediate_info *info;
2297 unsigned int i;
2299 /* Make sure we have more than one store in the group, otherwise we cannot
2300 merge anything. */
2301 if (bitregion_start % BITS_PER_UNIT != 0
2302 || bitregion_end % BITS_PER_UNIT != 0
2303 || stores.length () == 1)
2304 return false;
2306 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2308 /* Really do string concatenation for large strings only. */
2309 if (buf_size <= MOVE_MAX)
2310 string_concatenation = false;
2312 /* String concatenation only works for byte aligned start and end. */
2313 if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
2314 string_concatenation = false;
2316 /* Create a power-of-2-sized buffer for native_encode_expr. */
2317 if (!string_concatenation)
2318 buf_size = 1 << ceil_log2 (buf_size);
2320 val = XNEWVEC (unsigned char, 2 * buf_size);
2321 mask = val + buf_size;
2322 memset (val, 0, buf_size);
2323 memset (mask, ~0U, buf_size);
2325 stores.qsort (sort_by_order);
2327 FOR_EACH_VEC_ELT (stores, i, info)
2329 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2330 tree cst;
2331 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2332 cst = info->ops[0].val;
2333 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2334 cst = info->ops[1].val;
2335 else
2336 cst = NULL_TREE;
2337 bool ret = true;
2338 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2339 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2340 buf_size);
2341 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2342 if (BYTES_BIG_ENDIAN)
2343 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2344 - (pos_in_buffer % BITS_PER_UNIT)),
2345 info->bitsize);
2346 else
2347 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2348 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2350 if (ret)
2352 fputs ("After writing ", dump_file);
2353 print_generic_expr (dump_file, cst, TDF_NONE);
2354 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2355 " at position %d\n", info->bitsize, pos_in_buffer);
2356 fputs (" the merged value contains ", dump_file);
2357 dump_char_array (dump_file, val, buf_size);
2358 fputs (" the merged mask contains ", dump_file);
2359 dump_char_array (dump_file, mask, buf_size);
2360 if (bit_insertion)
2361 fputs (" bit insertion is required\n", dump_file);
2362 if (string_concatenation)
2363 fputs (" string concatenation is required\n", dump_file);
2365 else
2366 fprintf (dump_file, "Failed to merge stores\n");
2368 if (!ret)
2369 return false;
2371 stores.qsort (sort_by_bitpos);
2372 return true;
2375 /* Structure describing the store chain. */
2377 class imm_store_chain_info
2379 public:
2380 /* Doubly-linked list that imposes an order on chain processing.
2381 PNXP (prev's next pointer) points to the head of a list, or to
2382 the next field in the previous chain in the list.
2383 See pass_store_merging::m_stores_head for more rationale. */
2384 imm_store_chain_info *next, **pnxp;
2385 tree base_addr;
2386 auto_vec<store_immediate_info *> m_store_info;
2387 auto_vec<merged_store_group *> m_merged_store_groups;
2389 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2390 : next (inspt), pnxp (&inspt), base_addr (b_a)
2392 inspt = this;
2393 if (next)
2395 gcc_checking_assert (pnxp == next->pnxp);
2396 next->pnxp = &next;
2399 ~imm_store_chain_info ()
2401 *pnxp = next;
2402 if (next)
2404 gcc_checking_assert (&next == next->pnxp);
2405 next->pnxp = pnxp;
2408 bool terminate_and_process_chain ();
2409 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2410 unsigned int);
2411 bool coalesce_immediate_stores ();
2412 bool output_merged_store (merged_store_group *);
2413 bool output_merged_stores ();
2416 const pass_data pass_data_tree_store_merging = {
2417 GIMPLE_PASS, /* type */
2418 "store-merging", /* name */
2419 OPTGROUP_NONE, /* optinfo_flags */
2420 TV_GIMPLE_STORE_MERGING, /* tv_id */
2421 PROP_ssa, /* properties_required */
2422 0, /* properties_provided */
2423 0, /* properties_destroyed */
2424 0, /* todo_flags_start */
2425 TODO_update_ssa, /* todo_flags_finish */
2428 class pass_store_merging : public gimple_opt_pass
2430 public:
2431 pass_store_merging (gcc::context *ctxt)
2432 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2433 m_n_chains (0), m_n_stores (0)
2437 /* Pass not supported for PDP-endian, nor for insane hosts or
2438 target character sizes where native_{encode,interpret}_expr
2439 doesn't work properly. */
2440 bool
2441 gate (function *) final override
2443 return flag_store_merging
2444 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2445 && CHAR_BIT == 8
2446 && BITS_PER_UNIT == 8;
2449 unsigned int execute (function *) final override;
2451 private:
2452 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2454 /* Form a doubly-linked stack of the elements of m_stores, so that
2455 we can iterate over them in a predictable way. Using this order
2456 avoids extraneous differences in the compiler output just because
2457 of tree pointer variations (e.g. different chains end up in
2458 different positions of m_stores, so they are handled in different
2459 orders, so they allocate or release SSA names in different
2460 orders, and when they get reused, subsequent passes end up
2461 getting different SSA names, which may ultimately change
2462 decisions when going out of SSA). */
2463 imm_store_chain_info *m_stores_head;
2465 /* The number of store chains currently tracked. */
2466 unsigned m_n_chains;
2467 /* The number of stores currently tracked. */
2468 unsigned m_n_stores;
2470 bool process_store (gimple *);
2471 bool terminate_and_process_chain (imm_store_chain_info *);
2472 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2473 bool terminate_and_process_all_chains ();
2474 }; // class pass_store_merging
2476 /* Terminate and process all recorded chains. Return true if any changes
2477 were made. */
2479 bool
2480 pass_store_merging::terminate_and_process_all_chains ()
2482 bool ret = false;
2483 while (m_stores_head)
2484 ret |= terminate_and_process_chain (m_stores_head);
2485 gcc_assert (m_stores.is_empty ());
2486 return ret;
2489 /* Terminate all chains that are affected by the statement STMT.
2490 CHAIN_INFO is the chain we should ignore from the checks if
2491 non-NULL. Return true if any changes were made. */
2493 bool
2494 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2495 **chain_info,
2496 gimple *stmt)
2498 bool ret = false;
2500 /* If the statement doesn't touch memory it can't alias. */
2501 if (!gimple_vuse (stmt))
2502 return false;
2504 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2505 ao_ref store_lhs_ref;
2506 ao_ref_init (&store_lhs_ref, store_lhs);
2507 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2509 next = cur->next;
2511 /* We already checked all the stores in chain_info and terminated the
2512 chain if necessary. Skip it here. */
2513 if (chain_info && *chain_info == cur)
2514 continue;
2516 store_immediate_info *info;
2517 unsigned int i;
2518 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2520 tree lhs = gimple_assign_lhs (info->stmt);
2521 ao_ref lhs_ref;
2522 ao_ref_init (&lhs_ref, lhs);
2523 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2524 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2525 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2526 &lhs_ref, false)))
2528 if (dump_file && (dump_flags & TDF_DETAILS))
2530 fprintf (dump_file, "stmt causes chain termination:\n");
2531 print_gimple_stmt (dump_file, stmt, 0);
2533 ret |= terminate_and_process_chain (cur);
2534 break;
2539 return ret;
2542 /* Helper function. Terminate the recorded chain storing to base object
2543 BASE. Return true if the merging and output was successful. The m_stores
2544 entry is removed after the processing in any case. */
2546 bool
2547 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2549 m_n_stores -= chain_info->m_store_info.length ();
2550 m_n_chains--;
2551 bool ret = chain_info->terminate_and_process_chain ();
2552 m_stores.remove (chain_info->base_addr);
2553 delete chain_info;
2554 return ret;
2557 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2558 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2559 be able to sink load of REF across stores between FIRST and LAST, up
2560 to right before LAST. */
2562 bool
2563 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2565 ao_ref r;
2566 ao_ref_init (&r, ref);
2567 unsigned int count = 0;
2568 tree vop = gimple_vdef (last);
2569 gimple *stmt;
2571 /* Return true conservatively if the basic blocks are different. */
2572 if (gimple_bb (first) != gimple_bb (last))
2573 return true;
2577 stmt = SSA_NAME_DEF_STMT (vop);
2578 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2579 return true;
2580 if (gimple_store_p (stmt)
2581 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2582 return true;
2583 /* Avoid quadratic compile time by bounding the number of checks
2584 we perform. */
2585 if (++count > MAX_STORE_ALIAS_CHECKS)
2586 return true;
2587 vop = gimple_vuse (stmt);
2589 while (stmt != first);
2591 return false;
2594 /* Return true if INFO->ops[IDX] is mergeable with the
2595 corresponding loads already in MERGED_STORE group.
2596 BASE_ADDR is the base address of the whole store group. */
2598 bool
2599 compatible_load_p (merged_store_group *merged_store,
2600 store_immediate_info *info,
2601 tree base_addr, int idx)
2603 store_immediate_info *infof = merged_store->stores[0];
2604 if (!info->ops[idx].base_addr
2605 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2606 info->bitpos - infof->bitpos)
2607 || !operand_equal_p (info->ops[idx].base_addr,
2608 infof->ops[idx].base_addr, 0))
2609 return false;
2611 store_immediate_info *infol = merged_store->stores.last ();
2612 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2613 /* In this case all vuses should be the same, e.g.
2614 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2616 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2617 and we can emit the coalesced load next to any of those loads. */
2618 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2619 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2620 return true;
2622 /* Otherwise, at least for now require that the load has the same
2623 vuse as the store. See following examples. */
2624 if (gimple_vuse (info->stmt) != load_vuse)
2625 return false;
2627 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2628 || (infof != infol
2629 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2630 return false;
2632 /* If the load is from the same location as the store, already
2633 the construction of the immediate chain info guarantees no intervening
2634 stores, so no further checks are needed. Example:
2635 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2636 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2637 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2638 return true;
2640 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2641 of the stores in the group, or any other stores in between those.
2642 Previous calls to compatible_load_p ensured that for all the
2643 merged_store->stores IDX loads, no stmts starting with
2644 merged_store->first_stmt and ending right before merged_store->last_stmt
2645 clobbers those loads. */
2646 gimple *first = merged_store->first_stmt;
2647 gimple *last = merged_store->last_stmt;
2648 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2649 comes before the so far first load, we'll be changing
2650 merged_store->first_stmt. In that case we need to give up if
2651 any of the earlier processed loads clobber with the stmts in the new
2652 range. */
2653 if (info->order < merged_store->first_order)
2655 for (store_immediate_info *infoc : merged_store->stores)
2656 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2657 return false;
2658 first = info->stmt;
2660 /* Similarly, we could change merged_store->last_stmt, so ensure
2661 in that case no stmts in the new range clobber any of the earlier
2662 processed loads. */
2663 else if (info->order > merged_store->last_order)
2665 for (store_immediate_info *infoc : merged_store->stores)
2666 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2667 return false;
2668 last = info->stmt;
2670 /* And finally, we'd be adding a new load to the set, ensure it isn't
2671 clobbered in the new range. */
2672 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2673 return false;
2675 /* Otherwise, we are looking for:
2676 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2678 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2679 return true;
2682 /* Add all refs loaded to compute VAL to REFS vector. */
2684 void
2685 gather_bswap_load_refs (vec<tree> *refs, tree val)
2687 if (TREE_CODE (val) != SSA_NAME)
2688 return;
2690 gimple *stmt = SSA_NAME_DEF_STMT (val);
2691 if (!is_gimple_assign (stmt))
2692 return;
2694 if (gimple_assign_load_p (stmt))
2696 refs->safe_push (gimple_assign_rhs1 (stmt));
2697 return;
2700 switch (gimple_assign_rhs_class (stmt))
2702 case GIMPLE_BINARY_RHS:
2703 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2704 /* FALLTHRU */
2705 case GIMPLE_UNARY_RHS:
2706 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2707 break;
2708 default:
2709 gcc_unreachable ();
2713 /* Check if there are any stores in M_STORE_INFO after index I
2714 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2715 a potential group ending with END that have their order
2716 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2717 all the stores already merged and the one under consideration
2718 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2719 Consider:
2720 MEM[(long long int *)p_28] = 0;
2721 MEM[(long long int *)p_28 + 8B] = 0;
2722 MEM[(long long int *)p_28 + 16B] = 0;
2723 MEM[(long long int *)p_28 + 24B] = 0;
2724 _129 = (int) _130;
2725 MEM[(int *)p_28 + 8B] = _129;
2726 MEM[(int *)p_28].a = -1;
2727 We already have
2728 MEM[(long long int *)p_28] = 0;
2729 MEM[(int *)p_28].a = -1;
2730 stmts in the current group and need to consider if it is safe to
2731 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2732 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2733 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2734 into the group and merging of those 3 stores is successful, merged
2735 stmts will be emitted at the latest store from that group, i.e.
2736 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2737 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2738 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2739 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2740 into the group. That way it will be its own store group and will
2741 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2742 INTEGER_CST stores, those are mergeable using merge_overlapping,
2743 so don't return false for those.
2745 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2746 (exclusive), whether they don't overlap the bitrange START to END
2747 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2748 prevent merging in cases like:
2749 MEM <char[12]> [&b + 8B] = {};
2750 MEM[(short *) &b] = 5;
2751 _5 = *x_4(D);
2752 MEM <long long unsigned int> [&b + 2B] = _5;
2753 MEM[(char *)&b + 16B] = 88;
2754 MEM[(int *)&b + 20B] = 1;
2755 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2756 be merged with it, because the = _5 store overlaps these and is in between
2757 them in sort_by_order ordering. If it was merged, the merged store would
2758 go after the = _5 store and thus change behavior. */
2760 static bool
2761 check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2762 unsigned int i,
2763 bool all_integer_cst_p, unsigned int first_order,
2764 unsigned int last_order, unsigned HOST_WIDE_INT start,
2765 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2766 unsigned end_earlier)
2768 unsigned int len = m_store_info.length ();
2769 for (unsigned int j = first_earlier; j < end_earlier; j++)
2771 store_immediate_info *info = m_store_info[j];
2772 if (info->order > first_order
2773 && info->order < last_order
2774 && info->bitpos + info->bitsize > start)
2775 return false;
2777 for (++i; i < len; ++i)
2779 store_immediate_info *info = m_store_info[i];
2780 if (info->bitpos >= end)
2781 break;
2782 if (info->order < last_order
2783 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2784 return false;
2786 return true;
2789 /* Return true if m_store_info[first] and at least one following store
2790 form a group which store try_size bitsize value which is byte swapped
2791 from a memory load or some value, or identity from some value.
2792 This uses the bswap pass APIs. */
2794 bool
2795 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2796 unsigned int first,
2797 unsigned int try_size,
2798 unsigned int first_earlier)
2800 unsigned int len = m_store_info.length (), last = first;
2801 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2802 if (width >= try_size)
2803 return false;
2804 for (unsigned int i = first + 1; i < len; ++i)
2806 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2807 || m_store_info[i]->lp_nr != merged_store->lp_nr
2808 || m_store_info[i]->ins_stmt == NULL)
2809 return false;
2810 width += m_store_info[i]->bitsize;
2811 if (width >= try_size)
2813 last = i;
2814 break;
2817 if (width != try_size)
2818 return false;
2820 bool allow_unaligned
2821 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2822 /* Punt if the combined store would not be aligned and we need alignment. */
2823 if (!allow_unaligned)
2825 unsigned int align = merged_store->align;
2826 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2827 for (unsigned int i = first + 1; i <= last; ++i)
2829 unsigned int this_align;
2830 unsigned HOST_WIDE_INT align_bitpos = 0;
2831 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2832 &this_align, &align_bitpos);
2833 if (this_align > align)
2835 align = this_align;
2836 align_base = m_store_info[i]->bitpos - align_bitpos;
2839 unsigned HOST_WIDE_INT align_bitpos
2840 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2841 if (align_bitpos)
2842 align = least_bit_hwi (align_bitpos);
2843 if (align < try_size)
2844 return false;
2847 tree type;
2848 switch (try_size)
2850 case 16: type = uint16_type_node; break;
2851 case 32: type = uint32_type_node; break;
2852 case 64: type = uint64_type_node; break;
2853 default: gcc_unreachable ();
2855 struct symbolic_number n;
2856 gimple *ins_stmt = NULL;
2857 int vuse_store = -1;
2858 unsigned int first_order = merged_store->first_order;
2859 unsigned int last_order = merged_store->last_order;
2860 gimple *first_stmt = merged_store->first_stmt;
2861 gimple *last_stmt = merged_store->last_stmt;
2862 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2863 store_immediate_info *infof = m_store_info[first];
2865 for (unsigned int i = first; i <= last; ++i)
2867 store_immediate_info *info = m_store_info[i];
2868 struct symbolic_number this_n = info->n;
2869 this_n.type = type;
2870 if (!this_n.base_addr)
2871 this_n.range = try_size / BITS_PER_UNIT;
2872 else
2873 /* Update vuse in case it has changed by output_merged_stores. */
2874 this_n.vuse = gimple_vuse (info->ins_stmt);
2875 unsigned int bitpos = info->bitpos - infof->bitpos;
2876 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2877 BYTES_BIG_ENDIAN
2878 ? try_size - info->bitsize - bitpos
2879 : bitpos))
2880 return false;
2881 if (this_n.base_addr && vuse_store)
2883 unsigned int j;
2884 for (j = first; j <= last; ++j)
2885 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2886 break;
2887 if (j > last)
2889 if (vuse_store == 1)
2890 return false;
2891 vuse_store = 0;
2894 if (i == first)
2896 n = this_n;
2897 ins_stmt = info->ins_stmt;
2899 else
2901 if (n.base_addr && n.vuse != this_n.vuse)
2903 if (vuse_store == 0)
2904 return false;
2905 vuse_store = 1;
2907 if (info->order > last_order)
2909 last_order = info->order;
2910 last_stmt = info->stmt;
2912 else if (info->order < first_order)
2914 first_order = info->order;
2915 first_stmt = info->stmt;
2917 end = MAX (end, info->bitpos + info->bitsize);
2919 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2920 &this_n, &n, BIT_IOR_EXPR);
2921 if (ins_stmt == NULL)
2922 return false;
2926 uint64_t cmpxchg, cmpnop;
2927 bool cast64_to_32;
2928 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
2930 /* A complete byte swap should make the symbolic number to start with
2931 the largest digit in the highest order byte. Unchanged symbolic
2932 number indicates a read with same endianness as target architecture. */
2933 if (n.n != cmpnop && n.n != cmpxchg)
2934 return false;
2936 /* For now. */
2937 if (cast64_to_32)
2938 return false;
2940 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2941 return false;
2943 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2944 merged_store->start, end, first_earlier, first))
2945 return false;
2947 /* Don't handle memory copy this way if normal non-bswap processing
2948 would handle it too. */
2949 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2951 unsigned int i;
2952 for (i = first; i <= last; ++i)
2953 if (m_store_info[i]->rhs_code != MEM_REF)
2954 break;
2955 if (i == last + 1)
2956 return false;
2959 if (n.n == cmpxchg)
2960 switch (try_size)
2962 case 16:
2963 /* Will emit LROTATE_EXPR. */
2964 break;
2965 case 32:
2966 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2967 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2968 break;
2969 return false;
2970 case 64:
2971 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2972 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2973 break;
2974 return false;
2975 default:
2976 gcc_unreachable ();
2979 if (!allow_unaligned && n.base_addr)
2981 unsigned int align = get_object_alignment (n.src);
2982 if (align < try_size)
2983 return false;
2986 /* If each load has vuse of the corresponding store, need to verify
2987 the loads can be sunk right before the last store. */
2988 if (vuse_store == 1)
2990 auto_vec<tree, 64> refs;
2991 for (unsigned int i = first; i <= last; ++i)
2992 gather_bswap_load_refs (&refs,
2993 gimple_assign_rhs1 (m_store_info[i]->stmt));
2995 for (tree ref : refs)
2996 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2997 return false;
2998 n.vuse = NULL_TREE;
3001 infof->n = n;
3002 infof->ins_stmt = ins_stmt;
3003 for (unsigned int i = first; i <= last; ++i)
3005 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3006 m_store_info[i]->ops[0].base_addr = NULL_TREE;
3007 m_store_info[i]->ops[1].base_addr = NULL_TREE;
3008 if (i != first)
3009 merged_store->merge_into (m_store_info[i]);
3012 return true;
3015 /* Go through the candidate stores recorded in m_store_info and merge them
3016 into merged_store_group objects recorded into m_merged_store_groups
3017 representing the widened stores. Return true if coalescing was successful
3018 and the number of widened stores is fewer than the original number
3019 of stores. */
3021 bool
3022 imm_store_chain_info::coalesce_immediate_stores ()
3024 /* Anything less can't be processed. */
3025 if (m_store_info.length () < 2)
3026 return false;
3028 if (dump_file && (dump_flags & TDF_DETAILS))
3029 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
3030 m_store_info.length ());
3032 store_immediate_info *info;
3033 unsigned int i, ignore = 0;
3034 unsigned int first_earlier = 0;
3035 unsigned int end_earlier = 0;
3037 /* Order the stores by the bitposition they write to. */
3038 m_store_info.qsort (sort_by_bitpos);
3040 info = m_store_info[0];
3041 merged_store_group *merged_store = new merged_store_group (info);
3042 if (dump_file && (dump_flags & TDF_DETAILS))
3043 fputs ("New store group\n", dump_file);
3045 FOR_EACH_VEC_ELT (m_store_info, i, info)
3047 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3049 if (i <= ignore)
3050 goto done;
3052 while (first_earlier < end_earlier
3053 && (m_store_info[first_earlier]->bitpos
3054 + m_store_info[first_earlier]->bitsize
3055 <= merged_store->start))
3056 first_earlier++;
3058 /* First try to handle group of stores like:
3059 p[0] = data >> 24;
3060 p[1] = data >> 16;
3061 p[2] = data >> 8;
3062 p[3] = data;
3063 using the bswap framework. */
3064 if (info->bitpos == merged_store->start + merged_store->width
3065 && merged_store->stores.length () == 1
3066 && merged_store->stores[0]->ins_stmt != NULL
3067 && info->lp_nr == merged_store->lp_nr
3068 && info->ins_stmt != NULL)
3070 unsigned int try_size;
3071 for (try_size = 64; try_size >= 16; try_size >>= 1)
3072 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3073 first_earlier))
3074 break;
3076 if (try_size >= 16)
3078 ignore = i + merged_store->stores.length () - 1;
3079 m_merged_store_groups.safe_push (merged_store);
3080 if (ignore < m_store_info.length ())
3082 merged_store = new merged_store_group (m_store_info[ignore]);
3083 end_earlier = ignore;
3085 else
3086 merged_store = NULL;
3087 goto done;
3091 new_bitregion_start
3092 = MIN (merged_store->bitregion_start, info->bitregion_start);
3093 new_bitregion_end
3094 = MAX (merged_store->bitregion_end, info->bitregion_end);
3096 if (info->order >= merged_store->first_nonmergeable_order
3097 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3098 > (unsigned) param_store_merging_max_size))
3101 /* |---store 1---|
3102 |---store 2---|
3103 Overlapping stores. */
3104 else if (IN_RANGE (info->bitpos, merged_store->start,
3105 merged_store->start + merged_store->width - 1)
3106 /* |---store 1---||---store 2---|
3107 Handle also the consecutive INTEGER_CST stores case here,
3108 as we have here the code to deal with overlaps. */
3109 || (info->bitregion_start <= merged_store->bitregion_end
3110 && info->rhs_code == INTEGER_CST
3111 && merged_store->only_constants
3112 && merged_store->can_be_merged_into (info)))
3114 /* Only allow overlapping stores of constants. */
3115 if (info->rhs_code == INTEGER_CST
3116 && merged_store->only_constants
3117 && info->lp_nr == merged_store->lp_nr)
3119 unsigned int first_order
3120 = MIN (merged_store->first_order, info->order);
3121 unsigned int last_order
3122 = MAX (merged_store->last_order, info->order);
3123 unsigned HOST_WIDE_INT end
3124 = MAX (merged_store->start + merged_store->width,
3125 info->bitpos + info->bitsize);
3126 if (check_no_overlap (m_store_info, i, true, first_order,
3127 last_order, merged_store->start, end,
3128 first_earlier, end_earlier))
3130 /* check_no_overlap call above made sure there are no
3131 overlapping stores with non-INTEGER_CST rhs_code
3132 in between the first and last of the stores we've
3133 just merged. If there are any INTEGER_CST rhs_code
3134 stores in between, we need to merge_overlapping them
3135 even if in the sort_by_bitpos order there are other
3136 overlapping stores in between. Keep those stores as is.
3137 Example:
3138 MEM[(int *)p_28] = 0;
3139 MEM[(char *)p_28 + 3B] = 1;
3140 MEM[(char *)p_28 + 1B] = 2;
3141 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3142 We can't merge the zero store with the store of two and
3143 not merge anything else, because the store of one is
3144 in the original order in between those two, but in
3145 store_by_bitpos order it comes after the last store that
3146 we can't merge with them. We can merge the first 3 stores
3147 and keep the last store as is though. */
3148 unsigned int len = m_store_info.length ();
3149 unsigned int try_order = last_order;
3150 unsigned int first_nonmergeable_order;
3151 unsigned int k;
3152 bool last_iter = false;
3153 int attempts = 0;
3156 unsigned int max_order = 0;
3157 unsigned int min_order = first_order;
3158 unsigned first_nonmergeable_int_order = ~0U;
3159 unsigned HOST_WIDE_INT this_end = end;
3160 k = i;
3161 first_nonmergeable_order = ~0U;
3162 for (unsigned int j = i + 1; j < len; ++j)
3164 store_immediate_info *info2 = m_store_info[j];
3165 if (info2->bitpos >= this_end)
3166 break;
3167 if (info2->order < try_order)
3169 if (info2->rhs_code != INTEGER_CST
3170 || info2->lp_nr != merged_store->lp_nr)
3172 /* Normally check_no_overlap makes sure this
3173 doesn't happen, but if end grows below,
3174 then we need to process more stores than
3175 check_no_overlap verified. Example:
3176 MEM[(int *)p_5] = 0;
3177 MEM[(short *)p_5 + 3B] = 1;
3178 MEM[(char *)p_5 + 4B] = _9;
3179 MEM[(char *)p_5 + 2B] = 2; */
3180 k = 0;
3181 break;
3183 k = j;
3184 min_order = MIN (min_order, info2->order);
3185 this_end = MAX (this_end,
3186 info2->bitpos + info2->bitsize);
3188 else if (info2->rhs_code == INTEGER_CST
3189 && info2->lp_nr == merged_store->lp_nr
3190 && !last_iter)
3192 max_order = MAX (max_order, info2->order + 1);
3193 first_nonmergeable_int_order
3194 = MIN (first_nonmergeable_int_order,
3195 info2->order);
3197 else
3198 first_nonmergeable_order
3199 = MIN (first_nonmergeable_order, info2->order);
3201 if (k > i
3202 && !check_no_overlap (m_store_info, len - 1, true,
3203 min_order, try_order,
3204 merged_store->start, this_end,
3205 first_earlier, end_earlier))
3206 k = 0;
3207 if (k == 0)
3209 if (last_order == try_order)
3210 break;
3211 /* If this failed, but only because we grew
3212 try_order, retry with the last working one,
3213 so that we merge at least something. */
3214 try_order = last_order;
3215 last_iter = true;
3216 continue;
3218 last_order = try_order;
3219 /* Retry with a larger try_order to see if we could
3220 merge some further INTEGER_CST stores. */
3221 if (max_order
3222 && (first_nonmergeable_int_order
3223 < first_nonmergeable_order))
3225 try_order = MIN (max_order,
3226 first_nonmergeable_order);
3227 try_order
3228 = MIN (try_order,
3229 merged_store->first_nonmergeable_order);
3230 if (try_order > last_order && ++attempts < 16)
3231 continue;
3233 first_nonmergeable_order
3234 = MIN (first_nonmergeable_order,
3235 first_nonmergeable_int_order);
3236 end = this_end;
3237 break;
3239 while (1);
3241 if (k != 0)
3243 merged_store->merge_overlapping (info);
3245 merged_store->first_nonmergeable_order
3246 = MIN (merged_store->first_nonmergeable_order,
3247 first_nonmergeable_order);
3249 for (unsigned int j = i + 1; j <= k; j++)
3251 store_immediate_info *info2 = m_store_info[j];
3252 gcc_assert (info2->bitpos < end);
3253 if (info2->order < last_order)
3255 gcc_assert (info2->rhs_code == INTEGER_CST);
3256 if (info != info2)
3257 merged_store->merge_overlapping (info2);
3259 /* Other stores are kept and not merged in any
3260 way. */
3262 ignore = k;
3263 goto done;
3268 /* |---store 1---||---store 2---|
3269 This store is consecutive to the previous one.
3270 Merge it into the current store group. There can be gaps in between
3271 the stores, but there can't be gaps in between bitregions. */
3272 else if (info->bitregion_start <= merged_store->bitregion_end
3273 && merged_store->can_be_merged_into (info))
3275 store_immediate_info *infof = merged_store->stores[0];
3277 /* All the rhs_code ops that take 2 operands are commutative,
3278 swap the operands if it could make the operands compatible. */
3279 if (infof->ops[0].base_addr
3280 && infof->ops[1].base_addr
3281 && info->ops[0].base_addr
3282 && info->ops[1].base_addr
3283 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3284 info->bitpos - infof->bitpos)
3285 && operand_equal_p (info->ops[1].base_addr,
3286 infof->ops[0].base_addr, 0))
3288 std::swap (info->ops[0], info->ops[1]);
3289 info->ops_swapped_p = true;
3291 if (check_no_overlap (m_store_info, i, false,
3292 MIN (merged_store->first_order, info->order),
3293 MAX (merged_store->last_order, info->order),
3294 merged_store->start,
3295 MAX (merged_store->start + merged_store->width,
3296 info->bitpos + info->bitsize),
3297 first_earlier, end_earlier))
3299 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3300 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3302 info->rhs_code = BIT_INSERT_EXPR;
3303 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3304 info->ops[0].base_addr = NULL_TREE;
3306 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3308 for (store_immediate_info *infoj : merged_store->stores)
3310 infoj->rhs_code = BIT_INSERT_EXPR;
3311 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3312 infoj->ops[0].base_addr = NULL_TREE;
3314 merged_store->bit_insertion = true;
3316 if ((infof->ops[0].base_addr
3317 ? compatible_load_p (merged_store, info, base_addr, 0)
3318 : !info->ops[0].base_addr)
3319 && (infof->ops[1].base_addr
3320 ? compatible_load_p (merged_store, info, base_addr, 1)
3321 : !info->ops[1].base_addr))
3323 merged_store->merge_into (info);
3324 goto done;
3329 /* |---store 1---| <gap> |---store 2---|.
3330 Gap between stores or the rhs not compatible. Start a new group. */
3332 /* Try to apply all the stores recorded for the group to determine
3333 the bitpattern they write and discard it if that fails.
3334 This will also reject single-store groups. */
3335 if (merged_store->apply_stores ())
3336 m_merged_store_groups.safe_push (merged_store);
3337 else
3338 delete merged_store;
3340 merged_store = new merged_store_group (info);
3341 end_earlier = i;
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3343 fputs ("New store group\n", dump_file);
3345 done:
3346 if (dump_file && (dump_flags & TDF_DETAILS))
3348 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3349 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3350 i, info->bitsize, info->bitpos);
3351 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3352 fputc ('\n', dump_file);
3356 /* Record or discard the last store group. */
3357 if (merged_store)
3359 if (merged_store->apply_stores ())
3360 m_merged_store_groups.safe_push (merged_store);
3361 else
3362 delete merged_store;
3365 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3367 bool success
3368 = !m_merged_store_groups.is_empty ()
3369 && m_merged_store_groups.length () < m_store_info.length ();
3371 if (success && dump_file)
3372 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3373 m_merged_store_groups.length ());
3375 return success;
3378 /* Return the type to use for the merged stores or loads described by STMTS.
3379 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3380 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3381 of the MEM_REFs if any. */
3383 static tree
3384 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3385 unsigned short *cliquep, unsigned short *basep)
3387 gimple *stmt;
3388 unsigned int i;
3389 tree type = NULL_TREE;
3390 tree ret = NULL_TREE;
3391 *cliquep = 0;
3392 *basep = 0;
3394 FOR_EACH_VEC_ELT (stmts, i, stmt)
3396 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3397 : gimple_assign_lhs (stmt);
3398 tree type1 = reference_alias_ptr_type (ref);
3399 tree base = get_base_address (ref);
3401 if (i == 0)
3403 if (TREE_CODE (base) == MEM_REF)
3405 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3406 *basep = MR_DEPENDENCE_BASE (base);
3408 ret = type = type1;
3409 continue;
3411 if (!alias_ptr_types_compatible_p (type, type1))
3412 ret = ptr_type_node;
3413 if (TREE_CODE (base) != MEM_REF
3414 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3415 || *basep != MR_DEPENDENCE_BASE (base))
3417 *cliquep = 0;
3418 *basep = 0;
3421 return ret;
3424 /* Return the location_t information we can find among the statements
3425 in STMTS. */
3427 static location_t
3428 get_location_for_stmts (vec<gimple *> &stmts)
3430 for (gimple *stmt : stmts)
3431 if (gimple_has_location (stmt))
3432 return gimple_location (stmt);
3434 return UNKNOWN_LOCATION;
3437 /* Used to decribe a store resulting from splitting a wide store in smaller
3438 regularly-sized stores in split_group. */
3440 class split_store
3442 public:
3443 unsigned HOST_WIDE_INT bytepos;
3444 unsigned HOST_WIDE_INT size;
3445 unsigned HOST_WIDE_INT align;
3446 auto_vec<store_immediate_info *> orig_stores;
3447 /* True if there is a single orig stmt covering the whole split store. */
3448 bool orig;
3449 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3450 unsigned HOST_WIDE_INT);
3453 /* Simple constructor. */
3455 split_store::split_store (unsigned HOST_WIDE_INT bp,
3456 unsigned HOST_WIDE_INT sz,
3457 unsigned HOST_WIDE_INT al)
3458 : bytepos (bp), size (sz), align (al), orig (false)
3460 orig_stores.create (0);
3463 /* Record all stores in GROUP that write to the region starting at BITPOS and
3464 is of size BITSIZE. Record infos for such statements in STORES if
3465 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3466 if there is exactly one original store in the range (in that case ignore
3467 clobber stmts, unless there are only clobber stmts). */
3469 static store_immediate_info *
3470 find_constituent_stores (class merged_store_group *group,
3471 vec<store_immediate_info *> *stores,
3472 unsigned int *first,
3473 unsigned HOST_WIDE_INT bitpos,
3474 unsigned HOST_WIDE_INT bitsize)
3476 store_immediate_info *info, *ret = NULL;
3477 unsigned int i;
3478 bool second = false;
3479 bool update_first = true;
3480 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3481 for (i = *first; group->stores.iterate (i, &info); ++i)
3483 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3484 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3485 if (stmt_end <= bitpos)
3487 /* BITPOS passed to this function never decreases from within the
3488 same split_group call, so optimize and don't scan info records
3489 which are known to end before or at BITPOS next time.
3490 Only do it if all stores before this one also pass this. */
3491 if (update_first)
3492 *first = i + 1;
3493 continue;
3495 else
3496 update_first = false;
3498 /* The stores in GROUP are ordered by bitposition so if we're past
3499 the region for this group return early. */
3500 if (stmt_start >= end)
3501 return ret;
3503 if (gimple_clobber_p (info->stmt))
3505 if (stores)
3506 stores->safe_push (info);
3507 if (ret == NULL)
3508 ret = info;
3509 continue;
3511 if (stores)
3513 stores->safe_push (info);
3514 if (ret && !gimple_clobber_p (ret->stmt))
3516 ret = NULL;
3517 second = true;
3520 else if (ret && !gimple_clobber_p (ret->stmt))
3521 return NULL;
3522 if (!second)
3523 ret = info;
3525 return ret;
3528 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3529 store have multiple uses. If any SSA_NAME has multiple uses, also
3530 count statements needed to compute it. */
3532 static unsigned
3533 count_multiple_uses (store_immediate_info *info)
3535 gimple *stmt = info->stmt;
3536 unsigned ret = 0;
3537 switch (info->rhs_code)
3539 case INTEGER_CST:
3540 case STRING_CST:
3541 return 0;
3542 case BIT_AND_EXPR:
3543 case BIT_IOR_EXPR:
3544 case BIT_XOR_EXPR:
3545 if (info->bit_not_p)
3547 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3548 ret = 1; /* Fall through below to return
3549 the BIT_NOT_EXPR stmt and then
3550 BIT_{AND,IOR,XOR}_EXPR and anything it
3551 uses. */
3552 else
3553 /* stmt is after this the BIT_NOT_EXPR. */
3554 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3556 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3558 ret += 1 + info->ops[0].bit_not_p;
3559 if (info->ops[1].base_addr)
3560 ret += 1 + info->ops[1].bit_not_p;
3561 return ret + 1;
3563 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3564 /* stmt is now the BIT_*_EXPR. */
3565 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3566 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3567 else if (info->ops[info->ops_swapped_p].bit_not_p)
3569 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3570 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3571 ++ret;
3573 if (info->ops[1].base_addr == NULL_TREE)
3575 gcc_checking_assert (!info->ops_swapped_p);
3576 return ret;
3578 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3579 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3580 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3582 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3583 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3584 ++ret;
3586 return ret;
3587 case MEM_REF:
3588 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3589 return 1 + info->ops[0].bit_not_p;
3590 else if (info->ops[0].bit_not_p)
3592 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3593 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3594 return 1;
3596 return 0;
3597 case BIT_INSERT_EXPR:
3598 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3599 default:
3600 gcc_unreachable ();
3604 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3605 vector (if non-NULL) with split_store structs describing the byte offset
3606 (from the base), the bit size and alignment of each store as well as the
3607 original statements involved in each such split group.
3608 This is to separate the splitting strategy from the statement
3609 building/emission/linking done in output_merged_store.
3610 Return number of new stores.
3611 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3612 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3613 BZERO_FIRST may be true only when the first store covers the whole group
3614 and clears it; if BZERO_FIRST is true, keep that first store in the set
3615 unmodified and emit further stores for the overrides only.
3616 If SPLIT_STORES is NULL, it is just a dry run to count number of
3617 new stores. */
3619 static unsigned int
3620 split_group (merged_store_group *group, bool allow_unaligned_store,
3621 bool allow_unaligned_load, bool bzero_first,
3622 vec<split_store *> *split_stores,
3623 unsigned *total_orig,
3624 unsigned *total_new)
3626 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3627 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3628 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3629 unsigned HOST_WIDE_INT group_align = group->align;
3630 unsigned HOST_WIDE_INT align_base = group->align_base;
3631 unsigned HOST_WIDE_INT group_load_align = group_align;
3632 bool any_orig = false;
3634 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3636 /* For bswap framework using sets of stores, all the checking has been done
3637 earlier in try_coalesce_bswap and the result always needs to be emitted
3638 as a single store. Likewise for string concatenation. */
3639 if (group->stores[0]->rhs_code == LROTATE_EXPR
3640 || group->stores[0]->rhs_code == NOP_EXPR
3641 || group->string_concatenation)
3643 gcc_assert (!bzero_first);
3644 if (total_orig)
3646 /* Avoid the old/new stmt count heuristics. It should be
3647 always beneficial. */
3648 total_new[0] = 1;
3649 total_orig[0] = 2;
3652 if (split_stores)
3654 unsigned HOST_WIDE_INT align_bitpos
3655 = (group->start - align_base) & (group_align - 1);
3656 unsigned HOST_WIDE_INT align = group_align;
3657 if (align_bitpos)
3658 align = least_bit_hwi (align_bitpos);
3659 bytepos = group->start / BITS_PER_UNIT;
3660 split_store *store
3661 = new split_store (bytepos, group->width, align);
3662 unsigned int first = 0;
3663 find_constituent_stores (group, &store->orig_stores,
3664 &first, group->start, group->width);
3665 split_stores->safe_push (store);
3668 return 1;
3671 unsigned int ret = 0, first = 0;
3672 unsigned HOST_WIDE_INT try_pos = bytepos;
3674 if (total_orig)
3676 unsigned int i;
3677 store_immediate_info *info = group->stores[0];
3679 total_new[0] = 0;
3680 total_orig[0] = 1; /* The orig store. */
3681 info = group->stores[0];
3682 if (info->ops[0].base_addr)
3683 total_orig[0]++;
3684 if (info->ops[1].base_addr)
3685 total_orig[0]++;
3686 switch (info->rhs_code)
3688 case BIT_AND_EXPR:
3689 case BIT_IOR_EXPR:
3690 case BIT_XOR_EXPR:
3691 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3692 break;
3693 default:
3694 break;
3696 total_orig[0] *= group->stores.length ();
3698 FOR_EACH_VEC_ELT (group->stores, i, info)
3700 total_new[0] += count_multiple_uses (info);
3701 total_orig[0] += (info->bit_not_p
3702 + info->ops[0].bit_not_p
3703 + info->ops[1].bit_not_p);
3707 if (!allow_unaligned_load)
3708 for (int i = 0; i < 2; ++i)
3709 if (group->load_align[i])
3710 group_load_align = MIN (group_load_align, group->load_align[i]);
3712 if (bzero_first)
3714 store_immediate_info *gstore;
3715 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3716 if (!gimple_clobber_p (gstore->stmt))
3717 break;
3718 ++first;
3719 ret = 1;
3720 if (split_stores)
3722 split_store *store
3723 = new split_store (bytepos, gstore->bitsize, align_base);
3724 store->orig_stores.safe_push (gstore);
3725 store->orig = true;
3726 any_orig = true;
3727 split_stores->safe_push (store);
3731 while (size > 0)
3733 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3734 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3735 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3737 /* Skip padding bytes. */
3738 ++try_pos;
3739 size -= BITS_PER_UNIT;
3740 continue;
3743 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3744 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3745 unsigned HOST_WIDE_INT align_bitpos
3746 = (try_bitpos - align_base) & (group_align - 1);
3747 unsigned HOST_WIDE_INT align = group_align;
3748 bool found_orig = false;
3749 if (align_bitpos)
3750 align = least_bit_hwi (align_bitpos);
3751 if (!allow_unaligned_store)
3752 try_size = MIN (try_size, align);
3753 if (!allow_unaligned_load)
3755 /* If we can't do or don't want to do unaligned stores
3756 as well as loads, we need to take the loads into account
3757 as well. */
3758 unsigned HOST_WIDE_INT load_align = group_load_align;
3759 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3760 if (align_bitpos)
3761 load_align = least_bit_hwi (align_bitpos);
3762 for (int i = 0; i < 2; ++i)
3763 if (group->load_align[i])
3765 align_bitpos
3766 = known_alignment (try_bitpos
3767 - group->stores[0]->bitpos
3768 + group->stores[0]->ops[i].bitpos
3769 - group->load_align_base[i]);
3770 if (align_bitpos & (group_load_align - 1))
3772 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3773 load_align = MIN (load_align, a);
3776 try_size = MIN (try_size, load_align);
3778 store_immediate_info *info
3779 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3780 if (info && !gimple_clobber_p (info->stmt))
3782 /* If there is just one original statement for the range, see if
3783 we can just reuse the original store which could be even larger
3784 than try_size. */
3785 unsigned HOST_WIDE_INT stmt_end
3786 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3787 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3788 stmt_end - try_bitpos);
3789 if (info && info->bitpos >= try_bitpos)
3791 store_immediate_info *info2 = NULL;
3792 unsigned int first_copy = first;
3793 if (info->bitpos > try_bitpos
3794 && stmt_end - try_bitpos <= try_size)
3796 info2 = find_constituent_stores (group, NULL, &first_copy,
3797 try_bitpos,
3798 info->bitpos - try_bitpos);
3799 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3801 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3803 info2 = find_constituent_stores (group, NULL, &first_copy,
3804 stmt_end,
3805 (try_bitpos + try_size)
3806 - stmt_end);
3807 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3809 if (info2 == NULL)
3811 try_size = stmt_end - try_bitpos;
3812 found_orig = true;
3813 goto found;
3818 /* Approximate store bitsize for the case when there are no padding
3819 bits. */
3820 while (try_size > size)
3821 try_size /= 2;
3822 /* Now look for whole padding bytes at the end of that bitsize. */
3823 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3824 if (group->mask[try_pos - bytepos + nonmasked - 1]
3825 != (unsigned char) ~0U
3826 && (!bzero_first
3827 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3828 break;
3829 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3831 /* If entire try_size range is padding, skip it. */
3832 try_pos += try_size / BITS_PER_UNIT;
3833 size -= try_size;
3834 continue;
3836 /* Otherwise try to decrease try_size if second half, last 3 quarters
3837 etc. are padding. */
3838 nonmasked *= BITS_PER_UNIT;
3839 while (nonmasked <= try_size / 2)
3840 try_size /= 2;
3841 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3843 /* Now look for whole padding bytes at the start of that bitsize. */
3844 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3845 for (masked = 0; masked < try_bytesize; ++masked)
3846 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3847 && (!bzero_first
3848 || group->val[try_pos - bytepos + masked] != 0))
3849 break;
3850 masked *= BITS_PER_UNIT;
3851 gcc_assert (masked < try_size);
3852 if (masked >= try_size / 2)
3854 while (masked >= try_size / 2)
3856 try_size /= 2;
3857 try_pos += try_size / BITS_PER_UNIT;
3858 size -= try_size;
3859 masked -= try_size;
3861 /* Need to recompute the alignment, so just retry at the new
3862 position. */
3863 continue;
3867 found:
3868 ++ret;
3870 if (split_stores)
3872 split_store *store
3873 = new split_store (try_pos, try_size, align);
3874 info = find_constituent_stores (group, &store->orig_stores,
3875 &first, try_bitpos, try_size);
3876 if (info
3877 && !gimple_clobber_p (info->stmt)
3878 && info->bitpos >= try_bitpos
3879 && info->bitpos + info->bitsize <= try_bitpos + try_size
3880 && (store->orig_stores.length () == 1
3881 || found_orig
3882 || (info->bitpos == try_bitpos
3883 && (info->bitpos + info->bitsize
3884 == try_bitpos + try_size))))
3886 store->orig = true;
3887 any_orig = true;
3889 split_stores->safe_push (store);
3892 try_pos += try_size / BITS_PER_UNIT;
3893 size -= try_size;
3896 if (total_orig)
3898 unsigned int i;
3899 split_store *store;
3900 /* If we are reusing some original stores and any of the
3901 original SSA_NAMEs had multiple uses, we need to subtract
3902 those now before we add the new ones. */
3903 if (total_new[0] && any_orig)
3905 FOR_EACH_VEC_ELT (*split_stores, i, store)
3906 if (store->orig)
3907 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3909 total_new[0] += ret; /* The new store. */
3910 store_immediate_info *info = group->stores[0];
3911 if (info->ops[0].base_addr)
3912 total_new[0] += ret;
3913 if (info->ops[1].base_addr)
3914 total_new[0] += ret;
3915 switch (info->rhs_code)
3917 case BIT_AND_EXPR:
3918 case BIT_IOR_EXPR:
3919 case BIT_XOR_EXPR:
3920 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3921 break;
3922 default:
3923 break;
3925 FOR_EACH_VEC_ELT (*split_stores, i, store)
3927 unsigned int j;
3928 bool bit_not_p[3] = { false, false, false };
3929 /* If all orig_stores have certain bit_not_p set, then
3930 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3931 If some orig_stores have certain bit_not_p set, then
3932 we'd use a BIT_XOR_EXPR with a mask and need to account for
3933 it. */
3934 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3936 if (info->ops[0].bit_not_p)
3937 bit_not_p[0] = true;
3938 if (info->ops[1].bit_not_p)
3939 bit_not_p[1] = true;
3940 if (info->bit_not_p)
3941 bit_not_p[2] = true;
3943 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3948 return ret;
3951 /* Return the operation through which the operand IDX (if < 2) or
3952 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3953 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3954 the bits should be xored with mask. */
3956 static enum tree_code
3957 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3959 unsigned int i;
3960 store_immediate_info *info;
3961 unsigned int cnt = 0;
3962 bool any_paddings = false;
3963 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3965 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3966 if (bit_not_p)
3968 ++cnt;
3969 tree lhs = gimple_assign_lhs (info->stmt);
3970 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3971 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3972 any_paddings = true;
3975 mask = NULL_TREE;
3976 if (cnt == 0)
3977 return NOP_EXPR;
3978 if (cnt == split_store->orig_stores.length () && !any_paddings)
3979 return BIT_NOT_EXPR;
3981 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3982 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3983 unsigned char *buf
3984 = XALLOCAVEC (unsigned char, buf_size);
3985 memset (buf, ~0U, buf_size);
3986 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3988 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3989 if (!bit_not_p)
3990 continue;
3991 /* Clear regions with bit_not_p and invert afterwards, rather than
3992 clear regions with !bit_not_p, so that gaps in between stores aren't
3993 set in the mask. */
3994 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3995 unsigned HOST_WIDE_INT prec = bitsize;
3996 unsigned int pos_in_buffer = 0;
3997 if (any_paddings)
3999 tree lhs = gimple_assign_lhs (info->stmt);
4000 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4001 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
4002 prec = TYPE_PRECISION (TREE_TYPE (lhs));
4004 if (info->bitpos < try_bitpos)
4006 gcc_assert (info->bitpos + bitsize > try_bitpos);
4007 if (!BYTES_BIG_ENDIAN)
4009 if (prec <= try_bitpos - info->bitpos)
4010 continue;
4011 prec -= try_bitpos - info->bitpos;
4013 bitsize -= try_bitpos - info->bitpos;
4014 if (BYTES_BIG_ENDIAN && prec > bitsize)
4015 prec = bitsize;
4017 else
4018 pos_in_buffer = info->bitpos - try_bitpos;
4019 if (prec < bitsize)
4021 /* If this is a bool inversion, invert just the least significant
4022 prec bits rather than all bits of it. */
4023 if (BYTES_BIG_ENDIAN)
4025 pos_in_buffer += bitsize - prec;
4026 if (pos_in_buffer >= split_store->size)
4027 continue;
4029 bitsize = prec;
4031 if (pos_in_buffer + bitsize > split_store->size)
4032 bitsize = split_store->size - pos_in_buffer;
4033 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4034 if (BYTES_BIG_ENDIAN)
4035 clear_bit_region_be (p, (BITS_PER_UNIT - 1
4036 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4037 else
4038 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4040 for (unsigned int i = 0; i < buf_size; ++i)
4041 buf[i] = ~buf[i];
4042 mask = native_interpret_expr (int_type, buf, buf_size);
4043 return BIT_XOR_EXPR;
4046 /* Given a merged store group GROUP output the widened version of it.
4047 The store chain is against the base object BASE.
4048 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4049 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4050 Make sure that the number of statements output is less than the number of
4051 original statements. If a better sequence is possible emit it and
4052 return true. */
4054 bool
4055 imm_store_chain_info::output_merged_store (merged_store_group *group)
4057 const unsigned HOST_WIDE_INT start_byte_pos
4058 = group->bitregion_start / BITS_PER_UNIT;
4059 unsigned int orig_num_stmts = group->stores.length ();
4060 if (orig_num_stmts < 2)
4061 return false;
4063 bool allow_unaligned_store
4064 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4065 bool allow_unaligned_load = allow_unaligned_store;
4066 bool bzero_first = false;
4067 store_immediate_info *store;
4068 unsigned int num_clobber_stmts = 0;
4069 if (group->stores[0]->rhs_code == INTEGER_CST)
4071 unsigned int i;
4072 FOR_EACH_VEC_ELT (group->stores, i, store)
4073 if (gimple_clobber_p (store->stmt))
4074 num_clobber_stmts++;
4075 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4076 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4077 && group->start == store->bitpos
4078 && group->width == store->bitsize
4079 && (group->start % BITS_PER_UNIT) == 0
4080 && (group->width % BITS_PER_UNIT) == 0)
4082 bzero_first = true;
4083 break;
4085 else
4086 break;
4087 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4088 if (gimple_clobber_p (store->stmt))
4089 num_clobber_stmts++;
4090 if (num_clobber_stmts == orig_num_stmts)
4091 return false;
4092 orig_num_stmts -= num_clobber_stmts;
4094 if (allow_unaligned_store || bzero_first)
4096 /* If unaligned stores are allowed, see how many stores we'd emit
4097 for unaligned and how many stores we'd emit for aligned stores.
4098 Only use unaligned stores if it allows fewer stores than aligned.
4099 Similarly, if there is a whole region clear first, prefer expanding
4100 it together compared to expanding clear first followed by merged
4101 further stores. */
4102 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4103 int pass_min = 0;
4104 for (int pass = 0; pass < 4; ++pass)
4106 if (!allow_unaligned_store && (pass & 1) != 0)
4107 continue;
4108 if (!bzero_first && (pass & 2) != 0)
4109 continue;
4110 cnt[pass] = split_group (group, (pass & 1) != 0,
4111 allow_unaligned_load, (pass & 2) != 0,
4112 NULL, NULL, NULL);
4113 if (cnt[pass] < cnt[pass_min])
4114 pass_min = pass;
4116 if ((pass_min & 1) == 0)
4117 allow_unaligned_store = false;
4118 if ((pass_min & 2) == 0)
4119 bzero_first = false;
4122 auto_vec<class split_store *, 32> split_stores;
4123 split_store *split_store;
4124 unsigned total_orig, total_new, i;
4125 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4126 &split_stores, &total_orig, &total_new);
4128 /* Determine if there is a clobber covering the whole group at the start,
4129 followed by proposed split stores that cover the whole group. In that
4130 case, prefer the transformation even if
4131 split_stores.length () == orig_num_stmts. */
4132 bool clobber_first = false;
4133 if (num_clobber_stmts
4134 && gimple_clobber_p (group->stores[0]->stmt)
4135 && group->start == group->stores[0]->bitpos
4136 && group->width == group->stores[0]->bitsize
4137 && (group->start % BITS_PER_UNIT) == 0
4138 && (group->width % BITS_PER_UNIT) == 0)
4140 clobber_first = true;
4141 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4142 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4143 if (split_store->bytepos != pos)
4145 clobber_first = false;
4146 break;
4148 else
4149 pos += split_store->size / BITS_PER_UNIT;
4150 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4151 clobber_first = false;
4154 if (split_stores.length () >= orig_num_stmts + clobber_first)
4157 /* We didn't manage to reduce the number of statements. Bail out. */
4158 if (dump_file && (dump_flags & TDF_DETAILS))
4159 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4160 " Not profitable to emit new sequence.\n",
4161 orig_num_stmts);
4162 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4163 delete split_store;
4164 return false;
4166 if (total_orig <= total_new)
4168 /* If number of estimated new statements is above estimated original
4169 statements, bail out too. */
4170 if (dump_file && (dump_flags & TDF_DETAILS))
4171 fprintf (dump_file, "Estimated number of original stmts (%u)"
4172 " not larger than estimated number of new"
4173 " stmts (%u).\n",
4174 total_orig, total_new);
4175 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4176 delete split_store;
4177 return false;
4179 if (group->stores[0]->rhs_code == INTEGER_CST)
4181 bool all_orig = true;
4182 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4183 if (!split_store->orig)
4185 all_orig = false;
4186 break;
4188 if (all_orig)
4190 unsigned int cnt = split_stores.length ();
4191 store_immediate_info *store;
4192 FOR_EACH_VEC_ELT (group->stores, i, store)
4193 if (gimple_clobber_p (store->stmt))
4194 ++cnt;
4195 /* Punt if we wouldn't make any real changes, i.e. keep all
4196 orig stmts + all clobbers. */
4197 if (cnt == group->stores.length ())
4199 if (dump_file && (dump_flags & TDF_DETAILS))
4200 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4201 " Not profitable to emit new sequence.\n",
4202 orig_num_stmts);
4203 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4204 delete split_store;
4205 return false;
4210 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4211 gimple_seq seq = NULL;
4212 tree last_vdef, new_vuse;
4213 last_vdef = gimple_vdef (group->last_stmt);
4214 new_vuse = gimple_vuse (group->last_stmt);
4215 tree bswap_res = NULL_TREE;
4217 /* Clobbers are not removed. */
4218 if (gimple_clobber_p (group->last_stmt))
4220 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4221 gimple_set_vdef (group->last_stmt, new_vuse);
4224 if (group->stores[0]->rhs_code == LROTATE_EXPR
4225 || group->stores[0]->rhs_code == NOP_EXPR)
4227 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4228 gimple *ins_stmt = group->stores[0]->ins_stmt;
4229 struct symbolic_number *n = &group->stores[0]->n;
4230 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4232 switch (n->range)
4234 case 16:
4235 load_type = bswap_type = uint16_type_node;
4236 break;
4237 case 32:
4238 load_type = uint32_type_node;
4239 if (bswap)
4241 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4242 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4244 break;
4245 case 64:
4246 load_type = uint64_type_node;
4247 if (bswap)
4249 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4250 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4252 break;
4253 default:
4254 gcc_unreachable ();
4257 /* If the loads have each vuse of the corresponding store,
4258 we've checked the aliasing already in try_coalesce_bswap and
4259 we want to sink the need load into seq. So need to use new_vuse
4260 on the load. */
4261 if (n->base_addr)
4263 if (n->vuse == NULL)
4265 n->vuse = new_vuse;
4266 ins_stmt = NULL;
4268 else
4269 /* Update vuse in case it has changed by output_merged_stores. */
4270 n->vuse = gimple_vuse (ins_stmt);
4272 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4273 bswap_type, load_type, n, bswap,
4274 ~(uint64_t) 0);
4275 gcc_assert (bswap_res);
4278 gimple *stmt = NULL;
4279 auto_vec<gimple *, 32> orig_stmts;
4280 gimple_seq this_seq;
4281 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4282 is_gimple_mem_ref_addr, NULL_TREE);
4283 gimple_seq_add_seq_without_update (&seq, this_seq);
4285 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4286 gimple_seq load_seq[2] = { NULL, NULL };
4287 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4288 for (int j = 0; j < 2; ++j)
4290 store_operand_info &op = group->stores[0]->ops[j];
4291 if (op.base_addr == NULL_TREE)
4292 continue;
4294 store_immediate_info *infol = group->stores.last ();
4295 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4297 /* We can't pick the location randomly; while we've verified
4298 all the loads have the same vuse, they can be still in different
4299 basic blocks and we need to pick the one from the last bb:
4300 int x = q[0];
4301 if (x == N) return;
4302 int y = q[1];
4303 p[0] = x;
4304 p[1] = y;
4305 otherwise if we put the wider load at the q[0] load, we might
4306 segfault if q[1] is not mapped. */
4307 basic_block bb = gimple_bb (op.stmt);
4308 gimple *ostmt = op.stmt;
4309 store_immediate_info *info;
4310 FOR_EACH_VEC_ELT (group->stores, i, info)
4312 gimple *tstmt = info->ops[j].stmt;
4313 basic_block tbb = gimple_bb (tstmt);
4314 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4316 ostmt = tstmt;
4317 bb = tbb;
4320 load_gsi[j] = gsi_for_stmt (ostmt);
4321 load_addr[j]
4322 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4323 &load_seq[j], is_gimple_mem_ref_addr,
4324 NULL_TREE);
4326 else if (operand_equal_p (base_addr, op.base_addr, 0))
4327 load_addr[j] = addr;
4328 else
4330 load_addr[j]
4331 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4332 &this_seq, is_gimple_mem_ref_addr,
4333 NULL_TREE);
4334 gimple_seq_add_seq_without_update (&seq, this_seq);
4338 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4340 const unsigned HOST_WIDE_INT try_size = split_store->size;
4341 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4342 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4343 const unsigned HOST_WIDE_INT try_align = split_store->align;
4344 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4345 tree dest, src;
4346 location_t loc;
4348 if (split_store->orig)
4350 /* If there is just a single non-clobber constituent store
4351 which covers the whole area, just reuse the lhs and rhs. */
4352 gimple *orig_stmt = NULL;
4353 store_immediate_info *store;
4354 unsigned int j;
4355 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4356 if (!gimple_clobber_p (store->stmt))
4358 orig_stmt = store->stmt;
4359 break;
4361 dest = gimple_assign_lhs (orig_stmt);
4362 src = gimple_assign_rhs1 (orig_stmt);
4363 loc = gimple_location (orig_stmt);
4365 else
4367 store_immediate_info *info;
4368 unsigned short clique, base;
4369 unsigned int k;
4370 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4371 orig_stmts.safe_push (info->stmt);
4372 tree offset_type
4373 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4374 tree dest_type;
4375 loc = get_location_for_stmts (orig_stmts);
4376 orig_stmts.truncate (0);
4378 if (group->string_concatenation)
4379 dest_type
4380 = build_array_type_nelts (char_type_node,
4381 try_size / BITS_PER_UNIT);
4382 else
4384 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4385 dest_type = build_aligned_type (dest_type, try_align);
4387 dest = fold_build2 (MEM_REF, dest_type, addr,
4388 build_int_cst (offset_type, try_pos));
4389 if (TREE_CODE (dest) == MEM_REF)
4391 MR_DEPENDENCE_CLIQUE (dest) = clique;
4392 MR_DEPENDENCE_BASE (dest) = base;
4395 tree mask;
4396 if (bswap_res || group->string_concatenation)
4397 mask = integer_zero_node;
4398 else
4399 mask = native_interpret_expr (dest_type,
4400 group->mask + try_offset,
4401 group->buf_size);
4403 tree ops[2];
4404 for (int j = 0;
4405 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4406 ++j)
4408 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4409 if (bswap_res)
4410 ops[j] = bswap_res;
4411 else if (group->string_concatenation)
4413 ops[j] = build_string (try_size / BITS_PER_UNIT,
4414 (const char *) group->val + try_offset);
4415 TREE_TYPE (ops[j]) = dest_type;
4417 else if (op.base_addr)
4419 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4420 orig_stmts.safe_push (info->ops[j].stmt);
4422 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4423 &clique, &base);
4424 location_t load_loc = get_location_for_stmts (orig_stmts);
4425 orig_stmts.truncate (0);
4427 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4428 unsigned HOST_WIDE_INT align_bitpos
4429 = known_alignment (try_bitpos
4430 - split_store->orig_stores[0]->bitpos
4431 + op.bitpos);
4432 if (align_bitpos & (load_align - 1))
4433 load_align = least_bit_hwi (align_bitpos);
4435 tree load_int_type
4436 = build_nonstandard_integer_type (try_size, UNSIGNED);
4437 load_int_type
4438 = build_aligned_type (load_int_type, load_align);
4440 poly_uint64 load_pos
4441 = exact_div (try_bitpos
4442 - split_store->orig_stores[0]->bitpos
4443 + op.bitpos,
4444 BITS_PER_UNIT);
4445 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4446 build_int_cst (offset_type, load_pos));
4447 if (TREE_CODE (ops[j]) == MEM_REF)
4449 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4450 MR_DEPENDENCE_BASE (ops[j]) = base;
4452 if (!integer_zerop (mask))
4454 /* The load might load some bits (that will be masked
4455 off later on) uninitialized, avoid -W*uninitialized
4456 warnings in that case. */
4457 suppress_warning (ops[j], OPT_Wuninitialized);
4460 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4461 gimple_set_location (stmt, load_loc);
4462 if (gsi_bb (load_gsi[j]))
4464 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4465 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4467 else
4469 gimple_set_vuse (stmt, new_vuse);
4470 gimple_seq_add_stmt_without_update (&seq, stmt);
4472 ops[j] = gimple_assign_lhs (stmt);
4473 tree xor_mask;
4474 enum tree_code inv_op
4475 = invert_op (split_store, j, dest_type, xor_mask);
4476 if (inv_op != NOP_EXPR)
4478 stmt = gimple_build_assign (make_ssa_name (dest_type),
4479 inv_op, ops[j], xor_mask);
4480 gimple_set_location (stmt, load_loc);
4481 ops[j] = gimple_assign_lhs (stmt);
4483 if (gsi_bb (load_gsi[j]))
4484 gimple_seq_add_stmt_without_update (&load_seq[j],
4485 stmt);
4486 else
4487 gimple_seq_add_stmt_without_update (&seq, stmt);
4490 else
4491 ops[j] = native_interpret_expr (dest_type,
4492 group->val + try_offset,
4493 group->buf_size);
4496 switch (split_store->orig_stores[0]->rhs_code)
4498 case BIT_AND_EXPR:
4499 case BIT_IOR_EXPR:
4500 case BIT_XOR_EXPR:
4501 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4503 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4504 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4506 location_t bit_loc;
4507 bit_loc = get_location_for_stmts (orig_stmts);
4508 orig_stmts.truncate (0);
4510 stmt
4511 = gimple_build_assign (make_ssa_name (dest_type),
4512 split_store->orig_stores[0]->rhs_code,
4513 ops[0], ops[1]);
4514 gimple_set_location (stmt, bit_loc);
4515 /* If there is just one load and there is a separate
4516 load_seq[0], emit the bitwise op right after it. */
4517 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4518 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4519 /* Otherwise, if at least one load is in seq, we need to
4520 emit the bitwise op right before the store. If there
4521 are two loads and are emitted somewhere else, it would
4522 be better to emit the bitwise op as early as possible;
4523 we don't track where that would be possible right now
4524 though. */
4525 else
4526 gimple_seq_add_stmt_without_update (&seq, stmt);
4527 src = gimple_assign_lhs (stmt);
4528 tree xor_mask;
4529 enum tree_code inv_op;
4530 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4531 if (inv_op != NOP_EXPR)
4533 stmt = gimple_build_assign (make_ssa_name (dest_type),
4534 inv_op, src, xor_mask);
4535 gimple_set_location (stmt, bit_loc);
4536 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4537 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4538 else
4539 gimple_seq_add_stmt_without_update (&seq, stmt);
4540 src = gimple_assign_lhs (stmt);
4542 break;
4543 case LROTATE_EXPR:
4544 case NOP_EXPR:
4545 src = ops[0];
4546 if (!is_gimple_val (src))
4548 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4549 src);
4550 gimple_seq_add_stmt_without_update (&seq, stmt);
4551 src = gimple_assign_lhs (stmt);
4553 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4555 stmt = gimple_build_assign (make_ssa_name (dest_type),
4556 NOP_EXPR, src);
4557 gimple_seq_add_stmt_without_update (&seq, stmt);
4558 src = gimple_assign_lhs (stmt);
4560 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4561 if (inv_op != NOP_EXPR)
4563 stmt = gimple_build_assign (make_ssa_name (dest_type),
4564 inv_op, src, xor_mask);
4565 gimple_set_location (stmt, loc);
4566 gimple_seq_add_stmt_without_update (&seq, stmt);
4567 src = gimple_assign_lhs (stmt);
4569 break;
4570 default:
4571 src = ops[0];
4572 break;
4575 /* If bit insertion is required, we use the source as an accumulator
4576 into which the successive bit-field values are manually inserted.
4577 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4578 if (group->bit_insertion)
4579 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4580 if (info->rhs_code == BIT_INSERT_EXPR
4581 && info->bitpos < try_bitpos + try_size
4582 && info->bitpos + info->bitsize > try_bitpos)
4584 /* Mask, truncate, convert to final type, shift and ior into
4585 the accumulator. Note that every step can be a no-op. */
4586 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4587 const HOST_WIDE_INT end_gap
4588 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4589 tree tem = info->ops[0].val;
4590 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4592 const unsigned HOST_WIDE_INT size
4593 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4594 tree integer_type
4595 = build_nonstandard_integer_type (size, UNSIGNED);
4596 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4597 integer_type, tem);
4599 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4601 tree bitfield_type
4602 = build_nonstandard_integer_type (info->bitsize,
4603 UNSIGNED);
4604 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4606 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4608 const unsigned HOST_WIDE_INT imask
4609 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4610 tem = gimple_build (&seq, loc,
4611 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4612 build_int_cst (TREE_TYPE (tem),
4613 imask));
4615 const HOST_WIDE_INT shift
4616 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4617 if (shift < 0)
4618 tem = gimple_build (&seq, loc,
4619 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4620 build_int_cst (NULL_TREE, -shift));
4621 tem = gimple_convert (&seq, loc, dest_type, tem);
4622 if (shift > 0)
4623 tem = gimple_build (&seq, loc,
4624 LSHIFT_EXPR, dest_type, tem,
4625 build_int_cst (NULL_TREE, shift));
4626 src = gimple_build (&seq, loc,
4627 BIT_IOR_EXPR, dest_type, tem, src);
4630 if (!integer_zerop (mask))
4632 tree tem = make_ssa_name (dest_type);
4633 tree load_src = unshare_expr (dest);
4634 /* The load might load some or all bits uninitialized,
4635 avoid -W*uninitialized warnings in that case.
4636 As optimization, it would be nice if all the bits are
4637 provably uninitialized (no stores at all yet or previous
4638 store a CLOBBER) we'd optimize away the load and replace
4639 it e.g. with 0. */
4640 suppress_warning (load_src, OPT_Wuninitialized);
4641 stmt = gimple_build_assign (tem, load_src);
4642 gimple_set_location (stmt, loc);
4643 gimple_set_vuse (stmt, new_vuse);
4644 gimple_seq_add_stmt_without_update (&seq, stmt);
4646 /* FIXME: If there is a single chunk of zero bits in mask,
4647 perhaps use BIT_INSERT_EXPR instead? */
4648 stmt = gimple_build_assign (make_ssa_name (dest_type),
4649 BIT_AND_EXPR, tem, mask);
4650 gimple_set_location (stmt, loc);
4651 gimple_seq_add_stmt_without_update (&seq, stmt);
4652 tem = gimple_assign_lhs (stmt);
4654 if (TREE_CODE (src) == INTEGER_CST)
4655 src = wide_int_to_tree (dest_type,
4656 wi::bit_and_not (wi::to_wide (src),
4657 wi::to_wide (mask)));
4658 else
4660 tree nmask
4661 = wide_int_to_tree (dest_type,
4662 wi::bit_not (wi::to_wide (mask)));
4663 stmt = gimple_build_assign (make_ssa_name (dest_type),
4664 BIT_AND_EXPR, src, nmask);
4665 gimple_set_location (stmt, loc);
4666 gimple_seq_add_stmt_without_update (&seq, stmt);
4667 src = gimple_assign_lhs (stmt);
4669 stmt = gimple_build_assign (make_ssa_name (dest_type),
4670 BIT_IOR_EXPR, tem, src);
4671 gimple_set_location (stmt, loc);
4672 gimple_seq_add_stmt_without_update (&seq, stmt);
4673 src = gimple_assign_lhs (stmt);
4677 stmt = gimple_build_assign (dest, src);
4678 gimple_set_location (stmt, loc);
4679 gimple_set_vuse (stmt, new_vuse);
4680 gimple_seq_add_stmt_without_update (&seq, stmt);
4682 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4683 add_stmt_to_eh_lp (stmt, group->lp_nr);
4685 tree new_vdef;
4686 if (i < split_stores.length () - 1)
4687 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4688 else
4689 new_vdef = last_vdef;
4691 gimple_set_vdef (stmt, new_vdef);
4692 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4693 new_vuse = new_vdef;
4696 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4697 delete split_store;
4699 gcc_assert (seq);
4700 if (dump_file)
4702 fprintf (dump_file,
4703 "New sequence of %u stores to replace old one of %u stores\n",
4704 split_stores.length (), orig_num_stmts);
4705 if (dump_flags & TDF_DETAILS)
4706 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4709 if (gimple_clobber_p (group->last_stmt))
4710 update_stmt (group->last_stmt);
4712 if (group->lp_nr > 0)
4714 /* We're going to insert a sequence of (potentially) throwing stores
4715 into an active EH region. This means that we're going to create
4716 new basic blocks with EH edges pointing to the post landing pad
4717 and, therefore, to have to update its PHI nodes, if any. For the
4718 virtual PHI node, we're going to use the VDEFs created above, but
4719 for the other nodes, we need to record the original reaching defs. */
4720 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4721 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4722 basic_block last_bb = gimple_bb (group->last_stmt);
4723 edge last_edge = find_edge (last_bb, lp_bb);
4724 auto_vec<tree, 16> last_defs;
4725 gphi_iterator gpi;
4726 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4728 gphi *phi = gpi.phi ();
4729 tree last_def;
4730 if (virtual_operand_p (gimple_phi_result (phi)))
4731 last_def = NULL_TREE;
4732 else
4733 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4734 last_defs.safe_push (last_def);
4737 /* Do the insertion. Then, if new basic blocks have been created in the
4738 process, rewind the chain of VDEFs create above to walk the new basic
4739 blocks and update the corresponding arguments of the PHI nodes. */
4740 update_modified_stmts (seq);
4741 if (gimple_find_sub_bbs (seq, &last_gsi))
4742 while (last_vdef != gimple_vuse (group->last_stmt))
4744 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4745 if (stmt_could_throw_p (cfun, stmt))
4747 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4748 unsigned int i;
4749 for (gpi = gsi_start_phis (lp_bb), i = 0;
4750 !gsi_end_p (gpi);
4751 gsi_next (&gpi), i++)
4753 gphi *phi = gpi.phi ();
4754 tree new_def;
4755 if (virtual_operand_p (gimple_phi_result (phi)))
4756 new_def = last_vdef;
4757 else
4758 new_def = last_defs[i];
4759 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4762 last_vdef = gimple_vuse (stmt);
4765 else
4766 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4768 for (int j = 0; j < 2; ++j)
4769 if (load_seq[j])
4770 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4772 return true;
4775 /* Process the merged_store_group objects created in the coalescing phase.
4776 The stores are all against the base object BASE.
4777 Try to output the widened stores and delete the original statements if
4778 successful. Return true iff any changes were made. */
4780 bool
4781 imm_store_chain_info::output_merged_stores ()
4783 unsigned int i;
4784 merged_store_group *merged_store;
4785 bool ret = false;
4786 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4788 if (dbg_cnt (store_merging)
4789 && output_merged_store (merged_store))
4791 unsigned int j;
4792 store_immediate_info *store;
4793 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4795 gimple *stmt = store->stmt;
4796 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4797 /* Don't remove clobbers, they are still useful even if
4798 everything is overwritten afterwards. */
4799 if (gimple_clobber_p (stmt))
4800 continue;
4801 gsi_remove (&gsi, true);
4802 if (store->lp_nr)
4803 remove_stmt_from_eh_lp (stmt);
4804 if (stmt != merged_store->last_stmt)
4806 unlink_stmt_vdef (stmt);
4807 release_defs (stmt);
4810 ret = true;
4813 if (ret && dump_file)
4814 fprintf (dump_file, "Merging successful!\n");
4816 return ret;
4819 /* Coalesce the store_immediate_info objects recorded against the base object
4820 BASE in the first phase and output them.
4821 Delete the allocated structures.
4822 Return true if any changes were made. */
4824 bool
4825 imm_store_chain_info::terminate_and_process_chain ()
4827 if (dump_file && (dump_flags & TDF_DETAILS))
4828 fprintf (dump_file, "Terminating chain with %u stores\n",
4829 m_store_info.length ());
4830 /* Process store chain. */
4831 bool ret = false;
4832 if (m_store_info.length () > 1)
4834 ret = coalesce_immediate_stores ();
4835 if (ret)
4836 ret = output_merged_stores ();
4839 /* Delete all the entries we allocated ourselves. */
4840 store_immediate_info *info;
4841 unsigned int i;
4842 FOR_EACH_VEC_ELT (m_store_info, i, info)
4843 delete info;
4845 merged_store_group *merged_info;
4846 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4847 delete merged_info;
4849 return ret;
4852 /* Return true iff LHS is a destination potentially interesting for
4853 store merging. In practice these are the codes that get_inner_reference
4854 can process. */
4856 static bool
4857 lhs_valid_for_store_merging_p (tree lhs)
4859 if (DECL_P (lhs))
4860 return true;
4862 switch (TREE_CODE (lhs))
4864 case ARRAY_REF:
4865 case ARRAY_RANGE_REF:
4866 case BIT_FIELD_REF:
4867 case COMPONENT_REF:
4868 case MEM_REF:
4869 case VIEW_CONVERT_EXPR:
4870 return true;
4871 default:
4872 return false;
4876 /* Return true if the tree RHS is a constant we want to consider
4877 during store merging. In practice accept all codes that
4878 native_encode_expr accepts. */
4880 static bool
4881 rhs_valid_for_store_merging_p (tree rhs)
4883 unsigned HOST_WIDE_INT size;
4884 if (TREE_CODE (rhs) == CONSTRUCTOR
4885 && CONSTRUCTOR_NELTS (rhs) == 0
4886 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4887 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4888 return true;
4889 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4890 && native_encode_expr (rhs, NULL, size) != 0);
4893 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4894 and return true on success or false on failure. */
4896 static bool
4897 adjust_bit_pos (poly_offset_int byte_off,
4898 poly_int64 *pbitpos,
4899 poly_uint64 *pbitregion_start,
4900 poly_uint64 *pbitregion_end)
4902 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4903 bit_off += *pbitpos;
4905 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4907 if (maybe_ne (*pbitregion_end, 0U))
4909 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4910 bit_off += *pbitregion_start;
4911 if (bit_off.to_uhwi (pbitregion_start))
4913 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4914 bit_off += *pbitregion_end;
4915 if (!bit_off.to_uhwi (pbitregion_end))
4916 *pbitregion_end = 0;
4918 else
4919 *pbitregion_end = 0;
4921 return true;
4923 else
4924 return false;
4927 /* If MEM is a memory reference usable for store merging (either as
4928 store destination or for loads), return the non-NULL base_addr
4929 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4930 Otherwise return NULL, *PBITPOS should be still valid even for that
4931 case. */
4933 static tree
4934 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4935 poly_uint64 *pbitpos,
4936 poly_uint64 *pbitregion_start,
4937 poly_uint64 *pbitregion_end)
4939 poly_int64 bitsize, bitpos;
4940 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4941 machine_mode mode;
4942 int unsignedp = 0, reversep = 0, volatilep = 0;
4943 tree offset;
4944 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4945 &unsignedp, &reversep, &volatilep);
4946 *pbitsize = bitsize;
4947 if (known_le (bitsize, 0))
4948 return NULL_TREE;
4950 if (TREE_CODE (mem) == COMPONENT_REF
4951 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4953 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4954 if (maybe_ne (bitregion_end, 0U))
4955 bitregion_end += 1;
4958 if (reversep)
4959 return NULL_TREE;
4961 /* We do not want to rewrite TARGET_MEM_REFs. */
4962 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4963 return NULL_TREE;
4964 /* In some cases get_inner_reference may return a
4965 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4966 canonicalize the base_addr to MEM_REF [ptr] and take
4967 byteoffset into account in the bitpos. This occurs in
4968 PR 23684 and this way we can catch more chains. */
4969 else if (TREE_CODE (base_addr) == MEM_REF)
4971 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4972 &bitregion_start, &bitregion_end))
4973 return NULL_TREE;
4974 base_addr = TREE_OPERAND (base_addr, 0);
4976 /* get_inner_reference returns the base object, get at its
4977 address now. */
4978 else
4980 if (maybe_lt (bitpos, 0))
4981 return NULL_TREE;
4982 base_addr = build_fold_addr_expr (base_addr);
4985 if (offset)
4987 /* If the access is variable offset then a base decl has to be
4988 address-taken to be able to emit pointer-based stores to it.
4989 ??? We might be able to get away with re-using the original
4990 base up to the first variable part and then wrapping that inside
4991 a BIT_FIELD_REF. */
4992 tree base = get_base_address (base_addr);
4993 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4994 return NULL_TREE;
4996 /* Similarly to above for the base, remove constant from the offset. */
4997 if (TREE_CODE (offset) == PLUS_EXPR
4998 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4999 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
5000 &bitpos, &bitregion_start, &bitregion_end))
5001 offset = TREE_OPERAND (offset, 0);
5003 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5004 base_addr, offset);
5007 if (known_eq (bitregion_end, 0U))
5009 bitregion_start = round_down_to_byte_boundary (bitpos);
5010 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5013 *pbitsize = bitsize;
5014 *pbitpos = bitpos;
5015 *pbitregion_start = bitregion_start;
5016 *pbitregion_end = bitregion_end;
5017 return base_addr;
5020 /* Return true if STMT is a load that can be used for store merging.
5021 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5022 BITREGION_END are properties of the corresponding store. */
5024 static bool
5025 handled_load (gimple *stmt, store_operand_info *op,
5026 poly_uint64 bitsize, poly_uint64 bitpos,
5027 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
5029 if (!is_gimple_assign (stmt))
5030 return false;
5031 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5033 tree rhs1 = gimple_assign_rhs1 (stmt);
5034 if (TREE_CODE (rhs1) == SSA_NAME
5035 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5036 bitregion_start, bitregion_end))
5038 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5039 been optimized earlier, but if allowed here, would confuse the
5040 multiple uses counting. */
5041 if (op->bit_not_p)
5042 return false;
5043 op->bit_not_p = !op->bit_not_p;
5044 return true;
5046 return false;
5048 if (gimple_vuse (stmt)
5049 && gimple_assign_load_p (stmt)
5050 && !stmt_can_throw_internal (cfun, stmt)
5051 && !gimple_has_volatile_ops (stmt))
5053 tree mem = gimple_assign_rhs1 (stmt);
5054 op->base_addr
5055 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5056 &op->bitregion_start,
5057 &op->bitregion_end);
5058 if (op->base_addr != NULL_TREE
5059 && known_eq (op->bitsize, bitsize)
5060 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5061 && known_ge (op->bitpos - op->bitregion_start,
5062 bitpos - bitregion_start)
5063 && known_ge (op->bitregion_end - op->bitpos,
5064 bitregion_end - bitpos))
5066 op->stmt = stmt;
5067 op->val = mem;
5068 op->bit_not_p = false;
5069 return true;
5072 return false;
5075 /* Return the index number of the landing pad for STMT, if any. */
5077 static int
5078 lp_nr_for_store (gimple *stmt)
5080 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5081 return 0;
5083 if (!stmt_could_throw_p (cfun, stmt))
5084 return 0;
5086 return lookup_stmt_eh_lp (stmt);
5089 /* Record the store STMT for store merging optimization if it can be
5090 optimized. Return true if any changes were made. */
5092 bool
5093 pass_store_merging::process_store (gimple *stmt)
5095 tree lhs = gimple_assign_lhs (stmt);
5096 tree rhs = gimple_assign_rhs1 (stmt);
5097 poly_uint64 bitsize, bitpos = 0;
5098 poly_uint64 bitregion_start = 0, bitregion_end = 0;
5099 tree base_addr
5100 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5101 &bitregion_start, &bitregion_end);
5102 if (known_eq (bitsize, 0U))
5103 return false;
5105 bool invalid = (base_addr == NULL_TREE
5106 || (maybe_gt (bitsize,
5107 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5108 && TREE_CODE (rhs) != INTEGER_CST
5109 && (TREE_CODE (rhs) != CONSTRUCTOR
5110 || CONSTRUCTOR_NELTS (rhs) != 0)));
5111 enum tree_code rhs_code = ERROR_MARK;
5112 bool bit_not_p = false;
5113 struct symbolic_number n;
5114 gimple *ins_stmt = NULL;
5115 store_operand_info ops[2];
5116 if (invalid)
5118 else if (TREE_CODE (rhs) == STRING_CST)
5120 rhs_code = STRING_CST;
5121 ops[0].val = rhs;
5123 else if (rhs_valid_for_store_merging_p (rhs))
5125 rhs_code = INTEGER_CST;
5126 ops[0].val = rhs;
5128 else if (TREE_CODE (rhs) == SSA_NAME)
5130 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5131 if (!is_gimple_assign (def_stmt))
5132 invalid = true;
5133 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5134 bitregion_start, bitregion_end))
5135 rhs_code = MEM_REF;
5136 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5138 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5139 if (TREE_CODE (rhs1) == SSA_NAME
5140 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5142 bit_not_p = true;
5143 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5147 if (rhs_code == ERROR_MARK && !invalid)
5148 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5150 case BIT_AND_EXPR:
5151 case BIT_IOR_EXPR:
5152 case BIT_XOR_EXPR:
5153 tree rhs1, rhs2;
5154 rhs1 = gimple_assign_rhs1 (def_stmt);
5155 rhs2 = gimple_assign_rhs2 (def_stmt);
5156 invalid = true;
5157 if (TREE_CODE (rhs1) != SSA_NAME)
5158 break;
5159 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5160 if (!is_gimple_assign (def_stmt1)
5161 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5162 bitregion_start, bitregion_end))
5163 break;
5164 if (rhs_valid_for_store_merging_p (rhs2))
5165 ops[1].val = rhs2;
5166 else if (TREE_CODE (rhs2) != SSA_NAME)
5167 break;
5168 else
5170 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5171 if (!is_gimple_assign (def_stmt2))
5172 break;
5173 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5174 bitregion_start, bitregion_end))
5175 break;
5177 invalid = false;
5178 break;
5179 default:
5180 invalid = true;
5181 break;
5184 unsigned HOST_WIDE_INT const_bitsize;
5185 if (bitsize.is_constant (&const_bitsize)
5186 && (const_bitsize % BITS_PER_UNIT) == 0
5187 && const_bitsize <= 64
5188 && multiple_p (bitpos, BITS_PER_UNIT))
5190 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5191 if (ins_stmt)
5193 uint64_t nn = n.n;
5194 for (unsigned HOST_WIDE_INT i = 0;
5195 i < const_bitsize;
5196 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5197 if ((nn & MARKER_MASK) == 0
5198 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5200 ins_stmt = NULL;
5201 break;
5203 if (ins_stmt)
5205 if (invalid)
5207 rhs_code = LROTATE_EXPR;
5208 ops[0].base_addr = NULL_TREE;
5209 ops[1].base_addr = NULL_TREE;
5211 invalid = false;
5216 if (invalid
5217 && bitsize.is_constant (&const_bitsize)
5218 && ((const_bitsize % BITS_PER_UNIT) != 0
5219 || !multiple_p (bitpos, BITS_PER_UNIT))
5220 && const_bitsize <= MAX_FIXED_MODE_SIZE)
5222 /* Bypass a conversion to the bit-field type. */
5223 if (!bit_not_p
5224 && is_gimple_assign (def_stmt)
5225 && CONVERT_EXPR_CODE_P (rhs_code))
5227 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5228 if (TREE_CODE (rhs1) == SSA_NAME
5229 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5230 rhs = rhs1;
5232 rhs_code = BIT_INSERT_EXPR;
5233 bit_not_p = false;
5234 ops[0].val = rhs;
5235 ops[0].base_addr = NULL_TREE;
5236 ops[1].base_addr = NULL_TREE;
5237 invalid = false;
5240 else
5241 invalid = true;
5243 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5244 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5245 if (invalid
5246 || !bitsize.is_constant (&const_bitsize)
5247 || !bitpos.is_constant (&const_bitpos)
5248 || !bitregion_start.is_constant (&const_bitregion_start)
5249 || !bitregion_end.is_constant (&const_bitregion_end))
5250 return terminate_all_aliasing_chains (NULL, stmt);
5252 if (!ins_stmt)
5253 memset (&n, 0, sizeof (n));
5255 class imm_store_chain_info **chain_info = NULL;
5256 bool ret = false;
5257 if (base_addr)
5258 chain_info = m_stores.get (base_addr);
5260 store_immediate_info *info;
5261 if (chain_info)
5263 unsigned int ord = (*chain_info)->m_store_info.length ();
5264 info = new store_immediate_info (const_bitsize, const_bitpos,
5265 const_bitregion_start,
5266 const_bitregion_end,
5267 stmt, ord, rhs_code, n, ins_stmt,
5268 bit_not_p, lp_nr_for_store (stmt),
5269 ops[0], ops[1]);
5270 if (dump_file && (dump_flags & TDF_DETAILS))
5272 fprintf (dump_file, "Recording immediate store from stmt:\n");
5273 print_gimple_stmt (dump_file, stmt, 0);
5275 (*chain_info)->m_store_info.safe_push (info);
5276 m_n_stores++;
5277 ret |= terminate_all_aliasing_chains (chain_info, stmt);
5278 /* If we reach the limit of stores to merge in a chain terminate and
5279 process the chain now. */
5280 if ((*chain_info)->m_store_info.length ()
5281 == (unsigned int) param_max_stores_to_merge)
5283 if (dump_file && (dump_flags & TDF_DETAILS))
5284 fprintf (dump_file,
5285 "Reached maximum number of statements to merge:\n");
5286 ret |= terminate_and_process_chain (*chain_info);
5289 else
5291 /* Store aliases any existing chain? */
5292 ret |= terminate_all_aliasing_chains (NULL, stmt);
5294 /* Start a new chain. */
5295 class imm_store_chain_info *new_chain
5296 = new imm_store_chain_info (m_stores_head, base_addr);
5297 info = new store_immediate_info (const_bitsize, const_bitpos,
5298 const_bitregion_start,
5299 const_bitregion_end,
5300 stmt, 0, rhs_code, n, ins_stmt,
5301 bit_not_p, lp_nr_for_store (stmt),
5302 ops[0], ops[1]);
5303 new_chain->m_store_info.safe_push (info);
5304 m_n_stores++;
5305 m_stores.put (base_addr, new_chain);
5306 m_n_chains++;
5307 if (dump_file && (dump_flags & TDF_DETAILS))
5309 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5310 m_n_chains);
5311 print_gimple_stmt (dump_file, stmt, 0);
5312 fprintf (dump_file, "The base object is:\n");
5313 print_generic_expr (dump_file, base_addr);
5314 fprintf (dump_file, "\n");
5318 /* Prune oldest chains so that after adding the chain or store above
5319 we're again within the limits set by the params. */
5320 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5321 || m_n_stores > (unsigned)param_max_stores_to_track)
5323 if (dump_file && (dump_flags & TDF_DETAILS))
5324 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5325 "terminating oldest chain(s).\n", m_n_chains,
5326 param_max_store_chains_to_track, m_n_stores,
5327 param_max_stores_to_track);
5328 imm_store_chain_info **e = &m_stores_head;
5329 unsigned idx = 0;
5330 unsigned n_stores = 0;
5331 while (*e)
5333 if (idx >= (unsigned)param_max_store_chains_to_track
5334 || (n_stores + (*e)->m_store_info.length ()
5335 > (unsigned)param_max_stores_to_track))
5336 ret |= terminate_and_process_chain (*e);
5337 else
5339 n_stores += (*e)->m_store_info.length ();
5340 e = &(*e)->next;
5341 ++idx;
5346 return ret;
5349 /* Return true if STMT is a store valid for store merging. */
5351 static bool
5352 store_valid_for_store_merging_p (gimple *stmt)
5354 return gimple_assign_single_p (stmt)
5355 && gimple_vdef (stmt)
5356 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5357 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5360 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5362 /* Return the status of basic block BB wrt store merging. */
5364 static enum basic_block_status
5365 get_status_for_store_merging (basic_block bb)
5367 unsigned int num_statements = 0;
5368 unsigned int num_constructors = 0;
5369 gimple_stmt_iterator gsi;
5370 edge e;
5371 gimple *last_stmt = NULL;
5373 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5375 gimple *stmt = gsi_stmt (gsi);
5377 if (is_gimple_debug (stmt))
5378 continue;
5380 last_stmt = stmt;
5382 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5383 break;
5385 if (is_gimple_assign (stmt)
5386 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5388 tree rhs = gimple_assign_rhs1 (stmt);
5389 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5390 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5391 && gimple_assign_lhs (stmt) != NULL_TREE)
5393 HOST_WIDE_INT sz
5394 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5395 if (sz == 16 || sz == 32 || sz == 64)
5397 num_constructors = 1;
5398 break;
5404 if (num_statements == 0 && num_constructors == 0)
5405 return BB_INVALID;
5407 if (cfun->can_throw_non_call_exceptions && cfun->eh
5408 && store_valid_for_store_merging_p (last_stmt)
5409 && (e = find_fallthru_edge (bb->succs))
5410 && e->dest == bb->next_bb)
5411 return BB_EXTENDED_VALID;
5413 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5416 /* Entry point for the pass. Go over each basic block recording chains of
5417 immediate stores. Upon encountering a terminating statement (as defined
5418 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5419 variants. */
5421 unsigned int
5422 pass_store_merging::execute (function *fun)
5424 basic_block bb;
5425 hash_set<gimple *> orig_stmts;
5426 bool changed = false, open_chains = false;
5428 /* If the function can throw and catch non-call exceptions, we'll be trying
5429 to merge stores across different basic blocks so we need to first unsplit
5430 the EH edges in order to streamline the CFG of the function. */
5431 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5432 unsplit_eh_edges ();
5434 calculate_dominance_info (CDI_DOMINATORS);
5436 FOR_EACH_BB_FN (bb, fun)
5438 const basic_block_status bb_status = get_status_for_store_merging (bb);
5439 gimple_stmt_iterator gsi;
5441 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5443 changed |= terminate_and_process_all_chains ();
5444 open_chains = false;
5447 if (bb_status == BB_INVALID)
5448 continue;
5450 if (dump_file && (dump_flags & TDF_DETAILS))
5451 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5453 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5455 gimple *stmt = gsi_stmt (gsi);
5456 gsi_next (&gsi);
5458 if (is_gimple_debug (stmt))
5459 continue;
5461 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5463 /* Terminate all chains. */
5464 if (dump_file && (dump_flags & TDF_DETAILS))
5465 fprintf (dump_file, "Volatile access terminates "
5466 "all chains\n");
5467 changed |= terminate_and_process_all_chains ();
5468 open_chains = false;
5469 continue;
5472 if (is_gimple_assign (stmt)
5473 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5474 && maybe_optimize_vector_constructor (stmt))
5475 continue;
5477 if (store_valid_for_store_merging_p (stmt))
5478 changed |= process_store (stmt);
5479 else
5480 changed |= terminate_all_aliasing_chains (NULL, stmt);
5483 if (bb_status == BB_EXTENDED_VALID)
5484 open_chains = true;
5485 else
5487 changed |= terminate_and_process_all_chains ();
5488 open_chains = false;
5492 if (open_chains)
5493 changed |= terminate_and_process_all_chains ();
5495 /* If the function can throw and catch non-call exceptions and something
5496 changed during the pass, then the CFG has (very likely) changed too. */
5497 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5499 free_dominance_info (CDI_DOMINATORS);
5500 return TODO_cleanup_cfg;
5503 return 0;
5506 } // anon namespace
5508 /* Construct and return a store merging pass object. */
5510 gimple_opt_pass *
5511 make_pass_store_merging (gcc::context *ctxt)
5513 return new pass_store_merging (ctxt);
5516 #if CHECKING_P
5518 namespace selftest {
5520 /* Selftests for store merging helpers. */
5522 /* Assert that all elements of the byte arrays X and Y, both of length N
5523 are equal. */
5525 static void
5526 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5528 for (unsigned int i = 0; i < n; i++)
5530 if (x[i] != y[i])
5532 fprintf (stderr, "Arrays do not match. X:\n");
5533 dump_char_array (stderr, x, n);
5534 fprintf (stderr, "Y:\n");
5535 dump_char_array (stderr, y, n);
5537 ASSERT_EQ (x[i], y[i]);
5541 /* Test shift_bytes_in_array_left and that it carries bits across between
5542 bytes correctly. */
5544 static void
5545 verify_shift_bytes_in_array_left (void)
5547 /* byte 1 | byte 0
5548 00011111 | 11100000. */
5549 unsigned char orig[2] = { 0xe0, 0x1f };
5550 unsigned char in[2];
5551 memcpy (in, orig, sizeof orig);
5553 unsigned char expected[2] = { 0x80, 0x7f };
5554 shift_bytes_in_array_left (in, sizeof (in), 2);
5555 verify_array_eq (in, expected, sizeof (in));
5557 memcpy (in, orig, sizeof orig);
5558 memcpy (expected, orig, sizeof orig);
5559 /* Check that shifting by zero doesn't change anything. */
5560 shift_bytes_in_array_left (in, sizeof (in), 0);
5561 verify_array_eq (in, expected, sizeof (in));
5565 /* Test shift_bytes_in_array_right and that it carries bits across between
5566 bytes correctly. */
5568 static void
5569 verify_shift_bytes_in_array_right (void)
5571 /* byte 1 | byte 0
5572 00011111 | 11100000. */
5573 unsigned char orig[2] = { 0x1f, 0xe0};
5574 unsigned char in[2];
5575 memcpy (in, orig, sizeof orig);
5576 unsigned char expected[2] = { 0x07, 0xf8};
5577 shift_bytes_in_array_right (in, sizeof (in), 2);
5578 verify_array_eq (in, expected, sizeof (in));
5580 memcpy (in, orig, sizeof orig);
5581 memcpy (expected, orig, sizeof orig);
5582 /* Check that shifting by zero doesn't change anything. */
5583 shift_bytes_in_array_right (in, sizeof (in), 0);
5584 verify_array_eq (in, expected, sizeof (in));
5587 /* Test clear_bit_region that it clears exactly the bits asked and
5588 nothing more. */
5590 static void
5591 verify_clear_bit_region (void)
5593 /* Start with all bits set and test clearing various patterns in them. */
5594 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5595 unsigned char in[3];
5596 unsigned char expected[3];
5597 memcpy (in, orig, sizeof in);
5599 /* Check zeroing out all the bits. */
5600 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5601 expected[0] = expected[1] = expected[2] = 0;
5602 verify_array_eq (in, expected, sizeof in);
5604 memcpy (in, orig, sizeof in);
5605 /* Leave the first and last bits intact. */
5606 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5607 expected[0] = 0x1;
5608 expected[1] = 0;
5609 expected[2] = 0x80;
5610 verify_array_eq (in, expected, sizeof in);
5613 /* Test clear_bit_region_be that it clears exactly the bits asked and
5614 nothing more. */
5616 static void
5617 verify_clear_bit_region_be (void)
5619 /* Start with all bits set and test clearing various patterns in them. */
5620 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5621 unsigned char in[3];
5622 unsigned char expected[3];
5623 memcpy (in, orig, sizeof in);
5625 /* Check zeroing out all the bits. */
5626 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5627 expected[0] = expected[1] = expected[2] = 0;
5628 verify_array_eq (in, expected, sizeof in);
5630 memcpy (in, orig, sizeof in);
5631 /* Leave the first and last bits intact. */
5632 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5633 expected[0] = 0x80;
5634 expected[1] = 0;
5635 expected[2] = 0x1;
5636 verify_array_eq (in, expected, sizeof in);
5640 /* Run all of the selftests within this file. */
5642 void
5643 store_merging_cc_tests (void)
5645 verify_shift_bytes_in_array_left ();
5646 verify_shift_bytes_in_array_right ();
5647 verify_clear_bit_region ();
5648 verify_clear_bit_region_be ();
5651 } // namespace selftest
5652 #endif /* CHECKING_P. */