compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / gimple-ssa-store-merging.cc
blobb80b8eac4444d4bf580d5eb52c1fca4f431305b5
1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2022 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
182 namespace {
184 struct bswap_stat
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
260 inline bool
261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 uint64_t head_marker;
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
279 switch (code)
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
311 inline bool
312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
314 tree lhs_type;
316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 return false;
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
325 return true;
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
331 bool
332 init_symbolic_number (struct symbolic_number *n, tree src)
334 int size;
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337 return false;
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
359 return true;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
366 bool
367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
394 if (!integer_zerop (off))
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
401 base_addr = TREE_OPERAND (base_addr, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
415 bitpos += bit_offset.force_shwi ();
417 else
418 base_addr = build_fold_addr_expr (base_addr);
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
437 /* Compute the symbolic number N representing the result of a bitwise OR,
438 bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
441 gimple *
442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n, enum tree_code code)
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
481 if (start1 < start2)
483 n_start = n1;
484 start_sub = start2 - start1;
486 else
488 n_start = n2;
489 start_sub = start1 - start2;
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
505 end = end2;
506 end_sub = end2 - end1;
508 else
510 end = end1;
511 end_sub = end1 - end2;
513 n_end = (end2 > end1) ? n2 : n1;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
521 n->range = end - MIN (start1, start2) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
540 else
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559 uint64_t res_n = n1->n | n2->n;
561 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
563 uint64_t masked1, masked2;
565 masked1 = n1->n & mask;
566 masked2 = n2->n & mask;
567 /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 if (masked1 && masked2)
570 /* + can carry into upper bits, just punt. */
571 if (code == PLUS_EXPR)
572 return NULL;
573 /* x | x is still x. */
574 if (code == BIT_IOR_EXPR && masked1 == masked2)
575 continue;
576 if (code == BIT_XOR_EXPR)
578 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 unknown values and unknown ^ unknown is unknown. */
580 if (masked1 == masked2
581 && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 << i * BITS_PER_MARKER))
584 res_n &= ~mask;
585 continue;
588 /* Otherwise set the byte to unknown, it might still be
589 later masked off. */
590 res_n |= mask;
593 n->n = res_n;
594 n->n_ops = n1->n_ops + n2->n_ops;
596 return source_stmt;
599 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 the operation given by the rhs of STMT on the result. If the operation
601 could successfully be executed the function returns a gimple stmt whose
602 rhs's first tree is the expression of the source operand and NULL
603 otherwise. */
605 gimple *
606 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
608 enum tree_code code;
609 tree rhs1, rhs2 = NULL;
610 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611 enum gimple_rhs_class rhs_class;
613 if (!limit || !is_gimple_assign (stmt))
614 return NULL;
616 rhs1 = gimple_assign_rhs1 (stmt);
618 if (find_bswap_or_nop_load (stmt, rhs1, n))
619 return stmt;
621 /* Handle BIT_FIELD_REF. */
622 if (TREE_CODE (rhs1) == BIT_FIELD_REF
623 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
625 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
626 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
627 return NULL;
629 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
630 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
631 if (bitpos % BITS_PER_UNIT == 0
632 && bitsize % BITS_PER_UNIT == 0
633 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
635 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
636 if (BYTES_BIG_ENDIAN)
637 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
639 /* Shift. */
640 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
641 return NULL;
643 /* Mask. */
644 uint64_t mask = 0;
645 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
646 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
647 i++, tmp <<= BITS_PER_UNIT)
648 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649 n->n &= mask;
651 /* Convert. */
652 n->type = TREE_TYPE (rhs1);
653 if (!n->base_addr)
654 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
656 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
659 return NULL;
662 if (TREE_CODE (rhs1) != SSA_NAME)
663 return NULL;
665 code = gimple_assign_rhs_code (stmt);
666 rhs_class = gimple_assign_rhs_class (stmt);
667 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
669 if (rhs_class == GIMPLE_BINARY_RHS)
670 rhs2 = gimple_assign_rhs2 (stmt);
672 /* Handle unary rhs and binary rhs with integer constants as second
673 operand. */
675 if (rhs_class == GIMPLE_UNARY_RHS
676 || (rhs_class == GIMPLE_BINARY_RHS
677 && TREE_CODE (rhs2) == INTEGER_CST))
679 if (code != BIT_AND_EXPR
680 && code != LSHIFT_EXPR
681 && code != RSHIFT_EXPR
682 && code != LROTATE_EXPR
683 && code != RROTATE_EXPR
684 && !CONVERT_EXPR_CODE_P (code))
685 return NULL;
687 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
689 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
690 we have to initialize the symbolic number. */
691 if (!source_stmt1)
693 if (gimple_assign_load_p (stmt)
694 || !init_symbolic_number (n, rhs1))
695 return NULL;
696 source_stmt1 = stmt;
699 switch (code)
701 case BIT_AND_EXPR:
703 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
704 uint64_t val = int_cst_value (rhs2), mask = 0;
705 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
707 /* Only constants masking full bytes are allowed. */
708 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
709 if ((val & tmp) != 0 && (val & tmp) != tmp)
710 return NULL;
711 else if (val & tmp)
712 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
714 n->n &= mask;
716 break;
717 case LSHIFT_EXPR:
718 case RSHIFT_EXPR:
719 case LROTATE_EXPR:
720 case RROTATE_EXPR:
721 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
722 return NULL;
723 break;
724 CASE_CONVERT:
726 int i, type_size, old_type_size;
727 tree type;
729 type = TREE_TYPE (gimple_assign_lhs (stmt));
730 type_size = TYPE_PRECISION (type);
731 if (type_size % BITS_PER_UNIT != 0)
732 return NULL;
733 type_size /= BITS_PER_UNIT;
734 if (type_size > 64 / BITS_PER_MARKER)
735 return NULL;
737 /* Sign extension: result is dependent on the value. */
738 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
739 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
740 && HEAD_MARKER (n->n, old_type_size))
741 for (i = 0; i < type_size - old_type_size; i++)
742 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
743 << ((type_size - 1 - i) * BITS_PER_MARKER);
745 if (type_size < 64 / BITS_PER_MARKER)
747 /* If STMT casts to a smaller type mask out the bits not
748 belonging to the target type. */
749 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
751 n->type = type;
752 if (!n->base_addr)
753 n->range = type_size;
755 break;
756 default:
757 return NULL;
759 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
762 /* Handle binary rhs. */
764 if (rhs_class == GIMPLE_BINARY_RHS)
766 struct symbolic_number n1, n2;
767 gimple *source_stmt, *source_stmt2;
769 if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
770 return NULL;
772 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
774 switch (code)
776 case BIT_IOR_EXPR:
777 case BIT_XOR_EXPR:
778 case PLUS_EXPR:
779 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
781 if (!source_stmt1)
782 return NULL;
784 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
786 if (!source_stmt2)
787 return NULL;
789 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
790 return NULL;
792 if (n1.vuse != n2.vuse)
793 return NULL;
795 source_stmt
796 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
797 code);
799 if (!source_stmt)
800 return NULL;
802 if (!verify_symbolic_number_p (n, stmt))
803 return NULL;
805 break;
806 default:
807 return NULL;
809 return source_stmt;
811 return NULL;
814 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
815 *CMPXCHG, *CMPNOP and adjust *N. */
817 void
818 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
819 uint64_t *cmpnop, bool *cast64_to_32)
821 unsigned rsize;
822 uint64_t tmpn, mask;
824 /* The number which the find_bswap_or_nop_1 result should match in order
825 to have a full byte swap. The number is shifted to the right
826 according to the size of the symbolic number before using it. */
827 *cmpxchg = CMPXCHG;
828 *cmpnop = CMPNOP;
829 *cast64_to_32 = false;
831 /* Find real size of result (highest non-zero byte). */
832 if (n->base_addr)
833 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
834 else
835 rsize = n->range;
837 /* Zero out the bits corresponding to untouched bytes in original gimple
838 expression. */
839 if (n->range < (int) sizeof (int64_t))
841 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
842 if (n->base_addr == NULL
843 && n->range == 4
844 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
846 /* If all bytes in n->n are either 0 or in [5..8] range, this
847 might be a candidate for (unsigned) __builtin_bswap64 (src).
848 It is not worth it for (unsigned short) __builtin_bswap64 (src)
849 or (unsigned short) __builtin_bswap32 (src). */
850 *cast64_to_32 = true;
851 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
852 if ((tmpn & MARKER_MASK)
853 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
855 *cast64_to_32 = false;
856 break;
859 if (*cast64_to_32)
860 *cmpxchg &= mask;
861 else
862 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
863 *cmpnop &= mask;
866 /* Zero out the bits corresponding to unused bytes in the result of the
867 gimple expression. */
868 if (rsize < n->range)
870 if (BYTES_BIG_ENDIAN)
872 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
873 *cmpxchg &= mask;
874 if (n->range - rsize == sizeof (int64_t))
875 *cmpnop = 0;
876 else
877 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
879 else
881 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
882 if (n->range - rsize == sizeof (int64_t))
883 *cmpxchg = 0;
884 else
885 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
886 *cmpnop &= mask;
888 n->range = rsize;
891 if (*cast64_to_32)
892 n->range = 8;
893 n->range *= BITS_PER_UNIT;
896 /* Check if STMT completes a bswap implementation or a read in a given
897 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
898 accordingly. It also sets N to represent the kind of operations
899 performed: size of the resulting expression and whether it works on
900 a memory source, and if so alias-set and vuse. At last, the
901 function returns a stmt whose rhs's first tree is the source
902 expression. */
904 gimple *
905 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
906 bool *cast64_to_32, uint64_t *mask)
908 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
909 if (!tree_fits_uhwi_p (type_size))
910 return NULL;
912 /* The last parameter determines the depth search limit. It usually
913 correlates directly to the number n of bytes to be touched. We
914 increase that number by 2 * (log2(n) + 1) here in order to also
915 cover signed -> unsigned conversions of the src operand as can be seen
916 in libgcc, and for initial shift/and operation of the src operand. */
917 int limit = tree_to_uhwi (type_size);
918 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
919 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
921 if (!ins_stmt)
923 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
924 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
925 return NULL;
926 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
927 if (sz != 16 && sz != 32 && sz != 64)
928 return NULL;
929 tree rhs = gimple_assign_rhs1 (stmt);
930 if (CONSTRUCTOR_NELTS (rhs) == 0)
931 return NULL;
932 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
933 unsigned HOST_WIDE_INT eltsz
934 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
935 if (TYPE_PRECISION (eltype) != eltsz)
936 return NULL;
937 constructor_elt *elt;
938 unsigned int i;
939 tree type = build_nonstandard_integer_type (sz, 1);
940 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
942 if (TREE_CODE (elt->value) != SSA_NAME
943 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
944 return NULL;
945 struct symbolic_number n1;
946 gimple *source_stmt
947 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
948 limit - 1);
950 if (!source_stmt)
951 return NULL;
953 n1.type = type;
954 if (!n1.base_addr)
955 n1.range = sz / BITS_PER_UNIT;
957 if (i == 0)
959 ins_stmt = source_stmt;
960 *n = n1;
962 else
964 if (n->vuse != n1.vuse)
965 return NULL;
967 struct symbolic_number n0 = *n;
969 if (!BYTES_BIG_ENDIAN)
971 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
972 return NULL;
974 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
975 return NULL;
976 ins_stmt
977 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
978 BIT_IOR_EXPR);
980 if (!ins_stmt)
981 return NULL;
986 uint64_t cmpxchg, cmpnop;
987 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
989 /* A complete byte swap should make the symbolic number to start with
990 the largest digit in the highest order byte. Unchanged symbolic
991 number indicates a read with same endianness as target architecture. */
992 *mask = ~(uint64_t) 0;
993 if (n->n == cmpnop)
994 *bswap = false;
995 else if (n->n == cmpxchg)
996 *bswap = true;
997 else
999 int set = 0;
1000 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1001 if ((n->n & msk) == 0)
1002 *mask &= ~msk;
1003 else if ((n->n & msk) == (cmpxchg & msk))
1004 set++;
1005 else
1006 return NULL;
1007 if (set < 2)
1008 return NULL;
1009 *bswap = true;
1012 /* Useless bit manipulation performed by code. */
1013 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1014 return NULL;
1016 return ins_stmt;
1019 const pass_data pass_data_optimize_bswap =
1021 GIMPLE_PASS, /* type */
1022 "bswap", /* name */
1023 OPTGROUP_NONE, /* optinfo_flags */
1024 TV_NONE, /* tv_id */
1025 PROP_ssa, /* properties_required */
1026 0, /* properties_provided */
1027 0, /* properties_destroyed */
1028 0, /* todo_flags_start */
1029 0, /* todo_flags_finish */
1032 class pass_optimize_bswap : public gimple_opt_pass
1034 public:
1035 pass_optimize_bswap (gcc::context *ctxt)
1036 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1039 /* opt_pass methods: */
1040 bool gate (function *) final override
1042 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1045 unsigned int execute (function *) final override;
1047 }; // class pass_optimize_bswap
1049 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1050 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1051 first. */
1053 static tree
1054 bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1055 bool before)
1057 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1058 || POINTER_TYPE_P (TREE_TYPE (val)));
1059 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1061 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1062 if (POINTER_TYPE_P (TREE_TYPE (val)))
1064 gimple *g
1065 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1066 NOP_EXPR, val);
1067 if (before)
1068 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1069 else
1070 gsi_insert_after (gsi, g, GSI_NEW_STMT);
1071 val = gimple_assign_lhs (g);
1073 tree itype = build_nonstandard_integer_type (prec, 1);
1074 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1075 if (before)
1076 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1077 else
1078 gsi_insert_after (gsi, g, GSI_NEW_STMT);
1079 val = gimple_assign_lhs (g);
1081 return build1 (VIEW_CONVERT_EXPR, type, val);
1084 /* Perform the bswap optimization: replace the expression computed in the rhs
1085 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1086 bswap, load or load + bswap expression.
1087 Which of these alternatives replace the rhs is given by N->base_addr (non
1088 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1089 load to perform are also given in N while the builtin bswap invoke is given
1090 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1091 load statements involved to construct the rhs in gsi_stmt (GSI) and
1092 N->range gives the size of the rhs expression for maintaining some
1093 statistics.
1095 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1096 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1097 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1099 tree
1100 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1101 tree bswap_type, tree load_type, struct symbolic_number *n,
1102 bool bswap, uint64_t mask)
1104 tree src, tmp, tgt = NULL_TREE;
1105 gimple *bswap_stmt, *mask_stmt = NULL;
1106 tree_code conv_code = NOP_EXPR;
1108 gimple *cur_stmt = gsi_stmt (gsi);
1109 src = n->src;
1110 if (cur_stmt)
1112 tgt = gimple_assign_lhs (cur_stmt);
1113 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1114 && tgt
1115 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1116 conv_code = VIEW_CONVERT_EXPR;
1119 /* Need to load the value from memory first. */
1120 if (n->base_addr)
1122 gimple_stmt_iterator gsi_ins = gsi;
1123 if (ins_stmt)
1124 gsi_ins = gsi_for_stmt (ins_stmt);
1125 tree addr_expr, addr_tmp, val_expr, val_tmp;
1126 tree load_offset_ptr, aligned_load_type;
1127 gimple *load_stmt;
1128 unsigned align = get_object_alignment (src);
1129 poly_int64 load_offset = 0;
1131 if (cur_stmt)
1133 basic_block ins_bb = gimple_bb (ins_stmt);
1134 basic_block cur_bb = gimple_bb (cur_stmt);
1135 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1136 return NULL_TREE;
1138 /* Move cur_stmt just before one of the load of the original
1139 to ensure it has the same VUSE. See PR61517 for what could
1140 go wrong. */
1141 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1142 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1143 gsi_move_before (&gsi, &gsi_ins);
1144 gsi = gsi_for_stmt (cur_stmt);
1146 else
1147 gsi = gsi_ins;
1149 /* Compute address to load from and cast according to the size
1150 of the load. */
1151 addr_expr = build_fold_addr_expr (src);
1152 if (is_gimple_mem_ref_addr (addr_expr))
1153 addr_tmp = unshare_expr (addr_expr);
1154 else
1156 addr_tmp = unshare_expr (n->base_addr);
1157 if (!is_gimple_mem_ref_addr (addr_tmp))
1158 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1159 is_gimple_mem_ref_addr,
1160 NULL_TREE, true,
1161 GSI_SAME_STMT);
1162 load_offset = n->bytepos;
1163 if (n->offset)
1165 tree off
1166 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1167 true, NULL_TREE, true,
1168 GSI_SAME_STMT);
1169 gimple *stmt
1170 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1171 POINTER_PLUS_EXPR, addr_tmp, off);
1172 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1173 addr_tmp = gimple_assign_lhs (stmt);
1177 /* Perform the load. */
1178 aligned_load_type = load_type;
1179 if (align < TYPE_ALIGN (load_type))
1180 aligned_load_type = build_aligned_type (load_type, align);
1181 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1182 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1183 load_offset_ptr);
1185 if (!bswap)
1187 if (n->range == 16)
1188 nop_stats.found_16bit++;
1189 else if (n->range == 32)
1190 nop_stats.found_32bit++;
1191 else
1193 gcc_assert (n->range == 64);
1194 nop_stats.found_64bit++;
1197 /* Convert the result of load if necessary. */
1198 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1200 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1201 "load_dst");
1202 load_stmt = gimple_build_assign (val_tmp, val_expr);
1203 gimple_set_vuse (load_stmt, n->vuse);
1204 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1205 if (conv_code == VIEW_CONVERT_EXPR)
1206 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1207 true);
1208 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1209 update_stmt (cur_stmt);
1211 else if (cur_stmt)
1213 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1214 gimple_set_vuse (cur_stmt, n->vuse);
1215 update_stmt (cur_stmt);
1217 else
1219 tgt = make_ssa_name (load_type);
1220 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1221 gimple_set_vuse (cur_stmt, n->vuse);
1222 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1225 if (dump_file)
1227 fprintf (dump_file,
1228 "%d bit load in target endianness found at: ",
1229 (int) n->range);
1230 print_gimple_stmt (dump_file, cur_stmt, 0);
1232 return tgt;
1234 else
1236 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1237 load_stmt = gimple_build_assign (val_tmp, val_expr);
1238 gimple_set_vuse (load_stmt, n->vuse);
1239 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1241 src = val_tmp;
1243 else if (!bswap)
1245 gimple *g = NULL;
1246 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1248 if (!is_gimple_val (src))
1249 return NULL_TREE;
1250 if (conv_code == VIEW_CONVERT_EXPR)
1251 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
1252 g = gimple_build_assign (tgt, conv_code, src);
1254 else if (cur_stmt)
1255 g = gimple_build_assign (tgt, src);
1256 else
1257 tgt = src;
1258 if (n->range == 16)
1259 nop_stats.found_16bit++;
1260 else if (n->range == 32)
1261 nop_stats.found_32bit++;
1262 else
1264 gcc_assert (n->range == 64);
1265 nop_stats.found_64bit++;
1267 if (dump_file)
1269 fprintf (dump_file,
1270 "%d bit reshuffle in target endianness found at: ",
1271 (int) n->range);
1272 if (cur_stmt)
1273 print_gimple_stmt (dump_file, cur_stmt, 0);
1274 else
1276 print_generic_expr (dump_file, tgt, TDF_NONE);
1277 fprintf (dump_file, "\n");
1280 if (cur_stmt)
1281 gsi_replace (&gsi, g, true);
1282 return tgt;
1284 else if (TREE_CODE (src) == BIT_FIELD_REF)
1285 src = TREE_OPERAND (src, 0);
1287 if (n->range == 16)
1288 bswap_stats.found_16bit++;
1289 else if (n->range == 32)
1290 bswap_stats.found_32bit++;
1291 else
1293 gcc_assert (n->range == 64);
1294 bswap_stats.found_64bit++;
1297 tmp = src;
1299 /* Convert the src expression if necessary. */
1300 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1302 gimple *convert_stmt;
1304 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1305 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1306 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1309 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1310 are considered as rotation of 2N bit values by N bits is generally not
1311 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1312 gives 0x03040102 while a bswap for that value is 0x04030201. */
1313 if (bswap && n->range == 16)
1315 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1316 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1317 bswap_stmt = gimple_build_assign (NULL, src);
1319 else
1320 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1322 if (tgt == NULL_TREE)
1323 tgt = make_ssa_name (bswap_type);
1324 tmp = tgt;
1326 if (mask != ~(uint64_t) 0)
1328 tree m = build_int_cst (bswap_type, mask);
1329 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1330 gimple_set_lhs (bswap_stmt, tmp);
1331 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1332 tmp = tgt;
1335 /* Convert the result if necessary. */
1336 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1338 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1339 tree atmp = tmp;
1340 gimple_stmt_iterator gsi2 = gsi;
1341 if (conv_code == VIEW_CONVERT_EXPR)
1342 atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1343 gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1344 gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
1347 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1349 if (dump_file)
1351 fprintf (dump_file, "%d bit bswap implementation found at: ",
1352 (int) n->range);
1353 if (cur_stmt)
1354 print_gimple_stmt (dump_file, cur_stmt, 0);
1355 else
1357 print_generic_expr (dump_file, tgt, TDF_NONE);
1358 fprintf (dump_file, "\n");
1362 if (cur_stmt)
1364 if (mask_stmt)
1365 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1366 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1367 gsi_remove (&gsi, true);
1369 else
1371 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1372 if (mask_stmt)
1373 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1375 return tgt;
1378 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1379 using bswap optimizations. CDI_DOMINATORS need to be
1380 computed on entry. Return true if it has been optimized and
1381 TODO_update_ssa is needed. */
1383 static bool
1384 maybe_optimize_vector_constructor (gimple *cur_stmt)
1386 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1387 struct symbolic_number n;
1388 bool bswap;
1390 gcc_assert (is_gimple_assign (cur_stmt)
1391 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1393 tree rhs = gimple_assign_rhs1 (cur_stmt);
1394 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1395 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1396 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1397 return false;
1399 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1400 switch (sz)
1402 case 16:
1403 load_type = bswap_type = uint16_type_node;
1404 break;
1405 case 32:
1406 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1407 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1409 load_type = uint32_type_node;
1410 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1411 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1413 else
1414 return false;
1415 break;
1416 case 64:
1417 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1418 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1419 || (word_mode == SImode
1420 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1421 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1423 load_type = uint64_type_node;
1424 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1425 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1427 else
1428 return false;
1429 break;
1430 default:
1431 return false;
1434 bool cast64_to_32;
1435 uint64_t mask;
1436 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1437 &cast64_to_32, &mask);
1438 if (!ins_stmt
1439 || n.range != (unsigned HOST_WIDE_INT) sz
1440 || cast64_to_32
1441 || mask != ~(uint64_t) 0)
1442 return false;
1444 if (bswap && !fndecl && n.range != 16)
1445 return false;
1447 memset (&nop_stats, 0, sizeof (nop_stats));
1448 memset (&bswap_stats, 0, sizeof (bswap_stats));
1449 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1450 bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
1453 /* Find manual byte swap implementations as well as load in a given
1454 endianness. Byte swaps are turned into a bswap builtin invokation
1455 while endian loads are converted to bswap builtin invokation or
1456 simple load according to the target endianness. */
1458 unsigned int
1459 pass_optimize_bswap::execute (function *fun)
1461 basic_block bb;
1462 bool bswap32_p, bswap64_p;
1463 bool changed = false;
1464 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1466 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1467 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1468 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1469 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1470 || (bswap32_p && word_mode == SImode)));
1472 /* Determine the argument type of the builtins. The code later on
1473 assumes that the return and argument type are the same. */
1474 if (bswap32_p)
1476 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1477 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1480 if (bswap64_p)
1482 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1483 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1486 memset (&nop_stats, 0, sizeof (nop_stats));
1487 memset (&bswap_stats, 0, sizeof (bswap_stats));
1488 calculate_dominance_info (CDI_DOMINATORS);
1490 FOR_EACH_BB_FN (bb, fun)
1492 gimple_stmt_iterator gsi;
1494 /* We do a reverse scan for bswap patterns to make sure we get the
1495 widest match. As bswap pattern matching doesn't handle previously
1496 inserted smaller bswap replacements as sub-patterns, the wider
1497 variant wouldn't be detected. */
1498 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1500 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1501 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1502 enum tree_code code;
1503 struct symbolic_number n;
1504 bool bswap, cast64_to_32;
1505 uint64_t mask;
1507 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1508 might be moved to a different basic block by bswap_replace and gsi
1509 must not points to it if that's the case. Moving the gsi_prev
1510 there make sure that gsi points to the statement previous to
1511 cur_stmt while still making sure that all statements are
1512 considered in this basic block. */
1513 gsi_prev (&gsi);
1515 if (!is_gimple_assign (cur_stmt))
1516 continue;
1518 code = gimple_assign_rhs_code (cur_stmt);
1519 switch (code)
1521 case LROTATE_EXPR:
1522 case RROTATE_EXPR:
1523 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1524 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1525 % BITS_PER_UNIT)
1526 continue;
1527 /* Fall through. */
1528 case BIT_IOR_EXPR:
1529 case BIT_XOR_EXPR:
1530 case PLUS_EXPR:
1531 break;
1532 case CONSTRUCTOR:
1534 tree rhs = gimple_assign_rhs1 (cur_stmt);
1535 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1536 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1537 break;
1539 continue;
1540 default:
1541 continue;
1544 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1545 &cast64_to_32, &mask);
1547 if (!ins_stmt)
1548 continue;
1550 switch (n.range)
1552 case 16:
1553 /* Already in canonical form, nothing to do. */
1554 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1555 continue;
1556 load_type = bswap_type = uint16_type_node;
1557 break;
1558 case 32:
1559 load_type = uint32_type_node;
1560 if (bswap32_p)
1562 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1563 bswap_type = bswap32_type;
1565 break;
1566 case 64:
1567 load_type = uint64_type_node;
1568 if (bswap64_p)
1570 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1571 bswap_type = bswap64_type;
1573 break;
1574 default:
1575 continue;
1578 if (bswap && !fndecl && n.range != 16)
1579 continue;
1581 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1582 bswap_type, load_type, &n, bswap, mask))
1583 changed = true;
1587 statistics_counter_event (fun, "16-bit nop implementations found",
1588 nop_stats.found_16bit);
1589 statistics_counter_event (fun, "32-bit nop implementations found",
1590 nop_stats.found_32bit);
1591 statistics_counter_event (fun, "64-bit nop implementations found",
1592 nop_stats.found_64bit);
1593 statistics_counter_event (fun, "16-bit bswap implementations found",
1594 bswap_stats.found_16bit);
1595 statistics_counter_event (fun, "32-bit bswap implementations found",
1596 bswap_stats.found_32bit);
1597 statistics_counter_event (fun, "64-bit bswap implementations found",
1598 bswap_stats.found_64bit);
1600 return (changed ? TODO_update_ssa : 0);
1603 } // anon namespace
1605 gimple_opt_pass *
1606 make_pass_optimize_bswap (gcc::context *ctxt)
1608 return new pass_optimize_bswap (ctxt);
1611 namespace {
1613 /* Struct recording one operand for the store, which is either a constant,
1614 then VAL represents the constant and all the other fields are zero, or
1615 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1616 and the other fields also reflect the memory load, or an SSA name, then
1617 VAL represents the SSA name and all the other fields are zero, */
1619 class store_operand_info
1621 public:
1622 tree val;
1623 tree base_addr;
1624 poly_uint64 bitsize;
1625 poly_uint64 bitpos;
1626 poly_uint64 bitregion_start;
1627 poly_uint64 bitregion_end;
1628 gimple *stmt;
1629 bool bit_not_p;
1630 store_operand_info ();
1633 store_operand_info::store_operand_info ()
1634 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1635 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1639 /* Struct recording the information about a single store of an immediate
1640 to memory. These are created in the first phase and coalesced into
1641 merged_store_group objects in the second phase. */
1643 class store_immediate_info
1645 public:
1646 unsigned HOST_WIDE_INT bitsize;
1647 unsigned HOST_WIDE_INT bitpos;
1648 unsigned HOST_WIDE_INT bitregion_start;
1649 /* This is one past the last bit of the bit region. */
1650 unsigned HOST_WIDE_INT bitregion_end;
1651 gimple *stmt;
1652 unsigned int order;
1653 /* INTEGER_CST for constant store, STRING_CST for string store,
1654 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1655 BIT_INSERT_EXPR for bit insertion.
1656 LROTATE_EXPR if it can be only bswap optimized and
1657 ops are not really meaningful.
1658 NOP_EXPR if bswap optimization detected identity, ops
1659 are not meaningful. */
1660 enum tree_code rhs_code;
1661 /* Two fields for bswap optimization purposes. */
1662 struct symbolic_number n;
1663 gimple *ins_stmt;
1664 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1665 bool bit_not_p;
1666 /* True if ops have been swapped and thus ops[1] represents
1667 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1668 bool ops_swapped_p;
1669 /* The index number of the landing pad, or 0 if there is none. */
1670 int lp_nr;
1671 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1672 just the first one. */
1673 store_operand_info ops[2];
1674 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1675 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1676 gimple *, unsigned int, enum tree_code,
1677 struct symbolic_number &, gimple *, bool, int,
1678 const store_operand_info &,
1679 const store_operand_info &);
1682 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1683 unsigned HOST_WIDE_INT bp,
1684 unsigned HOST_WIDE_INT brs,
1685 unsigned HOST_WIDE_INT bre,
1686 gimple *st,
1687 unsigned int ord,
1688 enum tree_code rhscode,
1689 struct symbolic_number &nr,
1690 gimple *ins_stmtp,
1691 bool bitnotp,
1692 int nr2,
1693 const store_operand_info &op0r,
1694 const store_operand_info &op1r)
1695 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1696 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1697 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1698 lp_nr (nr2), ops { op0r, op1r }
1702 /* Struct representing a group of stores to contiguous memory locations.
1703 These are produced by the second phase (coalescing) and consumed in the
1704 third phase that outputs the widened stores. */
1706 class merged_store_group
1708 public:
1709 unsigned HOST_WIDE_INT start;
1710 unsigned HOST_WIDE_INT width;
1711 unsigned HOST_WIDE_INT bitregion_start;
1712 unsigned HOST_WIDE_INT bitregion_end;
1713 /* The size of the allocated memory for val and mask. */
1714 unsigned HOST_WIDE_INT buf_size;
1715 unsigned HOST_WIDE_INT align_base;
1716 poly_uint64 load_align_base[2];
1718 unsigned int align;
1719 unsigned int load_align[2];
1720 unsigned int first_order;
1721 unsigned int last_order;
1722 bool bit_insertion;
1723 bool string_concatenation;
1724 bool only_constants;
1725 bool consecutive;
1726 unsigned int first_nonmergeable_order;
1727 int lp_nr;
1729 auto_vec<store_immediate_info *> stores;
1730 /* We record the first and last original statements in the sequence because
1731 we'll need their vuse/vdef and replacement position. It's easier to keep
1732 track of them separately as 'stores' is reordered by apply_stores. */
1733 gimple *last_stmt;
1734 gimple *first_stmt;
1735 unsigned char *val;
1736 unsigned char *mask;
1738 merged_store_group (store_immediate_info *);
1739 ~merged_store_group ();
1740 bool can_be_merged_into (store_immediate_info *);
1741 void merge_into (store_immediate_info *);
1742 void merge_overlapping (store_immediate_info *);
1743 bool apply_stores ();
1744 private:
1745 void do_merge (store_immediate_info *);
1748 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1750 static void
1751 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1753 if (!fd)
1754 return;
1756 for (unsigned int i = 0; i < len; i++)
1757 fprintf (fd, "%02x ", ptr[i]);
1758 fprintf (fd, "\n");
1761 /* Clear out LEN bits starting from bit START in the byte array
1762 PTR. This clears the bits to the *right* from START.
1763 START must be within [0, BITS_PER_UNIT) and counts starting from
1764 the least significant bit. */
1766 static void
1767 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1768 unsigned int len)
1770 if (len == 0)
1771 return;
1772 /* Clear len bits to the right of start. */
1773 else if (len <= start + 1)
1775 unsigned char mask = (~(~0U << len));
1776 mask = mask << (start + 1U - len);
1777 ptr[0] &= ~mask;
1779 else if (start != BITS_PER_UNIT - 1)
1781 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1782 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1783 len - (start % BITS_PER_UNIT) - 1);
1785 else if (start == BITS_PER_UNIT - 1
1786 && len > BITS_PER_UNIT)
1788 unsigned int nbytes = len / BITS_PER_UNIT;
1789 memset (ptr, 0, nbytes);
1790 if (len % BITS_PER_UNIT != 0)
1791 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1792 len % BITS_PER_UNIT);
1794 else
1795 gcc_unreachable ();
1798 /* In the byte array PTR clear the bit region starting at bit
1799 START and is LEN bits wide.
1800 For regions spanning multiple bytes do this recursively until we reach
1801 zero LEN or a region contained within a single byte. */
1803 static void
1804 clear_bit_region (unsigned char *ptr, unsigned int start,
1805 unsigned int len)
1807 /* Degenerate base case. */
1808 if (len == 0)
1809 return;
1810 else if (start >= BITS_PER_UNIT)
1811 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1812 /* Second base case. */
1813 else if ((start + len) <= BITS_PER_UNIT)
1815 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1816 mask >>= BITS_PER_UNIT - (start + len);
1818 ptr[0] &= ~mask;
1820 return;
1822 /* Clear most significant bits in a byte and proceed with the next byte. */
1823 else if (start != 0)
1825 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1826 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1828 /* Whole bytes need to be cleared. */
1829 else if (start == 0 && len > BITS_PER_UNIT)
1831 unsigned int nbytes = len / BITS_PER_UNIT;
1832 /* We could recurse on each byte but we clear whole bytes, so a simple
1833 memset will do. */
1834 memset (ptr, '\0', nbytes);
1835 /* Clear the remaining sub-byte region if there is one. */
1836 if (len % BITS_PER_UNIT != 0)
1837 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1839 else
1840 gcc_unreachable ();
1843 /* Write BITLEN bits of EXPR to the byte array PTR at
1844 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1845 Return true if the operation succeeded. */
1847 static bool
1848 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1849 unsigned int total_bytes)
1851 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1852 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1853 || (bitpos % BITS_PER_UNIT)
1854 || !int_mode_for_size (bitlen, 0).exists ());
1855 bool empty_ctor_p
1856 = (TREE_CODE (expr) == CONSTRUCTOR
1857 && CONSTRUCTOR_NELTS (expr) == 0
1858 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1859 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1861 if (!sub_byte_op_p)
1863 if (first_byte >= total_bytes)
1864 return false;
1865 total_bytes -= first_byte;
1866 if (empty_ctor_p)
1868 unsigned HOST_WIDE_INT rhs_bytes
1869 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1870 if (rhs_bytes > total_bytes)
1871 return false;
1872 memset (ptr + first_byte, '\0', rhs_bytes);
1873 return true;
1875 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1878 /* LITTLE-ENDIAN
1879 We are writing a non byte-sized quantity or at a position that is not
1880 at a byte boundary.
1881 |--------|--------|--------| ptr + first_byte
1883 xxx xxxxxxxx xxx< bp>
1884 |______EXPR____|
1886 First native_encode_expr EXPR into a temporary buffer and shift each
1887 byte in the buffer by 'bp' (carrying the bits over as necessary).
1888 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1889 <------bitlen---->< bp>
1890 Then we clear the destination bits:
1891 |---00000|00000000|000-----| ptr + first_byte
1892 <-------bitlen--->< bp>
1894 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1895 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1897 BIG-ENDIAN
1898 We are writing a non byte-sized quantity or at a position that is not
1899 at a byte boundary.
1900 ptr + first_byte |--------|--------|--------|
1902 <bp >xxx xxxxxxxx xxx
1903 |_____EXPR_____|
1905 First native_encode_expr EXPR into a temporary buffer and shift each
1906 byte in the buffer to the right by (carrying the bits over as necessary).
1907 We shift by as much as needed to align the most significant bit of EXPR
1908 with bitpos:
1909 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1910 <---bitlen----> <bp ><-----bitlen----->
1911 Then we clear the destination bits:
1912 ptr + first_byte |-----000||00000000||00000---|
1913 <bp ><-------bitlen----->
1915 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1916 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1917 The awkwardness comes from the fact that bitpos is counted from the
1918 most significant bit of a byte. */
1920 /* We must be dealing with fixed-size data at this point, since the
1921 total size is also fixed. */
1922 unsigned int byte_size;
1923 if (empty_ctor_p)
1925 unsigned HOST_WIDE_INT rhs_bytes
1926 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1927 if (rhs_bytes > total_bytes)
1928 return false;
1929 byte_size = rhs_bytes;
1931 else
1933 fixed_size_mode mode
1934 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1935 byte_size
1936 = mode == BLKmode
1937 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1938 : GET_MODE_SIZE (mode);
1940 /* Allocate an extra byte so that we have space to shift into. */
1941 byte_size++;
1942 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1943 memset (tmpbuf, '\0', byte_size);
1944 /* The store detection code should only have allowed constants that are
1945 accepted by native_encode_expr or empty ctors. */
1946 if (!empty_ctor_p
1947 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1948 gcc_unreachable ();
1950 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1951 bytes to write. This means it can write more than
1952 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1953 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1954 bitlen and zero out the bits that are not relevant as well (that may
1955 contain a sign bit due to sign-extension). */
1956 unsigned int padding
1957 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1958 /* On big-endian the padding is at the 'front' so just skip the initial
1959 bytes. */
1960 if (BYTES_BIG_ENDIAN)
1961 tmpbuf += padding;
1963 byte_size -= padding;
1965 if (bitlen % BITS_PER_UNIT != 0)
1967 if (BYTES_BIG_ENDIAN)
1968 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1969 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1970 else
1971 clear_bit_region (tmpbuf, bitlen,
1972 byte_size * BITS_PER_UNIT - bitlen);
1974 /* Left shifting relies on the last byte being clear if bitlen is
1975 a multiple of BITS_PER_UNIT, which might not be clear if
1976 there are padding bytes. */
1977 else if (!BYTES_BIG_ENDIAN)
1978 tmpbuf[byte_size - 1] = '\0';
1980 /* Clear the bit region in PTR where the bits from TMPBUF will be
1981 inserted into. */
1982 if (BYTES_BIG_ENDIAN)
1983 clear_bit_region_be (ptr + first_byte,
1984 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1985 else
1986 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1988 int shift_amnt;
1989 int bitlen_mod = bitlen % BITS_PER_UNIT;
1990 int bitpos_mod = bitpos % BITS_PER_UNIT;
1992 bool skip_byte = false;
1993 if (BYTES_BIG_ENDIAN)
1995 /* BITPOS and BITLEN are exactly aligned and no shifting
1996 is necessary. */
1997 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1998 || (bitpos_mod == 0 && bitlen_mod == 0))
1999 shift_amnt = 0;
2000 /* |. . . . . . . .|
2001 <bp > <blen >.
2002 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2003 of the value until it aligns with 'bp' in the next byte over. */
2004 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2006 shift_amnt = bitlen_mod + bitpos_mod;
2007 skip_byte = bitlen_mod != 0;
2009 /* |. . . . . . . .|
2010 <----bp--->
2011 <---blen---->.
2012 Shift the value right within the same byte so it aligns with 'bp'. */
2013 else
2014 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2016 else
2017 shift_amnt = bitpos % BITS_PER_UNIT;
2019 /* Create the shifted version of EXPR. */
2020 if (!BYTES_BIG_ENDIAN)
2022 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
2023 if (shift_amnt == 0)
2024 byte_size--;
2026 else
2028 gcc_assert (BYTES_BIG_ENDIAN);
2029 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2030 /* If shifting right forced us to move into the next byte skip the now
2031 empty byte. */
2032 if (skip_byte)
2034 tmpbuf++;
2035 byte_size--;
2039 /* Insert the bits from TMPBUF. */
2040 for (unsigned int i = 0; i < byte_size; i++)
2041 ptr[first_byte + i] |= tmpbuf[i];
2043 return true;
2046 /* Sorting function for store_immediate_info objects.
2047 Sorts them by bitposition. */
2049 static int
2050 sort_by_bitpos (const void *x, const void *y)
2052 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2053 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2055 if ((*tmp)->bitpos < (*tmp2)->bitpos)
2056 return -1;
2057 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2058 return 1;
2059 else
2060 /* If they are the same let's use the order which is guaranteed to
2061 be different. */
2062 return (*tmp)->order - (*tmp2)->order;
2065 /* Sorting function for store_immediate_info objects.
2066 Sorts them by the order field. */
2068 static int
2069 sort_by_order (const void *x, const void *y)
2071 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2072 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2074 if ((*tmp)->order < (*tmp2)->order)
2075 return -1;
2076 else if ((*tmp)->order > (*tmp2)->order)
2077 return 1;
2079 gcc_unreachable ();
2082 /* Initialize a merged_store_group object from a store_immediate_info
2083 object. */
2085 merged_store_group::merged_store_group (store_immediate_info *info)
2087 start = info->bitpos;
2088 width = info->bitsize;
2089 bitregion_start = info->bitregion_start;
2090 bitregion_end = info->bitregion_end;
2091 /* VAL has memory allocated for it in apply_stores once the group
2092 width has been finalized. */
2093 val = NULL;
2094 mask = NULL;
2095 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2096 string_concatenation = info->rhs_code == STRING_CST;
2097 only_constants = info->rhs_code == INTEGER_CST;
2098 consecutive = true;
2099 first_nonmergeable_order = ~0U;
2100 lp_nr = info->lp_nr;
2101 unsigned HOST_WIDE_INT align_bitpos = 0;
2102 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2103 &align, &align_bitpos);
2104 align_base = start - align_bitpos;
2105 for (int i = 0; i < 2; ++i)
2107 store_operand_info &op = info->ops[i];
2108 if (op.base_addr == NULL_TREE)
2110 load_align[i] = 0;
2111 load_align_base[i] = 0;
2113 else
2115 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2116 load_align_base[i] = op.bitpos - align_bitpos;
2119 stores.create (1);
2120 stores.safe_push (info);
2121 last_stmt = info->stmt;
2122 last_order = info->order;
2123 first_stmt = last_stmt;
2124 first_order = last_order;
2125 buf_size = 0;
2128 merged_store_group::~merged_store_group ()
2130 if (val)
2131 XDELETEVEC (val);
2134 /* Return true if the store described by INFO can be merged into the group. */
2136 bool
2137 merged_store_group::can_be_merged_into (store_immediate_info *info)
2139 /* Do not merge bswap patterns. */
2140 if (info->rhs_code == LROTATE_EXPR)
2141 return false;
2143 if (info->lp_nr != lp_nr)
2144 return false;
2146 /* The canonical case. */
2147 if (info->rhs_code == stores[0]->rhs_code)
2148 return true;
2150 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2151 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2152 return !string_concatenation;
2154 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2155 return !string_concatenation;
2157 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2158 only for small regions since this can generate a lot of instructions. */
2159 if (info->rhs_code == MEM_REF
2160 && (stores[0]->rhs_code == INTEGER_CST
2161 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2162 && info->bitregion_start == stores[0]->bitregion_start
2163 && info->bitregion_end == stores[0]->bitregion_end
2164 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2165 return !string_concatenation;
2167 if (stores[0]->rhs_code == MEM_REF
2168 && (info->rhs_code == INTEGER_CST
2169 || info->rhs_code == BIT_INSERT_EXPR)
2170 && info->bitregion_start == stores[0]->bitregion_start
2171 && info->bitregion_end == stores[0]->bitregion_end
2172 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2173 return !string_concatenation;
2175 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2176 if (info->rhs_code == STRING_CST
2177 && stores[0]->rhs_code == INTEGER_CST
2178 && stores[0]->bitsize == CHAR_BIT)
2179 return !bit_insertion;
2181 if (stores[0]->rhs_code == STRING_CST
2182 && info->rhs_code == INTEGER_CST
2183 && info->bitsize == CHAR_BIT)
2184 return !bit_insertion;
2186 return false;
2189 /* Helper method for merge_into and merge_overlapping to do
2190 the common part. */
2192 void
2193 merged_store_group::do_merge (store_immediate_info *info)
2195 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2196 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2198 unsigned int this_align;
2199 unsigned HOST_WIDE_INT align_bitpos = 0;
2200 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2201 &this_align, &align_bitpos);
2202 if (this_align > align)
2204 align = this_align;
2205 align_base = info->bitpos - align_bitpos;
2207 for (int i = 0; i < 2; ++i)
2209 store_operand_info &op = info->ops[i];
2210 if (!op.base_addr)
2211 continue;
2213 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2214 if (this_align > load_align[i])
2216 load_align[i] = this_align;
2217 load_align_base[i] = op.bitpos - align_bitpos;
2221 gimple *stmt = info->stmt;
2222 stores.safe_push (info);
2223 if (info->order > last_order)
2225 last_order = info->order;
2226 last_stmt = stmt;
2228 else if (info->order < first_order)
2230 first_order = info->order;
2231 first_stmt = stmt;
2234 if (info->bitpos != start + width)
2235 consecutive = false;
2237 /* We need to use extraction if there is any bit-field. */
2238 if (info->rhs_code == BIT_INSERT_EXPR)
2240 bit_insertion = true;
2241 gcc_assert (!string_concatenation);
2244 /* We want to use concatenation if there is any string. */
2245 if (info->rhs_code == STRING_CST)
2247 string_concatenation = true;
2248 gcc_assert (!bit_insertion);
2251 /* But we cannot use it if we don't have consecutive stores. */
2252 if (!consecutive)
2253 string_concatenation = false;
2255 if (info->rhs_code != INTEGER_CST)
2256 only_constants = false;
2259 /* Merge a store recorded by INFO into this merged store.
2260 The store is not overlapping with the existing recorded
2261 stores. */
2263 void
2264 merged_store_group::merge_into (store_immediate_info *info)
2266 do_merge (info);
2268 /* Make sure we're inserting in the position we think we're inserting. */
2269 gcc_assert (info->bitpos >= start + width
2270 && info->bitregion_start <= bitregion_end);
2272 width = info->bitpos + info->bitsize - start;
2275 /* Merge a store described by INFO into this merged store.
2276 INFO overlaps in some way with the current store (i.e. it's not contiguous
2277 which is handled by merged_store_group::merge_into). */
2279 void
2280 merged_store_group::merge_overlapping (store_immediate_info *info)
2282 do_merge (info);
2284 /* If the store extends the size of the group, extend the width. */
2285 if (info->bitpos + info->bitsize > start + width)
2286 width = info->bitpos + info->bitsize - start;
2289 /* Go through all the recorded stores in this group in program order and
2290 apply their values to the VAL byte array to create the final merged
2291 value. Return true if the operation succeeded. */
2293 bool
2294 merged_store_group::apply_stores ()
2296 store_immediate_info *info;
2297 unsigned int i;
2299 /* Make sure we have more than one store in the group, otherwise we cannot
2300 merge anything. */
2301 if (bitregion_start % BITS_PER_UNIT != 0
2302 || bitregion_end % BITS_PER_UNIT != 0
2303 || stores.length () == 1)
2304 return false;
2306 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2308 /* Really do string concatenation for large strings only. */
2309 if (buf_size <= MOVE_MAX)
2310 string_concatenation = false;
2312 /* Create a power-of-2-sized buffer for native_encode_expr. */
2313 if (!string_concatenation)
2314 buf_size = 1 << ceil_log2 (buf_size);
2316 val = XNEWVEC (unsigned char, 2 * buf_size);
2317 mask = val + buf_size;
2318 memset (val, 0, buf_size);
2319 memset (mask, ~0U, buf_size);
2321 stores.qsort (sort_by_order);
2323 FOR_EACH_VEC_ELT (stores, i, info)
2325 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2326 tree cst;
2327 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2328 cst = info->ops[0].val;
2329 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2330 cst = info->ops[1].val;
2331 else
2332 cst = NULL_TREE;
2333 bool ret = true;
2334 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2335 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2336 buf_size);
2337 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2338 if (BYTES_BIG_ENDIAN)
2339 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2340 - (pos_in_buffer % BITS_PER_UNIT)),
2341 info->bitsize);
2342 else
2343 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2344 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2346 if (ret)
2348 fputs ("After writing ", dump_file);
2349 print_generic_expr (dump_file, cst, TDF_NONE);
2350 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2351 " at position %d\n", info->bitsize, pos_in_buffer);
2352 fputs (" the merged value contains ", dump_file);
2353 dump_char_array (dump_file, val, buf_size);
2354 fputs (" the merged mask contains ", dump_file);
2355 dump_char_array (dump_file, mask, buf_size);
2356 if (bit_insertion)
2357 fputs (" bit insertion is required\n", dump_file);
2358 if (string_concatenation)
2359 fputs (" string concatenation is required\n", dump_file);
2361 else
2362 fprintf (dump_file, "Failed to merge stores\n");
2364 if (!ret)
2365 return false;
2367 stores.qsort (sort_by_bitpos);
2368 return true;
2371 /* Structure describing the store chain. */
2373 class imm_store_chain_info
2375 public:
2376 /* Doubly-linked list that imposes an order on chain processing.
2377 PNXP (prev's next pointer) points to the head of a list, or to
2378 the next field in the previous chain in the list.
2379 See pass_store_merging::m_stores_head for more rationale. */
2380 imm_store_chain_info *next, **pnxp;
2381 tree base_addr;
2382 auto_vec<store_immediate_info *> m_store_info;
2383 auto_vec<merged_store_group *> m_merged_store_groups;
2385 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2386 : next (inspt), pnxp (&inspt), base_addr (b_a)
2388 inspt = this;
2389 if (next)
2391 gcc_checking_assert (pnxp == next->pnxp);
2392 next->pnxp = &next;
2395 ~imm_store_chain_info ()
2397 *pnxp = next;
2398 if (next)
2400 gcc_checking_assert (&next == next->pnxp);
2401 next->pnxp = pnxp;
2404 bool terminate_and_process_chain ();
2405 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2406 unsigned int);
2407 bool coalesce_immediate_stores ();
2408 bool output_merged_store (merged_store_group *);
2409 bool output_merged_stores ();
2412 const pass_data pass_data_tree_store_merging = {
2413 GIMPLE_PASS, /* type */
2414 "store-merging", /* name */
2415 OPTGROUP_NONE, /* optinfo_flags */
2416 TV_GIMPLE_STORE_MERGING, /* tv_id */
2417 PROP_ssa, /* properties_required */
2418 0, /* properties_provided */
2419 0, /* properties_destroyed */
2420 0, /* todo_flags_start */
2421 TODO_update_ssa, /* todo_flags_finish */
2424 class pass_store_merging : public gimple_opt_pass
2426 public:
2427 pass_store_merging (gcc::context *ctxt)
2428 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2429 m_n_chains (0), m_n_stores (0)
2433 /* Pass not supported for PDP-endian, nor for insane hosts or
2434 target character sizes where native_{encode,interpret}_expr
2435 doesn't work properly. */
2436 bool
2437 gate (function *) final override
2439 return flag_store_merging
2440 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2441 && CHAR_BIT == 8
2442 && BITS_PER_UNIT == 8;
2445 unsigned int execute (function *) final override;
2447 private:
2448 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2450 /* Form a doubly-linked stack of the elements of m_stores, so that
2451 we can iterate over them in a predictable way. Using this order
2452 avoids extraneous differences in the compiler output just because
2453 of tree pointer variations (e.g. different chains end up in
2454 different positions of m_stores, so they are handled in different
2455 orders, so they allocate or release SSA names in different
2456 orders, and when they get reused, subsequent passes end up
2457 getting different SSA names, which may ultimately change
2458 decisions when going out of SSA). */
2459 imm_store_chain_info *m_stores_head;
2461 /* The number of store chains currently tracked. */
2462 unsigned m_n_chains;
2463 /* The number of stores currently tracked. */
2464 unsigned m_n_stores;
2466 bool process_store (gimple *);
2467 bool terminate_and_process_chain (imm_store_chain_info *);
2468 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2469 bool terminate_and_process_all_chains ();
2470 }; // class pass_store_merging
2472 /* Terminate and process all recorded chains. Return true if any changes
2473 were made. */
2475 bool
2476 pass_store_merging::terminate_and_process_all_chains ()
2478 bool ret = false;
2479 while (m_stores_head)
2480 ret |= terminate_and_process_chain (m_stores_head);
2481 gcc_assert (m_stores.is_empty ());
2482 return ret;
2485 /* Terminate all chains that are affected by the statement STMT.
2486 CHAIN_INFO is the chain we should ignore from the checks if
2487 non-NULL. Return true if any changes were made. */
2489 bool
2490 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2491 **chain_info,
2492 gimple *stmt)
2494 bool ret = false;
2496 /* If the statement doesn't touch memory it can't alias. */
2497 if (!gimple_vuse (stmt))
2498 return false;
2500 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2501 ao_ref store_lhs_ref;
2502 ao_ref_init (&store_lhs_ref, store_lhs);
2503 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2505 next = cur->next;
2507 /* We already checked all the stores in chain_info and terminated the
2508 chain if necessary. Skip it here. */
2509 if (chain_info && *chain_info == cur)
2510 continue;
2512 store_immediate_info *info;
2513 unsigned int i;
2514 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2516 tree lhs = gimple_assign_lhs (info->stmt);
2517 ao_ref lhs_ref;
2518 ao_ref_init (&lhs_ref, lhs);
2519 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2520 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2521 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2522 &lhs_ref, false)))
2524 if (dump_file && (dump_flags & TDF_DETAILS))
2526 fprintf (dump_file, "stmt causes chain termination:\n");
2527 print_gimple_stmt (dump_file, stmt, 0);
2529 ret |= terminate_and_process_chain (cur);
2530 break;
2535 return ret;
2538 /* Helper function. Terminate the recorded chain storing to base object
2539 BASE. Return true if the merging and output was successful. The m_stores
2540 entry is removed after the processing in any case. */
2542 bool
2543 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2545 m_n_stores -= chain_info->m_store_info.length ();
2546 m_n_chains--;
2547 bool ret = chain_info->terminate_and_process_chain ();
2548 m_stores.remove (chain_info->base_addr);
2549 delete chain_info;
2550 return ret;
2553 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2554 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2555 be able to sink load of REF across stores between FIRST and LAST, up
2556 to right before LAST. */
2558 bool
2559 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2561 ao_ref r;
2562 ao_ref_init (&r, ref);
2563 unsigned int count = 0;
2564 tree vop = gimple_vdef (last);
2565 gimple *stmt;
2567 /* Return true conservatively if the basic blocks are different. */
2568 if (gimple_bb (first) != gimple_bb (last))
2569 return true;
2573 stmt = SSA_NAME_DEF_STMT (vop);
2574 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2575 return true;
2576 if (gimple_store_p (stmt)
2577 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2578 return true;
2579 /* Avoid quadratic compile time by bounding the number of checks
2580 we perform. */
2581 if (++count > MAX_STORE_ALIAS_CHECKS)
2582 return true;
2583 vop = gimple_vuse (stmt);
2585 while (stmt != first);
2587 return false;
2590 /* Return true if INFO->ops[IDX] is mergeable with the
2591 corresponding loads already in MERGED_STORE group.
2592 BASE_ADDR is the base address of the whole store group. */
2594 bool
2595 compatible_load_p (merged_store_group *merged_store,
2596 store_immediate_info *info,
2597 tree base_addr, int idx)
2599 store_immediate_info *infof = merged_store->stores[0];
2600 if (!info->ops[idx].base_addr
2601 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2602 info->bitpos - infof->bitpos)
2603 || !operand_equal_p (info->ops[idx].base_addr,
2604 infof->ops[idx].base_addr, 0))
2605 return false;
2607 store_immediate_info *infol = merged_store->stores.last ();
2608 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2609 /* In this case all vuses should be the same, e.g.
2610 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2612 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2613 and we can emit the coalesced load next to any of those loads. */
2614 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2615 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2616 return true;
2618 /* Otherwise, at least for now require that the load has the same
2619 vuse as the store. See following examples. */
2620 if (gimple_vuse (info->stmt) != load_vuse)
2621 return false;
2623 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2624 || (infof != infol
2625 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2626 return false;
2628 /* If the load is from the same location as the store, already
2629 the construction of the immediate chain info guarantees no intervening
2630 stores, so no further checks are needed. Example:
2631 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2632 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2633 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2634 return true;
2636 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2637 of the stores in the group, or any other stores in between those.
2638 Previous calls to compatible_load_p ensured that for all the
2639 merged_store->stores IDX loads, no stmts starting with
2640 merged_store->first_stmt and ending right before merged_store->last_stmt
2641 clobbers those loads. */
2642 gimple *first = merged_store->first_stmt;
2643 gimple *last = merged_store->last_stmt;
2644 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2645 comes before the so far first load, we'll be changing
2646 merged_store->first_stmt. In that case we need to give up if
2647 any of the earlier processed loads clobber with the stmts in the new
2648 range. */
2649 if (info->order < merged_store->first_order)
2651 for (store_immediate_info *infoc : merged_store->stores)
2652 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2653 return false;
2654 first = info->stmt;
2656 /* Similarly, we could change merged_store->last_stmt, so ensure
2657 in that case no stmts in the new range clobber any of the earlier
2658 processed loads. */
2659 else if (info->order > merged_store->last_order)
2661 for (store_immediate_info *infoc : merged_store->stores)
2662 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2663 return false;
2664 last = info->stmt;
2666 /* And finally, we'd be adding a new load to the set, ensure it isn't
2667 clobbered in the new range. */
2668 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2669 return false;
2671 /* Otherwise, we are looking for:
2672 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2674 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2675 return true;
2678 /* Add all refs loaded to compute VAL to REFS vector. */
2680 void
2681 gather_bswap_load_refs (vec<tree> *refs, tree val)
2683 if (TREE_CODE (val) != SSA_NAME)
2684 return;
2686 gimple *stmt = SSA_NAME_DEF_STMT (val);
2687 if (!is_gimple_assign (stmt))
2688 return;
2690 if (gimple_assign_load_p (stmt))
2692 refs->safe_push (gimple_assign_rhs1 (stmt));
2693 return;
2696 switch (gimple_assign_rhs_class (stmt))
2698 case GIMPLE_BINARY_RHS:
2699 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2700 /* FALLTHRU */
2701 case GIMPLE_UNARY_RHS:
2702 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2703 break;
2704 default:
2705 gcc_unreachable ();
2709 /* Check if there are any stores in M_STORE_INFO after index I
2710 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2711 a potential group ending with END that have their order
2712 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2713 all the stores already merged and the one under consideration
2714 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2715 Consider:
2716 MEM[(long long int *)p_28] = 0;
2717 MEM[(long long int *)p_28 + 8B] = 0;
2718 MEM[(long long int *)p_28 + 16B] = 0;
2719 MEM[(long long int *)p_28 + 24B] = 0;
2720 _129 = (int) _130;
2721 MEM[(int *)p_28 + 8B] = _129;
2722 MEM[(int *)p_28].a = -1;
2723 We already have
2724 MEM[(long long int *)p_28] = 0;
2725 MEM[(int *)p_28].a = -1;
2726 stmts in the current group and need to consider if it is safe to
2727 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2728 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2729 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2730 into the group and merging of those 3 stores is successful, merged
2731 stmts will be emitted at the latest store from that group, i.e.
2732 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2733 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2734 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2735 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2736 into the group. That way it will be its own store group and will
2737 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2738 INTEGER_CST stores, those are mergeable using merge_overlapping,
2739 so don't return false for those.
2741 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2742 (exclusive), whether they don't overlap the bitrange START to END
2743 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2744 prevent merging in cases like:
2745 MEM <char[12]> [&b + 8B] = {};
2746 MEM[(short *) &b] = 5;
2747 _5 = *x_4(D);
2748 MEM <long long unsigned int> [&b + 2B] = _5;
2749 MEM[(char *)&b + 16B] = 88;
2750 MEM[(int *)&b + 20B] = 1;
2751 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2752 be merged with it, because the = _5 store overlaps these and is in between
2753 them in sort_by_order ordering. If it was merged, the merged store would
2754 go after the = _5 store and thus change behavior. */
2756 static bool
2757 check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2758 unsigned int i,
2759 bool all_integer_cst_p, unsigned int first_order,
2760 unsigned int last_order, unsigned HOST_WIDE_INT start,
2761 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2762 unsigned end_earlier)
2764 unsigned int len = m_store_info.length ();
2765 for (unsigned int j = first_earlier; j < end_earlier; j++)
2767 store_immediate_info *info = m_store_info[j];
2768 if (info->order > first_order
2769 && info->order < last_order
2770 && info->bitpos + info->bitsize > start)
2771 return false;
2773 for (++i; i < len; ++i)
2775 store_immediate_info *info = m_store_info[i];
2776 if (info->bitpos >= end)
2777 break;
2778 if (info->order < last_order
2779 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2780 return false;
2782 return true;
2785 /* Return true if m_store_info[first] and at least one following store
2786 form a group which store try_size bitsize value which is byte swapped
2787 from a memory load or some value, or identity from some value.
2788 This uses the bswap pass APIs. */
2790 bool
2791 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2792 unsigned int first,
2793 unsigned int try_size,
2794 unsigned int first_earlier)
2796 unsigned int len = m_store_info.length (), last = first;
2797 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2798 if (width >= try_size)
2799 return false;
2800 for (unsigned int i = first + 1; i < len; ++i)
2802 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2803 || m_store_info[i]->lp_nr != merged_store->lp_nr
2804 || m_store_info[i]->ins_stmt == NULL)
2805 return false;
2806 width += m_store_info[i]->bitsize;
2807 if (width >= try_size)
2809 last = i;
2810 break;
2813 if (width != try_size)
2814 return false;
2816 bool allow_unaligned
2817 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2818 /* Punt if the combined store would not be aligned and we need alignment. */
2819 if (!allow_unaligned)
2821 unsigned int align = merged_store->align;
2822 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2823 for (unsigned int i = first + 1; i <= last; ++i)
2825 unsigned int this_align;
2826 unsigned HOST_WIDE_INT align_bitpos = 0;
2827 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2828 &this_align, &align_bitpos);
2829 if (this_align > align)
2831 align = this_align;
2832 align_base = m_store_info[i]->bitpos - align_bitpos;
2835 unsigned HOST_WIDE_INT align_bitpos
2836 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2837 if (align_bitpos)
2838 align = least_bit_hwi (align_bitpos);
2839 if (align < try_size)
2840 return false;
2843 tree type;
2844 switch (try_size)
2846 case 16: type = uint16_type_node; break;
2847 case 32: type = uint32_type_node; break;
2848 case 64: type = uint64_type_node; break;
2849 default: gcc_unreachable ();
2851 struct symbolic_number n;
2852 gimple *ins_stmt = NULL;
2853 int vuse_store = -1;
2854 unsigned int first_order = merged_store->first_order;
2855 unsigned int last_order = merged_store->last_order;
2856 gimple *first_stmt = merged_store->first_stmt;
2857 gimple *last_stmt = merged_store->last_stmt;
2858 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2859 store_immediate_info *infof = m_store_info[first];
2861 for (unsigned int i = first; i <= last; ++i)
2863 store_immediate_info *info = m_store_info[i];
2864 struct symbolic_number this_n = info->n;
2865 this_n.type = type;
2866 if (!this_n.base_addr)
2867 this_n.range = try_size / BITS_PER_UNIT;
2868 else
2869 /* Update vuse in case it has changed by output_merged_stores. */
2870 this_n.vuse = gimple_vuse (info->ins_stmt);
2871 unsigned int bitpos = info->bitpos - infof->bitpos;
2872 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2873 BYTES_BIG_ENDIAN
2874 ? try_size - info->bitsize - bitpos
2875 : bitpos))
2876 return false;
2877 if (this_n.base_addr && vuse_store)
2879 unsigned int j;
2880 for (j = first; j <= last; ++j)
2881 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2882 break;
2883 if (j > last)
2885 if (vuse_store == 1)
2886 return false;
2887 vuse_store = 0;
2890 if (i == first)
2892 n = this_n;
2893 ins_stmt = info->ins_stmt;
2895 else
2897 if (n.base_addr && n.vuse != this_n.vuse)
2899 if (vuse_store == 0)
2900 return false;
2901 vuse_store = 1;
2903 if (info->order > last_order)
2905 last_order = info->order;
2906 last_stmt = info->stmt;
2908 else if (info->order < first_order)
2910 first_order = info->order;
2911 first_stmt = info->stmt;
2913 end = MAX (end, info->bitpos + info->bitsize);
2915 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2916 &this_n, &n, BIT_IOR_EXPR);
2917 if (ins_stmt == NULL)
2918 return false;
2922 uint64_t cmpxchg, cmpnop;
2923 bool cast64_to_32;
2924 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
2926 /* A complete byte swap should make the symbolic number to start with
2927 the largest digit in the highest order byte. Unchanged symbolic
2928 number indicates a read with same endianness as target architecture. */
2929 if (n.n != cmpnop && n.n != cmpxchg)
2930 return false;
2932 /* For now. */
2933 if (cast64_to_32)
2934 return false;
2936 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2937 return false;
2939 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2940 merged_store->start, end, first_earlier, first))
2941 return false;
2943 /* Don't handle memory copy this way if normal non-bswap processing
2944 would handle it too. */
2945 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2947 unsigned int i;
2948 for (i = first; i <= last; ++i)
2949 if (m_store_info[i]->rhs_code != MEM_REF)
2950 break;
2951 if (i == last + 1)
2952 return false;
2955 if (n.n == cmpxchg)
2956 switch (try_size)
2958 case 16:
2959 /* Will emit LROTATE_EXPR. */
2960 break;
2961 case 32:
2962 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2963 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2964 break;
2965 return false;
2966 case 64:
2967 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2968 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2969 break;
2970 return false;
2971 default:
2972 gcc_unreachable ();
2975 if (!allow_unaligned && n.base_addr)
2977 unsigned int align = get_object_alignment (n.src);
2978 if (align < try_size)
2979 return false;
2982 /* If each load has vuse of the corresponding store, need to verify
2983 the loads can be sunk right before the last store. */
2984 if (vuse_store == 1)
2986 auto_vec<tree, 64> refs;
2987 for (unsigned int i = first; i <= last; ++i)
2988 gather_bswap_load_refs (&refs,
2989 gimple_assign_rhs1 (m_store_info[i]->stmt));
2991 for (tree ref : refs)
2992 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2993 return false;
2994 n.vuse = NULL_TREE;
2997 infof->n = n;
2998 infof->ins_stmt = ins_stmt;
2999 for (unsigned int i = first; i <= last; ++i)
3001 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3002 m_store_info[i]->ops[0].base_addr = NULL_TREE;
3003 m_store_info[i]->ops[1].base_addr = NULL_TREE;
3004 if (i != first)
3005 merged_store->merge_into (m_store_info[i]);
3008 return true;
3011 /* Go through the candidate stores recorded in m_store_info and merge them
3012 into merged_store_group objects recorded into m_merged_store_groups
3013 representing the widened stores. Return true if coalescing was successful
3014 and the number of widened stores is fewer than the original number
3015 of stores. */
3017 bool
3018 imm_store_chain_info::coalesce_immediate_stores ()
3020 /* Anything less can't be processed. */
3021 if (m_store_info.length () < 2)
3022 return false;
3024 if (dump_file && (dump_flags & TDF_DETAILS))
3025 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
3026 m_store_info.length ());
3028 store_immediate_info *info;
3029 unsigned int i, ignore = 0;
3030 unsigned int first_earlier = 0;
3031 unsigned int end_earlier = 0;
3033 /* Order the stores by the bitposition they write to. */
3034 m_store_info.qsort (sort_by_bitpos);
3036 info = m_store_info[0];
3037 merged_store_group *merged_store = new merged_store_group (info);
3038 if (dump_file && (dump_flags & TDF_DETAILS))
3039 fputs ("New store group\n", dump_file);
3041 FOR_EACH_VEC_ELT (m_store_info, i, info)
3043 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3045 if (i <= ignore)
3046 goto done;
3048 while (first_earlier < end_earlier
3049 && (m_store_info[first_earlier]->bitpos
3050 + m_store_info[first_earlier]->bitsize
3051 <= merged_store->start))
3052 first_earlier++;
3054 /* First try to handle group of stores like:
3055 p[0] = data >> 24;
3056 p[1] = data >> 16;
3057 p[2] = data >> 8;
3058 p[3] = data;
3059 using the bswap framework. */
3060 if (info->bitpos == merged_store->start + merged_store->width
3061 && merged_store->stores.length () == 1
3062 && merged_store->stores[0]->ins_stmt != NULL
3063 && info->lp_nr == merged_store->lp_nr
3064 && info->ins_stmt != NULL)
3066 unsigned int try_size;
3067 for (try_size = 64; try_size >= 16; try_size >>= 1)
3068 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3069 first_earlier))
3070 break;
3072 if (try_size >= 16)
3074 ignore = i + merged_store->stores.length () - 1;
3075 m_merged_store_groups.safe_push (merged_store);
3076 if (ignore < m_store_info.length ())
3078 merged_store = new merged_store_group (m_store_info[ignore]);
3079 end_earlier = ignore;
3081 else
3082 merged_store = NULL;
3083 goto done;
3087 new_bitregion_start
3088 = MIN (merged_store->bitregion_start, info->bitregion_start);
3089 new_bitregion_end
3090 = MAX (merged_store->bitregion_end, info->bitregion_end);
3092 if (info->order >= merged_store->first_nonmergeable_order
3093 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3094 > (unsigned) param_store_merging_max_size))
3097 /* |---store 1---|
3098 |---store 2---|
3099 Overlapping stores. */
3100 else if (IN_RANGE (info->bitpos, merged_store->start,
3101 merged_store->start + merged_store->width - 1)
3102 /* |---store 1---||---store 2---|
3103 Handle also the consecutive INTEGER_CST stores case here,
3104 as we have here the code to deal with overlaps. */
3105 || (info->bitregion_start <= merged_store->bitregion_end
3106 && info->rhs_code == INTEGER_CST
3107 && merged_store->only_constants
3108 && merged_store->can_be_merged_into (info)))
3110 /* Only allow overlapping stores of constants. */
3111 if (info->rhs_code == INTEGER_CST
3112 && merged_store->only_constants
3113 && info->lp_nr == merged_store->lp_nr)
3115 unsigned int first_order
3116 = MIN (merged_store->first_order, info->order);
3117 unsigned int last_order
3118 = MAX (merged_store->last_order, info->order);
3119 unsigned HOST_WIDE_INT end
3120 = MAX (merged_store->start + merged_store->width,
3121 info->bitpos + info->bitsize);
3122 if (check_no_overlap (m_store_info, i, true, first_order,
3123 last_order, merged_store->start, end,
3124 first_earlier, end_earlier))
3126 /* check_no_overlap call above made sure there are no
3127 overlapping stores with non-INTEGER_CST rhs_code
3128 in between the first and last of the stores we've
3129 just merged. If there are any INTEGER_CST rhs_code
3130 stores in between, we need to merge_overlapping them
3131 even if in the sort_by_bitpos order there are other
3132 overlapping stores in between. Keep those stores as is.
3133 Example:
3134 MEM[(int *)p_28] = 0;
3135 MEM[(char *)p_28 + 3B] = 1;
3136 MEM[(char *)p_28 + 1B] = 2;
3137 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3138 We can't merge the zero store with the store of two and
3139 not merge anything else, because the store of one is
3140 in the original order in between those two, but in
3141 store_by_bitpos order it comes after the last store that
3142 we can't merge with them. We can merge the first 3 stores
3143 and keep the last store as is though. */
3144 unsigned int len = m_store_info.length ();
3145 unsigned int try_order = last_order;
3146 unsigned int first_nonmergeable_order;
3147 unsigned int k;
3148 bool last_iter = false;
3149 int attempts = 0;
3152 unsigned int max_order = 0;
3153 unsigned int min_order = first_order;
3154 unsigned first_nonmergeable_int_order = ~0U;
3155 unsigned HOST_WIDE_INT this_end = end;
3156 k = i;
3157 first_nonmergeable_order = ~0U;
3158 for (unsigned int j = i + 1; j < len; ++j)
3160 store_immediate_info *info2 = m_store_info[j];
3161 if (info2->bitpos >= this_end)
3162 break;
3163 if (info2->order < try_order)
3165 if (info2->rhs_code != INTEGER_CST
3166 || info2->lp_nr != merged_store->lp_nr)
3168 /* Normally check_no_overlap makes sure this
3169 doesn't happen, but if end grows below,
3170 then we need to process more stores than
3171 check_no_overlap verified. Example:
3172 MEM[(int *)p_5] = 0;
3173 MEM[(short *)p_5 + 3B] = 1;
3174 MEM[(char *)p_5 + 4B] = _9;
3175 MEM[(char *)p_5 + 2B] = 2; */
3176 k = 0;
3177 break;
3179 k = j;
3180 min_order = MIN (min_order, info2->order);
3181 this_end = MAX (this_end,
3182 info2->bitpos + info2->bitsize);
3184 else if (info2->rhs_code == INTEGER_CST
3185 && info2->lp_nr == merged_store->lp_nr
3186 && !last_iter)
3188 max_order = MAX (max_order, info2->order + 1);
3189 first_nonmergeable_int_order
3190 = MIN (first_nonmergeable_int_order,
3191 info2->order);
3193 else
3194 first_nonmergeable_order
3195 = MIN (first_nonmergeable_order, info2->order);
3197 if (k > i
3198 && !check_no_overlap (m_store_info, len - 1, true,
3199 min_order, try_order,
3200 merged_store->start, this_end,
3201 first_earlier, end_earlier))
3202 k = 0;
3203 if (k == 0)
3205 if (last_order == try_order)
3206 break;
3207 /* If this failed, but only because we grew
3208 try_order, retry with the last working one,
3209 so that we merge at least something. */
3210 try_order = last_order;
3211 last_iter = true;
3212 continue;
3214 last_order = try_order;
3215 /* Retry with a larger try_order to see if we could
3216 merge some further INTEGER_CST stores. */
3217 if (max_order
3218 && (first_nonmergeable_int_order
3219 < first_nonmergeable_order))
3221 try_order = MIN (max_order,
3222 first_nonmergeable_order);
3223 try_order
3224 = MIN (try_order,
3225 merged_store->first_nonmergeable_order);
3226 if (try_order > last_order && ++attempts < 16)
3227 continue;
3229 first_nonmergeable_order
3230 = MIN (first_nonmergeable_order,
3231 first_nonmergeable_int_order);
3232 end = this_end;
3233 break;
3235 while (1);
3237 if (k != 0)
3239 merged_store->merge_overlapping (info);
3241 merged_store->first_nonmergeable_order
3242 = MIN (merged_store->first_nonmergeable_order,
3243 first_nonmergeable_order);
3245 for (unsigned int j = i + 1; j <= k; j++)
3247 store_immediate_info *info2 = m_store_info[j];
3248 gcc_assert (info2->bitpos < end);
3249 if (info2->order < last_order)
3251 gcc_assert (info2->rhs_code == INTEGER_CST);
3252 if (info != info2)
3253 merged_store->merge_overlapping (info2);
3255 /* Other stores are kept and not merged in any
3256 way. */
3258 ignore = k;
3259 goto done;
3264 /* |---store 1---||---store 2---|
3265 This store is consecutive to the previous one.
3266 Merge it into the current store group. There can be gaps in between
3267 the stores, but there can't be gaps in between bitregions. */
3268 else if (info->bitregion_start <= merged_store->bitregion_end
3269 && merged_store->can_be_merged_into (info))
3271 store_immediate_info *infof = merged_store->stores[0];
3273 /* All the rhs_code ops that take 2 operands are commutative,
3274 swap the operands if it could make the operands compatible. */
3275 if (infof->ops[0].base_addr
3276 && infof->ops[1].base_addr
3277 && info->ops[0].base_addr
3278 && info->ops[1].base_addr
3279 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3280 info->bitpos - infof->bitpos)
3281 && operand_equal_p (info->ops[1].base_addr,
3282 infof->ops[0].base_addr, 0))
3284 std::swap (info->ops[0], info->ops[1]);
3285 info->ops_swapped_p = true;
3287 if (check_no_overlap (m_store_info, i, false,
3288 MIN (merged_store->first_order, info->order),
3289 MAX (merged_store->last_order, info->order),
3290 merged_store->start,
3291 MAX (merged_store->start + merged_store->width,
3292 info->bitpos + info->bitsize),
3293 first_earlier, end_earlier))
3295 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3296 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3298 info->rhs_code = BIT_INSERT_EXPR;
3299 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3300 info->ops[0].base_addr = NULL_TREE;
3302 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3304 for (store_immediate_info *infoj : merged_store->stores)
3306 infoj->rhs_code = BIT_INSERT_EXPR;
3307 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3308 infoj->ops[0].base_addr = NULL_TREE;
3310 merged_store->bit_insertion = true;
3312 if ((infof->ops[0].base_addr
3313 ? compatible_load_p (merged_store, info, base_addr, 0)
3314 : !info->ops[0].base_addr)
3315 && (infof->ops[1].base_addr
3316 ? compatible_load_p (merged_store, info, base_addr, 1)
3317 : !info->ops[1].base_addr))
3319 merged_store->merge_into (info);
3320 goto done;
3325 /* |---store 1---| <gap> |---store 2---|.
3326 Gap between stores or the rhs not compatible. Start a new group. */
3328 /* Try to apply all the stores recorded for the group to determine
3329 the bitpattern they write and discard it if that fails.
3330 This will also reject single-store groups. */
3331 if (merged_store->apply_stores ())
3332 m_merged_store_groups.safe_push (merged_store);
3333 else
3334 delete merged_store;
3336 merged_store = new merged_store_group (info);
3337 end_earlier = i;
3338 if (dump_file && (dump_flags & TDF_DETAILS))
3339 fputs ("New store group\n", dump_file);
3341 done:
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3344 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3345 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3346 i, info->bitsize, info->bitpos);
3347 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3348 fputc ('\n', dump_file);
3352 /* Record or discard the last store group. */
3353 if (merged_store)
3355 if (merged_store->apply_stores ())
3356 m_merged_store_groups.safe_push (merged_store);
3357 else
3358 delete merged_store;
3361 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3363 bool success
3364 = !m_merged_store_groups.is_empty ()
3365 && m_merged_store_groups.length () < m_store_info.length ();
3367 if (success && dump_file)
3368 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3369 m_merged_store_groups.length ());
3371 return success;
3374 /* Return the type to use for the merged stores or loads described by STMTS.
3375 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3376 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3377 of the MEM_REFs if any. */
3379 static tree
3380 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3381 unsigned short *cliquep, unsigned short *basep)
3383 gimple *stmt;
3384 unsigned int i;
3385 tree type = NULL_TREE;
3386 tree ret = NULL_TREE;
3387 *cliquep = 0;
3388 *basep = 0;
3390 FOR_EACH_VEC_ELT (stmts, i, stmt)
3392 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3393 : gimple_assign_lhs (stmt);
3394 tree type1 = reference_alias_ptr_type (ref);
3395 tree base = get_base_address (ref);
3397 if (i == 0)
3399 if (TREE_CODE (base) == MEM_REF)
3401 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3402 *basep = MR_DEPENDENCE_BASE (base);
3404 ret = type = type1;
3405 continue;
3407 if (!alias_ptr_types_compatible_p (type, type1))
3408 ret = ptr_type_node;
3409 if (TREE_CODE (base) != MEM_REF
3410 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3411 || *basep != MR_DEPENDENCE_BASE (base))
3413 *cliquep = 0;
3414 *basep = 0;
3417 return ret;
3420 /* Return the location_t information we can find among the statements
3421 in STMTS. */
3423 static location_t
3424 get_location_for_stmts (vec<gimple *> &stmts)
3426 for (gimple *stmt : stmts)
3427 if (gimple_has_location (stmt))
3428 return gimple_location (stmt);
3430 return UNKNOWN_LOCATION;
3433 /* Used to decribe a store resulting from splitting a wide store in smaller
3434 regularly-sized stores in split_group. */
3436 class split_store
3438 public:
3439 unsigned HOST_WIDE_INT bytepos;
3440 unsigned HOST_WIDE_INT size;
3441 unsigned HOST_WIDE_INT align;
3442 auto_vec<store_immediate_info *> orig_stores;
3443 /* True if there is a single orig stmt covering the whole split store. */
3444 bool orig;
3445 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3446 unsigned HOST_WIDE_INT);
3449 /* Simple constructor. */
3451 split_store::split_store (unsigned HOST_WIDE_INT bp,
3452 unsigned HOST_WIDE_INT sz,
3453 unsigned HOST_WIDE_INT al)
3454 : bytepos (bp), size (sz), align (al), orig (false)
3456 orig_stores.create (0);
3459 /* Record all stores in GROUP that write to the region starting at BITPOS and
3460 is of size BITSIZE. Record infos for such statements in STORES if
3461 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3462 if there is exactly one original store in the range (in that case ignore
3463 clobber stmts, unless there are only clobber stmts). */
3465 static store_immediate_info *
3466 find_constituent_stores (class merged_store_group *group,
3467 vec<store_immediate_info *> *stores,
3468 unsigned int *first,
3469 unsigned HOST_WIDE_INT bitpos,
3470 unsigned HOST_WIDE_INT bitsize)
3472 store_immediate_info *info, *ret = NULL;
3473 unsigned int i;
3474 bool second = false;
3475 bool update_first = true;
3476 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3477 for (i = *first; group->stores.iterate (i, &info); ++i)
3479 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3480 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3481 if (stmt_end <= bitpos)
3483 /* BITPOS passed to this function never decreases from within the
3484 same split_group call, so optimize and don't scan info records
3485 which are known to end before or at BITPOS next time.
3486 Only do it if all stores before this one also pass this. */
3487 if (update_first)
3488 *first = i + 1;
3489 continue;
3491 else
3492 update_first = false;
3494 /* The stores in GROUP are ordered by bitposition so if we're past
3495 the region for this group return early. */
3496 if (stmt_start >= end)
3497 return ret;
3499 if (gimple_clobber_p (info->stmt))
3501 if (stores)
3502 stores->safe_push (info);
3503 if (ret == NULL)
3504 ret = info;
3505 continue;
3507 if (stores)
3509 stores->safe_push (info);
3510 if (ret && !gimple_clobber_p (ret->stmt))
3512 ret = NULL;
3513 second = true;
3516 else if (ret && !gimple_clobber_p (ret->stmt))
3517 return NULL;
3518 if (!second)
3519 ret = info;
3521 return ret;
3524 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3525 store have multiple uses. If any SSA_NAME has multiple uses, also
3526 count statements needed to compute it. */
3528 static unsigned
3529 count_multiple_uses (store_immediate_info *info)
3531 gimple *stmt = info->stmt;
3532 unsigned ret = 0;
3533 switch (info->rhs_code)
3535 case INTEGER_CST:
3536 case STRING_CST:
3537 return 0;
3538 case BIT_AND_EXPR:
3539 case BIT_IOR_EXPR:
3540 case BIT_XOR_EXPR:
3541 if (info->bit_not_p)
3543 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3544 ret = 1; /* Fall through below to return
3545 the BIT_NOT_EXPR stmt and then
3546 BIT_{AND,IOR,XOR}_EXPR and anything it
3547 uses. */
3548 else
3549 /* stmt is after this the BIT_NOT_EXPR. */
3550 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3552 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3554 ret += 1 + info->ops[0].bit_not_p;
3555 if (info->ops[1].base_addr)
3556 ret += 1 + info->ops[1].bit_not_p;
3557 return ret + 1;
3559 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3560 /* stmt is now the BIT_*_EXPR. */
3561 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3562 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3563 else if (info->ops[info->ops_swapped_p].bit_not_p)
3565 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3566 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3567 ++ret;
3569 if (info->ops[1].base_addr == NULL_TREE)
3571 gcc_checking_assert (!info->ops_swapped_p);
3572 return ret;
3574 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3575 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3576 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3578 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3579 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3580 ++ret;
3582 return ret;
3583 case MEM_REF:
3584 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3585 return 1 + info->ops[0].bit_not_p;
3586 else if (info->ops[0].bit_not_p)
3588 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3589 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3590 return 1;
3592 return 0;
3593 case BIT_INSERT_EXPR:
3594 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3595 default:
3596 gcc_unreachable ();
3600 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3601 vector (if non-NULL) with split_store structs describing the byte offset
3602 (from the base), the bit size and alignment of each store as well as the
3603 original statements involved in each such split group.
3604 This is to separate the splitting strategy from the statement
3605 building/emission/linking done in output_merged_store.
3606 Return number of new stores.
3607 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3608 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3609 BZERO_FIRST may be true only when the first store covers the whole group
3610 and clears it; if BZERO_FIRST is true, keep that first store in the set
3611 unmodified and emit further stores for the overrides only.
3612 If SPLIT_STORES is NULL, it is just a dry run to count number of
3613 new stores. */
3615 static unsigned int
3616 split_group (merged_store_group *group, bool allow_unaligned_store,
3617 bool allow_unaligned_load, bool bzero_first,
3618 vec<split_store *> *split_stores,
3619 unsigned *total_orig,
3620 unsigned *total_new)
3622 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3623 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3624 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3625 unsigned HOST_WIDE_INT group_align = group->align;
3626 unsigned HOST_WIDE_INT align_base = group->align_base;
3627 unsigned HOST_WIDE_INT group_load_align = group_align;
3628 bool any_orig = false;
3630 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3632 /* For bswap framework using sets of stores, all the checking has been done
3633 earlier in try_coalesce_bswap and the result always needs to be emitted
3634 as a single store. Likewise for string concatenation, */
3635 if (group->stores[0]->rhs_code == LROTATE_EXPR
3636 || group->stores[0]->rhs_code == NOP_EXPR
3637 || group->string_concatenation)
3639 gcc_assert (!bzero_first);
3640 if (total_orig)
3642 /* Avoid the old/new stmt count heuristics. It should be
3643 always beneficial. */
3644 total_new[0] = 1;
3645 total_orig[0] = 2;
3648 if (split_stores)
3650 unsigned HOST_WIDE_INT align_bitpos
3651 = (group->start - align_base) & (group_align - 1);
3652 unsigned HOST_WIDE_INT align = group_align;
3653 if (align_bitpos)
3654 align = least_bit_hwi (align_bitpos);
3655 bytepos = group->start / BITS_PER_UNIT;
3656 split_store *store
3657 = new split_store (bytepos, group->width, align);
3658 unsigned int first = 0;
3659 find_constituent_stores (group, &store->orig_stores,
3660 &first, group->start, group->width);
3661 split_stores->safe_push (store);
3664 return 1;
3667 unsigned int ret = 0, first = 0;
3668 unsigned HOST_WIDE_INT try_pos = bytepos;
3670 if (total_orig)
3672 unsigned int i;
3673 store_immediate_info *info = group->stores[0];
3675 total_new[0] = 0;
3676 total_orig[0] = 1; /* The orig store. */
3677 info = group->stores[0];
3678 if (info->ops[0].base_addr)
3679 total_orig[0]++;
3680 if (info->ops[1].base_addr)
3681 total_orig[0]++;
3682 switch (info->rhs_code)
3684 case BIT_AND_EXPR:
3685 case BIT_IOR_EXPR:
3686 case BIT_XOR_EXPR:
3687 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3688 break;
3689 default:
3690 break;
3692 total_orig[0] *= group->stores.length ();
3694 FOR_EACH_VEC_ELT (group->stores, i, info)
3696 total_new[0] += count_multiple_uses (info);
3697 total_orig[0] += (info->bit_not_p
3698 + info->ops[0].bit_not_p
3699 + info->ops[1].bit_not_p);
3703 if (!allow_unaligned_load)
3704 for (int i = 0; i < 2; ++i)
3705 if (group->load_align[i])
3706 group_load_align = MIN (group_load_align, group->load_align[i]);
3708 if (bzero_first)
3710 store_immediate_info *gstore;
3711 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3712 if (!gimple_clobber_p (gstore->stmt))
3713 break;
3714 ++first;
3715 ret = 1;
3716 if (split_stores)
3718 split_store *store
3719 = new split_store (bytepos, gstore->bitsize, align_base);
3720 store->orig_stores.safe_push (gstore);
3721 store->orig = true;
3722 any_orig = true;
3723 split_stores->safe_push (store);
3727 while (size > 0)
3729 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3730 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3731 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3733 /* Skip padding bytes. */
3734 ++try_pos;
3735 size -= BITS_PER_UNIT;
3736 continue;
3739 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3740 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3741 unsigned HOST_WIDE_INT align_bitpos
3742 = (try_bitpos - align_base) & (group_align - 1);
3743 unsigned HOST_WIDE_INT align = group_align;
3744 bool found_orig = false;
3745 if (align_bitpos)
3746 align = least_bit_hwi (align_bitpos);
3747 if (!allow_unaligned_store)
3748 try_size = MIN (try_size, align);
3749 if (!allow_unaligned_load)
3751 /* If we can't do or don't want to do unaligned stores
3752 as well as loads, we need to take the loads into account
3753 as well. */
3754 unsigned HOST_WIDE_INT load_align = group_load_align;
3755 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3756 if (align_bitpos)
3757 load_align = least_bit_hwi (align_bitpos);
3758 for (int i = 0; i < 2; ++i)
3759 if (group->load_align[i])
3761 align_bitpos
3762 = known_alignment (try_bitpos
3763 - group->stores[0]->bitpos
3764 + group->stores[0]->ops[i].bitpos
3765 - group->load_align_base[i]);
3766 if (align_bitpos & (group_load_align - 1))
3768 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3769 load_align = MIN (load_align, a);
3772 try_size = MIN (try_size, load_align);
3774 store_immediate_info *info
3775 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3776 if (info && !gimple_clobber_p (info->stmt))
3778 /* If there is just one original statement for the range, see if
3779 we can just reuse the original store which could be even larger
3780 than try_size. */
3781 unsigned HOST_WIDE_INT stmt_end
3782 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3783 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3784 stmt_end - try_bitpos);
3785 if (info && info->bitpos >= try_bitpos)
3787 store_immediate_info *info2 = NULL;
3788 unsigned int first_copy = first;
3789 if (info->bitpos > try_bitpos
3790 && stmt_end - try_bitpos <= try_size)
3792 info2 = find_constituent_stores (group, NULL, &first_copy,
3793 try_bitpos,
3794 info->bitpos - try_bitpos);
3795 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3797 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3799 info2 = find_constituent_stores (group, NULL, &first_copy,
3800 stmt_end,
3801 (try_bitpos + try_size)
3802 - stmt_end);
3803 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3805 if (info2 == NULL)
3807 try_size = stmt_end - try_bitpos;
3808 found_orig = true;
3809 goto found;
3814 /* Approximate store bitsize for the case when there are no padding
3815 bits. */
3816 while (try_size > size)
3817 try_size /= 2;
3818 /* Now look for whole padding bytes at the end of that bitsize. */
3819 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3820 if (group->mask[try_pos - bytepos + nonmasked - 1]
3821 != (unsigned char) ~0U
3822 && (!bzero_first
3823 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3824 break;
3825 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3827 /* If entire try_size range is padding, skip it. */
3828 try_pos += try_size / BITS_PER_UNIT;
3829 size -= try_size;
3830 continue;
3832 /* Otherwise try to decrease try_size if second half, last 3 quarters
3833 etc. are padding. */
3834 nonmasked *= BITS_PER_UNIT;
3835 while (nonmasked <= try_size / 2)
3836 try_size /= 2;
3837 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3839 /* Now look for whole padding bytes at the start of that bitsize. */
3840 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3841 for (masked = 0; masked < try_bytesize; ++masked)
3842 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3843 && (!bzero_first
3844 || group->val[try_pos - bytepos + masked] != 0))
3845 break;
3846 masked *= BITS_PER_UNIT;
3847 gcc_assert (masked < try_size);
3848 if (masked >= try_size / 2)
3850 while (masked >= try_size / 2)
3852 try_size /= 2;
3853 try_pos += try_size / BITS_PER_UNIT;
3854 size -= try_size;
3855 masked -= try_size;
3857 /* Need to recompute the alignment, so just retry at the new
3858 position. */
3859 continue;
3863 found:
3864 ++ret;
3866 if (split_stores)
3868 split_store *store
3869 = new split_store (try_pos, try_size, align);
3870 info = find_constituent_stores (group, &store->orig_stores,
3871 &first, try_bitpos, try_size);
3872 if (info
3873 && !gimple_clobber_p (info->stmt)
3874 && info->bitpos >= try_bitpos
3875 && info->bitpos + info->bitsize <= try_bitpos + try_size
3876 && (store->orig_stores.length () == 1
3877 || found_orig
3878 || (info->bitpos == try_bitpos
3879 && (info->bitpos + info->bitsize
3880 == try_bitpos + try_size))))
3882 store->orig = true;
3883 any_orig = true;
3885 split_stores->safe_push (store);
3888 try_pos += try_size / BITS_PER_UNIT;
3889 size -= try_size;
3892 if (total_orig)
3894 unsigned int i;
3895 split_store *store;
3896 /* If we are reusing some original stores and any of the
3897 original SSA_NAMEs had multiple uses, we need to subtract
3898 those now before we add the new ones. */
3899 if (total_new[0] && any_orig)
3901 FOR_EACH_VEC_ELT (*split_stores, i, store)
3902 if (store->orig)
3903 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3905 total_new[0] += ret; /* The new store. */
3906 store_immediate_info *info = group->stores[0];
3907 if (info->ops[0].base_addr)
3908 total_new[0] += ret;
3909 if (info->ops[1].base_addr)
3910 total_new[0] += ret;
3911 switch (info->rhs_code)
3913 case BIT_AND_EXPR:
3914 case BIT_IOR_EXPR:
3915 case BIT_XOR_EXPR:
3916 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3917 break;
3918 default:
3919 break;
3921 FOR_EACH_VEC_ELT (*split_stores, i, store)
3923 unsigned int j;
3924 bool bit_not_p[3] = { false, false, false };
3925 /* If all orig_stores have certain bit_not_p set, then
3926 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3927 If some orig_stores have certain bit_not_p set, then
3928 we'd use a BIT_XOR_EXPR with a mask and need to account for
3929 it. */
3930 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3932 if (info->ops[0].bit_not_p)
3933 bit_not_p[0] = true;
3934 if (info->ops[1].bit_not_p)
3935 bit_not_p[1] = true;
3936 if (info->bit_not_p)
3937 bit_not_p[2] = true;
3939 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3944 return ret;
3947 /* Return the operation through which the operand IDX (if < 2) or
3948 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3949 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3950 the bits should be xored with mask. */
3952 static enum tree_code
3953 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3955 unsigned int i;
3956 store_immediate_info *info;
3957 unsigned int cnt = 0;
3958 bool any_paddings = false;
3959 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3961 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3962 if (bit_not_p)
3964 ++cnt;
3965 tree lhs = gimple_assign_lhs (info->stmt);
3966 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3967 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3968 any_paddings = true;
3971 mask = NULL_TREE;
3972 if (cnt == 0)
3973 return NOP_EXPR;
3974 if (cnt == split_store->orig_stores.length () && !any_paddings)
3975 return BIT_NOT_EXPR;
3977 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3978 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3979 unsigned char *buf
3980 = XALLOCAVEC (unsigned char, buf_size);
3981 memset (buf, ~0U, buf_size);
3982 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3984 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3985 if (!bit_not_p)
3986 continue;
3987 /* Clear regions with bit_not_p and invert afterwards, rather than
3988 clear regions with !bit_not_p, so that gaps in between stores aren't
3989 set in the mask. */
3990 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3991 unsigned HOST_WIDE_INT prec = bitsize;
3992 unsigned int pos_in_buffer = 0;
3993 if (any_paddings)
3995 tree lhs = gimple_assign_lhs (info->stmt);
3996 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3997 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3998 prec = TYPE_PRECISION (TREE_TYPE (lhs));
4000 if (info->bitpos < try_bitpos)
4002 gcc_assert (info->bitpos + bitsize > try_bitpos);
4003 if (!BYTES_BIG_ENDIAN)
4005 if (prec <= try_bitpos - info->bitpos)
4006 continue;
4007 prec -= try_bitpos - info->bitpos;
4009 bitsize -= try_bitpos - info->bitpos;
4010 if (BYTES_BIG_ENDIAN && prec > bitsize)
4011 prec = bitsize;
4013 else
4014 pos_in_buffer = info->bitpos - try_bitpos;
4015 if (prec < bitsize)
4017 /* If this is a bool inversion, invert just the least significant
4018 prec bits rather than all bits of it. */
4019 if (BYTES_BIG_ENDIAN)
4021 pos_in_buffer += bitsize - prec;
4022 if (pos_in_buffer >= split_store->size)
4023 continue;
4025 bitsize = prec;
4027 if (pos_in_buffer + bitsize > split_store->size)
4028 bitsize = split_store->size - pos_in_buffer;
4029 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4030 if (BYTES_BIG_ENDIAN)
4031 clear_bit_region_be (p, (BITS_PER_UNIT - 1
4032 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4033 else
4034 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4036 for (unsigned int i = 0; i < buf_size; ++i)
4037 buf[i] = ~buf[i];
4038 mask = native_interpret_expr (int_type, buf, buf_size);
4039 return BIT_XOR_EXPR;
4042 /* Given a merged store group GROUP output the widened version of it.
4043 The store chain is against the base object BASE.
4044 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4045 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4046 Make sure that the number of statements output is less than the number of
4047 original statements. If a better sequence is possible emit it and
4048 return true. */
4050 bool
4051 imm_store_chain_info::output_merged_store (merged_store_group *group)
4053 const unsigned HOST_WIDE_INT start_byte_pos
4054 = group->bitregion_start / BITS_PER_UNIT;
4055 unsigned int orig_num_stmts = group->stores.length ();
4056 if (orig_num_stmts < 2)
4057 return false;
4059 bool allow_unaligned_store
4060 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4061 bool allow_unaligned_load = allow_unaligned_store;
4062 bool bzero_first = false;
4063 store_immediate_info *store;
4064 unsigned int num_clobber_stmts = 0;
4065 if (group->stores[0]->rhs_code == INTEGER_CST)
4067 unsigned int i;
4068 FOR_EACH_VEC_ELT (group->stores, i, store)
4069 if (gimple_clobber_p (store->stmt))
4070 num_clobber_stmts++;
4071 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4072 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4073 && group->start == store->bitpos
4074 && group->width == store->bitsize
4075 && (group->start % BITS_PER_UNIT) == 0
4076 && (group->width % BITS_PER_UNIT) == 0)
4078 bzero_first = true;
4079 break;
4081 else
4082 break;
4083 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4084 if (gimple_clobber_p (store->stmt))
4085 num_clobber_stmts++;
4086 if (num_clobber_stmts == orig_num_stmts)
4087 return false;
4088 orig_num_stmts -= num_clobber_stmts;
4090 if (allow_unaligned_store || bzero_first)
4092 /* If unaligned stores are allowed, see how many stores we'd emit
4093 for unaligned and how many stores we'd emit for aligned stores.
4094 Only use unaligned stores if it allows fewer stores than aligned.
4095 Similarly, if there is a whole region clear first, prefer expanding
4096 it together compared to expanding clear first followed by merged
4097 further stores. */
4098 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4099 int pass_min = 0;
4100 for (int pass = 0; pass < 4; ++pass)
4102 if (!allow_unaligned_store && (pass & 1) != 0)
4103 continue;
4104 if (!bzero_first && (pass & 2) != 0)
4105 continue;
4106 cnt[pass] = split_group (group, (pass & 1) != 0,
4107 allow_unaligned_load, (pass & 2) != 0,
4108 NULL, NULL, NULL);
4109 if (cnt[pass] < cnt[pass_min])
4110 pass_min = pass;
4112 if ((pass_min & 1) == 0)
4113 allow_unaligned_store = false;
4114 if ((pass_min & 2) == 0)
4115 bzero_first = false;
4118 auto_vec<class split_store *, 32> split_stores;
4119 split_store *split_store;
4120 unsigned total_orig, total_new, i;
4121 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4122 &split_stores, &total_orig, &total_new);
4124 /* Determine if there is a clobber covering the whole group at the start,
4125 followed by proposed split stores that cover the whole group. In that
4126 case, prefer the transformation even if
4127 split_stores.length () == orig_num_stmts. */
4128 bool clobber_first = false;
4129 if (num_clobber_stmts
4130 && gimple_clobber_p (group->stores[0]->stmt)
4131 && group->start == group->stores[0]->bitpos
4132 && group->width == group->stores[0]->bitsize
4133 && (group->start % BITS_PER_UNIT) == 0
4134 && (group->width % BITS_PER_UNIT) == 0)
4136 clobber_first = true;
4137 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4138 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4139 if (split_store->bytepos != pos)
4141 clobber_first = false;
4142 break;
4144 else
4145 pos += split_store->size / BITS_PER_UNIT;
4146 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4147 clobber_first = false;
4150 if (split_stores.length () >= orig_num_stmts + clobber_first)
4153 /* We didn't manage to reduce the number of statements. Bail out. */
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4155 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4156 " Not profitable to emit new sequence.\n",
4157 orig_num_stmts);
4158 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4159 delete split_store;
4160 return false;
4162 if (total_orig <= total_new)
4164 /* If number of estimated new statements is above estimated original
4165 statements, bail out too. */
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4167 fprintf (dump_file, "Estimated number of original stmts (%u)"
4168 " not larger than estimated number of new"
4169 " stmts (%u).\n",
4170 total_orig, total_new);
4171 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4172 delete split_store;
4173 return false;
4175 if (group->stores[0]->rhs_code == INTEGER_CST)
4177 bool all_orig = true;
4178 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4179 if (!split_store->orig)
4181 all_orig = false;
4182 break;
4184 if (all_orig)
4186 unsigned int cnt = split_stores.length ();
4187 store_immediate_info *store;
4188 FOR_EACH_VEC_ELT (group->stores, i, store)
4189 if (gimple_clobber_p (store->stmt))
4190 ++cnt;
4191 /* Punt if we wouldn't make any real changes, i.e. keep all
4192 orig stmts + all clobbers. */
4193 if (cnt == group->stores.length ())
4195 if (dump_file && (dump_flags & TDF_DETAILS))
4196 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4197 " Not profitable to emit new sequence.\n",
4198 orig_num_stmts);
4199 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4200 delete split_store;
4201 return false;
4206 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4207 gimple_seq seq = NULL;
4208 tree last_vdef, new_vuse;
4209 last_vdef = gimple_vdef (group->last_stmt);
4210 new_vuse = gimple_vuse (group->last_stmt);
4211 tree bswap_res = NULL_TREE;
4213 /* Clobbers are not removed. */
4214 if (gimple_clobber_p (group->last_stmt))
4216 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4217 gimple_set_vdef (group->last_stmt, new_vuse);
4220 if (group->stores[0]->rhs_code == LROTATE_EXPR
4221 || group->stores[0]->rhs_code == NOP_EXPR)
4223 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4224 gimple *ins_stmt = group->stores[0]->ins_stmt;
4225 struct symbolic_number *n = &group->stores[0]->n;
4226 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4228 switch (n->range)
4230 case 16:
4231 load_type = bswap_type = uint16_type_node;
4232 break;
4233 case 32:
4234 load_type = uint32_type_node;
4235 if (bswap)
4237 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4238 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4240 break;
4241 case 64:
4242 load_type = uint64_type_node;
4243 if (bswap)
4245 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4246 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4248 break;
4249 default:
4250 gcc_unreachable ();
4253 /* If the loads have each vuse of the corresponding store,
4254 we've checked the aliasing already in try_coalesce_bswap and
4255 we want to sink the need load into seq. So need to use new_vuse
4256 on the load. */
4257 if (n->base_addr)
4259 if (n->vuse == NULL)
4261 n->vuse = new_vuse;
4262 ins_stmt = NULL;
4264 else
4265 /* Update vuse in case it has changed by output_merged_stores. */
4266 n->vuse = gimple_vuse (ins_stmt);
4268 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4269 bswap_type, load_type, n, bswap,
4270 ~(uint64_t) 0);
4271 gcc_assert (bswap_res);
4274 gimple *stmt = NULL;
4275 auto_vec<gimple *, 32> orig_stmts;
4276 gimple_seq this_seq;
4277 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4278 is_gimple_mem_ref_addr, NULL_TREE);
4279 gimple_seq_add_seq_without_update (&seq, this_seq);
4281 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4282 gimple_seq load_seq[2] = { NULL, NULL };
4283 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4284 for (int j = 0; j < 2; ++j)
4286 store_operand_info &op = group->stores[0]->ops[j];
4287 if (op.base_addr == NULL_TREE)
4288 continue;
4290 store_immediate_info *infol = group->stores.last ();
4291 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4293 /* We can't pick the location randomly; while we've verified
4294 all the loads have the same vuse, they can be still in different
4295 basic blocks and we need to pick the one from the last bb:
4296 int x = q[0];
4297 if (x == N) return;
4298 int y = q[1];
4299 p[0] = x;
4300 p[1] = y;
4301 otherwise if we put the wider load at the q[0] load, we might
4302 segfault if q[1] is not mapped. */
4303 basic_block bb = gimple_bb (op.stmt);
4304 gimple *ostmt = op.stmt;
4305 store_immediate_info *info;
4306 FOR_EACH_VEC_ELT (group->stores, i, info)
4308 gimple *tstmt = info->ops[j].stmt;
4309 basic_block tbb = gimple_bb (tstmt);
4310 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4312 ostmt = tstmt;
4313 bb = tbb;
4316 load_gsi[j] = gsi_for_stmt (ostmt);
4317 load_addr[j]
4318 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4319 &load_seq[j], is_gimple_mem_ref_addr,
4320 NULL_TREE);
4322 else if (operand_equal_p (base_addr, op.base_addr, 0))
4323 load_addr[j] = addr;
4324 else
4326 load_addr[j]
4327 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4328 &this_seq, is_gimple_mem_ref_addr,
4329 NULL_TREE);
4330 gimple_seq_add_seq_without_update (&seq, this_seq);
4334 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4336 const unsigned HOST_WIDE_INT try_size = split_store->size;
4337 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4338 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4339 const unsigned HOST_WIDE_INT try_align = split_store->align;
4340 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4341 tree dest, src;
4342 location_t loc;
4344 if (split_store->orig)
4346 /* If there is just a single non-clobber constituent store
4347 which covers the whole area, just reuse the lhs and rhs. */
4348 gimple *orig_stmt = NULL;
4349 store_immediate_info *store;
4350 unsigned int j;
4351 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4352 if (!gimple_clobber_p (store->stmt))
4354 orig_stmt = store->stmt;
4355 break;
4357 dest = gimple_assign_lhs (orig_stmt);
4358 src = gimple_assign_rhs1 (orig_stmt);
4359 loc = gimple_location (orig_stmt);
4361 else
4363 store_immediate_info *info;
4364 unsigned short clique, base;
4365 unsigned int k;
4366 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4367 orig_stmts.safe_push (info->stmt);
4368 tree offset_type
4369 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4370 tree dest_type;
4371 loc = get_location_for_stmts (orig_stmts);
4372 orig_stmts.truncate (0);
4374 if (group->string_concatenation)
4375 dest_type
4376 = build_array_type_nelts (char_type_node,
4377 try_size / BITS_PER_UNIT);
4378 else
4380 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4381 dest_type = build_aligned_type (dest_type, try_align);
4383 dest = fold_build2 (MEM_REF, dest_type, addr,
4384 build_int_cst (offset_type, try_pos));
4385 if (TREE_CODE (dest) == MEM_REF)
4387 MR_DEPENDENCE_CLIQUE (dest) = clique;
4388 MR_DEPENDENCE_BASE (dest) = base;
4391 tree mask;
4392 if (bswap_res || group->string_concatenation)
4393 mask = integer_zero_node;
4394 else
4395 mask = native_interpret_expr (dest_type,
4396 group->mask + try_offset,
4397 group->buf_size);
4399 tree ops[2];
4400 for (int j = 0;
4401 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4402 ++j)
4404 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4405 if (bswap_res)
4406 ops[j] = bswap_res;
4407 else if (group->string_concatenation)
4409 ops[j] = build_string (try_size / BITS_PER_UNIT,
4410 (const char *) group->val + try_offset);
4411 TREE_TYPE (ops[j]) = dest_type;
4413 else if (op.base_addr)
4415 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4416 orig_stmts.safe_push (info->ops[j].stmt);
4418 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4419 &clique, &base);
4420 location_t load_loc = get_location_for_stmts (orig_stmts);
4421 orig_stmts.truncate (0);
4423 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4424 unsigned HOST_WIDE_INT align_bitpos
4425 = known_alignment (try_bitpos
4426 - split_store->orig_stores[0]->bitpos
4427 + op.bitpos);
4428 if (align_bitpos & (load_align - 1))
4429 load_align = least_bit_hwi (align_bitpos);
4431 tree load_int_type
4432 = build_nonstandard_integer_type (try_size, UNSIGNED);
4433 load_int_type
4434 = build_aligned_type (load_int_type, load_align);
4436 poly_uint64 load_pos
4437 = exact_div (try_bitpos
4438 - split_store->orig_stores[0]->bitpos
4439 + op.bitpos,
4440 BITS_PER_UNIT);
4441 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4442 build_int_cst (offset_type, load_pos));
4443 if (TREE_CODE (ops[j]) == MEM_REF)
4445 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4446 MR_DEPENDENCE_BASE (ops[j]) = base;
4448 if (!integer_zerop (mask))
4450 /* The load might load some bits (that will be masked
4451 off later on) uninitialized, avoid -W*uninitialized
4452 warnings in that case. */
4453 suppress_warning (ops[j], OPT_Wuninitialized);
4456 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4457 gimple_set_location (stmt, load_loc);
4458 if (gsi_bb (load_gsi[j]))
4460 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4461 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4463 else
4465 gimple_set_vuse (stmt, new_vuse);
4466 gimple_seq_add_stmt_without_update (&seq, stmt);
4468 ops[j] = gimple_assign_lhs (stmt);
4469 tree xor_mask;
4470 enum tree_code inv_op
4471 = invert_op (split_store, j, dest_type, xor_mask);
4472 if (inv_op != NOP_EXPR)
4474 stmt = gimple_build_assign (make_ssa_name (dest_type),
4475 inv_op, ops[j], xor_mask);
4476 gimple_set_location (stmt, load_loc);
4477 ops[j] = gimple_assign_lhs (stmt);
4479 if (gsi_bb (load_gsi[j]))
4480 gimple_seq_add_stmt_without_update (&load_seq[j],
4481 stmt);
4482 else
4483 gimple_seq_add_stmt_without_update (&seq, stmt);
4486 else
4487 ops[j] = native_interpret_expr (dest_type,
4488 group->val + try_offset,
4489 group->buf_size);
4492 switch (split_store->orig_stores[0]->rhs_code)
4494 case BIT_AND_EXPR:
4495 case BIT_IOR_EXPR:
4496 case BIT_XOR_EXPR:
4497 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4499 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4500 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4502 location_t bit_loc;
4503 bit_loc = get_location_for_stmts (orig_stmts);
4504 orig_stmts.truncate (0);
4506 stmt
4507 = gimple_build_assign (make_ssa_name (dest_type),
4508 split_store->orig_stores[0]->rhs_code,
4509 ops[0], ops[1]);
4510 gimple_set_location (stmt, bit_loc);
4511 /* If there is just one load and there is a separate
4512 load_seq[0], emit the bitwise op right after it. */
4513 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4514 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4515 /* Otherwise, if at least one load is in seq, we need to
4516 emit the bitwise op right before the store. If there
4517 are two loads and are emitted somewhere else, it would
4518 be better to emit the bitwise op as early as possible;
4519 we don't track where that would be possible right now
4520 though. */
4521 else
4522 gimple_seq_add_stmt_without_update (&seq, stmt);
4523 src = gimple_assign_lhs (stmt);
4524 tree xor_mask;
4525 enum tree_code inv_op;
4526 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4527 if (inv_op != NOP_EXPR)
4529 stmt = gimple_build_assign (make_ssa_name (dest_type),
4530 inv_op, src, xor_mask);
4531 gimple_set_location (stmt, bit_loc);
4532 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4533 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4534 else
4535 gimple_seq_add_stmt_without_update (&seq, stmt);
4536 src = gimple_assign_lhs (stmt);
4538 break;
4539 case LROTATE_EXPR:
4540 case NOP_EXPR:
4541 src = ops[0];
4542 if (!is_gimple_val (src))
4544 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4545 src);
4546 gimple_seq_add_stmt_without_update (&seq, stmt);
4547 src = gimple_assign_lhs (stmt);
4549 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4551 stmt = gimple_build_assign (make_ssa_name (dest_type),
4552 NOP_EXPR, src);
4553 gimple_seq_add_stmt_without_update (&seq, stmt);
4554 src = gimple_assign_lhs (stmt);
4556 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4557 if (inv_op != NOP_EXPR)
4559 stmt = gimple_build_assign (make_ssa_name (dest_type),
4560 inv_op, src, xor_mask);
4561 gimple_set_location (stmt, loc);
4562 gimple_seq_add_stmt_without_update (&seq, stmt);
4563 src = gimple_assign_lhs (stmt);
4565 break;
4566 default:
4567 src = ops[0];
4568 break;
4571 /* If bit insertion is required, we use the source as an accumulator
4572 into which the successive bit-field values are manually inserted.
4573 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4574 if (group->bit_insertion)
4575 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4576 if (info->rhs_code == BIT_INSERT_EXPR
4577 && info->bitpos < try_bitpos + try_size
4578 && info->bitpos + info->bitsize > try_bitpos)
4580 /* Mask, truncate, convert to final type, shift and ior into
4581 the accumulator. Note that every step can be a no-op. */
4582 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4583 const HOST_WIDE_INT end_gap
4584 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4585 tree tem = info->ops[0].val;
4586 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4588 const unsigned HOST_WIDE_INT size
4589 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4590 tree integer_type
4591 = build_nonstandard_integer_type (size, UNSIGNED);
4592 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4593 integer_type, tem);
4595 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4597 tree bitfield_type
4598 = build_nonstandard_integer_type (info->bitsize,
4599 UNSIGNED);
4600 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4602 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4604 const unsigned HOST_WIDE_INT imask
4605 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4606 tem = gimple_build (&seq, loc,
4607 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4608 build_int_cst (TREE_TYPE (tem),
4609 imask));
4611 const HOST_WIDE_INT shift
4612 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4613 if (shift < 0)
4614 tem = gimple_build (&seq, loc,
4615 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4616 build_int_cst (NULL_TREE, -shift));
4617 tem = gimple_convert (&seq, loc, dest_type, tem);
4618 if (shift > 0)
4619 tem = gimple_build (&seq, loc,
4620 LSHIFT_EXPR, dest_type, tem,
4621 build_int_cst (NULL_TREE, shift));
4622 src = gimple_build (&seq, loc,
4623 BIT_IOR_EXPR, dest_type, tem, src);
4626 if (!integer_zerop (mask))
4628 tree tem = make_ssa_name (dest_type);
4629 tree load_src = unshare_expr (dest);
4630 /* The load might load some or all bits uninitialized,
4631 avoid -W*uninitialized warnings in that case.
4632 As optimization, it would be nice if all the bits are
4633 provably uninitialized (no stores at all yet or previous
4634 store a CLOBBER) we'd optimize away the load and replace
4635 it e.g. with 0. */
4636 suppress_warning (load_src, OPT_Wuninitialized);
4637 stmt = gimple_build_assign (tem, load_src);
4638 gimple_set_location (stmt, loc);
4639 gimple_set_vuse (stmt, new_vuse);
4640 gimple_seq_add_stmt_without_update (&seq, stmt);
4642 /* FIXME: If there is a single chunk of zero bits in mask,
4643 perhaps use BIT_INSERT_EXPR instead? */
4644 stmt = gimple_build_assign (make_ssa_name (dest_type),
4645 BIT_AND_EXPR, tem, mask);
4646 gimple_set_location (stmt, loc);
4647 gimple_seq_add_stmt_without_update (&seq, stmt);
4648 tem = gimple_assign_lhs (stmt);
4650 if (TREE_CODE (src) == INTEGER_CST)
4651 src = wide_int_to_tree (dest_type,
4652 wi::bit_and_not (wi::to_wide (src),
4653 wi::to_wide (mask)));
4654 else
4656 tree nmask
4657 = wide_int_to_tree (dest_type,
4658 wi::bit_not (wi::to_wide (mask)));
4659 stmt = gimple_build_assign (make_ssa_name (dest_type),
4660 BIT_AND_EXPR, src, nmask);
4661 gimple_set_location (stmt, loc);
4662 gimple_seq_add_stmt_without_update (&seq, stmt);
4663 src = gimple_assign_lhs (stmt);
4665 stmt = gimple_build_assign (make_ssa_name (dest_type),
4666 BIT_IOR_EXPR, tem, src);
4667 gimple_set_location (stmt, loc);
4668 gimple_seq_add_stmt_without_update (&seq, stmt);
4669 src = gimple_assign_lhs (stmt);
4673 stmt = gimple_build_assign (dest, src);
4674 gimple_set_location (stmt, loc);
4675 gimple_set_vuse (stmt, new_vuse);
4676 gimple_seq_add_stmt_without_update (&seq, stmt);
4678 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4679 add_stmt_to_eh_lp (stmt, group->lp_nr);
4681 tree new_vdef;
4682 if (i < split_stores.length () - 1)
4683 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4684 else
4685 new_vdef = last_vdef;
4687 gimple_set_vdef (stmt, new_vdef);
4688 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4689 new_vuse = new_vdef;
4692 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4693 delete split_store;
4695 gcc_assert (seq);
4696 if (dump_file)
4698 fprintf (dump_file,
4699 "New sequence of %u stores to replace old one of %u stores\n",
4700 split_stores.length (), orig_num_stmts);
4701 if (dump_flags & TDF_DETAILS)
4702 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4705 if (gimple_clobber_p (group->last_stmt))
4706 update_stmt (group->last_stmt);
4708 if (group->lp_nr > 0)
4710 /* We're going to insert a sequence of (potentially) throwing stores
4711 into an active EH region. This means that we're going to create
4712 new basic blocks with EH edges pointing to the post landing pad
4713 and, therefore, to have to update its PHI nodes, if any. For the
4714 virtual PHI node, we're going to use the VDEFs created above, but
4715 for the other nodes, we need to record the original reaching defs. */
4716 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4717 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4718 basic_block last_bb = gimple_bb (group->last_stmt);
4719 edge last_edge = find_edge (last_bb, lp_bb);
4720 auto_vec<tree, 16> last_defs;
4721 gphi_iterator gpi;
4722 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4724 gphi *phi = gpi.phi ();
4725 tree last_def;
4726 if (virtual_operand_p (gimple_phi_result (phi)))
4727 last_def = NULL_TREE;
4728 else
4729 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4730 last_defs.safe_push (last_def);
4733 /* Do the insertion. Then, if new basic blocks have been created in the
4734 process, rewind the chain of VDEFs create above to walk the new basic
4735 blocks and update the corresponding arguments of the PHI nodes. */
4736 update_modified_stmts (seq);
4737 if (gimple_find_sub_bbs (seq, &last_gsi))
4738 while (last_vdef != gimple_vuse (group->last_stmt))
4740 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4741 if (stmt_could_throw_p (cfun, stmt))
4743 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4744 unsigned int i;
4745 for (gpi = gsi_start_phis (lp_bb), i = 0;
4746 !gsi_end_p (gpi);
4747 gsi_next (&gpi), i++)
4749 gphi *phi = gpi.phi ();
4750 tree new_def;
4751 if (virtual_operand_p (gimple_phi_result (phi)))
4752 new_def = last_vdef;
4753 else
4754 new_def = last_defs[i];
4755 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4758 last_vdef = gimple_vuse (stmt);
4761 else
4762 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4764 for (int j = 0; j < 2; ++j)
4765 if (load_seq[j])
4766 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4768 return true;
4771 /* Process the merged_store_group objects created in the coalescing phase.
4772 The stores are all against the base object BASE.
4773 Try to output the widened stores and delete the original statements if
4774 successful. Return true iff any changes were made. */
4776 bool
4777 imm_store_chain_info::output_merged_stores ()
4779 unsigned int i;
4780 merged_store_group *merged_store;
4781 bool ret = false;
4782 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4784 if (dbg_cnt (store_merging)
4785 && output_merged_store (merged_store))
4787 unsigned int j;
4788 store_immediate_info *store;
4789 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4791 gimple *stmt = store->stmt;
4792 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4793 /* Don't remove clobbers, they are still useful even if
4794 everything is overwritten afterwards. */
4795 if (gimple_clobber_p (stmt))
4796 continue;
4797 gsi_remove (&gsi, true);
4798 if (store->lp_nr)
4799 remove_stmt_from_eh_lp (stmt);
4800 if (stmt != merged_store->last_stmt)
4802 unlink_stmt_vdef (stmt);
4803 release_defs (stmt);
4806 ret = true;
4809 if (ret && dump_file)
4810 fprintf (dump_file, "Merging successful!\n");
4812 return ret;
4815 /* Coalesce the store_immediate_info objects recorded against the base object
4816 BASE in the first phase and output them.
4817 Delete the allocated structures.
4818 Return true if any changes were made. */
4820 bool
4821 imm_store_chain_info::terminate_and_process_chain ()
4823 if (dump_file && (dump_flags & TDF_DETAILS))
4824 fprintf (dump_file, "Terminating chain with %u stores\n",
4825 m_store_info.length ());
4826 /* Process store chain. */
4827 bool ret = false;
4828 if (m_store_info.length () > 1)
4830 ret = coalesce_immediate_stores ();
4831 if (ret)
4832 ret = output_merged_stores ();
4835 /* Delete all the entries we allocated ourselves. */
4836 store_immediate_info *info;
4837 unsigned int i;
4838 FOR_EACH_VEC_ELT (m_store_info, i, info)
4839 delete info;
4841 merged_store_group *merged_info;
4842 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4843 delete merged_info;
4845 return ret;
4848 /* Return true iff LHS is a destination potentially interesting for
4849 store merging. In practice these are the codes that get_inner_reference
4850 can process. */
4852 static bool
4853 lhs_valid_for_store_merging_p (tree lhs)
4855 if (DECL_P (lhs))
4856 return true;
4858 switch (TREE_CODE (lhs))
4860 case ARRAY_REF:
4861 case ARRAY_RANGE_REF:
4862 case BIT_FIELD_REF:
4863 case COMPONENT_REF:
4864 case MEM_REF:
4865 case VIEW_CONVERT_EXPR:
4866 return true;
4867 default:
4868 return false;
4872 /* Return true if the tree RHS is a constant we want to consider
4873 during store merging. In practice accept all codes that
4874 native_encode_expr accepts. */
4876 static bool
4877 rhs_valid_for_store_merging_p (tree rhs)
4879 unsigned HOST_WIDE_INT size;
4880 if (TREE_CODE (rhs) == CONSTRUCTOR
4881 && CONSTRUCTOR_NELTS (rhs) == 0
4882 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4883 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4884 return true;
4885 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4886 && native_encode_expr (rhs, NULL, size) != 0);
4889 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4890 and return true on success or false on failure. */
4892 static bool
4893 adjust_bit_pos (poly_offset_int byte_off,
4894 poly_int64 *pbitpos,
4895 poly_uint64 *pbitregion_start,
4896 poly_uint64 *pbitregion_end)
4898 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4899 bit_off += *pbitpos;
4901 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4903 if (maybe_ne (*pbitregion_end, 0U))
4905 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4906 bit_off += *pbitregion_start;
4907 if (bit_off.to_uhwi (pbitregion_start))
4909 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4910 bit_off += *pbitregion_end;
4911 if (!bit_off.to_uhwi (pbitregion_end))
4912 *pbitregion_end = 0;
4914 else
4915 *pbitregion_end = 0;
4917 return true;
4919 else
4920 return false;
4923 /* If MEM is a memory reference usable for store merging (either as
4924 store destination or for loads), return the non-NULL base_addr
4925 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4926 Otherwise return NULL, *PBITPOS should be still valid even for that
4927 case. */
4929 static tree
4930 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4931 poly_uint64 *pbitpos,
4932 poly_uint64 *pbitregion_start,
4933 poly_uint64 *pbitregion_end)
4935 poly_int64 bitsize, bitpos;
4936 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4937 machine_mode mode;
4938 int unsignedp = 0, reversep = 0, volatilep = 0;
4939 tree offset;
4940 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4941 &unsignedp, &reversep, &volatilep);
4942 *pbitsize = bitsize;
4943 if (known_le (bitsize, 0))
4944 return NULL_TREE;
4946 if (TREE_CODE (mem) == COMPONENT_REF
4947 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4949 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4950 if (maybe_ne (bitregion_end, 0U))
4951 bitregion_end += 1;
4954 if (reversep)
4955 return NULL_TREE;
4957 /* We do not want to rewrite TARGET_MEM_REFs. */
4958 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4959 return NULL_TREE;
4960 /* In some cases get_inner_reference may return a
4961 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4962 canonicalize the base_addr to MEM_REF [ptr] and take
4963 byteoffset into account in the bitpos. This occurs in
4964 PR 23684 and this way we can catch more chains. */
4965 else if (TREE_CODE (base_addr) == MEM_REF)
4967 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4968 &bitregion_start, &bitregion_end))
4969 return NULL_TREE;
4970 base_addr = TREE_OPERAND (base_addr, 0);
4972 /* get_inner_reference returns the base object, get at its
4973 address now. */
4974 else
4976 if (maybe_lt (bitpos, 0))
4977 return NULL_TREE;
4978 base_addr = build_fold_addr_expr (base_addr);
4981 if (offset)
4983 /* If the access is variable offset then a base decl has to be
4984 address-taken to be able to emit pointer-based stores to it.
4985 ??? We might be able to get away with re-using the original
4986 base up to the first variable part and then wrapping that inside
4987 a BIT_FIELD_REF. */
4988 tree base = get_base_address (base_addr);
4989 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4990 return NULL_TREE;
4992 /* Similarly to above for the base, remove constant from the offset. */
4993 if (TREE_CODE (offset) == PLUS_EXPR
4994 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4995 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4996 &bitpos, &bitregion_start, &bitregion_end))
4997 offset = TREE_OPERAND (offset, 0);
4999 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5000 base_addr, offset);
5003 if (known_eq (bitregion_end, 0U))
5005 bitregion_start = round_down_to_byte_boundary (bitpos);
5006 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5009 *pbitsize = bitsize;
5010 *pbitpos = bitpos;
5011 *pbitregion_start = bitregion_start;
5012 *pbitregion_end = bitregion_end;
5013 return base_addr;
5016 /* Return true if STMT is a load that can be used for store merging.
5017 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5018 BITREGION_END are properties of the corresponding store. */
5020 static bool
5021 handled_load (gimple *stmt, store_operand_info *op,
5022 poly_uint64 bitsize, poly_uint64 bitpos,
5023 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
5025 if (!is_gimple_assign (stmt))
5026 return false;
5027 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5029 tree rhs1 = gimple_assign_rhs1 (stmt);
5030 if (TREE_CODE (rhs1) == SSA_NAME
5031 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5032 bitregion_start, bitregion_end))
5034 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5035 been optimized earlier, but if allowed here, would confuse the
5036 multiple uses counting. */
5037 if (op->bit_not_p)
5038 return false;
5039 op->bit_not_p = !op->bit_not_p;
5040 return true;
5042 return false;
5044 if (gimple_vuse (stmt)
5045 && gimple_assign_load_p (stmt)
5046 && !stmt_can_throw_internal (cfun, stmt)
5047 && !gimple_has_volatile_ops (stmt))
5049 tree mem = gimple_assign_rhs1 (stmt);
5050 op->base_addr
5051 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5052 &op->bitregion_start,
5053 &op->bitregion_end);
5054 if (op->base_addr != NULL_TREE
5055 && known_eq (op->bitsize, bitsize)
5056 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5057 && known_ge (op->bitpos - op->bitregion_start,
5058 bitpos - bitregion_start)
5059 && known_ge (op->bitregion_end - op->bitpos,
5060 bitregion_end - bitpos))
5062 op->stmt = stmt;
5063 op->val = mem;
5064 op->bit_not_p = false;
5065 return true;
5068 return false;
5071 /* Return the index number of the landing pad for STMT, if any. */
5073 static int
5074 lp_nr_for_store (gimple *stmt)
5076 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5077 return 0;
5079 if (!stmt_could_throw_p (cfun, stmt))
5080 return 0;
5082 return lookup_stmt_eh_lp (stmt);
5085 /* Record the store STMT for store merging optimization if it can be
5086 optimized. Return true if any changes were made. */
5088 bool
5089 pass_store_merging::process_store (gimple *stmt)
5091 tree lhs = gimple_assign_lhs (stmt);
5092 tree rhs = gimple_assign_rhs1 (stmt);
5093 poly_uint64 bitsize, bitpos = 0;
5094 poly_uint64 bitregion_start = 0, bitregion_end = 0;
5095 tree base_addr
5096 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5097 &bitregion_start, &bitregion_end);
5098 if (known_eq (bitsize, 0U))
5099 return false;
5101 bool invalid = (base_addr == NULL_TREE
5102 || (maybe_gt (bitsize,
5103 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5104 && TREE_CODE (rhs) != INTEGER_CST
5105 && (TREE_CODE (rhs) != CONSTRUCTOR
5106 || CONSTRUCTOR_NELTS (rhs) != 0)));
5107 enum tree_code rhs_code = ERROR_MARK;
5108 bool bit_not_p = false;
5109 struct symbolic_number n;
5110 gimple *ins_stmt = NULL;
5111 store_operand_info ops[2];
5112 if (invalid)
5114 else if (TREE_CODE (rhs) == STRING_CST)
5116 rhs_code = STRING_CST;
5117 ops[0].val = rhs;
5119 else if (rhs_valid_for_store_merging_p (rhs))
5121 rhs_code = INTEGER_CST;
5122 ops[0].val = rhs;
5124 else if (TREE_CODE (rhs) == SSA_NAME)
5126 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5127 if (!is_gimple_assign (def_stmt))
5128 invalid = true;
5129 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5130 bitregion_start, bitregion_end))
5131 rhs_code = MEM_REF;
5132 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5134 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5135 if (TREE_CODE (rhs1) == SSA_NAME
5136 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5138 bit_not_p = true;
5139 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5143 if (rhs_code == ERROR_MARK && !invalid)
5144 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5146 case BIT_AND_EXPR:
5147 case BIT_IOR_EXPR:
5148 case BIT_XOR_EXPR:
5149 tree rhs1, rhs2;
5150 rhs1 = gimple_assign_rhs1 (def_stmt);
5151 rhs2 = gimple_assign_rhs2 (def_stmt);
5152 invalid = true;
5153 if (TREE_CODE (rhs1) != SSA_NAME)
5154 break;
5155 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5156 if (!is_gimple_assign (def_stmt1)
5157 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5158 bitregion_start, bitregion_end))
5159 break;
5160 if (rhs_valid_for_store_merging_p (rhs2))
5161 ops[1].val = rhs2;
5162 else if (TREE_CODE (rhs2) != SSA_NAME)
5163 break;
5164 else
5166 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5167 if (!is_gimple_assign (def_stmt2))
5168 break;
5169 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5170 bitregion_start, bitregion_end))
5171 break;
5173 invalid = false;
5174 break;
5175 default:
5176 invalid = true;
5177 break;
5180 unsigned HOST_WIDE_INT const_bitsize;
5181 if (bitsize.is_constant (&const_bitsize)
5182 && (const_bitsize % BITS_PER_UNIT) == 0
5183 && const_bitsize <= 64
5184 && multiple_p (bitpos, BITS_PER_UNIT))
5186 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5187 if (ins_stmt)
5189 uint64_t nn = n.n;
5190 for (unsigned HOST_WIDE_INT i = 0;
5191 i < const_bitsize;
5192 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5193 if ((nn & MARKER_MASK) == 0
5194 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5196 ins_stmt = NULL;
5197 break;
5199 if (ins_stmt)
5201 if (invalid)
5203 rhs_code = LROTATE_EXPR;
5204 ops[0].base_addr = NULL_TREE;
5205 ops[1].base_addr = NULL_TREE;
5207 invalid = false;
5212 if (invalid
5213 && bitsize.is_constant (&const_bitsize)
5214 && ((const_bitsize % BITS_PER_UNIT) != 0
5215 || !multiple_p (bitpos, BITS_PER_UNIT))
5216 && const_bitsize <= MAX_FIXED_MODE_SIZE)
5218 /* Bypass a conversion to the bit-field type. */
5219 if (!bit_not_p
5220 && is_gimple_assign (def_stmt)
5221 && CONVERT_EXPR_CODE_P (rhs_code))
5223 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5224 if (TREE_CODE (rhs1) == SSA_NAME
5225 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5226 rhs = rhs1;
5228 rhs_code = BIT_INSERT_EXPR;
5229 bit_not_p = false;
5230 ops[0].val = rhs;
5231 ops[0].base_addr = NULL_TREE;
5232 ops[1].base_addr = NULL_TREE;
5233 invalid = false;
5236 else
5237 invalid = true;
5239 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5240 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5241 if (invalid
5242 || !bitsize.is_constant (&const_bitsize)
5243 || !bitpos.is_constant (&const_bitpos)
5244 || !bitregion_start.is_constant (&const_bitregion_start)
5245 || !bitregion_end.is_constant (&const_bitregion_end))
5246 return terminate_all_aliasing_chains (NULL, stmt);
5248 if (!ins_stmt)
5249 memset (&n, 0, sizeof (n));
5251 class imm_store_chain_info **chain_info = NULL;
5252 bool ret = false;
5253 if (base_addr)
5254 chain_info = m_stores.get (base_addr);
5256 store_immediate_info *info;
5257 if (chain_info)
5259 unsigned int ord = (*chain_info)->m_store_info.length ();
5260 info = new store_immediate_info (const_bitsize, const_bitpos,
5261 const_bitregion_start,
5262 const_bitregion_end,
5263 stmt, ord, rhs_code, n, ins_stmt,
5264 bit_not_p, lp_nr_for_store (stmt),
5265 ops[0], ops[1]);
5266 if (dump_file && (dump_flags & TDF_DETAILS))
5268 fprintf (dump_file, "Recording immediate store from stmt:\n");
5269 print_gimple_stmt (dump_file, stmt, 0);
5271 (*chain_info)->m_store_info.safe_push (info);
5272 m_n_stores++;
5273 ret |= terminate_all_aliasing_chains (chain_info, stmt);
5274 /* If we reach the limit of stores to merge in a chain terminate and
5275 process the chain now. */
5276 if ((*chain_info)->m_store_info.length ()
5277 == (unsigned int) param_max_stores_to_merge)
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5280 fprintf (dump_file,
5281 "Reached maximum number of statements to merge:\n");
5282 ret |= terminate_and_process_chain (*chain_info);
5285 else
5287 /* Store aliases any existing chain? */
5288 ret |= terminate_all_aliasing_chains (NULL, stmt);
5290 /* Start a new chain. */
5291 class imm_store_chain_info *new_chain
5292 = new imm_store_chain_info (m_stores_head, base_addr);
5293 info = new store_immediate_info (const_bitsize, const_bitpos,
5294 const_bitregion_start,
5295 const_bitregion_end,
5296 stmt, 0, rhs_code, n, ins_stmt,
5297 bit_not_p, lp_nr_for_store (stmt),
5298 ops[0], ops[1]);
5299 new_chain->m_store_info.safe_push (info);
5300 m_n_stores++;
5301 m_stores.put (base_addr, new_chain);
5302 m_n_chains++;
5303 if (dump_file && (dump_flags & TDF_DETAILS))
5305 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5306 m_n_chains);
5307 print_gimple_stmt (dump_file, stmt, 0);
5308 fprintf (dump_file, "The base object is:\n");
5309 print_generic_expr (dump_file, base_addr);
5310 fprintf (dump_file, "\n");
5314 /* Prune oldest chains so that after adding the chain or store above
5315 we're again within the limits set by the params. */
5316 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5317 || m_n_stores > (unsigned)param_max_stores_to_track)
5319 if (dump_file && (dump_flags & TDF_DETAILS))
5320 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5321 "terminating oldest chain(s).\n", m_n_chains,
5322 param_max_store_chains_to_track, m_n_stores,
5323 param_max_stores_to_track);
5324 imm_store_chain_info **e = &m_stores_head;
5325 unsigned idx = 0;
5326 unsigned n_stores = 0;
5327 while (*e)
5329 if (idx >= (unsigned)param_max_store_chains_to_track
5330 || (n_stores + (*e)->m_store_info.length ()
5331 > (unsigned)param_max_stores_to_track))
5332 ret |= terminate_and_process_chain (*e);
5333 else
5335 n_stores += (*e)->m_store_info.length ();
5336 e = &(*e)->next;
5337 ++idx;
5342 return ret;
5345 /* Return true if STMT is a store valid for store merging. */
5347 static bool
5348 store_valid_for_store_merging_p (gimple *stmt)
5350 return gimple_assign_single_p (stmt)
5351 && gimple_vdef (stmt)
5352 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5353 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5356 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5358 /* Return the status of basic block BB wrt store merging. */
5360 static enum basic_block_status
5361 get_status_for_store_merging (basic_block bb)
5363 unsigned int num_statements = 0;
5364 unsigned int num_constructors = 0;
5365 gimple_stmt_iterator gsi;
5366 edge e;
5367 gimple *last_stmt = NULL;
5369 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5371 gimple *stmt = gsi_stmt (gsi);
5373 if (is_gimple_debug (stmt))
5374 continue;
5376 last_stmt = stmt;
5378 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5379 break;
5381 if (is_gimple_assign (stmt)
5382 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5384 tree rhs = gimple_assign_rhs1 (stmt);
5385 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5386 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5387 && gimple_assign_lhs (stmt) != NULL_TREE)
5389 HOST_WIDE_INT sz
5390 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5391 if (sz == 16 || sz == 32 || sz == 64)
5393 num_constructors = 1;
5394 break;
5400 if (num_statements == 0 && num_constructors == 0)
5401 return BB_INVALID;
5403 if (cfun->can_throw_non_call_exceptions && cfun->eh
5404 && store_valid_for_store_merging_p (last_stmt)
5405 && (e = find_fallthru_edge (bb->succs))
5406 && e->dest == bb->next_bb)
5407 return BB_EXTENDED_VALID;
5409 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5412 /* Entry point for the pass. Go over each basic block recording chains of
5413 immediate stores. Upon encountering a terminating statement (as defined
5414 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5415 variants. */
5417 unsigned int
5418 pass_store_merging::execute (function *fun)
5420 basic_block bb;
5421 hash_set<gimple *> orig_stmts;
5422 bool changed = false, open_chains = false;
5424 /* If the function can throw and catch non-call exceptions, we'll be trying
5425 to merge stores across different basic blocks so we need to first unsplit
5426 the EH edges in order to streamline the CFG of the function. */
5427 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5428 unsplit_eh_edges ();
5430 calculate_dominance_info (CDI_DOMINATORS);
5432 FOR_EACH_BB_FN (bb, fun)
5434 const basic_block_status bb_status = get_status_for_store_merging (bb);
5435 gimple_stmt_iterator gsi;
5437 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5439 changed |= terminate_and_process_all_chains ();
5440 open_chains = false;
5443 if (bb_status == BB_INVALID)
5444 continue;
5446 if (dump_file && (dump_flags & TDF_DETAILS))
5447 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5449 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5451 gimple *stmt = gsi_stmt (gsi);
5452 gsi_next (&gsi);
5454 if (is_gimple_debug (stmt))
5455 continue;
5457 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5459 /* Terminate all chains. */
5460 if (dump_file && (dump_flags & TDF_DETAILS))
5461 fprintf (dump_file, "Volatile access terminates "
5462 "all chains\n");
5463 changed |= terminate_and_process_all_chains ();
5464 open_chains = false;
5465 continue;
5468 if (is_gimple_assign (stmt)
5469 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5470 && maybe_optimize_vector_constructor (stmt))
5471 continue;
5473 if (store_valid_for_store_merging_p (stmt))
5474 changed |= process_store (stmt);
5475 else
5476 changed |= terminate_all_aliasing_chains (NULL, stmt);
5479 if (bb_status == BB_EXTENDED_VALID)
5480 open_chains = true;
5481 else
5483 changed |= terminate_and_process_all_chains ();
5484 open_chains = false;
5488 if (open_chains)
5489 changed |= terminate_and_process_all_chains ();
5491 /* If the function can throw and catch non-call exceptions and something
5492 changed during the pass, then the CFG has (very likely) changed too. */
5493 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5495 free_dominance_info (CDI_DOMINATORS);
5496 return TODO_cleanup_cfg;
5499 return 0;
5502 } // anon namespace
5504 /* Construct and return a store merging pass object. */
5506 gimple_opt_pass *
5507 make_pass_store_merging (gcc::context *ctxt)
5509 return new pass_store_merging (ctxt);
5512 #if CHECKING_P
5514 namespace selftest {
5516 /* Selftests for store merging helpers. */
5518 /* Assert that all elements of the byte arrays X and Y, both of length N
5519 are equal. */
5521 static void
5522 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5524 for (unsigned int i = 0; i < n; i++)
5526 if (x[i] != y[i])
5528 fprintf (stderr, "Arrays do not match. X:\n");
5529 dump_char_array (stderr, x, n);
5530 fprintf (stderr, "Y:\n");
5531 dump_char_array (stderr, y, n);
5533 ASSERT_EQ (x[i], y[i]);
5537 /* Test shift_bytes_in_array_left and that it carries bits across between
5538 bytes correctly. */
5540 static void
5541 verify_shift_bytes_in_array_left (void)
5543 /* byte 1 | byte 0
5544 00011111 | 11100000. */
5545 unsigned char orig[2] = { 0xe0, 0x1f };
5546 unsigned char in[2];
5547 memcpy (in, orig, sizeof orig);
5549 unsigned char expected[2] = { 0x80, 0x7f };
5550 shift_bytes_in_array_left (in, sizeof (in), 2);
5551 verify_array_eq (in, expected, sizeof (in));
5553 memcpy (in, orig, sizeof orig);
5554 memcpy (expected, orig, sizeof orig);
5555 /* Check that shifting by zero doesn't change anything. */
5556 shift_bytes_in_array_left (in, sizeof (in), 0);
5557 verify_array_eq (in, expected, sizeof (in));
5561 /* Test shift_bytes_in_array_right and that it carries bits across between
5562 bytes correctly. */
5564 static void
5565 verify_shift_bytes_in_array_right (void)
5567 /* byte 1 | byte 0
5568 00011111 | 11100000. */
5569 unsigned char orig[2] = { 0x1f, 0xe0};
5570 unsigned char in[2];
5571 memcpy (in, orig, sizeof orig);
5572 unsigned char expected[2] = { 0x07, 0xf8};
5573 shift_bytes_in_array_right (in, sizeof (in), 2);
5574 verify_array_eq (in, expected, sizeof (in));
5576 memcpy (in, orig, sizeof orig);
5577 memcpy (expected, orig, sizeof orig);
5578 /* Check that shifting by zero doesn't change anything. */
5579 shift_bytes_in_array_right (in, sizeof (in), 0);
5580 verify_array_eq (in, expected, sizeof (in));
5583 /* Test clear_bit_region that it clears exactly the bits asked and
5584 nothing more. */
5586 static void
5587 verify_clear_bit_region (void)
5589 /* Start with all bits set and test clearing various patterns in them. */
5590 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5591 unsigned char in[3];
5592 unsigned char expected[3];
5593 memcpy (in, orig, sizeof in);
5595 /* Check zeroing out all the bits. */
5596 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5597 expected[0] = expected[1] = expected[2] = 0;
5598 verify_array_eq (in, expected, sizeof in);
5600 memcpy (in, orig, sizeof in);
5601 /* Leave the first and last bits intact. */
5602 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5603 expected[0] = 0x1;
5604 expected[1] = 0;
5605 expected[2] = 0x80;
5606 verify_array_eq (in, expected, sizeof in);
5609 /* Test clear_bit_region_be that it clears exactly the bits asked and
5610 nothing more. */
5612 static void
5613 verify_clear_bit_region_be (void)
5615 /* Start with all bits set and test clearing various patterns in them. */
5616 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5617 unsigned char in[3];
5618 unsigned char expected[3];
5619 memcpy (in, orig, sizeof in);
5621 /* Check zeroing out all the bits. */
5622 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5623 expected[0] = expected[1] = expected[2] = 0;
5624 verify_array_eq (in, expected, sizeof in);
5626 memcpy (in, orig, sizeof in);
5627 /* Leave the first and last bits intact. */
5628 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5629 expected[0] = 0x80;
5630 expected[1] = 0;
5631 expected[2] = 0x1;
5632 verify_array_eq (in, expected, sizeof in);
5636 /* Run all of the selftests within this file. */
5638 void
5639 store_merging_cc_tests (void)
5641 verify_shift_bytes_in_array_left ();
5642 verify_shift_bytes_in_array_right ();
5643 verify_clear_bit_region ();
5644 verify_clear_bit_region_be ();
5647 } // namespace selftest
5648 #endif /* CHECKING_P. */