1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2018 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
33 #include "print-tree.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.c. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
104 The most general permute is something like this (example for V16QI):
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
109 (const_int a14) (const_int a15)]))
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
145 /* This is based on the union-find logic in web.c. web_entry_base is
147 class swap_web_entry
: public web_entry_base
150 /* Pointer to the insn. */
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant
: 1;
155 /* Set if insn is a load. */
156 unsigned int is_load
: 1;
157 /* Set if insn is a store. */
158 unsigned int is_store
: 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap
: 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in
: 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out
: 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg
: 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int
: 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call
: 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable
: 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling
: 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable
: 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete
: 1;
186 enum special_handling_values
{
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
202 union_defs (swap_web_entry
*insn_entry
, rtx insn
, df_ref use
)
204 struct df_link
*link
= DF_REF_CHAIN (use
);
207 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
211 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
212 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
214 if (DF_REF_INSN_INFO (link
->ref
))
216 rtx def_insn
= DF_REF_INSN (link
->ref
);
217 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
218 insn_entry
+ INSN_UID (def_insn
));
225 /* Union INSN with all insns containing uses reached from DEF.
226 Detect whether DEF is live-out from the current function. */
228 union_uses (swap_web_entry
*insn_entry
, rtx insn
, df_ref def
)
230 struct df_link
*link
= DF_REF_CHAIN (def
);
233 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
237 /* This could be an eh use or some other artificial use;
238 we treat these all the same (killing the optimization). */
239 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
240 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
242 if (DF_REF_INSN_INFO (link
->ref
))
244 rtx use_insn
= DF_REF_INSN (link
->ref
);
245 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
246 insn_entry
+ INSN_UID (use_insn
));
253 /* Return 1 iff INSN is a load insn, including permuting loads that
254 represent an lvxd2x instruction; else return 0. */
256 insn_is_load_p (rtx insn
)
258 rtx body
= PATTERN (insn
);
260 if (GET_CODE (body
) == SET
)
262 if (GET_CODE (SET_SRC (body
)) == MEM
)
265 if (GET_CODE (SET_SRC (body
)) == VEC_SELECT
266 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
)
272 if (GET_CODE (body
) != PARALLEL
)
275 rtx set
= XVECEXP (body
, 0, 0);
277 if (GET_CODE (set
) == SET
&& GET_CODE (SET_SRC (set
)) == MEM
)
283 /* Return 1 iff INSN is a store insn, including permuting stores that
284 represent an stvxd2x instruction; else return 0. */
286 insn_is_store_p (rtx insn
)
288 rtx body
= PATTERN (insn
);
289 if (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == MEM
)
291 if (GET_CODE (body
) != PARALLEL
)
293 rtx set
= XVECEXP (body
, 0, 0);
294 if (GET_CODE (set
) == SET
&& GET_CODE (SET_DEST (set
)) == MEM
)
299 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
300 a permuting load, or a permuting store. */
302 insn_is_swap_p (rtx insn
)
304 rtx body
= PATTERN (insn
);
305 if (GET_CODE (body
) != SET
)
307 rtx rhs
= SET_SRC (body
);
308 if (GET_CODE (rhs
) != VEC_SELECT
)
310 rtx parallel
= XEXP (rhs
, 1);
311 if (GET_CODE (parallel
) != PARALLEL
)
313 unsigned int len
= XVECLEN (parallel
, 0);
314 if (len
!= 2 && len
!= 4 && len
!= 8 && len
!= 16)
316 for (unsigned int i
= 0; i
< len
/ 2; ++i
)
318 rtx op
= XVECEXP (parallel
, 0, i
);
319 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != len
/ 2 + i
)
322 for (unsigned int i
= len
/ 2; i
< len
; ++i
)
324 rtx op
= XVECEXP (parallel
, 0, i
);
325 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != i
- len
/ 2)
331 /* Return true iff EXPR represents the sum of two registers. */
333 rs6000_sum_of_two_registers_p (const_rtx expr
)
335 if (GET_CODE (expr
) == PLUS
)
337 const_rtx operand1
= XEXP (expr
, 0);
338 const_rtx operand2
= XEXP (expr
, 1);
339 return (REG_P (operand1
) && REG_P (operand2
));
344 /* Return true iff EXPR represents an address expression that masks off
345 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
347 rs6000_quadword_masked_address_p (const_rtx expr
)
349 if (GET_CODE (expr
) == AND
)
351 const_rtx operand1
= XEXP (expr
, 0);
352 const_rtx operand2
= XEXP (expr
, 1);
353 if ((REG_P (operand1
) || rs6000_sum_of_two_registers_p (operand1
))
354 && CONST_SCALAR_INT_P (operand2
) && INTVAL (operand2
) == -16)
360 /* Return TRUE if INSN represents a swap of a swapped load from memory
361 and the memory address is quad-word aligned. */
363 quad_aligned_load_p (swap_web_entry
*insn_entry
, rtx_insn
*insn
)
365 unsigned uid
= INSN_UID (insn
);
366 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
369 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
371 /* Since insn is known to represent a swap instruction, we know it
372 "uses" only one input variable. */
373 df_ref use
= DF_INSN_INFO_USES (insn_info
);
375 /* Figure out where this input variable is defined. */
376 struct df_link
*def_link
= DF_REF_CHAIN (use
);
378 /* If there is no definition or the definition is artificial or there are
379 multiple definitions, punt. */
380 if (!def_link
|| !def_link
->ref
|| DF_REF_IS_ARTIFICIAL (def_link
->ref
)
384 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
385 unsigned uid2
= INSN_UID (def_insn
);
386 /* We're looking for a load-with-swap insn. If this is not that,
388 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
391 /* If the source of the rtl def is not a set from memory, return
393 rtx body
= PATTERN (def_insn
);
394 if (GET_CODE (body
) != SET
395 || GET_CODE (SET_SRC (body
)) != VEC_SELECT
396 || GET_CODE (XEXP (SET_SRC (body
), 0)) != MEM
)
399 rtx mem
= XEXP (SET_SRC (body
), 0);
400 rtx base_reg
= XEXP (mem
, 0);
401 return ((REG_P (base_reg
) || rs6000_sum_of_two_registers_p (base_reg
))
402 && MEM_ALIGN (mem
) >= 128) ? true : false;
405 /* Return TRUE if INSN represents a store-with-swap of a swapped value
406 and the memory address is quad-word aligned. */
408 quad_aligned_store_p (swap_web_entry
*insn_entry
, rtx_insn
*insn
)
410 unsigned uid
= INSN_UID (insn
);
411 if (!insn_entry
[uid
].is_swap
|| !insn_entry
[uid
].is_store
)
414 rtx body
= PATTERN (insn
);
415 rtx dest_address
= XEXP (SET_DEST (body
), 0);
416 rtx swap_reg
= XEXP (SET_SRC (body
), 0);
418 /* If the base address for the memory expression is not represented
419 by a single register and is not the sum of two registers, punt. */
420 if (!REG_P (dest_address
) && !rs6000_sum_of_two_registers_p (dest_address
))
423 /* Confirm that the value to be stored is produced by a swap
425 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
427 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
429 struct df_link
*def_link
= DF_REF_CHAIN (use
);
431 /* If this is not the definition of the candidate swap register,
432 then skip it. I am interested in a different definition. */
433 if (!rtx_equal_p (DF_REF_REG (use
), swap_reg
))
436 /* If there is no def or the def is artifical or there are
437 multiple defs, punt. */
438 if (!def_link
|| !def_link
->ref
|| DF_REF_IS_ARTIFICIAL (def_link
->ref
)
442 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
443 unsigned uid2
= INSN_UID (def_insn
);
445 /* If this source value is not a simple swap, return false */
446 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
447 || insn_entry
[uid2
].is_store
)
450 /* I've processed the use that I care about, so break out of
455 /* At this point, we know the source data comes from a swap. The
456 remaining question is whether the memory address is aligned. */
457 rtx set
= single_set (insn
);
460 rtx dest
= SET_DEST (set
);
462 return (MEM_ALIGN (dest
) >= 128);
467 /* Return 1 iff UID, known to reference a swap, is both fed by a load
468 and a feeder of a store. */
470 swap_feeds_both_load_and_store (swap_web_entry
*insn_entry
)
472 rtx insn
= insn_entry
->insn
;
473 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
475 struct df_link
*link
= 0;
476 rtx_insn
*load
= 0, *store
= 0;
477 bool fed_by_load
= 0;
478 bool feeds_store
= 0;
480 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
482 link
= DF_REF_CHAIN (use
);
483 load
= DF_REF_INSN (link
->ref
);
484 if (insn_is_load_p (load
) && insn_is_swap_p (load
))
488 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
490 link
= DF_REF_CHAIN (def
);
491 store
= DF_REF_INSN (link
->ref
);
492 if (insn_is_store_p (store
) && insn_is_swap_p (store
))
496 return fed_by_load
&& feeds_store
;
499 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
501 const_load_sequence_p (swap_web_entry
*insn_entry
, rtx insn
)
503 unsigned uid
= INSN_UID (insn
);
504 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
507 const_rtx tocrel_base
;
509 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
512 /* Iterate over the definitions that are used by this insn. Since
513 this is known to be a swap insn, expect only one used definnition. */
514 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
516 struct df_link
*def_link
= DF_REF_CHAIN (use
);
518 /* If there is no def or the def is artificial or there are
519 multiple defs, punt. */
520 if (!def_link
|| !def_link
->ref
|| DF_REF_IS_ARTIFICIAL (def_link
->ref
)
524 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
525 unsigned uid2
= INSN_UID (def_insn
);
526 /* If this is not a load or is not a swap, return false. */
527 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
530 /* If the source of the rtl def is not a set from memory, return
532 rtx body
= PATTERN (def_insn
);
533 if (GET_CODE (body
) != SET
534 || GET_CODE (SET_SRC (body
)) != VEC_SELECT
535 || GET_CODE (XEXP (SET_SRC (body
), 0)) != MEM
)
538 rtx mem
= XEXP (SET_SRC (body
), 0);
539 rtx base_reg
= XEXP (mem
, 0);
540 /* If the base address for the memory expression is not
541 represented by a register, punt. */
542 if (!REG_P (base_reg
))
546 insn_info
= DF_INSN_INFO_GET (def_insn
);
547 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
549 /* If base_use does not represent base_reg, look for another
551 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
554 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
555 if (!base_def_link
|| base_def_link
->next
)
558 /* Constants held on the stack are not "true" constants
559 because their values are not part of the static load
560 image. If this constant's base reference is a stack
561 or frame pointer, it is seen as an artificial
563 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
566 rtx tocrel_insn
= DF_REF_INSN (base_def_link
->ref
);
567 rtx tocrel_body
= PATTERN (tocrel_insn
);
569 if (GET_CODE (tocrel_body
) != SET
)
571 /* There is an extra level of indirection for small/large
573 rtx tocrel_expr
= SET_SRC (tocrel_body
);
574 if (GET_CODE (tocrel_expr
) == MEM
)
575 tocrel_expr
= XEXP (tocrel_expr
, 0);
576 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
578 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
580 if (GET_CODE (base
) != SYMBOL_REF
|| !CONSTANT_POOL_ADDRESS_P (base
))
584 /* FIXME: The conditions under which
585 ((GET_CODE (const_vector) == SYMBOL_REF) &&
586 !CONSTANT_POOL_ADDRESS_P (const_vector))
587 are not well understood. This code prevents
588 an internal compiler error which will occur in
589 replace_swapped_load_constant () if we were to return
590 true. Some day, we should figure out how to properly
591 handle this condition in
592 replace_swapped_load_constant () and then we can
593 remove this special test. */
594 rtx const_vector
= get_pool_constant (base
);
595 if (GET_CODE (const_vector
) == SYMBOL_REF
596 && CONSTANT_POOL_ADDRESS_P (const_vector
))
597 const_vector
= get_pool_constant (const_vector
);
598 if (GET_CODE (const_vector
) != CONST_VECTOR
)
606 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
607 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
609 v2df_reduction_p (rtx op
)
611 if (GET_MODE (op
) != V2DFmode
)
614 enum rtx_code code
= GET_CODE (op
);
615 if (code
!= PLUS
&& code
!= SMIN
&& code
!= SMAX
)
618 rtx concat
= XEXP (op
, 0);
619 if (GET_CODE (concat
) != VEC_CONCAT
)
622 rtx select0
= XEXP (concat
, 0);
623 rtx select1
= XEXP (concat
, 1);
624 if (GET_CODE (select0
) != VEC_SELECT
|| GET_CODE (select1
) != VEC_SELECT
)
627 rtx reg0
= XEXP (select0
, 0);
628 rtx reg1
= XEXP (select1
, 0);
629 if (!rtx_equal_p (reg0
, reg1
) || !REG_P (reg0
))
632 rtx parallel0
= XEXP (select0
, 1);
633 rtx parallel1
= XEXP (select1
, 1);
634 if (GET_CODE (parallel0
) != PARALLEL
|| GET_CODE (parallel1
) != PARALLEL
)
637 if (!rtx_equal_p (XVECEXP (parallel0
, 0, 0), const1_rtx
)
638 || !rtx_equal_p (XVECEXP (parallel1
, 0, 0), const0_rtx
))
644 /* Return 1 iff OP is an operand that will not be affected by having
645 vector doublewords swapped in memory. */
647 rtx_is_swappable_p (rtx op
, unsigned int *special
)
649 enum rtx_code code
= GET_CODE (op
);
668 *special
= SH_CONST_VECTOR
;
673 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
674 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
675 it represents a vector splat for which we can do special
677 if (GET_CODE (XEXP (op
, 0)) == CONST_INT
)
679 else if (REG_P (XEXP (op
, 0))
680 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
681 /* This catches V2DF and V2DI splat, at a minimum. */
683 else if (GET_CODE (XEXP (op
, 0)) == TRUNCATE
684 && REG_P (XEXP (XEXP (op
, 0), 0))
685 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
686 /* This catches splat of a truncated value. */
688 else if (GET_CODE (XEXP (op
, 0)) == VEC_SELECT
)
689 /* If the duplicated item is from a select, defer to the select
690 processing to see if we can change the lane for the splat. */
691 return rtx_is_swappable_p (XEXP (op
, 0), special
);
696 /* A vec_extract operation is ok if we change the lane. */
697 if (GET_CODE (XEXP (op
, 0)) == REG
698 && GET_MODE_INNER (GET_MODE (XEXP (op
, 0))) == GET_MODE (op
)
699 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
700 && XVECLEN (parallel
, 0) == 1
701 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
)
703 *special
= SH_EXTRACT
;
706 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
707 XXPERMDI is a swap operation, it will be identified by
708 insn_is_swap_p and therefore we won't get here. */
709 else if (GET_CODE (XEXP (op
, 0)) == VEC_CONCAT
710 && (GET_MODE (XEXP (op
, 0)) == V4DFmode
711 || GET_MODE (XEXP (op
, 0)) == V4DImode
)
712 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
713 && XVECLEN (parallel
, 0) == 2
714 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
715 && GET_CODE (XVECEXP (parallel
, 0, 1)) == CONST_INT
)
717 *special
= SH_XXPERMDI
;
720 else if (v2df_reduction_p (op
))
727 /* Various operations are unsafe for this optimization, at least
728 without significant additional work. Permutes are obviously
729 problematic, as both the permute control vector and the ordering
730 of the target values are invalidated by doubleword swapping.
731 Vector pack and unpack modify the number of vector lanes.
732 Merge-high/low will not operate correctly on swapped operands.
733 Vector shifts across element boundaries are clearly uncool,
734 as are vector select and concatenate operations. Vector
735 sum-across instructions define one operand with a specific
736 order-dependent element, so additional fixup code would be
737 needed to make those work. Vector set and non-immediate-form
738 vector splat are element-order sensitive. A few of these
739 cases might be workable with special handling if required.
740 Adding cost modeling would be appropriate in some cases. */
741 int val
= XINT (op
, 1);
747 case UNSPEC_VMRGH_DIRECT
:
748 case UNSPEC_VMRGL_DIRECT
:
749 case UNSPEC_VPACK_SIGN_SIGN_SAT
:
750 case UNSPEC_VPACK_SIGN_UNS_SAT
:
751 case UNSPEC_VPACK_UNS_UNS_MOD
:
752 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
:
753 case UNSPEC_VPACK_UNS_UNS_SAT
:
755 case UNSPEC_VPERM_UNS
:
758 case UNSPEC_VPERMXOR
:
763 case UNSPEC_VSUM2SWS
:
765 case UNSPEC_VSUM4UBS
:
767 case UNSPEC_VSUMSWS_DIRECT
:
768 case UNSPEC_VSX_CONCAT
:
769 case UNSPEC_VSX_CVDPSPN
:
770 case UNSPEC_VSX_CVSPDP
:
771 case UNSPEC_VSX_CVSPDPN
:
772 case UNSPEC_VSX_EXTRACT
:
774 case UNSPEC_VSX_SLDWI
:
775 case UNSPEC_VSX_VSLO
:
776 case UNSPEC_VUNPACK_HI_SIGN
:
777 case UNSPEC_VUNPACK_HI_SIGN_DIRECT
:
778 case UNSPEC_VUNPACK_LO_SIGN
:
779 case UNSPEC_VUNPACK_LO_SIGN_DIRECT
:
781 case UNSPEC_VUPKHS_V4SF
:
782 case UNSPEC_VUPKHU_V4SF
:
784 case UNSPEC_VUPKLS_V4SF
:
785 case UNSPEC_VUPKLU_V4SF
:
787 case UNSPEC_VSPLT_DIRECT
:
788 case UNSPEC_VSX_XXSPLTD
:
791 case UNSPEC_REDUC_PLUS
:
801 const char *fmt
= GET_RTX_FORMAT (code
);
804 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
805 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
807 unsigned int special_op
= SH_NONE
;
808 ok
&= rtx_is_swappable_p (XEXP (op
, i
), &special_op
);
809 if (special_op
== SH_NONE
)
811 /* Ensure we never have two kinds of special handling
812 for the same insn. */
813 if (*special
!= SH_NONE
&& *special
!= special_op
)
815 *special
= special_op
;
817 else if (fmt
[i
] == 'E')
818 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
820 unsigned int special_op
= SH_NONE
;
821 ok
&= rtx_is_swappable_p (XVECEXP (op
, i
, j
), &special_op
);
822 if (special_op
== SH_NONE
)
824 /* Ensure we never have two kinds of special handling
825 for the same insn. */
826 if (*special
!= SH_NONE
&& *special
!= special_op
)
828 *special
= special_op
;
834 /* Return 1 iff INSN is an operand that will not be affected by
835 having vector doublewords swapped in memory (in which case
836 *SPECIAL is unchanged), or that can be modified to be correct
837 if vector doublewords are swapped in memory (in which case
838 *SPECIAL is changed to a value indicating how). */
840 insn_is_swappable_p (swap_web_entry
*insn_entry
, rtx insn
,
841 unsigned int *special
)
843 /* Calls are always bad. */
844 if (GET_CODE (insn
) == CALL_INSN
)
847 /* Loads and stores seen here are not permuting, but we can still
848 fix them up by converting them to permuting ones. Exceptions:
849 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
850 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
851 for the SET source. Also we must now make an exception for lvx
852 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
853 explicit "& -16") since this leads to unrecognizable insns. */
854 rtx body
= PATTERN (insn
);
855 int i
= INSN_UID (insn
);
857 if (insn_entry
[i
].is_load
)
859 if (GET_CODE (body
) == SET
)
861 rtx rhs
= SET_SRC (body
);
862 /* Even without a swap, the RHS might be a vec_select for, say,
863 a byte-reversing load. */
864 if (GET_CODE (rhs
) != MEM
)
866 if (GET_CODE (XEXP (rhs
, 0)) == AND
)
869 *special
= SH_NOSWAP_LD
;
876 if (insn_entry
[i
].is_store
)
878 if (GET_CODE (body
) == SET
879 && GET_CODE (SET_SRC (body
)) != UNSPEC
880 && GET_CODE (SET_SRC (body
)) != VEC_SELECT
)
882 rtx lhs
= SET_DEST (body
);
883 /* Even without a swap, the RHS might be a vec_select for, say,
884 a byte-reversing store. */
885 if (GET_CODE (lhs
) != MEM
)
887 if (GET_CODE (XEXP (lhs
, 0)) == AND
)
890 *special
= SH_NOSWAP_ST
;
897 /* A convert to single precision can be left as is provided that
898 all of its uses are in xxspltw instructions that splat BE element
900 if (GET_CODE (body
) == SET
901 && GET_CODE (SET_SRC (body
)) == UNSPEC
902 && XINT (SET_SRC (body
), 1) == UNSPEC_VSX_CVDPSPN
)
905 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
907 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
909 struct df_link
*link
= DF_REF_CHAIN (def
);
913 for (; link
; link
= link
->next
) {
914 rtx use_insn
= DF_REF_INSN (link
->ref
);
915 rtx use_body
= PATTERN (use_insn
);
916 if (GET_CODE (use_body
) != SET
917 || GET_CODE (SET_SRC (use_body
)) != UNSPEC
918 || XINT (SET_SRC (use_body
), 1) != UNSPEC_VSX_XXSPLTW
919 || XVECEXP (SET_SRC (use_body
), 0, 1) != const0_rtx
)
927 /* A concatenation of two doublewords is ok if we reverse the
928 order of the inputs. */
929 if (GET_CODE (body
) == SET
930 && GET_CODE (SET_SRC (body
)) == VEC_CONCAT
931 && (GET_MODE (SET_SRC (body
)) == V2DFmode
932 || GET_MODE (SET_SRC (body
)) == V2DImode
))
934 *special
= SH_CONCAT
;
938 /* V2DF reductions are always swappable. */
939 if (GET_CODE (body
) == PARALLEL
)
941 rtx expr
= XVECEXP (body
, 0, 0);
942 if (GET_CODE (expr
) == SET
943 && v2df_reduction_p (SET_SRC (expr
)))
947 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
949 if (GET_CODE (body
) == SET
950 && GET_CODE (SET_SRC (body
)) == UNSPEC
951 && XINT (SET_SRC (body
), 1) == UNSPEC_VPERM
952 && XVECLEN (SET_SRC (body
), 0) == 3
953 && GET_CODE (XVECEXP (SET_SRC (body
), 0, 2)) == REG
)
955 rtx mask_reg
= XVECEXP (SET_SRC (body
), 0, 2);
956 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
958 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
959 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
961 struct df_link
*def_link
= DF_REF_CHAIN (use
);
962 /* Punt if multiple definitions for this reg. */
963 if (def_link
&& !def_link
->next
&&
964 const_load_sequence_p (insn_entry
,
965 DF_REF_INSN (def_link
->ref
)))
973 /* Otherwise check the operands for vector lane violations. */
974 return rtx_is_swappable_p (body
, special
);
977 enum chain_purpose
{ FOR_LOADS
, FOR_STORES
};
979 /* Return true if the UD or DU chain headed by LINK is non-empty,
980 and every entry on the chain references an insn that is a
981 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
982 register swap must have only permuting loads as reaching defs.
983 If PURPOSE is FOR_STORES, each such register swap must have only
984 register swaps or permuting stores as reached uses. */
986 chain_contains_only_swaps (swap_web_entry
*insn_entry
, struct df_link
*link
,
987 enum chain_purpose purpose
)
992 for (; link
; link
= link
->next
)
994 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link
->ref
))))
997 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
1000 rtx reached_insn
= DF_REF_INSN (link
->ref
);
1001 unsigned uid
= INSN_UID (reached_insn
);
1002 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (reached_insn
);
1004 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
1005 || insn_entry
[uid
].is_store
)
1008 if (purpose
== FOR_LOADS
)
1011 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1013 struct df_link
*swap_link
= DF_REF_CHAIN (use
);
1017 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
1020 rtx swap_def_insn
= DF_REF_INSN (swap_link
->ref
);
1021 unsigned uid2
= INSN_UID (swap_def_insn
);
1023 /* Only permuting loads are allowed. */
1024 if (!insn_entry
[uid2
].is_swap
|| !insn_entry
[uid2
].is_load
)
1027 swap_link
= swap_link
->next
;
1031 else if (purpose
== FOR_STORES
)
1034 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
1036 struct df_link
*swap_link
= DF_REF_CHAIN (def
);
1040 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
1043 rtx swap_use_insn
= DF_REF_INSN (swap_link
->ref
);
1044 unsigned uid2
= INSN_UID (swap_use_insn
);
1046 /* Permuting stores or register swaps are allowed. */
1047 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
)
1050 swap_link
= swap_link
->next
;
1059 /* Mark the xxswapdi instructions associated with permuting loads and
1060 stores for removal. Note that we only flag them for deletion here,
1061 as there is a possibility of a swap being reached from multiple
1064 mark_swaps_for_removal (swap_web_entry
*insn_entry
, unsigned int i
)
1066 rtx insn
= insn_entry
[i
].insn
;
1067 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
1069 if (insn_entry
[i
].is_load
)
1072 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
1074 struct df_link
*link
= DF_REF_CHAIN (def
);
1076 /* We know by now that these are swaps, so we can delete
1077 them confidently. */
1080 rtx use_insn
= DF_REF_INSN (link
->ref
);
1081 insn_entry
[INSN_UID (use_insn
)].will_delete
= 1;
1086 else if (insn_entry
[i
].is_store
)
1089 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1091 /* Ignore uses for addressability. */
1092 machine_mode mode
= GET_MODE (DF_REF_REG (use
));
1093 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
1096 struct df_link
*link
= DF_REF_CHAIN (use
);
1098 /* We know by now that these are swaps, so we can delete
1099 them confidently. */
1102 rtx def_insn
= DF_REF_INSN (link
->ref
);
1103 insn_entry
[INSN_UID (def_insn
)].will_delete
= 1;
1110 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1111 Swap the first half of the vector with the second in the first
1112 case. Recurse to find it in the second. */
1114 swap_const_vector_halves (rtx
*op_ptr
)
1118 enum rtx_code code
= GET_CODE (op
);
1119 if (GET_CODE (op
) == CONST_VECTOR
)
1121 int units
= GET_MODE_NUNITS (GET_MODE (op
));
1122 rtx_vector_builder
builder (GET_MODE (op
), units
, 1);
1123 for (i
= 0; i
< units
/ 2; ++i
)
1124 builder
.quick_push (CONST_VECTOR_ELT (op
, i
+ units
/ 2));
1125 for (i
= 0; i
< units
/ 2; ++i
)
1126 builder
.quick_push (CONST_VECTOR_ELT (op
, i
));
1127 *op_ptr
= builder
.build ();
1132 const char *fmt
= GET_RTX_FORMAT (code
);
1133 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
1134 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
1135 swap_const_vector_halves (&XEXP (op
, i
));
1136 else if (fmt
[i
] == 'E')
1137 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
1138 swap_const_vector_halves (&XVECEXP (op
, i
, j
));
1142 /* Find all subregs of a vector expression that perform a narrowing,
1143 and adjust the subreg index to account for doubleword swapping. */
1145 adjust_subreg_index (rtx op
)
1147 enum rtx_code code
= GET_CODE (op
);
1149 && (GET_MODE_SIZE (GET_MODE (op
))
1150 < GET_MODE_SIZE (GET_MODE (XEXP (op
, 0)))))
1152 unsigned int index
= SUBREG_BYTE (op
);
1157 SUBREG_BYTE (op
) = index
;
1160 const char *fmt
= GET_RTX_FORMAT (code
);
1162 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
1163 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
1164 adjust_subreg_index (XEXP (op
, i
));
1165 else if (fmt
[i
] == 'E')
1166 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
1167 adjust_subreg_index (XVECEXP (op
, i
, j
));
1170 /* Convert the non-permuting load INSN to a permuting one. */
1172 permute_load (rtx_insn
*insn
)
1174 rtx body
= PATTERN (insn
);
1175 rtx mem_op
= SET_SRC (body
);
1176 rtx tgt_reg
= SET_DEST (body
);
1177 machine_mode mode
= GET_MODE (tgt_reg
);
1178 int n_elts
= GET_MODE_NUNITS (mode
);
1179 int half_elts
= n_elts
/ 2;
1180 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
1182 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
1183 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1184 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
1185 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1186 rtx sel
= gen_rtx_VEC_SELECT (mode
, mem_op
, par
);
1187 SET_SRC (body
) = sel
;
1188 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1189 df_insn_rescan (insn
);
1192 fprintf (dump_file
, "Replacing load %d with permuted load\n",
1196 /* Convert the non-permuting store INSN to a permuting one. */
1198 permute_store (rtx_insn
*insn
)
1200 rtx body
= PATTERN (insn
);
1201 rtx src_reg
= SET_SRC (body
);
1202 machine_mode mode
= GET_MODE (src_reg
);
1203 int n_elts
= GET_MODE_NUNITS (mode
);
1204 int half_elts
= n_elts
/ 2;
1205 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
1207 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
1208 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1209 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
1210 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1211 rtx sel
= gen_rtx_VEC_SELECT (mode
, src_reg
, par
);
1212 SET_SRC (body
) = sel
;
1213 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1214 df_insn_rescan (insn
);
1217 fprintf (dump_file
, "Replacing store %d with permuted store\n",
1221 /* Given OP that contains a vector extract operation, adjust the index
1222 of the extracted lane to account for the doubleword swap. */
1224 adjust_extract (rtx_insn
*insn
)
1226 rtx pattern
= PATTERN (insn
);
1227 if (GET_CODE (pattern
) == PARALLEL
)
1228 pattern
= XVECEXP (pattern
, 0, 0);
1229 rtx src
= SET_SRC (pattern
);
1230 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1231 account for that. */
1232 rtx sel
= GET_CODE (src
) == VEC_DUPLICATE
? XEXP (src
, 0) : src
;
1233 rtx par
= XEXP (sel
, 1);
1234 int half_elts
= GET_MODE_NUNITS (GET_MODE (XEXP (sel
, 0))) >> 1;
1235 int lane
= INTVAL (XVECEXP (par
, 0, 0));
1236 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
1237 XVECEXP (par
, 0, 0) = GEN_INT (lane
);
1238 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1239 df_insn_rescan (insn
);
1242 fprintf (dump_file
, "Changing lane for extract %d\n", INSN_UID (insn
));
1245 /* Given OP that contains a vector direct-splat operation, adjust the index
1246 of the source lane to account for the doubleword swap. */
1248 adjust_splat (rtx_insn
*insn
)
1250 rtx body
= PATTERN (insn
);
1251 rtx unspec
= XEXP (body
, 1);
1252 int half_elts
= GET_MODE_NUNITS (GET_MODE (unspec
)) >> 1;
1253 int lane
= INTVAL (XVECEXP (unspec
, 0, 1));
1254 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
1255 XVECEXP (unspec
, 0, 1) = GEN_INT (lane
);
1256 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1257 df_insn_rescan (insn
);
1260 fprintf (dump_file
, "Changing lane for splat %d\n", INSN_UID (insn
));
1263 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1264 swap), reverse the order of the source operands and adjust the indices
1265 of the source lanes to account for doubleword reversal. */
1267 adjust_xxpermdi (rtx_insn
*insn
)
1269 rtx set
= PATTERN (insn
);
1270 rtx select
= XEXP (set
, 1);
1271 rtx concat
= XEXP (select
, 0);
1272 rtx src0
= XEXP (concat
, 0);
1273 XEXP (concat
, 0) = XEXP (concat
, 1);
1274 XEXP (concat
, 1) = src0
;
1275 rtx parallel
= XEXP (select
, 1);
1276 int lane0
= INTVAL (XVECEXP (parallel
, 0, 0));
1277 int lane1
= INTVAL (XVECEXP (parallel
, 0, 1));
1278 int new_lane0
= 3 - lane1
;
1279 int new_lane1
= 3 - lane0
;
1280 XVECEXP (parallel
, 0, 0) = GEN_INT (new_lane0
);
1281 XVECEXP (parallel
, 0, 1) = GEN_INT (new_lane1
);
1282 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1283 df_insn_rescan (insn
);
1286 fprintf (dump_file
, "Changing lanes for xxpermdi %d\n", INSN_UID (insn
));
1289 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1290 reverse the order of those inputs. */
1292 adjust_concat (rtx_insn
*insn
)
1294 rtx set
= PATTERN (insn
);
1295 rtx concat
= XEXP (set
, 1);
1296 rtx src0
= XEXP (concat
, 0);
1297 XEXP (concat
, 0) = XEXP (concat
, 1);
1298 XEXP (concat
, 1) = src0
;
1299 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1300 df_insn_rescan (insn
);
1303 fprintf (dump_file
, "Reversing inputs for concat %d\n", INSN_UID (insn
));
1306 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1307 constant pool to reflect swapped doublewords. */
1309 adjust_vperm (rtx_insn
*insn
)
1311 /* We previously determined that the UNSPEC_VPERM was fed by a
1312 swap of a swapping load of a TOC-relative constant pool symbol.
1313 Find the MEM in the swapping load and replace it with a MEM for
1314 the adjusted mask constant. */
1315 rtx set
= PATTERN (insn
);
1316 rtx mask_reg
= XVECEXP (SET_SRC (set
), 0, 2);
1318 /* Find the swap. */
1319 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
1321 rtx_insn
*swap_insn
= 0;
1322 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1323 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
1325 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1326 gcc_assert (def_link
&& !def_link
->next
);
1327 swap_insn
= DF_REF_INSN (def_link
->ref
);
1330 gcc_assert (swap_insn
);
1332 /* Find the load. */
1333 insn_info
= DF_INSN_INFO_GET (swap_insn
);
1334 rtx_insn
*load_insn
= 0;
1335 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1337 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1338 gcc_assert (def_link
&& !def_link
->next
);
1339 load_insn
= DF_REF_INSN (def_link
->ref
);
1342 gcc_assert (load_insn
);
1344 /* Find the TOC-relative symbol access. */
1345 insn_info
= DF_INSN_INFO_GET (load_insn
);
1346 rtx_insn
*tocrel_insn
= 0;
1347 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1349 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1350 gcc_assert (def_link
&& !def_link
->next
);
1351 tocrel_insn
= DF_REF_INSN (def_link
->ref
);
1354 gcc_assert (tocrel_insn
);
1356 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1357 to set tocrel_base; otherwise it would be unnecessary as we've
1358 already established it will return true. */
1360 const_rtx tocrel_base
;
1361 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
1362 /* There is an extra level of indirection for small/large code models. */
1363 if (GET_CODE (tocrel_expr
) == MEM
)
1364 tocrel_expr
= XEXP (tocrel_expr
, 0);
1365 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
1367 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
1368 rtx const_vector
= get_pool_constant (base
);
1369 /* With the extra indirection, get_pool_constant will produce the
1370 real constant from the reg_equal expression, so get the real
1372 if (GET_CODE (const_vector
) == SYMBOL_REF
)
1373 const_vector
= get_pool_constant (const_vector
);
1374 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
1376 /* Create an adjusted mask from the initial mask. */
1377 unsigned int new_mask
[16], i
, val
;
1378 for (i
= 0; i
< 16; ++i
) {
1379 val
= INTVAL (XVECEXP (const_vector
, 0, i
));
1381 new_mask
[i
] = (val
+ 8) % 16;
1383 new_mask
[i
] = ((val
+ 8) % 16) + 16;
1386 /* Create a new CONST_VECTOR and a MEM that references it. */
1387 rtx vals
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
1388 for (i
= 0; i
< 16; ++i
)
1389 XVECEXP (vals
, 0, i
) = GEN_INT (new_mask
[i
]);
1390 rtx new_const_vector
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (vals
, 0));
1391 rtx new_mem
= force_const_mem (V16QImode
, new_const_vector
);
1392 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1393 can't recognize. Force the SYMBOL_REF into a register. */
1394 if (!REG_P (XEXP (new_mem
, 0))) {
1395 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
1396 XEXP (new_mem
, 0) = base_reg
;
1397 /* Move the newly created insn ahead of the load insn. */
1398 rtx_insn
*force_insn
= get_last_insn ();
1399 remove_insn (force_insn
);
1400 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
1401 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
1402 df_insn_rescan (before_load_insn
);
1403 df_insn_rescan (force_insn
);
1406 /* Replace the MEM in the load instruction and rescan it. */
1407 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
1408 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
1409 df_insn_rescan (load_insn
);
1412 fprintf (dump_file
, "Adjusting mask for vperm %d\n", INSN_UID (insn
));
1415 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1416 with special handling. Take care of that here. */
1418 handle_special_swappables (swap_web_entry
*insn_entry
, unsigned i
)
1420 rtx_insn
*insn
= insn_entry
[i
].insn
;
1421 rtx body
= PATTERN (insn
);
1423 switch (insn_entry
[i
].special_handling
)
1427 case SH_CONST_VECTOR
:
1429 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1430 gcc_assert (GET_CODE (body
) == SET
);
1431 swap_const_vector_halves (&SET_SRC (body
));
1433 fprintf (dump_file
, "Swapping constant halves in insn %d\n", i
);
1437 /* A subreg of the same size is already safe. For subregs that
1438 select a smaller portion of a reg, adjust the index for
1439 swapped doublewords. */
1440 adjust_subreg_index (body
);
1442 fprintf (dump_file
, "Adjusting subreg in insn %d\n", i
);
1445 /* Convert a non-permuting load to a permuting one. */
1446 permute_load (insn
);
1449 /* Convert a non-permuting store to a permuting one. */
1450 permute_store (insn
);
1453 /* Change the lane on an extract operation. */
1454 adjust_extract (insn
);
1457 /* Change the lane on a direct-splat operation. */
1458 adjust_splat (insn
);
1461 /* Change the lanes on an XXPERMDI operation. */
1462 adjust_xxpermdi (insn
);
1465 /* Reverse the order of a concatenation operation. */
1466 adjust_concat (insn
);
1469 /* Change the mask loaded from the constant pool for a VPERM. */
1470 adjust_vperm (insn
);
1475 /* Find the insn from the Ith table entry, which is known to be a
1476 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1478 replace_swap_with_copy (swap_web_entry
*insn_entry
, unsigned i
)
1480 rtx_insn
*insn
= insn_entry
[i
].insn
;
1481 rtx body
= PATTERN (insn
);
1482 rtx src_reg
= XEXP (SET_SRC (body
), 0);
1483 rtx copy
= gen_rtx_SET (SET_DEST (body
), src_reg
);
1484 rtx_insn
*new_insn
= emit_insn_before (copy
, insn
);
1485 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (insn
));
1486 df_insn_rescan (new_insn
);
1490 unsigned int new_uid
= INSN_UID (new_insn
);
1491 fprintf (dump_file
, "Replacing swap %d with copy %d\n", i
, new_uid
);
1494 df_insn_delete (insn
);
1496 insn
->set_deleted ();
1499 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1500 ORIGINAL_MEM_EXP. */
1502 mimic_memory_attributes_and_flags (rtx new_mem_exp
, const_rtx original_mem_exp
)
1504 RTX_FLAG (new_mem_exp
, jump
) = RTX_FLAG (original_mem_exp
, jump
);
1505 RTX_FLAG (new_mem_exp
, call
) = RTX_FLAG (original_mem_exp
, call
);
1506 RTX_FLAG (new_mem_exp
, unchanging
) = RTX_FLAG (original_mem_exp
, unchanging
);
1507 RTX_FLAG (new_mem_exp
, volatil
) = RTX_FLAG (original_mem_exp
, volatil
);
1508 RTX_FLAG (new_mem_exp
, frame_related
) =
1509 RTX_FLAG (original_mem_exp
, frame_related
);
1511 /* The following fields may not be used with MEM subexpressions */
1512 RTX_FLAG (new_mem_exp
, in_struct
) = RTX_FLAG (original_mem_exp
, in_struct
);
1513 RTX_FLAG (new_mem_exp
, return_val
) = RTX_FLAG (original_mem_exp
, return_val
);
1515 struct mem_attrs original_attrs
= *get_mem_attrs(original_mem_exp
);
1517 alias_set_type set
= original_attrs
.alias
;
1518 set_mem_alias_set (new_mem_exp
, set
);
1520 addr_space_t addrspace
= original_attrs
.addrspace
;
1521 set_mem_addr_space (new_mem_exp
, addrspace
);
1523 unsigned int align
= original_attrs
.align
;
1524 set_mem_align (new_mem_exp
, align
);
1526 tree expr
= original_attrs
.expr
;
1527 set_mem_expr (new_mem_exp
, expr
);
1529 if (original_attrs
.offset_known_p
)
1531 HOST_WIDE_INT offset
= original_attrs
.offset
;
1532 set_mem_offset (new_mem_exp
, offset
);
1535 clear_mem_offset (new_mem_exp
);
1537 if (original_attrs
.size_known_p
)
1539 HOST_WIDE_INT size
= original_attrs
.size
;
1540 set_mem_size (new_mem_exp
, size
);
1543 clear_mem_size (new_mem_exp
);
1546 /* Generate an rtx expression to represent use of the stvx insn to store
1547 the value represented by register SRC_EXP into the memory at address
1548 DEST_EXP, with vector mode MODE. */
1550 rs6000_gen_stvx (enum machine_mode mode
, rtx dest_exp
, rtx src_exp
)
1554 if (mode
== V16QImode
)
1555 stvx
= gen_altivec_stvx_v16qi (src_exp
, dest_exp
);
1556 else if (mode
== V8HImode
)
1557 stvx
= gen_altivec_stvx_v8hi (src_exp
, dest_exp
);
1558 #ifdef HAVE_V8HFmode
1559 else if (mode
== V8HFmode
)
1560 stvx
= gen_altivec_stvx_v8hf (src_exp
, dest_exp
);
1562 else if (mode
== V4SImode
)
1563 stvx
= gen_altivec_stvx_v4si (src_exp
, dest_exp
);
1564 else if (mode
== V4SFmode
)
1565 stvx
= gen_altivec_stvx_v4sf (src_exp
, dest_exp
);
1566 else if (mode
== V2DImode
)
1567 stvx
= gen_altivec_stvx_v2di (src_exp
, dest_exp
);
1568 else if (mode
== V2DFmode
)
1569 stvx
= gen_altivec_stvx_v2df (src_exp
, dest_exp
);
1570 else if (mode
== V1TImode
)
1571 stvx
= gen_altivec_stvx_v1ti (src_exp
, dest_exp
);
1573 /* KFmode, TFmode, other modes not expected in this context. */
1576 rtx new_mem_exp
= SET_DEST (PATTERN (stvx
));
1577 mimic_memory_attributes_and_flags (new_mem_exp
, dest_exp
);
1581 /* Given that STORE_INSN represents an aligned store-with-swap of a
1582 swapped value, replace the store with an aligned store (without
1583 swap) and replace the swap with a copy insn. */
1585 replace_swapped_aligned_store (swap_web_entry
*insn_entry
,
1586 rtx_insn
*store_insn
)
1588 unsigned uid
= INSN_UID (store_insn
);
1589 gcc_assert (insn_entry
[uid
].is_swap
&& insn_entry
[uid
].is_store
);
1591 rtx body
= PATTERN (store_insn
);
1592 rtx dest_address
= XEXP (SET_DEST (body
), 0);
1593 rtx swap_reg
= XEXP (SET_SRC (body
), 0);
1594 gcc_assert (REG_P (dest_address
)
1595 || rs6000_sum_of_two_registers_p (dest_address
));
1597 /* Find the swap instruction that provides the value to be stored by
1598 * this store-with-swap instruction. */
1599 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (store_insn
);
1601 rtx_insn
*swap_insn
= NULL
;
1603 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1605 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1607 /* if this is not the definition of the candidate swap register,
1608 then skip it. I am only interested in the swap insnd. */
1609 if (!rtx_equal_p (DF_REF_REG (use
), swap_reg
))
1612 /* If there is no def or the def is artifical or there are
1613 multiple defs, we should not be here. */
1614 gcc_assert (def_link
&& def_link
->ref
&& !def_link
->next
1615 && !DF_REF_IS_ARTIFICIAL (def_link
->ref
));
1617 swap_insn
= DF_REF_INSN (def_link
->ref
);
1618 uid2
= INSN_UID (swap_insn
);
1620 /* If this source value is not a simple swap, we should not be here. */
1621 gcc_assert (insn_entry
[uid2
].is_swap
&& !insn_entry
[uid2
].is_load
1622 && !insn_entry
[uid2
].is_store
);
1624 /* We've processed the use we care about, so break out of
1629 /* At this point, swap_insn and uid2 represent the swap instruction
1630 that feeds the store. */
1631 gcc_assert (swap_insn
);
1632 rtx set
= single_set (store_insn
);
1634 rtx dest_exp
= SET_DEST (set
);
1635 rtx src_exp
= XEXP (SET_SRC (body
), 0);
1636 enum machine_mode mode
= GET_MODE (dest_exp
);
1637 gcc_assert (MEM_P (dest_exp
));
1638 gcc_assert (MEM_ALIGN (dest_exp
) >= 128);
1640 /* Replace the copy with a new insn. */
1642 stvx
= rs6000_gen_stvx (mode
, dest_exp
, src_exp
);
1644 rtx_insn
*new_insn
= emit_insn_before (stvx
, store_insn
);
1645 rtx new_body
= PATTERN (new_insn
);
1647 gcc_assert ((GET_CODE (new_body
) == SET
)
1648 && (GET_CODE (SET_DEST (new_body
)) == MEM
));
1650 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (store_insn
));
1651 df_insn_rescan (new_insn
);
1653 df_insn_delete (store_insn
);
1654 remove_insn (store_insn
);
1655 store_insn
->set_deleted ();
1657 /* Replace the swap with a copy. */
1658 uid2
= INSN_UID (swap_insn
);
1659 mark_swaps_for_removal (insn_entry
, uid2
);
1660 replace_swap_with_copy (insn_entry
, uid2
);
1663 /* Generate an rtx expression to represent use of the lvx insn to load
1664 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1666 rs6000_gen_lvx (enum machine_mode mode
, rtx dest_exp
, rtx src_exp
)
1670 if (mode
== V16QImode
)
1671 lvx
= gen_altivec_lvx_v16qi (dest_exp
, src_exp
);
1672 else if (mode
== V8HImode
)
1673 lvx
= gen_altivec_lvx_v8hi (dest_exp
, src_exp
);
1674 #ifdef HAVE_V8HFmode
1675 else if (mode
== V8HFmode
)
1676 lvx
= gen_altivec_lvx_v8hf (dest_exp
, src_exp
);
1678 else if (mode
== V4SImode
)
1679 lvx
= gen_altivec_lvx_v4si (dest_exp
, src_exp
);
1680 else if (mode
== V4SFmode
)
1681 lvx
= gen_altivec_lvx_v4sf (dest_exp
, src_exp
);
1682 else if (mode
== V2DImode
)
1683 lvx
= gen_altivec_lvx_v2di (dest_exp
, src_exp
);
1684 else if (mode
== V2DFmode
)
1685 lvx
= gen_altivec_lvx_v2df (dest_exp
, src_exp
);
1686 else if (mode
== V1TImode
)
1687 lvx
= gen_altivec_lvx_v1ti (dest_exp
, src_exp
);
1689 /* KFmode, TFmode, other modes not expected in this context. */
1692 rtx new_mem_exp
= SET_SRC (PATTERN (lvx
));
1693 mimic_memory_attributes_and_flags (new_mem_exp
, src_exp
);
1698 /* Given that SWAP_INSN represents a swap of an aligned
1699 load-with-swap, replace the load with an aligned load (without
1700 swap) and replace the swap with a copy insn. */
1702 replace_swapped_aligned_load (swap_web_entry
*insn_entry
, rtx swap_insn
)
1704 /* Find the load. */
1705 unsigned uid
= INSN_UID (swap_insn
);
1706 /* Only call this if quad_aligned_load_p (swap_insn). */
1707 gcc_assert (insn_entry
[uid
].is_swap
&& !insn_entry
[uid
].is_load
);
1708 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (swap_insn
);
1710 /* Since insn is known to represent a swap instruction, we know it
1711 "uses" only one input variable. */
1712 df_ref use
= DF_INSN_INFO_USES (insn_info
);
1714 /* Figure out where this input variable is defined. */
1715 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1716 gcc_assert (def_link
&& !def_link
->next
);
1717 gcc_assert (def_link
&& def_link
->ref
&&
1718 !DF_REF_IS_ARTIFICIAL (def_link
->ref
) && !def_link
->next
);
1720 rtx_insn
*def_insn
= DF_REF_INSN (def_link
->ref
);
1721 unsigned uid2
= INSN_UID (def_insn
);
1723 /* We're expecting a load-with-swap insn. */
1724 gcc_assert (insn_entry
[uid2
].is_load
&& insn_entry
[uid2
].is_swap
);
1726 /* We expect this to be a set to memory, with source representing a
1727 swap (indicated by code VEC_SELECT). */
1728 rtx body
= PATTERN (def_insn
);
1729 gcc_assert ((GET_CODE (body
) == SET
)
1730 && (GET_CODE (SET_SRC (body
)) == VEC_SELECT
)
1731 && (GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
));
1733 rtx src_exp
= XEXP (SET_SRC (body
), 0);
1734 enum machine_mode mode
= GET_MODE (src_exp
);
1735 rtx lvx
= rs6000_gen_lvx (mode
, SET_DEST (body
), src_exp
);
1737 rtx_insn
*new_insn
= emit_insn_before (lvx
, def_insn
);
1738 rtx new_body
= PATTERN (new_insn
);
1740 gcc_assert ((GET_CODE (new_body
) == SET
)
1741 && (GET_CODE (SET_SRC (new_body
)) == MEM
));
1743 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (def_insn
));
1744 df_insn_rescan (new_insn
);
1746 df_insn_delete (def_insn
);
1747 remove_insn (def_insn
);
1748 def_insn
->set_deleted ();
1750 /* Replace the swap with a copy. */
1751 mark_swaps_for_removal (insn_entry
, uid
);
1752 replace_swap_with_copy (insn_entry
, uid
);
1755 /* Given that SWAP_INSN represents a swap of a load of a constant
1756 vector value, replace with a single instruction that loads a
1757 swapped variant of the original constant.
1759 The "natural" representation of a byte array in memory is the same
1760 for big endian and little endian.
1762 unsigned char byte_array[] =
1763 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1765 However, when loaded into a vector register, the representation
1766 depends on endian conventions.
1768 In big-endian mode, the register holds:
1771 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1773 In little-endian mode, the register holds:
1776 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1778 Word arrays require different handling. Consider the word array:
1780 unsigned int word_array[] =
1781 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1783 The in-memory representation depends on endian configuration. The
1784 equivalent array, declared as a byte array, in memory would be:
1786 unsigned char big_endian_word_array_data[] =
1787 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1789 unsigned char little_endian_word_array_data[] =
1790 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1792 In big-endian mode, the register holds:
1795 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1797 In little-endian mode, the register holds:
1800 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1803 Similar transformations apply to the vector of half-word and vector
1804 of double-word representations.
1806 For now, don't handle vectors of quad-precision values. Just return.
1807 A better solution is to fix the code generator to emit lvx/stvx for
1810 replace_swapped_load_constant (swap_web_entry
*insn_entry
, rtx swap_insn
)
1812 /* Find the load. */
1813 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (swap_insn
);
1814 rtx_insn
*load_insn
;
1815 df_ref use
= DF_INSN_INFO_USES (insn_info
);
1816 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1817 gcc_assert (def_link
&& !def_link
->next
);
1819 load_insn
= DF_REF_INSN (def_link
->ref
);
1820 gcc_assert (load_insn
);
1822 /* Find the TOC-relative symbol access. */
1823 insn_info
= DF_INSN_INFO_GET (load_insn
);
1824 use
= DF_INSN_INFO_USES (insn_info
);
1826 def_link
= DF_REF_CHAIN (use
);
1827 gcc_assert (def_link
&& !def_link
->next
);
1829 rtx_insn
*tocrel_insn
= DF_REF_INSN (def_link
->ref
);
1830 gcc_assert (tocrel_insn
);
1832 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1833 to set tocrel_base; otherwise it would be unnecessary as we've
1834 already established it will return true. */
1836 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
1837 const_rtx tocrel_base
;
1839 /* There is an extra level of indirection for small/large code models. */
1840 if (GET_CODE (tocrel_expr
) == MEM
)
1841 tocrel_expr
= XEXP (tocrel_expr
, 0);
1843 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
1846 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
1847 rtx const_vector
= get_pool_constant (base
);
1849 /* With the extra indirection, get_pool_constant will produce the
1850 real constant from the reg_equal expression, so get the real
1852 if (GET_CODE (const_vector
) == SYMBOL_REF
)
1853 const_vector
= get_pool_constant (const_vector
);
1854 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
1857 enum machine_mode mode
= GET_MODE (const_vector
);
1859 /* Create an adjusted constant from the original constant. */
1860 if (mode
== V1TImode
)
1861 /* Leave this code as is. */
1863 else if (mode
== V16QImode
)
1865 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (16));
1868 for (i
= 0; i
< 16; i
++)
1869 XVECEXP (vals
, 0, ((i
+8) % 16)) = XVECEXP (const_vector
, 0, i
);
1870 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1871 new_mem
= force_const_mem (mode
, new_const_vector
);
1873 else if ((mode
== V8HImode
)
1874 #ifdef HAVE_V8HFmode
1875 || (mode
== V8HFmode
)
1879 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (8));
1882 for (i
= 0; i
< 8; i
++)
1883 XVECEXP (vals
, 0, ((i
+4) % 8)) = XVECEXP (const_vector
, 0, i
);
1884 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1885 new_mem
= force_const_mem (mode
, new_const_vector
);
1887 else if ((mode
== V4SImode
) || (mode
== V4SFmode
))
1889 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (4));
1892 for (i
= 0; i
< 4; i
++)
1893 XVECEXP (vals
, 0, ((i
+2) % 4)) = XVECEXP (const_vector
, 0, i
);
1894 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1895 new_mem
= force_const_mem (mode
, new_const_vector
);
1897 else if ((mode
== V2DImode
) || (mode
== V2DFmode
))
1899 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (2));
1902 for (i
= 0; i
< 2; i
++)
1903 XVECEXP (vals
, 0, ((i
+1) % 2)) = XVECEXP (const_vector
, 0, i
);
1904 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1905 new_mem
= force_const_mem (mode
, new_const_vector
);
1909 /* We do not expect other modes to be constant-load-swapped. */
1913 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1914 can't recognize. Force the SYMBOL_REF into a register. */
1915 if (!REG_P (XEXP (new_mem
, 0))) {
1916 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
1917 XEXP (new_mem
, 0) = base_reg
;
1919 /* Move the newly created insn ahead of the load insn. */
1920 /* The last insn is the the insn that forced new_mem into a register. */
1921 rtx_insn
*force_insn
= get_last_insn ();
1922 /* Remove this insn from the end of the instruction sequence. */
1923 remove_insn (force_insn
);
1924 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
1926 /* And insert this insn back into the sequence before the previous
1927 load insn so this new expression will be available when the
1928 existing load is modified to load the swapped constant. */
1929 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
1930 df_insn_rescan (before_load_insn
);
1931 df_insn_rescan (force_insn
);
1934 /* Replace the MEM in the load instruction and rescan it. */
1935 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
1936 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
1937 df_insn_rescan (load_insn
);
1939 unsigned int uid
= INSN_UID (swap_insn
);
1940 mark_swaps_for_removal (insn_entry
, uid
);
1941 replace_swap_with_copy (insn_entry
, uid
);
1944 /* Dump the swap table to DUMP_FILE. */
1946 dump_swap_insn_table (swap_web_entry
*insn_entry
)
1948 int e
= get_max_uid ();
1949 fprintf (dump_file
, "\nRelevant insns with their flag settings\n\n");
1951 for (int i
= 0; i
< e
; ++i
)
1952 if (insn_entry
[i
].is_relevant
)
1954 swap_web_entry
*pred_entry
= (swap_web_entry
*)insn_entry
[i
].pred ();
1955 fprintf (dump_file
, "%6d %6d ", i
,
1956 pred_entry
&& pred_entry
->insn
1957 ? INSN_UID (pred_entry
->insn
) : 0);
1958 if (insn_entry
[i
].is_load
)
1959 fputs ("load ", dump_file
);
1960 if (insn_entry
[i
].is_store
)
1961 fputs ("store ", dump_file
);
1962 if (insn_entry
[i
].is_swap
)
1963 fputs ("swap ", dump_file
);
1964 if (insn_entry
[i
].is_live_in
)
1965 fputs ("live-in ", dump_file
);
1966 if (insn_entry
[i
].is_live_out
)
1967 fputs ("live-out ", dump_file
);
1968 if (insn_entry
[i
].contains_subreg
)
1969 fputs ("subreg ", dump_file
);
1970 if (insn_entry
[i
].is_128_int
)
1971 fputs ("int128 ", dump_file
);
1972 if (insn_entry
[i
].is_call
)
1973 fputs ("call ", dump_file
);
1974 if (insn_entry
[i
].is_swappable
)
1976 fputs ("swappable ", dump_file
);
1977 if (insn_entry
[i
].special_handling
== SH_CONST_VECTOR
)
1978 fputs ("special:constvec ", dump_file
);
1979 else if (insn_entry
[i
].special_handling
== SH_SUBREG
)
1980 fputs ("special:subreg ", dump_file
);
1981 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_LD
)
1982 fputs ("special:load ", dump_file
);
1983 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_ST
)
1984 fputs ("special:store ", dump_file
);
1985 else if (insn_entry
[i
].special_handling
== SH_EXTRACT
)
1986 fputs ("special:extract ", dump_file
);
1987 else if (insn_entry
[i
].special_handling
== SH_SPLAT
)
1988 fputs ("special:splat ", dump_file
);
1989 else if (insn_entry
[i
].special_handling
== SH_XXPERMDI
)
1990 fputs ("special:xxpermdi ", dump_file
);
1991 else if (insn_entry
[i
].special_handling
== SH_CONCAT
)
1992 fputs ("special:concat ", dump_file
);
1993 else if (insn_entry
[i
].special_handling
== SH_VPERM
)
1994 fputs ("special:vperm ", dump_file
);
1996 if (insn_entry
[i
].web_not_optimizable
)
1997 fputs ("unoptimizable ", dump_file
);
1998 if (insn_entry
[i
].will_delete
)
1999 fputs ("delete ", dump_file
);
2000 fputs ("\n", dump_file
);
2002 fputs ("\n", dump_file
);
2005 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2006 Here RTX is an (& addr (const_int -16)). Always return a new copy
2007 to avoid problems with combine. */
2009 alignment_with_canonical_addr (rtx align
)
2012 rtx addr
= XEXP (align
, 0);
2017 else if (GET_CODE (addr
) == PLUS
)
2019 rtx addrop0
= XEXP (addr
, 0);
2020 rtx addrop1
= XEXP (addr
, 1);
2022 if (!REG_P (addrop0
))
2023 addrop0
= force_reg (GET_MODE (addrop0
), addrop0
);
2025 if (!REG_P (addrop1
))
2026 addrop1
= force_reg (GET_MODE (addrop1
), addrop1
);
2028 canon
= gen_rtx_PLUS (GET_MODE (addr
), addrop0
, addrop1
);
2032 canon
= force_reg (GET_MODE (addr
), addr
);
2034 return gen_rtx_AND (GET_MODE (align
), canon
, GEN_INT (-16));
2037 /* Check whether an rtx is an alignment mask, and if so, return
2038 a fully-expanded rtx for the masking operation. */
2040 alignment_mask (rtx_insn
*insn
)
2042 rtx body
= PATTERN (insn
);
2044 if (GET_CODE (body
) != SET
2045 || GET_CODE (SET_SRC (body
)) != AND
2046 || !REG_P (XEXP (SET_SRC (body
), 0)))
2049 rtx mask
= XEXP (SET_SRC (body
), 1);
2051 if (GET_CODE (mask
) == CONST_INT
)
2053 if (INTVAL (mask
) == -16)
2054 return alignment_with_canonical_addr (SET_SRC (body
));
2062 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2066 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
2068 if (!rtx_equal_p (DF_REF_REG (use
), mask
))
2071 struct df_link
*def_link
= DF_REF_CHAIN (use
);
2072 if (!def_link
|| def_link
->next
)
2075 rtx_insn
*const_insn
= DF_REF_INSN (def_link
->ref
);
2076 rtx const_body
= PATTERN (const_insn
);
2077 if (GET_CODE (const_body
) != SET
)
2080 real_mask
= SET_SRC (const_body
);
2082 if (GET_CODE (real_mask
) != CONST_INT
2083 || INTVAL (real_mask
) != -16)
2090 return alignment_with_canonical_addr (SET_SRC (body
));
2093 /* Given INSN that's a load or store based at BASE_REG, look for a
2094 feeding computation that aligns its address on a 16-byte boundary.
2095 Return the rtx and its containing AND_INSN. */
2097 find_alignment_op (rtx_insn
*insn
, rtx base_reg
, rtx_insn
**and_insn
)
2100 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2101 rtx and_operation
= 0;
2103 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
2105 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
2108 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
2109 if (!base_def_link
|| base_def_link
->next
)
2112 /* With stack-protector code enabled, and possibly in other
2113 circumstances, there may not be an associated insn for
2115 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
2118 *and_insn
= DF_REF_INSN (base_def_link
->ref
);
2119 and_operation
= alignment_mask (*and_insn
);
2120 if (and_operation
!= 0)
2124 return and_operation
;
2127 struct del_info
{ bool replace
; rtx_insn
*replace_insn
; };
2129 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2131 recombine_lvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
2133 rtx body
= PATTERN (insn
);
2134 gcc_assert (GET_CODE (body
) == SET
2135 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
2136 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
);
2138 rtx mem
= XEXP (SET_SRC (body
), 0);
2139 rtx base_reg
= XEXP (mem
, 0);
2142 rtx and_operation
= find_alignment_op (insn
, base_reg
, &and_insn
);
2144 if (and_operation
!= 0)
2147 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2148 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
2150 struct df_link
*link
= DF_REF_CHAIN (def
);
2151 if (!link
|| link
->next
)
2154 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
2155 if (!insn_is_swap_p (swap_insn
)
2156 || insn_is_load_p (swap_insn
)
2157 || insn_is_store_p (swap_insn
))
2160 /* Expected lvx pattern found. Change the swap to
2161 a copy, and propagate the AND operation into the
2163 to_delete
[INSN_UID (swap_insn
)].replace
= true;
2164 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
2166 /* However, first we must be sure that we make the
2167 base register from the AND operation available
2168 in case the register has been overwritten. Copy
2169 the base register to a new pseudo and use that
2170 as the base register of the AND operation in
2171 the new LVX instruction. */
2172 rtx and_base
= XEXP (and_operation
, 0);
2173 rtx new_reg
= gen_reg_rtx (GET_MODE (and_base
));
2174 rtx copy
= gen_rtx_SET (new_reg
, and_base
);
2175 rtx_insn
*new_insn
= emit_insn_after (copy
, and_insn
);
2176 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (and_insn
));
2177 df_insn_rescan (new_insn
);
2179 XEXP (mem
, 0) = gen_rtx_AND (GET_MODE (and_base
), new_reg
,
2180 XEXP (and_operation
, 1));
2181 SET_SRC (body
) = mem
;
2182 INSN_CODE (insn
) = -1; /* Force re-recognition. */
2183 df_insn_rescan (insn
);
2186 fprintf (dump_file
, "lvx opportunity found at %d\n",
2192 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2194 recombine_stvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
2196 rtx body
= PATTERN (insn
);
2197 gcc_assert (GET_CODE (body
) == SET
2198 && GET_CODE (SET_DEST (body
)) == MEM
2199 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
);
2200 rtx mem
= SET_DEST (body
);
2201 rtx base_reg
= XEXP (mem
, 0);
2204 rtx and_operation
= find_alignment_op (insn
, base_reg
, &and_insn
);
2206 if (and_operation
!= 0)
2208 rtx src_reg
= XEXP (SET_SRC (body
), 0);
2210 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2211 FOR_EACH_INSN_INFO_USE (src_use
, insn_info
)
2213 if (!rtx_equal_p (DF_REF_REG (src_use
), src_reg
))
2216 struct df_link
*link
= DF_REF_CHAIN (src_use
);
2217 if (!link
|| link
->next
)
2220 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
2221 if (!insn_is_swap_p (swap_insn
)
2222 || insn_is_load_p (swap_insn
)
2223 || insn_is_store_p (swap_insn
))
2226 /* Expected stvx pattern found. Change the swap to
2227 a copy, and propagate the AND operation into the
2229 to_delete
[INSN_UID (swap_insn
)].replace
= true;
2230 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
2232 /* However, first we must be sure that we make the
2233 base register from the AND operation available
2234 in case the register has been overwritten. Copy
2235 the base register to a new pseudo and use that
2236 as the base register of the AND operation in
2237 the new STVX instruction. */
2238 rtx and_base
= XEXP (and_operation
, 0);
2239 rtx new_reg
= gen_reg_rtx (GET_MODE (and_base
));
2240 rtx copy
= gen_rtx_SET (new_reg
, and_base
);
2241 rtx_insn
*new_insn
= emit_insn_after (copy
, and_insn
);
2242 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (and_insn
));
2243 df_insn_rescan (new_insn
);
2245 XEXP (mem
, 0) = gen_rtx_AND (GET_MODE (and_base
), new_reg
,
2246 XEXP (and_operation
, 1));
2247 SET_SRC (body
) = src_reg
;
2248 INSN_CODE (insn
) = -1; /* Force re-recognition. */
2249 df_insn_rescan (insn
);
2252 fprintf (dump_file
, "stvx opportunity found at %d\n",
2258 /* Look for patterns created from builtin lvx and stvx calls, and
2259 canonicalize them to be properly recognized as such. */
2261 recombine_lvx_stvx_patterns (function
*fun
)
2267 int num_insns
= get_max_uid ();
2268 del_info
*to_delete
= XCNEWVEC (del_info
, num_insns
);
2270 FOR_ALL_BB_FN (bb
, fun
)
2271 FOR_BB_INSNS (bb
, insn
)
2273 if (!NONDEBUG_INSN_P (insn
))
2276 if (insn_is_load_p (insn
) && insn_is_swap_p (insn
))
2277 recombine_lvx_pattern (insn
, to_delete
);
2278 else if (insn_is_store_p (insn
) && insn_is_swap_p (insn
))
2279 recombine_stvx_pattern (insn
, to_delete
);
2282 /* Turning swaps into copies is delayed until now, to avoid problems
2283 with deleting instructions during the insn walk. */
2284 for (i
= 0; i
< num_insns
; i
++)
2285 if (to_delete
[i
].replace
)
2287 rtx swap_body
= PATTERN (to_delete
[i
].replace_insn
);
2288 rtx src_reg
= XEXP (SET_SRC (swap_body
), 0);
2289 rtx copy
= gen_rtx_SET (SET_DEST (swap_body
), src_reg
);
2290 rtx_insn
*new_insn
= emit_insn_before (copy
,
2291 to_delete
[i
].replace_insn
);
2292 set_block_for_insn (new_insn
,
2293 BLOCK_FOR_INSN (to_delete
[i
].replace_insn
));
2294 df_insn_rescan (new_insn
);
2295 df_insn_delete (to_delete
[i
].replace_insn
);
2296 remove_insn (to_delete
[i
].replace_insn
);
2297 to_delete
[i
].replace_insn
->set_deleted ();
2303 /* Main entry point for this pass. */
2305 rs6000_analyze_swaps (function
*fun
)
2307 swap_web_entry
*insn_entry
;
2309 rtx_insn
*insn
, *curr_insn
= 0;
2311 /* Dataflow analysis for use-def chains. */
2312 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
2313 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
2315 df_set_flags (DF_DEFER_INSN_RESCAN
);
2317 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2318 recombine_lvx_stvx_patterns (fun
);
2319 df_process_deferred_rescans ();
2321 /* Allocate structure to represent webs of insns. */
2322 insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
2324 /* Walk the insns to gather basic data. */
2325 FOR_ALL_BB_FN (bb
, fun
)
2326 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
2328 unsigned int uid
= INSN_UID (insn
);
2329 if (NONDEBUG_INSN_P (insn
))
2331 insn_entry
[uid
].insn
= insn
;
2333 if (GET_CODE (insn
) == CALL_INSN
)
2334 insn_entry
[uid
].is_call
= 1;
2336 /* Walk the uses and defs to see if we mention vector regs.
2337 Record any constraints on optimization of such mentions. */
2338 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2340 FOR_EACH_INSN_INFO_USE (mention
, insn_info
)
2342 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2343 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
2345 /* If a use gets its value from a call insn, it will be
2346 a hard register and will look like (reg:V4SI 3 3).
2347 The df analysis creates two mentions for GPR3 and GPR4,
2348 both DImode. We must recognize this and treat it as a
2349 vector mention to ensure the call is unioned with this
2351 if (mode
== DImode
&& DF_REF_INSN_INFO (mention
))
2353 rtx feeder
= DF_REF_INSN (mention
);
2354 /* FIXME: It is pretty hard to get from the df mention
2355 to the mode of the use in the insn. We arbitrarily
2356 pick a vector mode here, even though the use might
2357 be a real DImode. We can be too conservative
2358 (create a web larger than necessary) because of
2359 this, so consider eventually fixing this. */
2360 if (GET_CODE (feeder
) == CALL_INSN
)
2364 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
2366 insn_entry
[uid
].is_relevant
= 1;
2367 if (mode
== TImode
|| mode
== V1TImode
2368 || FLOAT128_VECTOR_P (mode
))
2369 insn_entry
[uid
].is_128_int
= 1;
2370 if (DF_REF_INSN_INFO (mention
))
2371 insn_entry
[uid
].contains_subreg
2372 = !rtx_equal_p (DF_REF_REG (mention
),
2373 DF_REF_REAL_REG (mention
));
2374 union_defs (insn_entry
, insn
, mention
);
2377 FOR_EACH_INSN_INFO_DEF (mention
, insn_info
)
2379 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2380 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
2382 /* If we're loading up a hard vector register for a call,
2383 it looks like (set (reg:V4SI 9 9) (...)). The df
2384 analysis creates two mentions for GPR9 and GPR10, both
2385 DImode. So relying on the mode from the mentions
2386 isn't sufficient to ensure we union the call into the
2387 web with the parameter setup code. */
2388 if (mode
== DImode
&& GET_CODE (insn
) == SET
2389 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
))))
2390 mode
= GET_MODE (SET_DEST (insn
));
2392 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
2394 insn_entry
[uid
].is_relevant
= 1;
2395 if (mode
== TImode
|| mode
== V1TImode
2396 || FLOAT128_VECTOR_P (mode
))
2397 insn_entry
[uid
].is_128_int
= 1;
2398 if (DF_REF_INSN_INFO (mention
))
2399 insn_entry
[uid
].contains_subreg
2400 = !rtx_equal_p (DF_REF_REG (mention
),
2401 DF_REF_REAL_REG (mention
));
2402 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2403 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention
)))
2404 insn_entry
[uid
].is_live_out
= 1;
2405 union_uses (insn_entry
, insn
, mention
);
2409 if (insn_entry
[uid
].is_relevant
)
2411 /* Determine if this is a load or store. */
2412 insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
2413 insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
2415 /* Determine if this is a doubleword swap. If not,
2416 determine whether it can legally be swapped. */
2417 if (insn_is_swap_p (insn
))
2418 insn_entry
[uid
].is_swap
= 1;
2421 unsigned int special
= SH_NONE
;
2422 insn_entry
[uid
].is_swappable
2423 = insn_is_swappable_p (insn_entry
, insn
, &special
);
2424 if (special
!= SH_NONE
&& insn_entry
[uid
].contains_subreg
)
2425 insn_entry
[uid
].is_swappable
= 0;
2426 else if (special
!= SH_NONE
)
2427 insn_entry
[uid
].special_handling
= special
;
2428 else if (insn_entry
[uid
].contains_subreg
)
2429 insn_entry
[uid
].special_handling
= SH_SUBREG
;
2437 fprintf (dump_file
, "\nSwap insn entry table when first built\n");
2438 dump_swap_insn_table (insn_entry
);
2441 /* Record unoptimizable webs. */
2442 unsigned e
= get_max_uid (), i
;
2443 for (i
= 0; i
< e
; ++i
)
2445 if (!insn_entry
[i
].is_relevant
)
2448 swap_web_entry
*root
2449 = (swap_web_entry
*)(&insn_entry
[i
])->unionfind_root ();
2451 if (insn_entry
[i
].is_live_in
|| insn_entry
[i
].is_live_out
2452 || (insn_entry
[i
].contains_subreg
2453 && insn_entry
[i
].special_handling
!= SH_SUBREG
)
2454 || insn_entry
[i
].is_128_int
|| insn_entry
[i
].is_call
2455 || !(insn_entry
[i
].is_swappable
|| insn_entry
[i
].is_swap
))
2456 root
->web_not_optimizable
= 1;
2458 /* If we have loads or stores that aren't permuting then the
2459 optimization isn't appropriate. */
2460 else if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
2461 && !insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_swappable
)
2462 root
->web_not_optimizable
= 1;
2464 /* If we have a swap that is both fed by a permuting load
2465 and a feeder of a permuting store, then the optimization
2466 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2467 else if (insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_load
2468 && !insn_entry
[i
].is_store
2469 && swap_feeds_both_load_and_store (&insn_entry
[i
]))
2470 root
->web_not_optimizable
= 1;
2472 /* If we have permuting loads or stores that are not accompanied
2473 by a register swap, the optimization isn't appropriate. */
2474 else if (insn_entry
[i
].is_load
&& insn_entry
[i
].is_swap
)
2476 rtx insn
= insn_entry
[i
].insn
;
2477 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2480 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
2482 struct df_link
*link
= DF_REF_CHAIN (def
);
2484 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_LOADS
))
2486 root
->web_not_optimizable
= 1;
2491 else if (insn_entry
[i
].is_store
&& insn_entry
[i
].is_swap
)
2493 rtx insn
= insn_entry
[i
].insn
;
2494 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2497 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
2499 struct df_link
*link
= DF_REF_CHAIN (use
);
2501 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_STORES
))
2503 root
->web_not_optimizable
= 1;
2512 fprintf (dump_file
, "\nSwap insn entry table after web analysis\n");
2513 dump_swap_insn_table (insn_entry
);
2516 /* For each load and store in an optimizable web (which implies
2517 the loads and stores are permuting), find the associated
2518 register swaps and mark them for removal. Due to various
2519 optimizations we may mark the same swap more than once. Also
2520 perform special handling for swappable insns that require it. */
2521 for (i
= 0; i
< e
; ++i
)
2522 if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
2523 && insn_entry
[i
].is_swap
)
2525 swap_web_entry
* root_entry
2526 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
2527 if (!root_entry
->web_not_optimizable
)
2528 mark_swaps_for_removal (insn_entry
, i
);
2530 else if (insn_entry
[i
].is_swappable
&& insn_entry
[i
].special_handling
)
2532 swap_web_entry
* root_entry
2533 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
2534 if (!root_entry
->web_not_optimizable
)
2535 handle_special_swappables (insn_entry
, i
);
2538 /* Now delete the swaps marked for removal. */
2539 for (i
= 0; i
< e
; ++i
)
2540 if (insn_entry
[i
].will_delete
)
2541 replace_swap_with_copy (insn_entry
, i
);
2546 /* Use a second pass over rtl to detect that certain vector values
2547 fetched from or stored to memory on quad-word aligned addresses
2548 can use lvx/stvx without swaps. */
2550 /* First, rebuild ud chains. */
2551 df_remove_problem (df_chain
);
2552 df_process_deferred_rescans ();
2553 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
2554 df_chain_add_problem (DF_UD_CHAIN
);
2557 swap_web_entry
*pass2_insn_entry
;
2558 pass2_insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
2560 /* Walk the insns to gather basic data. */
2561 FOR_ALL_BB_FN (bb
, fun
)
2562 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
2564 unsigned int uid
= INSN_UID (insn
);
2565 if (NONDEBUG_INSN_P (insn
))
2567 pass2_insn_entry
[uid
].insn
= insn
;
2569 pass2_insn_entry
[uid
].is_relevant
= 1;
2570 pass2_insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
2571 pass2_insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
2573 /* Determine if this is a doubleword swap. If not,
2574 determine whether it can legally be swapped. */
2575 if (insn_is_swap_p (insn
))
2576 pass2_insn_entry
[uid
].is_swap
= 1;
2581 for (unsigned i
= 0; i
< e
; ++i
)
2582 if (pass2_insn_entry
[i
].is_swap
&& !pass2_insn_entry
[i
].is_load
2583 && !pass2_insn_entry
[i
].is_store
)
2585 /* Replace swap of aligned load-swap with aligned unswapped
2587 rtx_insn
*rtx_insn
= pass2_insn_entry
[i
].insn
;
2588 if (quad_aligned_load_p (pass2_insn_entry
, rtx_insn
))
2589 replace_swapped_aligned_load (pass2_insn_entry
, rtx_insn
);
2591 else if (pass2_insn_entry
[i
].is_swap
&& pass2_insn_entry
[i
].is_store
)
2593 /* Replace aligned store-swap of swapped value with aligned
2595 rtx_insn
*rtx_insn
= pass2_insn_entry
[i
].insn
;
2596 if (quad_aligned_store_p (pass2_insn_entry
, rtx_insn
))
2597 replace_swapped_aligned_store (pass2_insn_entry
, rtx_insn
);
2601 free (pass2_insn_entry
);
2603 /* Use a third pass over rtl to replace swap(load(vector constant))
2604 with load(swapped vector constant). */
2606 /* First, rebuild ud chains. */
2607 df_remove_problem (df_chain
);
2608 df_process_deferred_rescans ();
2609 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
2610 df_chain_add_problem (DF_UD_CHAIN
);
2613 swap_web_entry
*pass3_insn_entry
;
2614 pass3_insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
2616 /* Walk the insns to gather basic data. */
2617 FOR_ALL_BB_FN (bb
, fun
)
2618 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
2620 unsigned int uid
= INSN_UID (insn
);
2621 if (NONDEBUG_INSN_P (insn
))
2623 pass3_insn_entry
[uid
].insn
= insn
;
2625 pass3_insn_entry
[uid
].is_relevant
= 1;
2626 pass3_insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
2627 pass3_insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
2629 /* Determine if this is a doubleword swap. If not,
2630 determine whether it can legally be swapped. */
2631 if (insn_is_swap_p (insn
))
2632 pass3_insn_entry
[uid
].is_swap
= 1;
2637 for (unsigned i
= 0; i
< e
; ++i
)
2638 if (pass3_insn_entry
[i
].is_swap
&& !pass3_insn_entry
[i
].is_load
2639 && !pass3_insn_entry
[i
].is_store
)
2641 insn
= pass3_insn_entry
[i
].insn
;
2642 if (const_load_sequence_p (pass3_insn_entry
, insn
))
2643 replace_swapped_load_constant (pass3_insn_entry
, insn
);
2647 free (pass3_insn_entry
);
2651 const pass_data pass_data_analyze_swaps
=
2653 RTL_PASS
, /* type */
2655 OPTGROUP_NONE
, /* optinfo_flags */
2656 TV_NONE
, /* tv_id */
2657 0, /* properties_required */
2658 0, /* properties_provided */
2659 0, /* properties_destroyed */
2660 0, /* todo_flags_start */
2661 TODO_df_finish
, /* todo_flags_finish */
2664 class pass_analyze_swaps
: public rtl_opt_pass
2667 pass_analyze_swaps(gcc::context
*ctxt
)
2668 : rtl_opt_pass(pass_data_analyze_swaps
, ctxt
)
2671 /* opt_pass methods: */
2672 virtual bool gate (function
*)
2674 return (optimize
> 0 && !BYTES_BIG_ENDIAN
&& TARGET_VSX
2675 && !TARGET_P9_VECTOR
&& rs6000_optimize_swaps
);
2678 virtual unsigned int execute (function
*fun
)
2680 return rs6000_analyze_swaps (fun
);
2685 return new pass_analyze_swaps (m_ctxt
);
2688 }; // class pass_analyze_swaps
2691 make_pass_analyze_swaps (gcc::context
*ctxt
)
2693 return new pass_analyze_swaps (ctxt
);