PR target/83789
[official-gcc.git] / gcc / config / rs6000 / rs6000-p8swap.c
blob6ef2eed322d253118a9d5d689119a8238ae34cab
1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2018 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
52 unpermuted value.)
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
57 correctness.
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.c. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
96 /* Notes on Permutes
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
104 The most general permute is something like this (example for V16QI):
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
109 (const_int a14) (const_int a15)]))
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
145 /* This is based on the union-find logic in web.c. web_entry_base is
146 defined in df.h. */
147 class swap_web_entry : public web_entry_base
149 public:
150 /* Pointer to the insn. */
151 rtx_insn *insn;
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant : 1;
155 /* Set if insn is a load. */
156 unsigned int is_load : 1;
157 /* Set if insn is a store. */
158 unsigned int is_store : 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap : 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in : 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out : 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg : 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int : 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call : 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable : 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling : 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable : 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete : 1;
186 enum special_handling_values {
187 SH_NONE = 0,
188 SH_CONST_VECTOR,
189 SH_SUBREG,
190 SH_NOSWAP_LD,
191 SH_NOSWAP_ST,
192 SH_EXTRACT,
193 SH_SPLAT,
194 SH_XXPERMDI,
195 SH_CONCAT,
196 SH_VPERM
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
201 static void
202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
204 struct df_link *link = DF_REF_CHAIN (use);
206 if (!link)
207 insn_entry[INSN_UID (insn)].is_live_in = 1;
209 while (link)
211 if (DF_REF_IS_ARTIFICIAL (link->ref))
212 insn_entry[INSN_UID (insn)].is_live_in = 1;
214 if (DF_REF_INSN_INFO (link->ref))
216 rtx def_insn = DF_REF_INSN (link->ref);
217 (void)unionfind_union (insn_entry + INSN_UID (insn),
218 insn_entry + INSN_UID (def_insn));
221 link = link->next;
225 /* Union INSN with all insns containing uses reached from DEF.
226 Detect whether DEF is live-out from the current function. */
227 static void
228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
230 struct df_link *link = DF_REF_CHAIN (def);
232 if (!link)
233 insn_entry[INSN_UID (insn)].is_live_out = 1;
235 while (link)
237 /* This could be an eh use or some other artificial use;
238 we treat these all the same (killing the optimization). */
239 if (DF_REF_IS_ARTIFICIAL (link->ref))
240 insn_entry[INSN_UID (insn)].is_live_out = 1;
242 if (DF_REF_INSN_INFO (link->ref))
244 rtx use_insn = DF_REF_INSN (link->ref);
245 (void)unionfind_union (insn_entry + INSN_UID (insn),
246 insn_entry + INSN_UID (use_insn));
249 link = link->next;
253 /* Return 1 iff INSN is a load insn, including permuting loads that
254 represent an lvxd2x instruction; else return 0. */
255 static unsigned int
256 insn_is_load_p (rtx insn)
258 rtx body = PATTERN (insn);
260 if (GET_CODE (body) == SET)
262 if (GET_CODE (SET_SRC (body)) == MEM)
263 return 1;
265 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
266 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
267 return 1;
269 return 0;
272 if (GET_CODE (body) != PARALLEL)
273 return 0;
275 rtx set = XVECEXP (body, 0, 0);
277 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
278 return 1;
280 return 0;
283 /* Return 1 iff INSN is a store insn, including permuting stores that
284 represent an stvxd2x instruction; else return 0. */
285 static unsigned int
286 insn_is_store_p (rtx insn)
288 rtx body = PATTERN (insn);
289 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
290 return 1;
291 if (GET_CODE (body) != PARALLEL)
292 return 0;
293 rtx set = XVECEXP (body, 0, 0);
294 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
295 return 1;
296 return 0;
299 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
300 a permuting load, or a permuting store. */
301 static unsigned int
302 insn_is_swap_p (rtx insn)
304 rtx body = PATTERN (insn);
305 if (GET_CODE (body) != SET)
306 return 0;
307 rtx rhs = SET_SRC (body);
308 if (GET_CODE (rhs) != VEC_SELECT)
309 return 0;
310 rtx parallel = XEXP (rhs, 1);
311 if (GET_CODE (parallel) != PARALLEL)
312 return 0;
313 unsigned int len = XVECLEN (parallel, 0);
314 if (len != 2 && len != 4 && len != 8 && len != 16)
315 return 0;
316 for (unsigned int i = 0; i < len / 2; ++i)
318 rtx op = XVECEXP (parallel, 0, i);
319 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
320 return 0;
322 for (unsigned int i = len / 2; i < len; ++i)
324 rtx op = XVECEXP (parallel, 0, i);
325 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
326 return 0;
328 return 1;
331 /* Return true iff EXPR represents the sum of two registers. */
332 bool
333 rs6000_sum_of_two_registers_p (const_rtx expr)
335 if (GET_CODE (expr) == PLUS)
337 const_rtx operand1 = XEXP (expr, 0);
338 const_rtx operand2 = XEXP (expr, 1);
339 return (REG_P (operand1) && REG_P (operand2));
341 return false;
344 /* Return true iff EXPR represents an address expression that masks off
345 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
346 bool
347 rs6000_quadword_masked_address_p (const_rtx expr)
349 if (GET_CODE (expr) == AND)
351 const_rtx operand1 = XEXP (expr, 0);
352 const_rtx operand2 = XEXP (expr, 1);
353 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
354 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
355 return true;
357 return false;
360 /* Return TRUE if INSN represents a swap of a swapped load from memory
361 and the memory address is quad-word aligned. */
362 static bool
363 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
365 unsigned uid = INSN_UID (insn);
366 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
367 return false;
369 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
371 /* Since insn is known to represent a swap instruction, we know it
372 "uses" only one input variable. */
373 df_ref use = DF_INSN_INFO_USES (insn_info);
375 /* Figure out where this input variable is defined. */
376 struct df_link *def_link = DF_REF_CHAIN (use);
378 /* If there is no definition or the definition is artificial or there are
379 multiple definitions, punt. */
380 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
381 || def_link->next)
382 return false;
384 rtx def_insn = DF_REF_INSN (def_link->ref);
385 unsigned uid2 = INSN_UID (def_insn);
386 /* We're looking for a load-with-swap insn. If this is not that,
387 return false. */
388 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
389 return false;
391 /* If the source of the rtl def is not a set from memory, return
392 false. */
393 rtx body = PATTERN (def_insn);
394 if (GET_CODE (body) != SET
395 || GET_CODE (SET_SRC (body)) != VEC_SELECT
396 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
397 return false;
399 rtx mem = XEXP (SET_SRC (body), 0);
400 rtx base_reg = XEXP (mem, 0);
401 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
402 && MEM_ALIGN (mem) >= 128) ? true : false;
405 /* Return TRUE if INSN represents a store-with-swap of a swapped value
406 and the memory address is quad-word aligned. */
407 static bool
408 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
410 unsigned uid = INSN_UID (insn);
411 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
412 return false;
414 rtx body = PATTERN (insn);
415 rtx dest_address = XEXP (SET_DEST (body), 0);
416 rtx swap_reg = XEXP (SET_SRC (body), 0);
418 /* If the base address for the memory expression is not represented
419 by a single register and is not the sum of two registers, punt. */
420 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
421 return false;
423 /* Confirm that the value to be stored is produced by a swap
424 instruction. */
425 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
426 df_ref use;
427 FOR_EACH_INSN_INFO_USE (use, insn_info)
429 struct df_link *def_link = DF_REF_CHAIN (use);
431 /* If this is not the definition of the candidate swap register,
432 then skip it. I am interested in a different definition. */
433 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
434 continue;
436 /* If there is no def or the def is artifical or there are
437 multiple defs, punt. */
438 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
439 || def_link->next)
440 return false;
442 rtx def_insn = DF_REF_INSN (def_link->ref);
443 unsigned uid2 = INSN_UID (def_insn);
445 /* If this source value is not a simple swap, return false */
446 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
447 || insn_entry[uid2].is_store)
448 return false;
450 /* I've processed the use that I care about, so break out of
451 this loop. */
452 break;
455 /* At this point, we know the source data comes from a swap. The
456 remaining question is whether the memory address is aligned. */
457 rtx set = single_set (insn);
458 if (set)
460 rtx dest = SET_DEST (set);
461 if (MEM_P (dest))
462 return (MEM_ALIGN (dest) >= 128);
464 return false;
467 /* Return 1 iff UID, known to reference a swap, is both fed by a load
468 and a feeder of a store. */
469 static unsigned int
470 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
472 rtx insn = insn_entry->insn;
473 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
474 df_ref def, use;
475 struct df_link *link = 0;
476 rtx_insn *load = 0, *store = 0;
477 bool fed_by_load = 0;
478 bool feeds_store = 0;
480 FOR_EACH_INSN_INFO_USE (use, insn_info)
482 link = DF_REF_CHAIN (use);
483 load = DF_REF_INSN (link->ref);
484 if (insn_is_load_p (load) && insn_is_swap_p (load))
485 fed_by_load = 1;
488 FOR_EACH_INSN_INFO_DEF (def, insn_info)
490 link = DF_REF_CHAIN (def);
491 store = DF_REF_INSN (link->ref);
492 if (insn_is_store_p (store) && insn_is_swap_p (store))
493 feeds_store = 1;
496 return fed_by_load && feeds_store;
499 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
500 static bool
501 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
503 unsigned uid = INSN_UID (insn);
504 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
505 return false;
507 const_rtx tocrel_base;
509 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
510 df_ref use;
512 /* Iterate over the definitions that are used by this insn. Since
513 this is known to be a swap insn, expect only one used definnition. */
514 FOR_EACH_INSN_INFO_USE (use, insn_info)
516 struct df_link *def_link = DF_REF_CHAIN (use);
518 /* If there is no def or the def is artificial or there are
519 multiple defs, punt. */
520 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
521 || def_link->next)
522 return false;
524 rtx def_insn = DF_REF_INSN (def_link->ref);
525 unsigned uid2 = INSN_UID (def_insn);
526 /* If this is not a load or is not a swap, return false. */
527 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
528 return false;
530 /* If the source of the rtl def is not a set from memory, return
531 false. */
532 rtx body = PATTERN (def_insn);
533 if (GET_CODE (body) != SET
534 || GET_CODE (SET_SRC (body)) != VEC_SELECT
535 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
536 return false;
538 rtx mem = XEXP (SET_SRC (body), 0);
539 rtx base_reg = XEXP (mem, 0);
540 /* If the base address for the memory expression is not
541 represented by a register, punt. */
542 if (!REG_P (base_reg))
543 return false;
545 df_ref base_use;
546 insn_info = DF_INSN_INFO_GET (def_insn);
547 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
549 /* If base_use does not represent base_reg, look for another
550 use. */
551 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
552 continue;
554 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
555 if (!base_def_link || base_def_link->next)
556 return false;
558 /* Constants held on the stack are not "true" constants
559 because their values are not part of the static load
560 image. If this constant's base reference is a stack
561 or frame pointer, it is seen as an artificial
562 reference. */
563 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
564 return false;
566 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
567 rtx tocrel_body = PATTERN (tocrel_insn);
568 rtx base, offset;
569 if (GET_CODE (tocrel_body) != SET)
570 return false;
571 /* There is an extra level of indirection for small/large
572 code models. */
573 rtx tocrel_expr = SET_SRC (tocrel_body);
574 if (GET_CODE (tocrel_expr) == MEM)
575 tocrel_expr = XEXP (tocrel_expr, 0);
576 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
577 return false;
578 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
580 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
581 return false;
582 else
584 /* FIXME: The conditions under which
585 ((GET_CODE (const_vector) == SYMBOL_REF) &&
586 !CONSTANT_POOL_ADDRESS_P (const_vector))
587 are not well understood. This code prevents
588 an internal compiler error which will occur in
589 replace_swapped_load_constant () if we were to return
590 true. Some day, we should figure out how to properly
591 handle this condition in
592 replace_swapped_load_constant () and then we can
593 remove this special test. */
594 rtx const_vector = get_pool_constant (base);
595 if (GET_CODE (const_vector) == SYMBOL_REF
596 && !CONSTANT_POOL_ADDRESS_P (const_vector))
597 return false;
601 return true;
604 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
605 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
606 static bool
607 v2df_reduction_p (rtx op)
609 if (GET_MODE (op) != V2DFmode)
610 return false;
612 enum rtx_code code = GET_CODE (op);
613 if (code != PLUS && code != SMIN && code != SMAX)
614 return false;
616 rtx concat = XEXP (op, 0);
617 if (GET_CODE (concat) != VEC_CONCAT)
618 return false;
620 rtx select0 = XEXP (concat, 0);
621 rtx select1 = XEXP (concat, 1);
622 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
623 return false;
625 rtx reg0 = XEXP (select0, 0);
626 rtx reg1 = XEXP (select1, 0);
627 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
628 return false;
630 rtx parallel0 = XEXP (select0, 1);
631 rtx parallel1 = XEXP (select1, 1);
632 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
633 return false;
635 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
636 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
637 return false;
639 return true;
642 /* Return 1 iff OP is an operand that will not be affected by having
643 vector doublewords swapped in memory. */
644 static unsigned int
645 rtx_is_swappable_p (rtx op, unsigned int *special)
647 enum rtx_code code = GET_CODE (op);
648 int i, j;
649 rtx parallel;
651 switch (code)
653 case LABEL_REF:
654 case SYMBOL_REF:
655 case CLOBBER:
656 case REG:
657 return 1;
659 case VEC_CONCAT:
660 case ASM_INPUT:
661 case ASM_OPERANDS:
662 return 0;
664 case CONST_VECTOR:
666 *special = SH_CONST_VECTOR;
667 return 1;
670 case VEC_DUPLICATE:
671 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
672 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
673 it represents a vector splat for which we can do special
674 handling. */
675 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
676 return 1;
677 else if (REG_P (XEXP (op, 0))
678 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
679 /* This catches V2DF and V2DI splat, at a minimum. */
680 return 1;
681 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
682 && REG_P (XEXP (XEXP (op, 0), 0))
683 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
684 /* This catches splat of a truncated value. */
685 return 1;
686 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
687 /* If the duplicated item is from a select, defer to the select
688 processing to see if we can change the lane for the splat. */
689 return rtx_is_swappable_p (XEXP (op, 0), special);
690 else
691 return 0;
693 case VEC_SELECT:
694 /* A vec_extract operation is ok if we change the lane. */
695 if (GET_CODE (XEXP (op, 0)) == REG
696 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
697 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
698 && XVECLEN (parallel, 0) == 1
699 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
701 *special = SH_EXTRACT;
702 return 1;
704 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
705 XXPERMDI is a swap operation, it will be identified by
706 insn_is_swap_p and therefore we won't get here. */
707 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
708 && (GET_MODE (XEXP (op, 0)) == V4DFmode
709 || GET_MODE (XEXP (op, 0)) == V4DImode)
710 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
711 && XVECLEN (parallel, 0) == 2
712 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
713 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
715 *special = SH_XXPERMDI;
716 return 1;
718 else if (v2df_reduction_p (op))
719 return 1;
720 else
721 return 0;
723 case UNSPEC:
725 /* Various operations are unsafe for this optimization, at least
726 without significant additional work. Permutes are obviously
727 problematic, as both the permute control vector and the ordering
728 of the target values are invalidated by doubleword swapping.
729 Vector pack and unpack modify the number of vector lanes.
730 Merge-high/low will not operate correctly on swapped operands.
731 Vector shifts across element boundaries are clearly uncool,
732 as are vector select and concatenate operations. Vector
733 sum-across instructions define one operand with a specific
734 order-dependent element, so additional fixup code would be
735 needed to make those work. Vector set and non-immediate-form
736 vector splat are element-order sensitive. A few of these
737 cases might be workable with special handling if required.
738 Adding cost modeling would be appropriate in some cases. */
739 int val = XINT (op, 1);
740 switch (val)
742 default:
743 break;
744 case UNSPEC_VBPERMQ:
745 case UNSPEC_VMRGH_DIRECT:
746 case UNSPEC_VMRGL_DIRECT:
747 case UNSPEC_VPACK_SIGN_SIGN_SAT:
748 case UNSPEC_VPACK_SIGN_UNS_SAT:
749 case UNSPEC_VPACK_UNS_UNS_MOD:
750 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
751 case UNSPEC_VPACK_UNS_UNS_SAT:
752 case UNSPEC_VPERM:
753 case UNSPEC_VPERM_UNS:
754 case UNSPEC_VPERMHI:
755 case UNSPEC_VPERMSI:
756 case UNSPEC_VPERMXOR:
757 case UNSPEC_VPKPX:
758 case UNSPEC_VSLDOI:
759 case UNSPEC_VSLO:
760 case UNSPEC_VSRO:
761 case UNSPEC_VSUM2SWS:
762 case UNSPEC_VSUM4S:
763 case UNSPEC_VSUM4UBS:
764 case UNSPEC_VSUMSWS:
765 case UNSPEC_VSUMSWS_DIRECT:
766 case UNSPEC_VSX_CONCAT:
767 case UNSPEC_VSX_CVDPSPN:
768 case UNSPEC_VSX_CVSPDP:
769 case UNSPEC_VSX_CVSPDPN:
770 case UNSPEC_VSX_EXTRACT:
771 case UNSPEC_VSX_SET:
772 case UNSPEC_VSX_SLDWI:
773 case UNSPEC_VSX_VEC_INIT:
774 case UNSPEC_VSX_VSLO:
775 case UNSPEC_VUNPACK_HI_SIGN:
776 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
777 case UNSPEC_VUNPACK_LO_SIGN:
778 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
779 case UNSPEC_VUPKHPX:
780 case UNSPEC_VUPKHS_V4SF:
781 case UNSPEC_VUPKHU_V4SF:
782 case UNSPEC_VUPKLPX:
783 case UNSPEC_VUPKLS_V4SF:
784 case UNSPEC_VUPKLU_V4SF:
785 return 0;
786 case UNSPEC_VSPLT_DIRECT:
787 case UNSPEC_VSX_XXSPLTD:
788 *special = SH_SPLAT;
789 return 1;
790 case UNSPEC_REDUC_PLUS:
791 case UNSPEC_REDUC:
792 return 1;
796 default:
797 break;
800 const char *fmt = GET_RTX_FORMAT (code);
801 int ok = 1;
803 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
804 if (fmt[i] == 'e' || fmt[i] == 'u')
806 unsigned int special_op = SH_NONE;
807 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
808 if (special_op == SH_NONE)
809 continue;
810 /* Ensure we never have two kinds of special handling
811 for the same insn. */
812 if (*special != SH_NONE && *special != special_op)
813 return 0;
814 *special = special_op;
816 else if (fmt[i] == 'E')
817 for (j = 0; j < XVECLEN (op, i); ++j)
819 unsigned int special_op = SH_NONE;
820 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
821 if (special_op == SH_NONE)
822 continue;
823 /* Ensure we never have two kinds of special handling
824 for the same insn. */
825 if (*special != SH_NONE && *special != special_op)
826 return 0;
827 *special = special_op;
830 return ok;
833 /* Return 1 iff INSN is an operand that will not be affected by
834 having vector doublewords swapped in memory (in which case
835 *SPECIAL is unchanged), or that can be modified to be correct
836 if vector doublewords are swapped in memory (in which case
837 *SPECIAL is changed to a value indicating how). */
838 static unsigned int
839 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
840 unsigned int *special)
842 /* Calls are always bad. */
843 if (GET_CODE (insn) == CALL_INSN)
844 return 0;
846 /* Loads and stores seen here are not permuting, but we can still
847 fix them up by converting them to permuting ones. Exceptions:
848 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
849 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
850 for the SET source. Also we must now make an exception for lvx
851 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
852 explicit "& -16") since this leads to unrecognizable insns. */
853 rtx body = PATTERN (insn);
854 int i = INSN_UID (insn);
856 if (insn_entry[i].is_load)
858 if (GET_CODE (body) == SET)
860 rtx rhs = SET_SRC (body);
861 /* Even without a swap, the RHS might be a vec_select for, say,
862 a byte-reversing load. */
863 if (GET_CODE (rhs) != MEM)
864 return 0;
865 if (GET_CODE (XEXP (rhs, 0)) == AND)
866 return 0;
868 *special = SH_NOSWAP_LD;
869 return 1;
871 else
872 return 0;
875 if (insn_entry[i].is_store)
877 if (GET_CODE (body) == SET
878 && GET_CODE (SET_SRC (body)) != UNSPEC
879 && GET_CODE (SET_SRC (body)) != VEC_SELECT)
881 rtx lhs = SET_DEST (body);
882 /* Even without a swap, the RHS might be a vec_select for, say,
883 a byte-reversing store. */
884 if (GET_CODE (lhs) != MEM)
885 return 0;
886 if (GET_CODE (XEXP (lhs, 0)) == AND)
887 return 0;
889 *special = SH_NOSWAP_ST;
890 return 1;
892 else
893 return 0;
896 /* A convert to single precision can be left as is provided that
897 all of its uses are in xxspltw instructions that splat BE element
898 zero. */
899 if (GET_CODE (body) == SET
900 && GET_CODE (SET_SRC (body)) == UNSPEC
901 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
903 df_ref def;
904 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
906 FOR_EACH_INSN_INFO_DEF (def, insn_info)
908 struct df_link *link = DF_REF_CHAIN (def);
909 if (!link)
910 return 0;
912 for (; link; link = link->next) {
913 rtx use_insn = DF_REF_INSN (link->ref);
914 rtx use_body = PATTERN (use_insn);
915 if (GET_CODE (use_body) != SET
916 || GET_CODE (SET_SRC (use_body)) != UNSPEC
917 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
918 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
919 return 0;
923 return 1;
926 /* A concatenation of two doublewords is ok if we reverse the
927 order of the inputs. */
928 if (GET_CODE (body) == SET
929 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
930 && (GET_MODE (SET_SRC (body)) == V2DFmode
931 || GET_MODE (SET_SRC (body)) == V2DImode))
933 *special = SH_CONCAT;
934 return 1;
937 /* V2DF reductions are always swappable. */
938 if (GET_CODE (body) == PARALLEL)
940 rtx expr = XVECEXP (body, 0, 0);
941 if (GET_CODE (expr) == SET
942 && v2df_reduction_p (SET_SRC (expr)))
943 return 1;
946 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
947 constant pool. */
948 if (GET_CODE (body) == SET
949 && GET_CODE (SET_SRC (body)) == UNSPEC
950 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
951 && XVECLEN (SET_SRC (body), 0) == 3
952 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
954 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
955 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
956 df_ref use;
957 FOR_EACH_INSN_INFO_USE (use, insn_info)
958 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
960 struct df_link *def_link = DF_REF_CHAIN (use);
961 /* Punt if multiple definitions for this reg. */
962 if (def_link && !def_link->next &&
963 const_load_sequence_p (insn_entry,
964 DF_REF_INSN (def_link->ref)))
966 *special = SH_VPERM;
967 return 1;
972 /* Otherwise check the operands for vector lane violations. */
973 return rtx_is_swappable_p (body, special);
976 enum chain_purpose { FOR_LOADS, FOR_STORES };
978 /* Return true if the UD or DU chain headed by LINK is non-empty,
979 and every entry on the chain references an insn that is a
980 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
981 register swap must have only permuting loads as reaching defs.
982 If PURPOSE is FOR_STORES, each such register swap must have only
983 register swaps or permuting stores as reached uses. */
984 static bool
985 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
986 enum chain_purpose purpose)
988 if (!link)
989 return false;
991 for (; link; link = link->next)
993 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
994 continue;
996 if (DF_REF_IS_ARTIFICIAL (link->ref))
997 return false;
999 rtx reached_insn = DF_REF_INSN (link->ref);
1000 unsigned uid = INSN_UID (reached_insn);
1001 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1003 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1004 || insn_entry[uid].is_store)
1005 return false;
1007 if (purpose == FOR_LOADS)
1009 df_ref use;
1010 FOR_EACH_INSN_INFO_USE (use, insn_info)
1012 struct df_link *swap_link = DF_REF_CHAIN (use);
1014 while (swap_link)
1016 if (DF_REF_IS_ARTIFICIAL (link->ref))
1017 return false;
1019 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1020 unsigned uid2 = INSN_UID (swap_def_insn);
1022 /* Only permuting loads are allowed. */
1023 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1024 return false;
1026 swap_link = swap_link->next;
1030 else if (purpose == FOR_STORES)
1032 df_ref def;
1033 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1035 struct df_link *swap_link = DF_REF_CHAIN (def);
1037 while (swap_link)
1039 if (DF_REF_IS_ARTIFICIAL (link->ref))
1040 return false;
1042 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1043 unsigned uid2 = INSN_UID (swap_use_insn);
1045 /* Permuting stores or register swaps are allowed. */
1046 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1047 return false;
1049 swap_link = swap_link->next;
1055 return true;
1058 /* Mark the xxswapdi instructions associated with permuting loads and
1059 stores for removal. Note that we only flag them for deletion here,
1060 as there is a possibility of a swap being reached from multiple
1061 loads, etc. */
1062 static void
1063 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1065 rtx insn = insn_entry[i].insn;
1066 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1068 if (insn_entry[i].is_load)
1070 df_ref def;
1071 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1073 struct df_link *link = DF_REF_CHAIN (def);
1075 /* We know by now that these are swaps, so we can delete
1076 them confidently. */
1077 while (link)
1079 rtx use_insn = DF_REF_INSN (link->ref);
1080 insn_entry[INSN_UID (use_insn)].will_delete = 1;
1081 link = link->next;
1085 else if (insn_entry[i].is_store)
1087 df_ref use;
1088 FOR_EACH_INSN_INFO_USE (use, insn_info)
1090 /* Ignore uses for addressability. */
1091 machine_mode mode = GET_MODE (DF_REF_REG (use));
1092 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1093 continue;
1095 struct df_link *link = DF_REF_CHAIN (use);
1097 /* We know by now that these are swaps, so we can delete
1098 them confidently. */
1099 while (link)
1101 rtx def_insn = DF_REF_INSN (link->ref);
1102 insn_entry[INSN_UID (def_insn)].will_delete = 1;
1103 link = link->next;
1109 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1110 Swap the first half of the vector with the second in the first
1111 case. Recurse to find it in the second. */
1112 static void
1113 swap_const_vector_halves (rtx *op_ptr)
1115 int i;
1116 rtx op = *op_ptr;
1117 enum rtx_code code = GET_CODE (op);
1118 if (GET_CODE (op) == CONST_VECTOR)
1120 int units = GET_MODE_NUNITS (GET_MODE (op));
1121 rtx_vector_builder builder (GET_MODE (op), units, 1);
1122 for (i = 0; i < units / 2; ++i)
1123 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1124 for (i = 0; i < units / 2; ++i)
1125 builder.quick_push (CONST_VECTOR_ELT (op, i));
1126 *op_ptr = builder.build ();
1128 else
1130 int j;
1131 const char *fmt = GET_RTX_FORMAT (code);
1132 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1133 if (fmt[i] == 'e' || fmt[i] == 'u')
1134 swap_const_vector_halves (&XEXP (op, i));
1135 else if (fmt[i] == 'E')
1136 for (j = 0; j < XVECLEN (op, i); ++j)
1137 swap_const_vector_halves (&XVECEXP (op, i, j));
1141 /* Find all subregs of a vector expression that perform a narrowing,
1142 and adjust the subreg index to account for doubleword swapping. */
1143 static void
1144 adjust_subreg_index (rtx op)
1146 enum rtx_code code = GET_CODE (op);
1147 if (code == SUBREG
1148 && (GET_MODE_SIZE (GET_MODE (op))
1149 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1151 unsigned int index = SUBREG_BYTE (op);
1152 if (index < 8)
1153 index += 8;
1154 else
1155 index -= 8;
1156 SUBREG_BYTE (op) = index;
1159 const char *fmt = GET_RTX_FORMAT (code);
1160 int i,j;
1161 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1162 if (fmt[i] == 'e' || fmt[i] == 'u')
1163 adjust_subreg_index (XEXP (op, i));
1164 else if (fmt[i] == 'E')
1165 for (j = 0; j < XVECLEN (op, i); ++j)
1166 adjust_subreg_index (XVECEXP (op, i, j));
1169 /* Convert the non-permuting load INSN to a permuting one. */
1170 static void
1171 permute_load (rtx_insn *insn)
1173 rtx body = PATTERN (insn);
1174 rtx mem_op = SET_SRC (body);
1175 rtx tgt_reg = SET_DEST (body);
1176 machine_mode mode = GET_MODE (tgt_reg);
1177 int n_elts = GET_MODE_NUNITS (mode);
1178 int half_elts = n_elts / 2;
1179 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1180 int i, j;
1181 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1182 XVECEXP (par, 0, i) = GEN_INT (j);
1183 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1184 XVECEXP (par, 0, i) = GEN_INT (j);
1185 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1186 SET_SRC (body) = sel;
1187 INSN_CODE (insn) = -1; /* Force re-recognition. */
1188 df_insn_rescan (insn);
1190 if (dump_file)
1191 fprintf (dump_file, "Replacing load %d with permuted load\n",
1192 INSN_UID (insn));
1195 /* Convert the non-permuting store INSN to a permuting one. */
1196 static void
1197 permute_store (rtx_insn *insn)
1199 rtx body = PATTERN (insn);
1200 rtx src_reg = SET_SRC (body);
1201 machine_mode mode = GET_MODE (src_reg);
1202 int n_elts = GET_MODE_NUNITS (mode);
1203 int half_elts = n_elts / 2;
1204 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1205 int i, j;
1206 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1207 XVECEXP (par, 0, i) = GEN_INT (j);
1208 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1209 XVECEXP (par, 0, i) = GEN_INT (j);
1210 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1211 SET_SRC (body) = sel;
1212 INSN_CODE (insn) = -1; /* Force re-recognition. */
1213 df_insn_rescan (insn);
1215 if (dump_file)
1216 fprintf (dump_file, "Replacing store %d with permuted store\n",
1217 INSN_UID (insn));
1220 /* Given OP that contains a vector extract operation, adjust the index
1221 of the extracted lane to account for the doubleword swap. */
1222 static void
1223 adjust_extract (rtx_insn *insn)
1225 rtx pattern = PATTERN (insn);
1226 if (GET_CODE (pattern) == PARALLEL)
1227 pattern = XVECEXP (pattern, 0, 0);
1228 rtx src = SET_SRC (pattern);
1229 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1230 account for that. */
1231 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1232 rtx par = XEXP (sel, 1);
1233 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1234 int lane = INTVAL (XVECEXP (par, 0, 0));
1235 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1236 XVECEXP (par, 0, 0) = GEN_INT (lane);
1237 INSN_CODE (insn) = -1; /* Force re-recognition. */
1238 df_insn_rescan (insn);
1240 if (dump_file)
1241 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1244 /* Given OP that contains a vector direct-splat operation, adjust the index
1245 of the source lane to account for the doubleword swap. */
1246 static void
1247 adjust_splat (rtx_insn *insn)
1249 rtx body = PATTERN (insn);
1250 rtx unspec = XEXP (body, 1);
1251 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1252 int lane = INTVAL (XVECEXP (unspec, 0, 1));
1253 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1254 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1255 INSN_CODE (insn) = -1; /* Force re-recognition. */
1256 df_insn_rescan (insn);
1258 if (dump_file)
1259 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1262 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1263 swap), reverse the order of the source operands and adjust the indices
1264 of the source lanes to account for doubleword reversal. */
1265 static void
1266 adjust_xxpermdi (rtx_insn *insn)
1268 rtx set = PATTERN (insn);
1269 rtx select = XEXP (set, 1);
1270 rtx concat = XEXP (select, 0);
1271 rtx src0 = XEXP (concat, 0);
1272 XEXP (concat, 0) = XEXP (concat, 1);
1273 XEXP (concat, 1) = src0;
1274 rtx parallel = XEXP (select, 1);
1275 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1276 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1277 int new_lane0 = 3 - lane1;
1278 int new_lane1 = 3 - lane0;
1279 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1280 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1281 INSN_CODE (insn) = -1; /* Force re-recognition. */
1282 df_insn_rescan (insn);
1284 if (dump_file)
1285 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1288 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1289 reverse the order of those inputs. */
1290 static void
1291 adjust_concat (rtx_insn *insn)
1293 rtx set = PATTERN (insn);
1294 rtx concat = XEXP (set, 1);
1295 rtx src0 = XEXP (concat, 0);
1296 XEXP (concat, 0) = XEXP (concat, 1);
1297 XEXP (concat, 1) = src0;
1298 INSN_CODE (insn) = -1; /* Force re-recognition. */
1299 df_insn_rescan (insn);
1301 if (dump_file)
1302 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1305 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1306 constant pool to reflect swapped doublewords. */
1307 static void
1308 adjust_vperm (rtx_insn *insn)
1310 /* We previously determined that the UNSPEC_VPERM was fed by a
1311 swap of a swapping load of a TOC-relative constant pool symbol.
1312 Find the MEM in the swapping load and replace it with a MEM for
1313 the adjusted mask constant. */
1314 rtx set = PATTERN (insn);
1315 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1317 /* Find the swap. */
1318 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1319 df_ref use;
1320 rtx_insn *swap_insn = 0;
1321 FOR_EACH_INSN_INFO_USE (use, insn_info)
1322 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1324 struct df_link *def_link = DF_REF_CHAIN (use);
1325 gcc_assert (def_link && !def_link->next);
1326 swap_insn = DF_REF_INSN (def_link->ref);
1327 break;
1329 gcc_assert (swap_insn);
1331 /* Find the load. */
1332 insn_info = DF_INSN_INFO_GET (swap_insn);
1333 rtx_insn *load_insn = 0;
1334 FOR_EACH_INSN_INFO_USE (use, insn_info)
1336 struct df_link *def_link = DF_REF_CHAIN (use);
1337 gcc_assert (def_link && !def_link->next);
1338 load_insn = DF_REF_INSN (def_link->ref);
1339 break;
1341 gcc_assert (load_insn);
1343 /* Find the TOC-relative symbol access. */
1344 insn_info = DF_INSN_INFO_GET (load_insn);
1345 rtx_insn *tocrel_insn = 0;
1346 FOR_EACH_INSN_INFO_USE (use, insn_info)
1348 struct df_link *def_link = DF_REF_CHAIN (use);
1349 gcc_assert (def_link && !def_link->next);
1350 tocrel_insn = DF_REF_INSN (def_link->ref);
1351 break;
1353 gcc_assert (tocrel_insn);
1355 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1356 to set tocrel_base; otherwise it would be unnecessary as we've
1357 already established it will return true. */
1358 rtx base, offset;
1359 const_rtx tocrel_base;
1360 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1361 /* There is an extra level of indirection for small/large code models. */
1362 if (GET_CODE (tocrel_expr) == MEM)
1363 tocrel_expr = XEXP (tocrel_expr, 0);
1364 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1365 gcc_unreachable ();
1366 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1367 rtx const_vector = get_pool_constant (base);
1368 /* With the extra indirection, get_pool_constant will produce the
1369 real constant from the reg_equal expression, so get the real
1370 constant. */
1371 if (GET_CODE (const_vector) == SYMBOL_REF)
1372 const_vector = get_pool_constant (const_vector);
1373 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1375 /* Create an adjusted mask from the initial mask. */
1376 unsigned int new_mask[16], i, val;
1377 for (i = 0; i < 16; ++i) {
1378 val = INTVAL (XVECEXP (const_vector, 0, i));
1379 if (val < 16)
1380 new_mask[i] = (val + 8) % 16;
1381 else
1382 new_mask[i] = ((val + 8) % 16) + 16;
1385 /* Create a new CONST_VECTOR and a MEM that references it. */
1386 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1387 for (i = 0; i < 16; ++i)
1388 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1389 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1390 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1391 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1392 can't recognize. Force the SYMBOL_REF into a register. */
1393 if (!REG_P (XEXP (new_mem, 0))) {
1394 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1395 XEXP (new_mem, 0) = base_reg;
1396 /* Move the newly created insn ahead of the load insn. */
1397 rtx_insn *force_insn = get_last_insn ();
1398 remove_insn (force_insn);
1399 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1400 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1401 df_insn_rescan (before_load_insn);
1402 df_insn_rescan (force_insn);
1405 /* Replace the MEM in the load instruction and rescan it. */
1406 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1407 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1408 df_insn_rescan (load_insn);
1410 if (dump_file)
1411 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1414 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1415 with special handling. Take care of that here. */
1416 static void
1417 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1419 rtx_insn *insn = insn_entry[i].insn;
1420 rtx body = PATTERN (insn);
1422 switch (insn_entry[i].special_handling)
1424 default:
1425 gcc_unreachable ();
1426 case SH_CONST_VECTOR:
1428 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1429 gcc_assert (GET_CODE (body) == SET);
1430 swap_const_vector_halves (&SET_SRC (body));
1431 if (dump_file)
1432 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1433 break;
1435 case SH_SUBREG:
1436 /* A subreg of the same size is already safe. For subregs that
1437 select a smaller portion of a reg, adjust the index for
1438 swapped doublewords. */
1439 adjust_subreg_index (body);
1440 if (dump_file)
1441 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1442 break;
1443 case SH_NOSWAP_LD:
1444 /* Convert a non-permuting load to a permuting one. */
1445 permute_load (insn);
1446 break;
1447 case SH_NOSWAP_ST:
1448 /* Convert a non-permuting store to a permuting one. */
1449 permute_store (insn);
1450 break;
1451 case SH_EXTRACT:
1452 /* Change the lane on an extract operation. */
1453 adjust_extract (insn);
1454 break;
1455 case SH_SPLAT:
1456 /* Change the lane on a direct-splat operation. */
1457 adjust_splat (insn);
1458 break;
1459 case SH_XXPERMDI:
1460 /* Change the lanes on an XXPERMDI operation. */
1461 adjust_xxpermdi (insn);
1462 break;
1463 case SH_CONCAT:
1464 /* Reverse the order of a concatenation operation. */
1465 adjust_concat (insn);
1466 break;
1467 case SH_VPERM:
1468 /* Change the mask loaded from the constant pool for a VPERM. */
1469 adjust_vperm (insn);
1470 break;
1474 /* Find the insn from the Ith table entry, which is known to be a
1475 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1476 static void
1477 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1479 rtx_insn *insn = insn_entry[i].insn;
1480 rtx body = PATTERN (insn);
1481 rtx src_reg = XEXP (SET_SRC (body), 0);
1482 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1483 rtx_insn *new_insn = emit_insn_before (copy, insn);
1484 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1485 df_insn_rescan (new_insn);
1487 if (dump_file)
1489 unsigned int new_uid = INSN_UID (new_insn);
1490 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1493 df_insn_delete (insn);
1494 remove_insn (insn);
1495 insn->set_deleted ();
1498 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1499 ORIGINAL_MEM_EXP. */
1500 static void
1501 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1503 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1504 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1505 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1506 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1507 RTX_FLAG (new_mem_exp, frame_related) =
1508 RTX_FLAG (original_mem_exp, frame_related);
1510 /* The following fields may not be used with MEM subexpressions */
1511 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1512 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1514 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1516 alias_set_type set = original_attrs.alias;
1517 set_mem_alias_set (new_mem_exp, set);
1519 addr_space_t addrspace = original_attrs.addrspace;
1520 set_mem_addr_space (new_mem_exp, addrspace);
1522 unsigned int align = original_attrs.align;
1523 set_mem_align (new_mem_exp, align);
1525 tree expr = original_attrs.expr;
1526 set_mem_expr (new_mem_exp, expr);
1528 if (original_attrs.offset_known_p)
1530 HOST_WIDE_INT offset = original_attrs.offset;
1531 set_mem_offset (new_mem_exp, offset);
1533 else
1534 clear_mem_offset (new_mem_exp);
1536 if (original_attrs.size_known_p)
1538 HOST_WIDE_INT size = original_attrs.size;
1539 set_mem_size (new_mem_exp, size);
1541 else
1542 clear_mem_size (new_mem_exp);
1545 /* Generate an rtx expression to represent use of the stvx insn to store
1546 the value represented by register SRC_EXP into the memory at address
1547 DEST_EXP, with vector mode MODE. */
1549 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1551 rtx stvx;
1553 if (mode == V16QImode)
1554 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1555 else if (mode == V8HImode)
1556 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1557 #ifdef HAVE_V8HFmode
1558 else if (mode == V8HFmode)
1559 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1560 #endif
1561 else if (mode == V4SImode)
1562 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1563 else if (mode == V4SFmode)
1564 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1565 else if (mode == V2DImode)
1566 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1567 else if (mode == V2DFmode)
1568 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1569 else if (mode == V1TImode)
1570 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1571 else
1572 /* KFmode, TFmode, other modes not expected in this context. */
1573 gcc_unreachable ();
1575 rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1576 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1577 return stvx;
1580 /* Given that STORE_INSN represents an aligned store-with-swap of a
1581 swapped value, replace the store with an aligned store (without
1582 swap) and replace the swap with a copy insn. */
1583 static void
1584 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1585 rtx_insn *store_insn)
1587 unsigned uid = INSN_UID (store_insn);
1588 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1590 rtx body = PATTERN (store_insn);
1591 rtx dest_address = XEXP (SET_DEST (body), 0);
1592 rtx swap_reg = XEXP (SET_SRC (body), 0);
1593 gcc_assert (REG_P (dest_address)
1594 || rs6000_sum_of_two_registers_p (dest_address));
1596 /* Find the swap instruction that provides the value to be stored by
1597 * this store-with-swap instruction. */
1598 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1599 df_ref use;
1600 rtx_insn *swap_insn = NULL;
1601 unsigned uid2 = 0;
1602 FOR_EACH_INSN_INFO_USE (use, insn_info)
1604 struct df_link *def_link = DF_REF_CHAIN (use);
1606 /* if this is not the definition of the candidate swap register,
1607 then skip it. I am only interested in the swap insnd. */
1608 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1609 continue;
1611 /* If there is no def or the def is artifical or there are
1612 multiple defs, we should not be here. */
1613 gcc_assert (def_link && def_link->ref && !def_link->next
1614 && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1616 swap_insn = DF_REF_INSN (def_link->ref);
1617 uid2 = INSN_UID (swap_insn);
1619 /* If this source value is not a simple swap, we should not be here. */
1620 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1621 && !insn_entry[uid2].is_store);
1623 /* We've processed the use we care about, so break out of
1624 this loop. */
1625 break;
1628 /* At this point, swap_insn and uid2 represent the swap instruction
1629 that feeds the store. */
1630 gcc_assert (swap_insn);
1631 rtx set = single_set (store_insn);
1632 gcc_assert (set);
1633 rtx dest_exp = SET_DEST (set);
1634 rtx src_exp = XEXP (SET_SRC (body), 0);
1635 enum machine_mode mode = GET_MODE (dest_exp);
1636 gcc_assert (MEM_P (dest_exp));
1637 gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1639 /* Replace the copy with a new insn. */
1640 rtx stvx;
1641 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1643 rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1644 rtx new_body = PATTERN (new_insn);
1646 gcc_assert ((GET_CODE (new_body) == SET)
1647 && (GET_CODE (SET_DEST (new_body)) == MEM));
1649 set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn));
1650 df_insn_rescan (new_insn);
1652 df_insn_delete (store_insn);
1653 remove_insn (store_insn);
1654 store_insn->set_deleted ();
1656 /* Replace the swap with a copy. */
1657 uid2 = INSN_UID (swap_insn);
1658 mark_swaps_for_removal (insn_entry, uid2);
1659 replace_swap_with_copy (insn_entry, uid2);
1662 /* Generate an rtx expression to represent use of the lvx insn to load
1663 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1665 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1667 rtx lvx;
1669 if (mode == V16QImode)
1670 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1671 else if (mode == V8HImode)
1672 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1673 #ifdef HAVE_V8HFmode
1674 else if (mode == V8HFmode)
1675 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1676 #endif
1677 else if (mode == V4SImode)
1678 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1679 else if (mode == V4SFmode)
1680 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1681 else if (mode == V2DImode)
1682 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1683 else if (mode == V2DFmode)
1684 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1685 else if (mode == V1TImode)
1686 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1687 else
1688 /* KFmode, TFmode, other modes not expected in this context. */
1689 gcc_unreachable ();
1691 rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1692 mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1694 return lvx;
1697 /* Given that SWAP_INSN represents a swap of an aligned
1698 load-with-swap, replace the load with an aligned load (without
1699 swap) and replace the swap with a copy insn. */
1700 static void
1701 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1703 /* Find the load. */
1704 unsigned uid = INSN_UID (swap_insn);
1705 /* Only call this if quad_aligned_load_p (swap_insn). */
1706 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1707 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1709 /* Since insn is known to represent a swap instruction, we know it
1710 "uses" only one input variable. */
1711 df_ref use = DF_INSN_INFO_USES (insn_info);
1713 /* Figure out where this input variable is defined. */
1714 struct df_link *def_link = DF_REF_CHAIN (use);
1715 gcc_assert (def_link && !def_link->next);
1716 gcc_assert (def_link && def_link->ref &&
1717 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1719 rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1720 unsigned uid2 = INSN_UID (def_insn);
1722 /* We're expecting a load-with-swap insn. */
1723 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1725 /* We expect this to be a set to memory, with source representing a
1726 swap (indicated by code VEC_SELECT). */
1727 rtx body = PATTERN (def_insn);
1728 gcc_assert ((GET_CODE (body) == SET)
1729 && (GET_CODE (SET_SRC (body)) == VEC_SELECT)
1730 && (GET_CODE (XEXP (SET_SRC (body), 0)) == MEM));
1732 rtx src_exp = XEXP (SET_SRC (body), 0);
1733 enum machine_mode mode = GET_MODE (src_exp);
1734 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1736 rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1737 rtx new_body = PATTERN (new_insn);
1739 gcc_assert ((GET_CODE (new_body) == SET)
1740 && (GET_CODE (SET_SRC (new_body)) == MEM));
1742 set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn));
1743 df_insn_rescan (new_insn);
1745 df_insn_delete (def_insn);
1746 remove_insn (def_insn);
1747 def_insn->set_deleted ();
1749 /* Replace the swap with a copy. */
1750 mark_swaps_for_removal (insn_entry, uid);
1751 replace_swap_with_copy (insn_entry, uid);
1754 /* Given that SWAP_INSN represents a swap of a load of a constant
1755 vector value, replace with a single instruction that loads a
1756 swapped variant of the original constant.
1758 The "natural" representation of a byte array in memory is the same
1759 for big endian and little endian.
1761 unsigned char byte_array[] =
1762 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1764 However, when loaded into a vector register, the representation
1765 depends on endian conventions.
1767 In big-endian mode, the register holds:
1769 MSB LSB
1770 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1772 In little-endian mode, the register holds:
1774 MSB LSB
1775 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1777 Word arrays require different handling. Consider the word array:
1779 unsigned int word_array[] =
1780 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1782 The in-memory representation depends on endian configuration. The
1783 equivalent array, declared as a byte array, in memory would be:
1785 unsigned char big_endian_word_array_data[] =
1786 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1788 unsigned char little_endian_word_array_data[] =
1789 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1791 In big-endian mode, the register holds:
1793 MSB LSB
1794 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1796 In little-endian mode, the register holds:
1798 MSB LSB
1799 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1802 Similar transformations apply to the vector of half-word and vector
1803 of double-word representations.
1805 For now, don't handle vectors of quad-precision values. Just return.
1806 A better solution is to fix the code generator to emit lvx/stvx for
1807 those. */
1808 static void
1809 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1811 /* Find the load. */
1812 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1813 rtx_insn *load_insn;
1814 df_ref use = DF_INSN_INFO_USES (insn_info);
1815 struct df_link *def_link = DF_REF_CHAIN (use);
1816 gcc_assert (def_link && !def_link->next);
1818 load_insn = DF_REF_INSN (def_link->ref);
1819 gcc_assert (load_insn);
1821 /* Find the TOC-relative symbol access. */
1822 insn_info = DF_INSN_INFO_GET (load_insn);
1823 use = DF_INSN_INFO_USES (insn_info);
1825 def_link = DF_REF_CHAIN (use);
1826 gcc_assert (def_link && !def_link->next);
1828 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1829 gcc_assert (tocrel_insn);
1831 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1832 to set tocrel_base; otherwise it would be unnecessary as we've
1833 already established it will return true. */
1834 rtx base, offset;
1835 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1836 const_rtx tocrel_base;
1838 /* There is an extra level of indirection for small/large code models. */
1839 if (GET_CODE (tocrel_expr) == MEM)
1840 tocrel_expr = XEXP (tocrel_expr, 0);
1842 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1843 gcc_unreachable ();
1845 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1846 rtx const_vector = get_pool_constant (base);
1848 /* With the extra indirection, get_pool_constant will produce the
1849 real constant from the reg_equal expression, so get the real
1850 constant. */
1851 if (GET_CODE (const_vector) == SYMBOL_REF)
1852 const_vector = get_pool_constant (const_vector);
1853 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1855 rtx new_mem;
1856 enum machine_mode mode = GET_MODE (const_vector);
1858 /* Create an adjusted constant from the original constant. */
1859 if (mode == V1TImode)
1860 /* Leave this code as is. */
1861 return;
1862 else if (mode == V16QImode)
1864 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1865 int i;
1867 for (i = 0; i < 16; i++)
1868 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1869 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1870 new_mem = force_const_mem (mode, new_const_vector);
1872 else if ((mode == V8HImode)
1873 #ifdef HAVE_V8HFmode
1874 || (mode == V8HFmode)
1875 #endif
1878 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1879 int i;
1881 for (i = 0; i < 8; i++)
1882 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1883 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1884 new_mem = force_const_mem (mode, new_const_vector);
1886 else if ((mode == V4SImode) || (mode == V4SFmode))
1888 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1889 int i;
1891 for (i = 0; i < 4; i++)
1892 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1893 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1894 new_mem = force_const_mem (mode, new_const_vector);
1896 else if ((mode == V2DImode) || (mode == V2DFmode))
1898 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1899 int i;
1901 for (i = 0; i < 2; i++)
1902 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1903 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1904 new_mem = force_const_mem (mode, new_const_vector);
1906 else
1908 /* We do not expect other modes to be constant-load-swapped. */
1909 gcc_unreachable ();
1912 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1913 can't recognize. Force the SYMBOL_REF into a register. */
1914 if (!REG_P (XEXP (new_mem, 0))) {
1915 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1916 XEXP (new_mem, 0) = base_reg;
1918 /* Move the newly created insn ahead of the load insn. */
1919 /* The last insn is the the insn that forced new_mem into a register. */
1920 rtx_insn *force_insn = get_last_insn ();
1921 /* Remove this insn from the end of the instruction sequence. */
1922 remove_insn (force_insn);
1923 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1925 /* And insert this insn back into the sequence before the previous
1926 load insn so this new expression will be available when the
1927 existing load is modified to load the swapped constant. */
1928 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1929 df_insn_rescan (before_load_insn);
1930 df_insn_rescan (force_insn);
1933 /* Replace the MEM in the load instruction and rescan it. */
1934 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1935 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1936 df_insn_rescan (load_insn);
1938 unsigned int uid = INSN_UID (swap_insn);
1939 mark_swaps_for_removal (insn_entry, uid);
1940 replace_swap_with_copy (insn_entry, uid);
1943 /* Dump the swap table to DUMP_FILE. */
1944 static void
1945 dump_swap_insn_table (swap_web_entry *insn_entry)
1947 int e = get_max_uid ();
1948 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
1950 for (int i = 0; i < e; ++i)
1951 if (insn_entry[i].is_relevant)
1953 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
1954 fprintf (dump_file, "%6d %6d ", i,
1955 pred_entry && pred_entry->insn
1956 ? INSN_UID (pred_entry->insn) : 0);
1957 if (insn_entry[i].is_load)
1958 fputs ("load ", dump_file);
1959 if (insn_entry[i].is_store)
1960 fputs ("store ", dump_file);
1961 if (insn_entry[i].is_swap)
1962 fputs ("swap ", dump_file);
1963 if (insn_entry[i].is_live_in)
1964 fputs ("live-in ", dump_file);
1965 if (insn_entry[i].is_live_out)
1966 fputs ("live-out ", dump_file);
1967 if (insn_entry[i].contains_subreg)
1968 fputs ("subreg ", dump_file);
1969 if (insn_entry[i].is_128_int)
1970 fputs ("int128 ", dump_file);
1971 if (insn_entry[i].is_call)
1972 fputs ("call ", dump_file);
1973 if (insn_entry[i].is_swappable)
1975 fputs ("swappable ", dump_file);
1976 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
1977 fputs ("special:constvec ", dump_file);
1978 else if (insn_entry[i].special_handling == SH_SUBREG)
1979 fputs ("special:subreg ", dump_file);
1980 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
1981 fputs ("special:load ", dump_file);
1982 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
1983 fputs ("special:store ", dump_file);
1984 else if (insn_entry[i].special_handling == SH_EXTRACT)
1985 fputs ("special:extract ", dump_file);
1986 else if (insn_entry[i].special_handling == SH_SPLAT)
1987 fputs ("special:splat ", dump_file);
1988 else if (insn_entry[i].special_handling == SH_XXPERMDI)
1989 fputs ("special:xxpermdi ", dump_file);
1990 else if (insn_entry[i].special_handling == SH_CONCAT)
1991 fputs ("special:concat ", dump_file);
1992 else if (insn_entry[i].special_handling == SH_VPERM)
1993 fputs ("special:vperm ", dump_file);
1995 if (insn_entry[i].web_not_optimizable)
1996 fputs ("unoptimizable ", dump_file);
1997 if (insn_entry[i].will_delete)
1998 fputs ("delete ", dump_file);
1999 fputs ("\n", dump_file);
2001 fputs ("\n", dump_file);
2004 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2005 Here RTX is an (& addr (const_int -16)). Always return a new copy
2006 to avoid problems with combine. */
2007 static rtx
2008 alignment_with_canonical_addr (rtx align)
2010 rtx canon;
2011 rtx addr = XEXP (align, 0);
2013 if (REG_P (addr))
2014 canon = addr;
2016 else if (GET_CODE (addr) == PLUS)
2018 rtx addrop0 = XEXP (addr, 0);
2019 rtx addrop1 = XEXP (addr, 1);
2021 if (!REG_P (addrop0))
2022 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2024 if (!REG_P (addrop1))
2025 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2027 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2030 else
2031 canon = force_reg (GET_MODE (addr), addr);
2033 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2036 /* Check whether an rtx is an alignment mask, and if so, return
2037 a fully-expanded rtx for the masking operation. */
2038 static rtx
2039 alignment_mask (rtx_insn *insn)
2041 rtx body = PATTERN (insn);
2043 if (GET_CODE (body) != SET
2044 || GET_CODE (SET_SRC (body)) != AND
2045 || !REG_P (XEXP (SET_SRC (body), 0)))
2046 return 0;
2048 rtx mask = XEXP (SET_SRC (body), 1);
2050 if (GET_CODE (mask) == CONST_INT)
2052 if (INTVAL (mask) == -16)
2053 return alignment_with_canonical_addr (SET_SRC (body));
2054 else
2055 return 0;
2058 if (!REG_P (mask))
2059 return 0;
2061 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2062 df_ref use;
2063 rtx real_mask = 0;
2065 FOR_EACH_INSN_INFO_USE (use, insn_info)
2067 if (!rtx_equal_p (DF_REF_REG (use), mask))
2068 continue;
2070 struct df_link *def_link = DF_REF_CHAIN (use);
2071 if (!def_link || def_link->next)
2072 return 0;
2074 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2075 rtx const_body = PATTERN (const_insn);
2076 if (GET_CODE (const_body) != SET)
2077 return 0;
2079 real_mask = SET_SRC (const_body);
2081 if (GET_CODE (real_mask) != CONST_INT
2082 || INTVAL (real_mask) != -16)
2083 return 0;
2086 if (real_mask == 0)
2087 return 0;
2089 return alignment_with_canonical_addr (SET_SRC (body));
2092 /* Given INSN that's a load or store based at BASE_REG, look for a
2093 feeding computation that aligns its address on a 16-byte boundary.
2094 Return the rtx and its containing AND_INSN. */
2095 static rtx
2096 find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
2098 df_ref base_use;
2099 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2100 rtx and_operation = 0;
2102 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2104 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2105 continue;
2107 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2108 if (!base_def_link || base_def_link->next)
2109 break;
2111 /* With stack-protector code enabled, and possibly in other
2112 circumstances, there may not be an associated insn for
2113 the def. */
2114 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2115 break;
2117 *and_insn = DF_REF_INSN (base_def_link->ref);
2118 and_operation = alignment_mask (*and_insn);
2119 if (and_operation != 0)
2120 break;
2123 return and_operation;
2126 struct del_info { bool replace; rtx_insn *replace_insn; };
2128 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2129 static void
2130 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2132 rtx body = PATTERN (insn);
2133 gcc_assert (GET_CODE (body) == SET
2134 && GET_CODE (SET_SRC (body)) == VEC_SELECT
2135 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
2137 rtx mem = XEXP (SET_SRC (body), 0);
2138 rtx base_reg = XEXP (mem, 0);
2140 rtx_insn *and_insn;
2141 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2143 if (and_operation != 0)
2145 df_ref def;
2146 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2147 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2149 struct df_link *link = DF_REF_CHAIN (def);
2150 if (!link || link->next)
2151 break;
2153 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2154 if (!insn_is_swap_p (swap_insn)
2155 || insn_is_load_p (swap_insn)
2156 || insn_is_store_p (swap_insn))
2157 break;
2159 /* Expected lvx pattern found. Change the swap to
2160 a copy, and propagate the AND operation into the
2161 load. */
2162 to_delete[INSN_UID (swap_insn)].replace = true;
2163 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2165 /* However, first we must be sure that we make the
2166 base register from the AND operation available
2167 in case the register has been overwritten. Copy
2168 the base register to a new pseudo and use that
2169 as the base register of the AND operation in
2170 the new LVX instruction. */
2171 rtx and_base = XEXP (and_operation, 0);
2172 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2173 rtx copy = gen_rtx_SET (new_reg, and_base);
2174 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2175 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2176 df_insn_rescan (new_insn);
2178 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2179 XEXP (and_operation, 1));
2180 SET_SRC (body) = mem;
2181 INSN_CODE (insn) = -1; /* Force re-recognition. */
2182 df_insn_rescan (insn);
2184 if (dump_file)
2185 fprintf (dump_file, "lvx opportunity found at %d\n",
2186 INSN_UID (insn));
2191 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2192 static void
2193 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2195 rtx body = PATTERN (insn);
2196 gcc_assert (GET_CODE (body) == SET
2197 && GET_CODE (SET_DEST (body)) == MEM
2198 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
2199 rtx mem = SET_DEST (body);
2200 rtx base_reg = XEXP (mem, 0);
2202 rtx_insn *and_insn;
2203 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2205 if (and_operation != 0)
2207 rtx src_reg = XEXP (SET_SRC (body), 0);
2208 df_ref src_use;
2209 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2210 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2212 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2213 continue;
2215 struct df_link *link = DF_REF_CHAIN (src_use);
2216 if (!link || link->next)
2217 break;
2219 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2220 if (!insn_is_swap_p (swap_insn)
2221 || insn_is_load_p (swap_insn)
2222 || insn_is_store_p (swap_insn))
2223 break;
2225 /* Expected stvx pattern found. Change the swap to
2226 a copy, and propagate the AND operation into the
2227 store. */
2228 to_delete[INSN_UID (swap_insn)].replace = true;
2229 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2231 /* However, first we must be sure that we make the
2232 base register from the AND operation available
2233 in case the register has been overwritten. Copy
2234 the base register to a new pseudo and use that
2235 as the base register of the AND operation in
2236 the new STVX instruction. */
2237 rtx and_base = XEXP (and_operation, 0);
2238 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2239 rtx copy = gen_rtx_SET (new_reg, and_base);
2240 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2241 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2242 df_insn_rescan (new_insn);
2244 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2245 XEXP (and_operation, 1));
2246 SET_SRC (body) = src_reg;
2247 INSN_CODE (insn) = -1; /* Force re-recognition. */
2248 df_insn_rescan (insn);
2250 if (dump_file)
2251 fprintf (dump_file, "stvx opportunity found at %d\n",
2252 INSN_UID (insn));
2257 /* Look for patterns created from builtin lvx and stvx calls, and
2258 canonicalize them to be properly recognized as such. */
2259 static void
2260 recombine_lvx_stvx_patterns (function *fun)
2262 int i;
2263 basic_block bb;
2264 rtx_insn *insn;
2266 int num_insns = get_max_uid ();
2267 del_info *to_delete = XCNEWVEC (del_info, num_insns);
2269 FOR_ALL_BB_FN (bb, fun)
2270 FOR_BB_INSNS (bb, insn)
2272 if (!NONDEBUG_INSN_P (insn))
2273 continue;
2275 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2276 recombine_lvx_pattern (insn, to_delete);
2277 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2278 recombine_stvx_pattern (insn, to_delete);
2281 /* Turning swaps into copies is delayed until now, to avoid problems
2282 with deleting instructions during the insn walk. */
2283 for (i = 0; i < num_insns; i++)
2284 if (to_delete[i].replace)
2286 rtx swap_body = PATTERN (to_delete[i].replace_insn);
2287 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2288 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2289 rtx_insn *new_insn = emit_insn_before (copy,
2290 to_delete[i].replace_insn);
2291 set_block_for_insn (new_insn,
2292 BLOCK_FOR_INSN (to_delete[i].replace_insn));
2293 df_insn_rescan (new_insn);
2294 df_insn_delete (to_delete[i].replace_insn);
2295 remove_insn (to_delete[i].replace_insn);
2296 to_delete[i].replace_insn->set_deleted ();
2299 free (to_delete);
2302 /* Main entry point for this pass. */
2303 unsigned int
2304 rs6000_analyze_swaps (function *fun)
2306 swap_web_entry *insn_entry;
2307 basic_block bb;
2308 rtx_insn *insn, *curr_insn = 0;
2310 /* Dataflow analysis for use-def chains. */
2311 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2312 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2313 df_analyze ();
2314 df_set_flags (DF_DEFER_INSN_RESCAN);
2316 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2317 recombine_lvx_stvx_patterns (fun);
2318 df_process_deferred_rescans ();
2320 /* Allocate structure to represent webs of insns. */
2321 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2323 /* Walk the insns to gather basic data. */
2324 FOR_ALL_BB_FN (bb, fun)
2325 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2327 unsigned int uid = INSN_UID (insn);
2328 if (NONDEBUG_INSN_P (insn))
2330 insn_entry[uid].insn = insn;
2332 if (GET_CODE (insn) == CALL_INSN)
2333 insn_entry[uid].is_call = 1;
2335 /* Walk the uses and defs to see if we mention vector regs.
2336 Record any constraints on optimization of such mentions. */
2337 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2338 df_ref mention;
2339 FOR_EACH_INSN_INFO_USE (mention, insn_info)
2341 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2342 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2344 /* If a use gets its value from a call insn, it will be
2345 a hard register and will look like (reg:V4SI 3 3).
2346 The df analysis creates two mentions for GPR3 and GPR4,
2347 both DImode. We must recognize this and treat it as a
2348 vector mention to ensure the call is unioned with this
2349 use. */
2350 if (mode == DImode && DF_REF_INSN_INFO (mention))
2352 rtx feeder = DF_REF_INSN (mention);
2353 /* FIXME: It is pretty hard to get from the df mention
2354 to the mode of the use in the insn. We arbitrarily
2355 pick a vector mode here, even though the use might
2356 be a real DImode. We can be too conservative
2357 (create a web larger than necessary) because of
2358 this, so consider eventually fixing this. */
2359 if (GET_CODE (feeder) == CALL_INSN)
2360 mode = V4SImode;
2363 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2365 insn_entry[uid].is_relevant = 1;
2366 if (mode == TImode || mode == V1TImode
2367 || FLOAT128_VECTOR_P (mode))
2368 insn_entry[uid].is_128_int = 1;
2369 if (DF_REF_INSN_INFO (mention))
2370 insn_entry[uid].contains_subreg
2371 = !rtx_equal_p (DF_REF_REG (mention),
2372 DF_REF_REAL_REG (mention));
2373 union_defs (insn_entry, insn, mention);
2376 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2378 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2379 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2381 /* If we're loading up a hard vector register for a call,
2382 it looks like (set (reg:V4SI 9 9) (...)). The df
2383 analysis creates two mentions for GPR9 and GPR10, both
2384 DImode. So relying on the mode from the mentions
2385 isn't sufficient to ensure we union the call into the
2386 web with the parameter setup code. */
2387 if (mode == DImode && GET_CODE (insn) == SET
2388 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2389 mode = GET_MODE (SET_DEST (insn));
2391 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2393 insn_entry[uid].is_relevant = 1;
2394 if (mode == TImode || mode == V1TImode
2395 || FLOAT128_VECTOR_P (mode))
2396 insn_entry[uid].is_128_int = 1;
2397 if (DF_REF_INSN_INFO (mention))
2398 insn_entry[uid].contains_subreg
2399 = !rtx_equal_p (DF_REF_REG (mention),
2400 DF_REF_REAL_REG (mention));
2401 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2402 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2403 insn_entry[uid].is_live_out = 1;
2404 union_uses (insn_entry, insn, mention);
2408 if (insn_entry[uid].is_relevant)
2410 /* Determine if this is a load or store. */
2411 insn_entry[uid].is_load = insn_is_load_p (insn);
2412 insn_entry[uid].is_store = insn_is_store_p (insn);
2414 /* Determine if this is a doubleword swap. If not,
2415 determine whether it can legally be swapped. */
2416 if (insn_is_swap_p (insn))
2417 insn_entry[uid].is_swap = 1;
2418 else
2420 unsigned int special = SH_NONE;
2421 insn_entry[uid].is_swappable
2422 = insn_is_swappable_p (insn_entry, insn, &special);
2423 if (special != SH_NONE && insn_entry[uid].contains_subreg)
2424 insn_entry[uid].is_swappable = 0;
2425 else if (special != SH_NONE)
2426 insn_entry[uid].special_handling = special;
2427 else if (insn_entry[uid].contains_subreg)
2428 insn_entry[uid].special_handling = SH_SUBREG;
2434 if (dump_file)
2436 fprintf (dump_file, "\nSwap insn entry table when first built\n");
2437 dump_swap_insn_table (insn_entry);
2440 /* Record unoptimizable webs. */
2441 unsigned e = get_max_uid (), i;
2442 for (i = 0; i < e; ++i)
2444 if (!insn_entry[i].is_relevant)
2445 continue;
2447 swap_web_entry *root
2448 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2450 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2451 || (insn_entry[i].contains_subreg
2452 && insn_entry[i].special_handling != SH_SUBREG)
2453 || insn_entry[i].is_128_int || insn_entry[i].is_call
2454 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2455 root->web_not_optimizable = 1;
2457 /* If we have loads or stores that aren't permuting then the
2458 optimization isn't appropriate. */
2459 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2460 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2461 root->web_not_optimizable = 1;
2463 /* If we have a swap that is both fed by a permuting load
2464 and a feeder of a permuting store, then the optimization
2465 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2466 else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2467 && !insn_entry[i].is_store
2468 && swap_feeds_both_load_and_store (&insn_entry[i]))
2469 root->web_not_optimizable = 1;
2471 /* If we have permuting loads or stores that are not accompanied
2472 by a register swap, the optimization isn't appropriate. */
2473 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2475 rtx insn = insn_entry[i].insn;
2476 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2477 df_ref def;
2479 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2481 struct df_link *link = DF_REF_CHAIN (def);
2483 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2485 root->web_not_optimizable = 1;
2486 break;
2490 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2492 rtx insn = insn_entry[i].insn;
2493 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2494 df_ref use;
2496 FOR_EACH_INSN_INFO_USE (use, insn_info)
2498 struct df_link *link = DF_REF_CHAIN (use);
2500 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2502 root->web_not_optimizable = 1;
2503 break;
2509 if (dump_file)
2511 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2512 dump_swap_insn_table (insn_entry);
2515 /* For each load and store in an optimizable web (which implies
2516 the loads and stores are permuting), find the associated
2517 register swaps and mark them for removal. Due to various
2518 optimizations we may mark the same swap more than once. Also
2519 perform special handling for swappable insns that require it. */
2520 for (i = 0; i < e; ++i)
2521 if ((insn_entry[i].is_load || insn_entry[i].is_store)
2522 && insn_entry[i].is_swap)
2524 swap_web_entry* root_entry
2525 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2526 if (!root_entry->web_not_optimizable)
2527 mark_swaps_for_removal (insn_entry, i);
2529 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2531 swap_web_entry* root_entry
2532 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2533 if (!root_entry->web_not_optimizable)
2534 handle_special_swappables (insn_entry, i);
2537 /* Now delete the swaps marked for removal. */
2538 for (i = 0; i < e; ++i)
2539 if (insn_entry[i].will_delete)
2540 replace_swap_with_copy (insn_entry, i);
2542 /* Clean up. */
2543 free (insn_entry);
2545 /* Use a second pass over rtl to detect that certain vector values
2546 fetched from or stored to memory on quad-word aligned addresses
2547 can use lvx/stvx without swaps. */
2549 /* First, rebuild ud chains. */
2550 df_remove_problem (df_chain);
2551 df_process_deferred_rescans ();
2552 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2553 df_chain_add_problem (DF_UD_CHAIN);
2554 df_analyze ();
2556 swap_web_entry *pass2_insn_entry;
2557 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2559 /* Walk the insns to gather basic data. */
2560 FOR_ALL_BB_FN (bb, fun)
2561 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2563 unsigned int uid = INSN_UID (insn);
2564 if (NONDEBUG_INSN_P (insn))
2566 pass2_insn_entry[uid].insn = insn;
2568 pass2_insn_entry[uid].is_relevant = 1;
2569 pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2570 pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2572 /* Determine if this is a doubleword swap. If not,
2573 determine whether it can legally be swapped. */
2574 if (insn_is_swap_p (insn))
2575 pass2_insn_entry[uid].is_swap = 1;
2579 e = get_max_uid ();
2580 for (unsigned i = 0; i < e; ++i)
2581 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2582 && !pass2_insn_entry[i].is_store)
2584 /* Replace swap of aligned load-swap with aligned unswapped
2585 load. */
2586 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2587 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2588 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2590 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2592 /* Replace aligned store-swap of swapped value with aligned
2593 unswapped store. */
2594 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2595 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2596 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2599 /* Clean up. */
2600 free (pass2_insn_entry);
2602 /* Use a third pass over rtl to replace swap(load(vector constant))
2603 with load(swapped vector constant). */
2605 /* First, rebuild ud chains. */
2606 df_remove_problem (df_chain);
2607 df_process_deferred_rescans ();
2608 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2609 df_chain_add_problem (DF_UD_CHAIN);
2610 df_analyze ();
2612 swap_web_entry *pass3_insn_entry;
2613 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2615 /* Walk the insns to gather basic data. */
2616 FOR_ALL_BB_FN (bb, fun)
2617 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2619 unsigned int uid = INSN_UID (insn);
2620 if (NONDEBUG_INSN_P (insn))
2622 pass3_insn_entry[uid].insn = insn;
2624 pass3_insn_entry[uid].is_relevant = 1;
2625 pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2626 pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2628 /* Determine if this is a doubleword swap. If not,
2629 determine whether it can legally be swapped. */
2630 if (insn_is_swap_p (insn))
2631 pass3_insn_entry[uid].is_swap = 1;
2635 e = get_max_uid ();
2636 for (unsigned i = 0; i < e; ++i)
2637 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2638 && !pass3_insn_entry[i].is_store)
2640 insn = pass3_insn_entry[i].insn;
2641 if (const_load_sequence_p (pass3_insn_entry, insn))
2642 replace_swapped_load_constant (pass3_insn_entry, insn);
2645 /* Clean up. */
2646 free (pass3_insn_entry);
2647 return 0;
2650 const pass_data pass_data_analyze_swaps =
2652 RTL_PASS, /* type */
2653 "swaps", /* name */
2654 OPTGROUP_NONE, /* optinfo_flags */
2655 TV_NONE, /* tv_id */
2656 0, /* properties_required */
2657 0, /* properties_provided */
2658 0, /* properties_destroyed */
2659 0, /* todo_flags_start */
2660 TODO_df_finish, /* todo_flags_finish */
2663 class pass_analyze_swaps : public rtl_opt_pass
2665 public:
2666 pass_analyze_swaps(gcc::context *ctxt)
2667 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2670 /* opt_pass methods: */
2671 virtual bool gate (function *)
2673 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2674 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2677 virtual unsigned int execute (function *fun)
2679 return rs6000_analyze_swaps (fun);
2682 opt_pass *clone ()
2684 return new pass_analyze_swaps (m_ctxt);
2687 }; // class pass_analyze_swaps
2689 rtl_opt_pass *
2690 make_pass_analyze_swaps (gcc::context *ctxt)
2692 return new pass_analyze_swaps (ctxt);