Fix ICE in lto_symtab_merge_symbols_1 (PR lto/88004).
[official-gcc.git] / gcc / config / rs6000 / rs6000-p8swap.c
blobf32db38b3fe244b2d240a19c7998b37698766d5a
1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2018 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
52 unpermuted value.)
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
57 correctness.
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.c. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
96 /* Notes on Permutes
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
104 The most general permute is something like this (example for V16QI):
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
109 (const_int a14) (const_int a15)]))
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
145 /* This is based on the union-find logic in web.c. web_entry_base is
146 defined in df.h. */
147 class swap_web_entry : public web_entry_base
149 public:
150 /* Pointer to the insn. */
151 rtx_insn *insn;
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant : 1;
155 /* Set if insn is a load. */
156 unsigned int is_load : 1;
157 /* Set if insn is a store. */
158 unsigned int is_store : 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap : 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in : 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out : 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg : 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int : 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call : 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable : 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling : 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable : 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete : 1;
186 enum special_handling_values {
187 SH_NONE = 0,
188 SH_CONST_VECTOR,
189 SH_SUBREG,
190 SH_NOSWAP_LD,
191 SH_NOSWAP_ST,
192 SH_EXTRACT,
193 SH_SPLAT,
194 SH_XXPERMDI,
195 SH_CONCAT,
196 SH_VPERM
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
201 static void
202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
204 struct df_link *link = DF_REF_CHAIN (use);
206 if (!link)
207 insn_entry[INSN_UID (insn)].is_live_in = 1;
209 while (link)
211 if (DF_REF_IS_ARTIFICIAL (link->ref))
212 insn_entry[INSN_UID (insn)].is_live_in = 1;
214 if (DF_REF_INSN_INFO (link->ref))
216 rtx def_insn = DF_REF_INSN (link->ref);
217 (void)unionfind_union (insn_entry + INSN_UID (insn),
218 insn_entry + INSN_UID (def_insn));
221 link = link->next;
225 /* Union INSN with all insns containing uses reached from DEF.
226 Detect whether DEF is live-out from the current function. */
227 static void
228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
230 struct df_link *link = DF_REF_CHAIN (def);
232 if (!link)
233 insn_entry[INSN_UID (insn)].is_live_out = 1;
235 while (link)
237 /* This could be an eh use or some other artificial use;
238 we treat these all the same (killing the optimization). */
239 if (DF_REF_IS_ARTIFICIAL (link->ref))
240 insn_entry[INSN_UID (insn)].is_live_out = 1;
242 if (DF_REF_INSN_INFO (link->ref))
244 rtx use_insn = DF_REF_INSN (link->ref);
245 (void)unionfind_union (insn_entry + INSN_UID (insn),
246 insn_entry + INSN_UID (use_insn));
249 link = link->next;
253 /* Return 1 iff INSN is a load insn, including permuting loads that
254 represent an lvxd2x instruction; else return 0. */
255 static unsigned int
256 insn_is_load_p (rtx insn)
258 rtx body = PATTERN (insn);
260 if (GET_CODE (body) == SET)
262 if (GET_CODE (SET_SRC (body)) == MEM)
263 return 1;
265 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
266 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
267 return 1;
269 return 0;
272 if (GET_CODE (body) != PARALLEL)
273 return 0;
275 rtx set = XVECEXP (body, 0, 0);
277 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
278 return 1;
280 return 0;
283 /* Return 1 iff INSN is a store insn, including permuting stores that
284 represent an stvxd2x instruction; else return 0. */
285 static unsigned int
286 insn_is_store_p (rtx insn)
288 rtx body = PATTERN (insn);
289 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
290 return 1;
291 if (GET_CODE (body) != PARALLEL)
292 return 0;
293 rtx set = XVECEXP (body, 0, 0);
294 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
295 return 1;
296 return 0;
299 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
300 a permuting load, or a permuting store. */
301 static unsigned int
302 insn_is_swap_p (rtx insn)
304 rtx body = PATTERN (insn);
305 if (GET_CODE (body) != SET)
306 return 0;
307 rtx rhs = SET_SRC (body);
308 if (GET_CODE (rhs) != VEC_SELECT)
309 return 0;
310 rtx parallel = XEXP (rhs, 1);
311 if (GET_CODE (parallel) != PARALLEL)
312 return 0;
313 unsigned int len = XVECLEN (parallel, 0);
314 if (len != 2 && len != 4 && len != 8 && len != 16)
315 return 0;
316 for (unsigned int i = 0; i < len / 2; ++i)
318 rtx op = XVECEXP (parallel, 0, i);
319 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
320 return 0;
322 for (unsigned int i = len / 2; i < len; ++i)
324 rtx op = XVECEXP (parallel, 0, i);
325 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
326 return 0;
328 return 1;
331 /* Return true iff EXPR represents the sum of two registers. */
332 bool
333 rs6000_sum_of_two_registers_p (const_rtx expr)
335 if (GET_CODE (expr) == PLUS)
337 const_rtx operand1 = XEXP (expr, 0);
338 const_rtx operand2 = XEXP (expr, 1);
339 return (REG_P (operand1) && REG_P (operand2));
341 return false;
344 /* Return true iff EXPR represents an address expression that masks off
345 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
346 bool
347 rs6000_quadword_masked_address_p (const_rtx expr)
349 if (GET_CODE (expr) == AND)
351 const_rtx operand1 = XEXP (expr, 0);
352 const_rtx operand2 = XEXP (expr, 1);
353 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
354 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
355 return true;
357 return false;
360 /* Return TRUE if INSN represents a swap of a swapped load from memory
361 and the memory address is quad-word aligned. */
362 static bool
363 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
365 unsigned uid = INSN_UID (insn);
366 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
367 return false;
369 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
371 /* Since insn is known to represent a swap instruction, we know it
372 "uses" only one input variable. */
373 df_ref use = DF_INSN_INFO_USES (insn_info);
375 /* Figure out where this input variable is defined. */
376 struct df_link *def_link = DF_REF_CHAIN (use);
378 /* If there is no definition or the definition is artificial or there are
379 multiple definitions, punt. */
380 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
381 || def_link->next)
382 return false;
384 rtx def_insn = DF_REF_INSN (def_link->ref);
385 unsigned uid2 = INSN_UID (def_insn);
386 /* We're looking for a load-with-swap insn. If this is not that,
387 return false. */
388 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
389 return false;
391 /* If the source of the rtl def is not a set from memory, return
392 false. */
393 rtx body = PATTERN (def_insn);
394 if (GET_CODE (body) != SET
395 || GET_CODE (SET_SRC (body)) != VEC_SELECT
396 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
397 return false;
399 rtx mem = XEXP (SET_SRC (body), 0);
400 rtx base_reg = XEXP (mem, 0);
401 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
402 && MEM_ALIGN (mem) >= 128) ? true : false;
405 /* Return TRUE if INSN represents a store-with-swap of a swapped value
406 and the memory address is quad-word aligned. */
407 static bool
408 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
410 unsigned uid = INSN_UID (insn);
411 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
412 return false;
414 rtx body = PATTERN (insn);
415 rtx dest_address = XEXP (SET_DEST (body), 0);
416 rtx swap_reg = XEXP (SET_SRC (body), 0);
418 /* If the base address for the memory expression is not represented
419 by a single register and is not the sum of two registers, punt. */
420 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
421 return false;
423 /* Confirm that the value to be stored is produced by a swap
424 instruction. */
425 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
426 df_ref use;
427 FOR_EACH_INSN_INFO_USE (use, insn_info)
429 struct df_link *def_link = DF_REF_CHAIN (use);
431 /* If this is not the definition of the candidate swap register,
432 then skip it. I am interested in a different definition. */
433 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
434 continue;
436 /* If there is no def or the def is artifical or there are
437 multiple defs, punt. */
438 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
439 || def_link->next)
440 return false;
442 rtx def_insn = DF_REF_INSN (def_link->ref);
443 unsigned uid2 = INSN_UID (def_insn);
445 /* If this source value is not a simple swap, return false */
446 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
447 || insn_entry[uid2].is_store)
448 return false;
450 /* I've processed the use that I care about, so break out of
451 this loop. */
452 break;
455 /* At this point, we know the source data comes from a swap. The
456 remaining question is whether the memory address is aligned. */
457 rtx set = single_set (insn);
458 if (set)
460 rtx dest = SET_DEST (set);
461 if (MEM_P (dest))
462 return (MEM_ALIGN (dest) >= 128);
464 return false;
467 /* Return 1 iff UID, known to reference a swap, is both fed by a load
468 and a feeder of a store. */
469 static unsigned int
470 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
472 rtx insn = insn_entry->insn;
473 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
474 df_ref def, use;
475 struct df_link *link = 0;
476 rtx_insn *load = 0, *store = 0;
477 bool fed_by_load = 0;
478 bool feeds_store = 0;
480 FOR_EACH_INSN_INFO_USE (use, insn_info)
482 link = DF_REF_CHAIN (use);
483 load = DF_REF_INSN (link->ref);
484 if (insn_is_load_p (load) && insn_is_swap_p (load))
485 fed_by_load = 1;
488 FOR_EACH_INSN_INFO_DEF (def, insn_info)
490 link = DF_REF_CHAIN (def);
491 store = DF_REF_INSN (link->ref);
492 if (insn_is_store_p (store) && insn_is_swap_p (store))
493 feeds_store = 1;
496 return fed_by_load && feeds_store;
499 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
500 static bool
501 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
503 unsigned uid = INSN_UID (insn);
504 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
505 return false;
507 const_rtx tocrel_base;
509 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
510 df_ref use;
512 /* Iterate over the definitions that are used by this insn. Since
513 this is known to be a swap insn, expect only one used definnition. */
514 FOR_EACH_INSN_INFO_USE (use, insn_info)
516 struct df_link *def_link = DF_REF_CHAIN (use);
518 /* If there is no def or the def is artificial or there are
519 multiple defs, punt. */
520 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
521 || def_link->next)
522 return false;
524 rtx def_insn = DF_REF_INSN (def_link->ref);
525 unsigned uid2 = INSN_UID (def_insn);
526 /* If this is not a load or is not a swap, return false. */
527 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
528 return false;
530 /* If the source of the rtl def is not a set from memory, return
531 false. */
532 rtx body = PATTERN (def_insn);
533 if (GET_CODE (body) != SET
534 || GET_CODE (SET_SRC (body)) != VEC_SELECT
535 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
536 return false;
538 rtx mem = XEXP (SET_SRC (body), 0);
539 rtx base_reg = XEXP (mem, 0);
540 /* If the base address for the memory expression is not
541 represented by a register, punt. */
542 if (!REG_P (base_reg))
543 return false;
545 df_ref base_use;
546 insn_info = DF_INSN_INFO_GET (def_insn);
547 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
549 /* If base_use does not represent base_reg, look for another
550 use. */
551 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
552 continue;
554 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
555 if (!base_def_link || base_def_link->next)
556 return false;
558 /* Constants held on the stack are not "true" constants
559 because their values are not part of the static load
560 image. If this constant's base reference is a stack
561 or frame pointer, it is seen as an artificial
562 reference. */
563 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
564 return false;
566 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
567 rtx tocrel_body = PATTERN (tocrel_insn);
568 rtx base, offset;
569 if (GET_CODE (tocrel_body) != SET)
570 return false;
571 /* There is an extra level of indirection for small/large
572 code models. */
573 rtx tocrel_expr = SET_SRC (tocrel_body);
574 if (GET_CODE (tocrel_expr) == MEM)
575 tocrel_expr = XEXP (tocrel_expr, 0);
576 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
577 return false;
578 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
580 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
581 return false;
582 else
584 /* FIXME: The conditions under which
585 ((GET_CODE (const_vector) == SYMBOL_REF) &&
586 !CONSTANT_POOL_ADDRESS_P (const_vector))
587 are not well understood. This code prevents
588 an internal compiler error which will occur in
589 replace_swapped_load_constant () if we were to return
590 true. Some day, we should figure out how to properly
591 handle this condition in
592 replace_swapped_load_constant () and then we can
593 remove this special test. */
594 rtx const_vector = get_pool_constant (base);
595 if (GET_CODE (const_vector) == SYMBOL_REF
596 && CONSTANT_POOL_ADDRESS_P (const_vector))
597 const_vector = get_pool_constant (const_vector);
598 if (GET_CODE (const_vector) != CONST_VECTOR)
599 return false;
603 return true;
606 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
607 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
608 static bool
609 v2df_reduction_p (rtx op)
611 if (GET_MODE (op) != V2DFmode)
612 return false;
614 enum rtx_code code = GET_CODE (op);
615 if (code != PLUS && code != SMIN && code != SMAX)
616 return false;
618 rtx concat = XEXP (op, 0);
619 if (GET_CODE (concat) != VEC_CONCAT)
620 return false;
622 rtx select0 = XEXP (concat, 0);
623 rtx select1 = XEXP (concat, 1);
624 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
625 return false;
627 rtx reg0 = XEXP (select0, 0);
628 rtx reg1 = XEXP (select1, 0);
629 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
630 return false;
632 rtx parallel0 = XEXP (select0, 1);
633 rtx parallel1 = XEXP (select1, 1);
634 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
635 return false;
637 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
638 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
639 return false;
641 return true;
644 /* Return 1 iff OP is an operand that will not be affected by having
645 vector doublewords swapped in memory. */
646 static unsigned int
647 rtx_is_swappable_p (rtx op, unsigned int *special)
649 enum rtx_code code = GET_CODE (op);
650 int i, j;
651 rtx parallel;
653 switch (code)
655 case LABEL_REF:
656 case SYMBOL_REF:
657 case CLOBBER:
658 case REG:
659 return 1;
661 case VEC_CONCAT:
662 case ASM_INPUT:
663 case ASM_OPERANDS:
664 return 0;
666 case CONST_VECTOR:
668 *special = SH_CONST_VECTOR;
669 return 1;
672 case VEC_DUPLICATE:
673 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
674 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
675 it represents a vector splat for which we can do special
676 handling. */
677 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
678 return 1;
679 else if (REG_P (XEXP (op, 0))
680 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
681 /* This catches V2DF and V2DI splat, at a minimum. */
682 return 1;
683 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
684 && REG_P (XEXP (XEXP (op, 0), 0))
685 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
686 /* This catches splat of a truncated value. */
687 return 1;
688 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
689 /* If the duplicated item is from a select, defer to the select
690 processing to see if we can change the lane for the splat. */
691 return rtx_is_swappable_p (XEXP (op, 0), special);
692 else
693 return 0;
695 case VEC_SELECT:
696 /* A vec_extract operation is ok if we change the lane. */
697 if (GET_CODE (XEXP (op, 0)) == REG
698 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
699 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
700 && XVECLEN (parallel, 0) == 1
701 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
703 *special = SH_EXTRACT;
704 return 1;
706 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
707 XXPERMDI is a swap operation, it will be identified by
708 insn_is_swap_p and therefore we won't get here. */
709 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
710 && (GET_MODE (XEXP (op, 0)) == V4DFmode
711 || GET_MODE (XEXP (op, 0)) == V4DImode)
712 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
713 && XVECLEN (parallel, 0) == 2
714 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
715 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
717 *special = SH_XXPERMDI;
718 return 1;
720 else if (v2df_reduction_p (op))
721 return 1;
722 else
723 return 0;
725 case UNSPEC:
727 /* Various operations are unsafe for this optimization, at least
728 without significant additional work. Permutes are obviously
729 problematic, as both the permute control vector and the ordering
730 of the target values are invalidated by doubleword swapping.
731 Vector pack and unpack modify the number of vector lanes.
732 Merge-high/low will not operate correctly on swapped operands.
733 Vector shifts across element boundaries are clearly uncool,
734 as are vector select and concatenate operations. Vector
735 sum-across instructions define one operand with a specific
736 order-dependent element, so additional fixup code would be
737 needed to make those work. Vector set and non-immediate-form
738 vector splat are element-order sensitive. A few of these
739 cases might be workable with special handling if required.
740 Adding cost modeling would be appropriate in some cases. */
741 int val = XINT (op, 1);
742 switch (val)
744 default:
745 break;
746 case UNSPEC_VBPERMQ:
747 case UNSPEC_VMRGH_DIRECT:
748 case UNSPEC_VMRGL_DIRECT:
749 case UNSPEC_VPACK_SIGN_SIGN_SAT:
750 case UNSPEC_VPACK_SIGN_UNS_SAT:
751 case UNSPEC_VPACK_UNS_UNS_MOD:
752 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
753 case UNSPEC_VPACK_UNS_UNS_SAT:
754 case UNSPEC_VPERM:
755 case UNSPEC_VPERM_UNS:
756 case UNSPEC_VPERMHI:
757 case UNSPEC_VPERMSI:
758 case UNSPEC_VPERMXOR:
759 case UNSPEC_VPKPX:
760 case UNSPEC_VSLDOI:
761 case UNSPEC_VSLO:
762 case UNSPEC_VSRO:
763 case UNSPEC_VSUM2SWS:
764 case UNSPEC_VSUM4S:
765 case UNSPEC_VSUM4UBS:
766 case UNSPEC_VSUMSWS:
767 case UNSPEC_VSUMSWS_DIRECT:
768 case UNSPEC_VSX_CONCAT:
769 case UNSPEC_VSX_CVDPSPN:
770 case UNSPEC_VSX_CVSPDP:
771 case UNSPEC_VSX_CVSPDPN:
772 case UNSPEC_VSX_EXTRACT:
773 case UNSPEC_VSX_SET:
774 case UNSPEC_VSX_SLDWI:
775 case UNSPEC_VSX_VSLO:
776 case UNSPEC_VUNPACK_HI_SIGN:
777 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
778 case UNSPEC_VUNPACK_LO_SIGN:
779 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
780 case UNSPEC_VUPKHPX:
781 case UNSPEC_VUPKHS_V4SF:
782 case UNSPEC_VUPKHU_V4SF:
783 case UNSPEC_VUPKLPX:
784 case UNSPEC_VUPKLS_V4SF:
785 case UNSPEC_VUPKLU_V4SF:
786 return 0;
787 case UNSPEC_VSPLT_DIRECT:
788 case UNSPEC_VSX_XXSPLTD:
789 *special = SH_SPLAT;
790 return 1;
791 case UNSPEC_REDUC_PLUS:
792 case UNSPEC_REDUC:
793 return 1;
797 default:
798 break;
801 const char *fmt = GET_RTX_FORMAT (code);
802 int ok = 1;
804 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
805 if (fmt[i] == 'e' || fmt[i] == 'u')
807 unsigned int special_op = SH_NONE;
808 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
809 if (special_op == SH_NONE)
810 continue;
811 /* Ensure we never have two kinds of special handling
812 for the same insn. */
813 if (*special != SH_NONE && *special != special_op)
814 return 0;
815 *special = special_op;
817 else if (fmt[i] == 'E')
818 for (j = 0; j < XVECLEN (op, i); ++j)
820 unsigned int special_op = SH_NONE;
821 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
822 if (special_op == SH_NONE)
823 continue;
824 /* Ensure we never have two kinds of special handling
825 for the same insn. */
826 if (*special != SH_NONE && *special != special_op)
827 return 0;
828 *special = special_op;
831 return ok;
834 /* Return 1 iff INSN is an operand that will not be affected by
835 having vector doublewords swapped in memory (in which case
836 *SPECIAL is unchanged), or that can be modified to be correct
837 if vector doublewords are swapped in memory (in which case
838 *SPECIAL is changed to a value indicating how). */
839 static unsigned int
840 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
841 unsigned int *special)
843 /* Calls are always bad. */
844 if (GET_CODE (insn) == CALL_INSN)
845 return 0;
847 /* Loads and stores seen here are not permuting, but we can still
848 fix them up by converting them to permuting ones. Exceptions:
849 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
850 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
851 for the SET source. Also we must now make an exception for lvx
852 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
853 explicit "& -16") since this leads to unrecognizable insns. */
854 rtx body = PATTERN (insn);
855 int i = INSN_UID (insn);
857 if (insn_entry[i].is_load)
859 if (GET_CODE (body) == SET)
861 rtx rhs = SET_SRC (body);
862 /* Even without a swap, the RHS might be a vec_select for, say,
863 a byte-reversing load. */
864 if (GET_CODE (rhs) != MEM)
865 return 0;
866 if (GET_CODE (XEXP (rhs, 0)) == AND)
867 return 0;
869 *special = SH_NOSWAP_LD;
870 return 1;
872 else
873 return 0;
876 if (insn_entry[i].is_store)
878 if (GET_CODE (body) == SET
879 && GET_CODE (SET_SRC (body)) != UNSPEC
880 && GET_CODE (SET_SRC (body)) != VEC_SELECT)
882 rtx lhs = SET_DEST (body);
883 /* Even without a swap, the RHS might be a vec_select for, say,
884 a byte-reversing store. */
885 if (GET_CODE (lhs) != MEM)
886 return 0;
887 if (GET_CODE (XEXP (lhs, 0)) == AND)
888 return 0;
890 *special = SH_NOSWAP_ST;
891 return 1;
893 else
894 return 0;
897 /* A convert to single precision can be left as is provided that
898 all of its uses are in xxspltw instructions that splat BE element
899 zero. */
900 if (GET_CODE (body) == SET
901 && GET_CODE (SET_SRC (body)) == UNSPEC
902 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
904 df_ref def;
905 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
907 FOR_EACH_INSN_INFO_DEF (def, insn_info)
909 struct df_link *link = DF_REF_CHAIN (def);
910 if (!link)
911 return 0;
913 for (; link; link = link->next) {
914 rtx use_insn = DF_REF_INSN (link->ref);
915 rtx use_body = PATTERN (use_insn);
916 if (GET_CODE (use_body) != SET
917 || GET_CODE (SET_SRC (use_body)) != UNSPEC
918 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
919 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
920 return 0;
924 return 1;
927 /* A concatenation of two doublewords is ok if we reverse the
928 order of the inputs. */
929 if (GET_CODE (body) == SET
930 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
931 && (GET_MODE (SET_SRC (body)) == V2DFmode
932 || GET_MODE (SET_SRC (body)) == V2DImode))
934 *special = SH_CONCAT;
935 return 1;
938 /* V2DF reductions are always swappable. */
939 if (GET_CODE (body) == PARALLEL)
941 rtx expr = XVECEXP (body, 0, 0);
942 if (GET_CODE (expr) == SET
943 && v2df_reduction_p (SET_SRC (expr)))
944 return 1;
947 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
948 constant pool. */
949 if (GET_CODE (body) == SET
950 && GET_CODE (SET_SRC (body)) == UNSPEC
951 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
952 && XVECLEN (SET_SRC (body), 0) == 3
953 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
955 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
956 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
957 df_ref use;
958 FOR_EACH_INSN_INFO_USE (use, insn_info)
959 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
961 struct df_link *def_link = DF_REF_CHAIN (use);
962 /* Punt if multiple definitions for this reg. */
963 if (def_link && !def_link->next &&
964 const_load_sequence_p (insn_entry,
965 DF_REF_INSN (def_link->ref)))
967 *special = SH_VPERM;
968 return 1;
973 /* Otherwise check the operands for vector lane violations. */
974 return rtx_is_swappable_p (body, special);
977 enum chain_purpose { FOR_LOADS, FOR_STORES };
979 /* Return true if the UD or DU chain headed by LINK is non-empty,
980 and every entry on the chain references an insn that is a
981 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
982 register swap must have only permuting loads as reaching defs.
983 If PURPOSE is FOR_STORES, each such register swap must have only
984 register swaps or permuting stores as reached uses. */
985 static bool
986 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
987 enum chain_purpose purpose)
989 if (!link)
990 return false;
992 for (; link; link = link->next)
994 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
995 continue;
997 if (DF_REF_IS_ARTIFICIAL (link->ref))
998 return false;
1000 rtx reached_insn = DF_REF_INSN (link->ref);
1001 unsigned uid = INSN_UID (reached_insn);
1002 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1004 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1005 || insn_entry[uid].is_store)
1006 return false;
1008 if (purpose == FOR_LOADS)
1010 df_ref use;
1011 FOR_EACH_INSN_INFO_USE (use, insn_info)
1013 struct df_link *swap_link = DF_REF_CHAIN (use);
1015 while (swap_link)
1017 if (DF_REF_IS_ARTIFICIAL (link->ref))
1018 return false;
1020 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1021 unsigned uid2 = INSN_UID (swap_def_insn);
1023 /* Only permuting loads are allowed. */
1024 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1025 return false;
1027 swap_link = swap_link->next;
1031 else if (purpose == FOR_STORES)
1033 df_ref def;
1034 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1036 struct df_link *swap_link = DF_REF_CHAIN (def);
1038 while (swap_link)
1040 if (DF_REF_IS_ARTIFICIAL (link->ref))
1041 return false;
1043 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1044 unsigned uid2 = INSN_UID (swap_use_insn);
1046 /* Permuting stores or register swaps are allowed. */
1047 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1048 return false;
1050 swap_link = swap_link->next;
1056 return true;
1059 /* Mark the xxswapdi instructions associated with permuting loads and
1060 stores for removal. Note that we only flag them for deletion here,
1061 as there is a possibility of a swap being reached from multiple
1062 loads, etc. */
1063 static void
1064 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1066 rtx insn = insn_entry[i].insn;
1067 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1069 if (insn_entry[i].is_load)
1071 df_ref def;
1072 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1074 struct df_link *link = DF_REF_CHAIN (def);
1076 /* We know by now that these are swaps, so we can delete
1077 them confidently. */
1078 while (link)
1080 rtx use_insn = DF_REF_INSN (link->ref);
1081 insn_entry[INSN_UID (use_insn)].will_delete = 1;
1082 link = link->next;
1086 else if (insn_entry[i].is_store)
1088 df_ref use;
1089 FOR_EACH_INSN_INFO_USE (use, insn_info)
1091 /* Ignore uses for addressability. */
1092 machine_mode mode = GET_MODE (DF_REF_REG (use));
1093 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1094 continue;
1096 struct df_link *link = DF_REF_CHAIN (use);
1098 /* We know by now that these are swaps, so we can delete
1099 them confidently. */
1100 while (link)
1102 rtx def_insn = DF_REF_INSN (link->ref);
1103 insn_entry[INSN_UID (def_insn)].will_delete = 1;
1104 link = link->next;
1110 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1111 Swap the first half of the vector with the second in the first
1112 case. Recurse to find it in the second. */
1113 static void
1114 swap_const_vector_halves (rtx *op_ptr)
1116 int i;
1117 rtx op = *op_ptr;
1118 enum rtx_code code = GET_CODE (op);
1119 if (GET_CODE (op) == CONST_VECTOR)
1121 int units = GET_MODE_NUNITS (GET_MODE (op));
1122 rtx_vector_builder builder (GET_MODE (op), units, 1);
1123 for (i = 0; i < units / 2; ++i)
1124 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1125 for (i = 0; i < units / 2; ++i)
1126 builder.quick_push (CONST_VECTOR_ELT (op, i));
1127 *op_ptr = builder.build ();
1129 else
1131 int j;
1132 const char *fmt = GET_RTX_FORMAT (code);
1133 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1134 if (fmt[i] == 'e' || fmt[i] == 'u')
1135 swap_const_vector_halves (&XEXP (op, i));
1136 else if (fmt[i] == 'E')
1137 for (j = 0; j < XVECLEN (op, i); ++j)
1138 swap_const_vector_halves (&XVECEXP (op, i, j));
1142 /* Find all subregs of a vector expression that perform a narrowing,
1143 and adjust the subreg index to account for doubleword swapping. */
1144 static void
1145 adjust_subreg_index (rtx op)
1147 enum rtx_code code = GET_CODE (op);
1148 if (code == SUBREG
1149 && (GET_MODE_SIZE (GET_MODE (op))
1150 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1152 unsigned int index = SUBREG_BYTE (op);
1153 if (index < 8)
1154 index += 8;
1155 else
1156 index -= 8;
1157 SUBREG_BYTE (op) = index;
1160 const char *fmt = GET_RTX_FORMAT (code);
1161 int i,j;
1162 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1163 if (fmt[i] == 'e' || fmt[i] == 'u')
1164 adjust_subreg_index (XEXP (op, i));
1165 else if (fmt[i] == 'E')
1166 for (j = 0; j < XVECLEN (op, i); ++j)
1167 adjust_subreg_index (XVECEXP (op, i, j));
1170 /* Convert the non-permuting load INSN to a permuting one. */
1171 static void
1172 permute_load (rtx_insn *insn)
1174 rtx body = PATTERN (insn);
1175 rtx mem_op = SET_SRC (body);
1176 rtx tgt_reg = SET_DEST (body);
1177 machine_mode mode = GET_MODE (tgt_reg);
1178 int n_elts = GET_MODE_NUNITS (mode);
1179 int half_elts = n_elts / 2;
1180 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1181 int i, j;
1182 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1183 XVECEXP (par, 0, i) = GEN_INT (j);
1184 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1185 XVECEXP (par, 0, i) = GEN_INT (j);
1186 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1187 SET_SRC (body) = sel;
1188 INSN_CODE (insn) = -1; /* Force re-recognition. */
1189 df_insn_rescan (insn);
1191 if (dump_file)
1192 fprintf (dump_file, "Replacing load %d with permuted load\n",
1193 INSN_UID (insn));
1196 /* Convert the non-permuting store INSN to a permuting one. */
1197 static void
1198 permute_store (rtx_insn *insn)
1200 rtx body = PATTERN (insn);
1201 rtx src_reg = SET_SRC (body);
1202 machine_mode mode = GET_MODE (src_reg);
1203 int n_elts = GET_MODE_NUNITS (mode);
1204 int half_elts = n_elts / 2;
1205 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1206 int i, j;
1207 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1208 XVECEXP (par, 0, i) = GEN_INT (j);
1209 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1210 XVECEXP (par, 0, i) = GEN_INT (j);
1211 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1212 SET_SRC (body) = sel;
1213 INSN_CODE (insn) = -1; /* Force re-recognition. */
1214 df_insn_rescan (insn);
1216 if (dump_file)
1217 fprintf (dump_file, "Replacing store %d with permuted store\n",
1218 INSN_UID (insn));
1221 /* Given OP that contains a vector extract operation, adjust the index
1222 of the extracted lane to account for the doubleword swap. */
1223 static void
1224 adjust_extract (rtx_insn *insn)
1226 rtx pattern = PATTERN (insn);
1227 if (GET_CODE (pattern) == PARALLEL)
1228 pattern = XVECEXP (pattern, 0, 0);
1229 rtx src = SET_SRC (pattern);
1230 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1231 account for that. */
1232 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1233 rtx par = XEXP (sel, 1);
1234 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1235 int lane = INTVAL (XVECEXP (par, 0, 0));
1236 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1237 XVECEXP (par, 0, 0) = GEN_INT (lane);
1238 INSN_CODE (insn) = -1; /* Force re-recognition. */
1239 df_insn_rescan (insn);
1241 if (dump_file)
1242 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1245 /* Given OP that contains a vector direct-splat operation, adjust the index
1246 of the source lane to account for the doubleword swap. */
1247 static void
1248 adjust_splat (rtx_insn *insn)
1250 rtx body = PATTERN (insn);
1251 rtx unspec = XEXP (body, 1);
1252 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1253 int lane = INTVAL (XVECEXP (unspec, 0, 1));
1254 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1255 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1256 INSN_CODE (insn) = -1; /* Force re-recognition. */
1257 df_insn_rescan (insn);
1259 if (dump_file)
1260 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1263 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1264 swap), reverse the order of the source operands and adjust the indices
1265 of the source lanes to account for doubleword reversal. */
1266 static void
1267 adjust_xxpermdi (rtx_insn *insn)
1269 rtx set = PATTERN (insn);
1270 rtx select = XEXP (set, 1);
1271 rtx concat = XEXP (select, 0);
1272 rtx src0 = XEXP (concat, 0);
1273 XEXP (concat, 0) = XEXP (concat, 1);
1274 XEXP (concat, 1) = src0;
1275 rtx parallel = XEXP (select, 1);
1276 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1277 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1278 int new_lane0 = 3 - lane1;
1279 int new_lane1 = 3 - lane0;
1280 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1281 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1282 INSN_CODE (insn) = -1; /* Force re-recognition. */
1283 df_insn_rescan (insn);
1285 if (dump_file)
1286 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1289 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1290 reverse the order of those inputs. */
1291 static void
1292 adjust_concat (rtx_insn *insn)
1294 rtx set = PATTERN (insn);
1295 rtx concat = XEXP (set, 1);
1296 rtx src0 = XEXP (concat, 0);
1297 XEXP (concat, 0) = XEXP (concat, 1);
1298 XEXP (concat, 1) = src0;
1299 INSN_CODE (insn) = -1; /* Force re-recognition. */
1300 df_insn_rescan (insn);
1302 if (dump_file)
1303 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1306 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1307 constant pool to reflect swapped doublewords. */
1308 static void
1309 adjust_vperm (rtx_insn *insn)
1311 /* We previously determined that the UNSPEC_VPERM was fed by a
1312 swap of a swapping load of a TOC-relative constant pool symbol.
1313 Find the MEM in the swapping load and replace it with a MEM for
1314 the adjusted mask constant. */
1315 rtx set = PATTERN (insn);
1316 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1318 /* Find the swap. */
1319 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1320 df_ref use;
1321 rtx_insn *swap_insn = 0;
1322 FOR_EACH_INSN_INFO_USE (use, insn_info)
1323 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1325 struct df_link *def_link = DF_REF_CHAIN (use);
1326 gcc_assert (def_link && !def_link->next);
1327 swap_insn = DF_REF_INSN (def_link->ref);
1328 break;
1330 gcc_assert (swap_insn);
1332 /* Find the load. */
1333 insn_info = DF_INSN_INFO_GET (swap_insn);
1334 rtx_insn *load_insn = 0;
1335 FOR_EACH_INSN_INFO_USE (use, insn_info)
1337 struct df_link *def_link = DF_REF_CHAIN (use);
1338 gcc_assert (def_link && !def_link->next);
1339 load_insn = DF_REF_INSN (def_link->ref);
1340 break;
1342 gcc_assert (load_insn);
1344 /* Find the TOC-relative symbol access. */
1345 insn_info = DF_INSN_INFO_GET (load_insn);
1346 rtx_insn *tocrel_insn = 0;
1347 FOR_EACH_INSN_INFO_USE (use, insn_info)
1349 struct df_link *def_link = DF_REF_CHAIN (use);
1350 gcc_assert (def_link && !def_link->next);
1351 tocrel_insn = DF_REF_INSN (def_link->ref);
1352 break;
1354 gcc_assert (tocrel_insn);
1356 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1357 to set tocrel_base; otherwise it would be unnecessary as we've
1358 already established it will return true. */
1359 rtx base, offset;
1360 const_rtx tocrel_base;
1361 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1362 /* There is an extra level of indirection for small/large code models. */
1363 if (GET_CODE (tocrel_expr) == MEM)
1364 tocrel_expr = XEXP (tocrel_expr, 0);
1365 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1366 gcc_unreachable ();
1367 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1368 rtx const_vector = get_pool_constant (base);
1369 /* With the extra indirection, get_pool_constant will produce the
1370 real constant from the reg_equal expression, so get the real
1371 constant. */
1372 if (GET_CODE (const_vector) == SYMBOL_REF)
1373 const_vector = get_pool_constant (const_vector);
1374 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1376 /* Create an adjusted mask from the initial mask. */
1377 unsigned int new_mask[16], i, val;
1378 for (i = 0; i < 16; ++i) {
1379 val = INTVAL (XVECEXP (const_vector, 0, i));
1380 if (val < 16)
1381 new_mask[i] = (val + 8) % 16;
1382 else
1383 new_mask[i] = ((val + 8) % 16) + 16;
1386 /* Create a new CONST_VECTOR and a MEM that references it. */
1387 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1388 for (i = 0; i < 16; ++i)
1389 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1390 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1391 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1392 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1393 can't recognize. Force the SYMBOL_REF into a register. */
1394 if (!REG_P (XEXP (new_mem, 0))) {
1395 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1396 XEXP (new_mem, 0) = base_reg;
1397 /* Move the newly created insn ahead of the load insn. */
1398 rtx_insn *force_insn = get_last_insn ();
1399 remove_insn (force_insn);
1400 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1401 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1402 df_insn_rescan (before_load_insn);
1403 df_insn_rescan (force_insn);
1406 /* Replace the MEM in the load instruction and rescan it. */
1407 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1408 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1409 df_insn_rescan (load_insn);
1411 if (dump_file)
1412 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1415 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1416 with special handling. Take care of that here. */
1417 static void
1418 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1420 rtx_insn *insn = insn_entry[i].insn;
1421 rtx body = PATTERN (insn);
1423 switch (insn_entry[i].special_handling)
1425 default:
1426 gcc_unreachable ();
1427 case SH_CONST_VECTOR:
1429 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1430 gcc_assert (GET_CODE (body) == SET);
1431 swap_const_vector_halves (&SET_SRC (body));
1432 if (dump_file)
1433 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1434 break;
1436 case SH_SUBREG:
1437 /* A subreg of the same size is already safe. For subregs that
1438 select a smaller portion of a reg, adjust the index for
1439 swapped doublewords. */
1440 adjust_subreg_index (body);
1441 if (dump_file)
1442 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1443 break;
1444 case SH_NOSWAP_LD:
1445 /* Convert a non-permuting load to a permuting one. */
1446 permute_load (insn);
1447 break;
1448 case SH_NOSWAP_ST:
1449 /* Convert a non-permuting store to a permuting one. */
1450 permute_store (insn);
1451 break;
1452 case SH_EXTRACT:
1453 /* Change the lane on an extract operation. */
1454 adjust_extract (insn);
1455 break;
1456 case SH_SPLAT:
1457 /* Change the lane on a direct-splat operation. */
1458 adjust_splat (insn);
1459 break;
1460 case SH_XXPERMDI:
1461 /* Change the lanes on an XXPERMDI operation. */
1462 adjust_xxpermdi (insn);
1463 break;
1464 case SH_CONCAT:
1465 /* Reverse the order of a concatenation operation. */
1466 adjust_concat (insn);
1467 break;
1468 case SH_VPERM:
1469 /* Change the mask loaded from the constant pool for a VPERM. */
1470 adjust_vperm (insn);
1471 break;
1475 /* Find the insn from the Ith table entry, which is known to be a
1476 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1477 static void
1478 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1480 rtx_insn *insn = insn_entry[i].insn;
1481 rtx body = PATTERN (insn);
1482 rtx src_reg = XEXP (SET_SRC (body), 0);
1483 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1484 rtx_insn *new_insn = emit_insn_before (copy, insn);
1485 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1486 df_insn_rescan (new_insn);
1488 if (dump_file)
1490 unsigned int new_uid = INSN_UID (new_insn);
1491 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1494 df_insn_delete (insn);
1495 remove_insn (insn);
1496 insn->set_deleted ();
1499 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1500 ORIGINAL_MEM_EXP. */
1501 static void
1502 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1504 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1505 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1506 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1507 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1508 RTX_FLAG (new_mem_exp, frame_related) =
1509 RTX_FLAG (original_mem_exp, frame_related);
1511 /* The following fields may not be used with MEM subexpressions */
1512 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1513 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1515 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1517 alias_set_type set = original_attrs.alias;
1518 set_mem_alias_set (new_mem_exp, set);
1520 addr_space_t addrspace = original_attrs.addrspace;
1521 set_mem_addr_space (new_mem_exp, addrspace);
1523 unsigned int align = original_attrs.align;
1524 set_mem_align (new_mem_exp, align);
1526 tree expr = original_attrs.expr;
1527 set_mem_expr (new_mem_exp, expr);
1529 if (original_attrs.offset_known_p)
1531 HOST_WIDE_INT offset = original_attrs.offset;
1532 set_mem_offset (new_mem_exp, offset);
1534 else
1535 clear_mem_offset (new_mem_exp);
1537 if (original_attrs.size_known_p)
1539 HOST_WIDE_INT size = original_attrs.size;
1540 set_mem_size (new_mem_exp, size);
1542 else
1543 clear_mem_size (new_mem_exp);
1546 /* Generate an rtx expression to represent use of the stvx insn to store
1547 the value represented by register SRC_EXP into the memory at address
1548 DEST_EXP, with vector mode MODE. */
1550 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1552 rtx stvx;
1554 if (mode == V16QImode)
1555 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1556 else if (mode == V8HImode)
1557 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1558 #ifdef HAVE_V8HFmode
1559 else if (mode == V8HFmode)
1560 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1561 #endif
1562 else if (mode == V4SImode)
1563 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1564 else if (mode == V4SFmode)
1565 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1566 else if (mode == V2DImode)
1567 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1568 else if (mode == V2DFmode)
1569 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1570 else if (mode == V1TImode)
1571 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1572 else
1573 /* KFmode, TFmode, other modes not expected in this context. */
1574 gcc_unreachable ();
1576 rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1577 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1578 return stvx;
1581 /* Given that STORE_INSN represents an aligned store-with-swap of a
1582 swapped value, replace the store with an aligned store (without
1583 swap) and replace the swap with a copy insn. */
1584 static void
1585 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1586 rtx_insn *store_insn)
1588 unsigned uid = INSN_UID (store_insn);
1589 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1591 rtx body = PATTERN (store_insn);
1592 rtx dest_address = XEXP (SET_DEST (body), 0);
1593 rtx swap_reg = XEXP (SET_SRC (body), 0);
1594 gcc_assert (REG_P (dest_address)
1595 || rs6000_sum_of_two_registers_p (dest_address));
1597 /* Find the swap instruction that provides the value to be stored by
1598 * this store-with-swap instruction. */
1599 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1600 df_ref use;
1601 rtx_insn *swap_insn = NULL;
1602 unsigned uid2 = 0;
1603 FOR_EACH_INSN_INFO_USE (use, insn_info)
1605 struct df_link *def_link = DF_REF_CHAIN (use);
1607 /* if this is not the definition of the candidate swap register,
1608 then skip it. I am only interested in the swap insnd. */
1609 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1610 continue;
1612 /* If there is no def or the def is artifical or there are
1613 multiple defs, we should not be here. */
1614 gcc_assert (def_link && def_link->ref && !def_link->next
1615 && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1617 swap_insn = DF_REF_INSN (def_link->ref);
1618 uid2 = INSN_UID (swap_insn);
1620 /* If this source value is not a simple swap, we should not be here. */
1621 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1622 && !insn_entry[uid2].is_store);
1624 /* We've processed the use we care about, so break out of
1625 this loop. */
1626 break;
1629 /* At this point, swap_insn and uid2 represent the swap instruction
1630 that feeds the store. */
1631 gcc_assert (swap_insn);
1632 rtx set = single_set (store_insn);
1633 gcc_assert (set);
1634 rtx dest_exp = SET_DEST (set);
1635 rtx src_exp = XEXP (SET_SRC (body), 0);
1636 enum machine_mode mode = GET_MODE (dest_exp);
1637 gcc_assert (MEM_P (dest_exp));
1638 gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1640 /* Replace the copy with a new insn. */
1641 rtx stvx;
1642 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1644 rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1645 rtx new_body = PATTERN (new_insn);
1647 gcc_assert ((GET_CODE (new_body) == SET)
1648 && (GET_CODE (SET_DEST (new_body)) == MEM));
1650 set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn));
1651 df_insn_rescan (new_insn);
1653 df_insn_delete (store_insn);
1654 remove_insn (store_insn);
1655 store_insn->set_deleted ();
1657 /* Replace the swap with a copy. */
1658 uid2 = INSN_UID (swap_insn);
1659 mark_swaps_for_removal (insn_entry, uid2);
1660 replace_swap_with_copy (insn_entry, uid2);
1663 /* Generate an rtx expression to represent use of the lvx insn to load
1664 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1666 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1668 rtx lvx;
1670 if (mode == V16QImode)
1671 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1672 else if (mode == V8HImode)
1673 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1674 #ifdef HAVE_V8HFmode
1675 else if (mode == V8HFmode)
1676 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1677 #endif
1678 else if (mode == V4SImode)
1679 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1680 else if (mode == V4SFmode)
1681 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1682 else if (mode == V2DImode)
1683 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1684 else if (mode == V2DFmode)
1685 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1686 else if (mode == V1TImode)
1687 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1688 else
1689 /* KFmode, TFmode, other modes not expected in this context. */
1690 gcc_unreachable ();
1692 rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1693 mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1695 return lvx;
1698 /* Given that SWAP_INSN represents a swap of an aligned
1699 load-with-swap, replace the load with an aligned load (without
1700 swap) and replace the swap with a copy insn. */
1701 static void
1702 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1704 /* Find the load. */
1705 unsigned uid = INSN_UID (swap_insn);
1706 /* Only call this if quad_aligned_load_p (swap_insn). */
1707 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1708 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1710 /* Since insn is known to represent a swap instruction, we know it
1711 "uses" only one input variable. */
1712 df_ref use = DF_INSN_INFO_USES (insn_info);
1714 /* Figure out where this input variable is defined. */
1715 struct df_link *def_link = DF_REF_CHAIN (use);
1716 gcc_assert (def_link && !def_link->next);
1717 gcc_assert (def_link && def_link->ref &&
1718 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1720 rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1721 unsigned uid2 = INSN_UID (def_insn);
1723 /* We're expecting a load-with-swap insn. */
1724 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1726 /* We expect this to be a set to memory, with source representing a
1727 swap (indicated by code VEC_SELECT). */
1728 rtx body = PATTERN (def_insn);
1729 gcc_assert ((GET_CODE (body) == SET)
1730 && (GET_CODE (SET_SRC (body)) == VEC_SELECT)
1731 && (GET_CODE (XEXP (SET_SRC (body), 0)) == MEM));
1733 rtx src_exp = XEXP (SET_SRC (body), 0);
1734 enum machine_mode mode = GET_MODE (src_exp);
1735 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1737 rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1738 rtx new_body = PATTERN (new_insn);
1740 gcc_assert ((GET_CODE (new_body) == SET)
1741 && (GET_CODE (SET_SRC (new_body)) == MEM));
1743 set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn));
1744 df_insn_rescan (new_insn);
1746 df_insn_delete (def_insn);
1747 remove_insn (def_insn);
1748 def_insn->set_deleted ();
1750 /* Replace the swap with a copy. */
1751 mark_swaps_for_removal (insn_entry, uid);
1752 replace_swap_with_copy (insn_entry, uid);
1755 /* Given that SWAP_INSN represents a swap of a load of a constant
1756 vector value, replace with a single instruction that loads a
1757 swapped variant of the original constant.
1759 The "natural" representation of a byte array in memory is the same
1760 for big endian and little endian.
1762 unsigned char byte_array[] =
1763 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1765 However, when loaded into a vector register, the representation
1766 depends on endian conventions.
1768 In big-endian mode, the register holds:
1770 MSB LSB
1771 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1773 In little-endian mode, the register holds:
1775 MSB LSB
1776 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1778 Word arrays require different handling. Consider the word array:
1780 unsigned int word_array[] =
1781 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1783 The in-memory representation depends on endian configuration. The
1784 equivalent array, declared as a byte array, in memory would be:
1786 unsigned char big_endian_word_array_data[] =
1787 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1789 unsigned char little_endian_word_array_data[] =
1790 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1792 In big-endian mode, the register holds:
1794 MSB LSB
1795 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1797 In little-endian mode, the register holds:
1799 MSB LSB
1800 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1803 Similar transformations apply to the vector of half-word and vector
1804 of double-word representations.
1806 For now, don't handle vectors of quad-precision values. Just return.
1807 A better solution is to fix the code generator to emit lvx/stvx for
1808 those. */
1809 static void
1810 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1812 /* Find the load. */
1813 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1814 rtx_insn *load_insn;
1815 df_ref use = DF_INSN_INFO_USES (insn_info);
1816 struct df_link *def_link = DF_REF_CHAIN (use);
1817 gcc_assert (def_link && !def_link->next);
1819 load_insn = DF_REF_INSN (def_link->ref);
1820 gcc_assert (load_insn);
1822 /* Find the TOC-relative symbol access. */
1823 insn_info = DF_INSN_INFO_GET (load_insn);
1824 use = DF_INSN_INFO_USES (insn_info);
1826 def_link = DF_REF_CHAIN (use);
1827 gcc_assert (def_link && !def_link->next);
1829 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1830 gcc_assert (tocrel_insn);
1832 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1833 to set tocrel_base; otherwise it would be unnecessary as we've
1834 already established it will return true. */
1835 rtx base, offset;
1836 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1837 const_rtx tocrel_base;
1839 /* There is an extra level of indirection for small/large code models. */
1840 if (GET_CODE (tocrel_expr) == MEM)
1841 tocrel_expr = XEXP (tocrel_expr, 0);
1843 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1844 gcc_unreachable ();
1846 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1847 rtx const_vector = get_pool_constant (base);
1849 /* With the extra indirection, get_pool_constant will produce the
1850 real constant from the reg_equal expression, so get the real
1851 constant. */
1852 if (GET_CODE (const_vector) == SYMBOL_REF)
1853 const_vector = get_pool_constant (const_vector);
1854 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1856 rtx new_mem;
1857 enum machine_mode mode = GET_MODE (const_vector);
1859 /* Create an adjusted constant from the original constant. */
1860 if (mode == V1TImode)
1861 /* Leave this code as is. */
1862 return;
1863 else if (mode == V16QImode)
1865 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1866 int i;
1868 for (i = 0; i < 16; i++)
1869 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1870 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1871 new_mem = force_const_mem (mode, new_const_vector);
1873 else if ((mode == V8HImode)
1874 #ifdef HAVE_V8HFmode
1875 || (mode == V8HFmode)
1876 #endif
1879 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1880 int i;
1882 for (i = 0; i < 8; i++)
1883 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1884 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1885 new_mem = force_const_mem (mode, new_const_vector);
1887 else if ((mode == V4SImode) || (mode == V4SFmode))
1889 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1890 int i;
1892 for (i = 0; i < 4; i++)
1893 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1894 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1895 new_mem = force_const_mem (mode, new_const_vector);
1897 else if ((mode == V2DImode) || (mode == V2DFmode))
1899 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1900 int i;
1902 for (i = 0; i < 2; i++)
1903 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1904 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1905 new_mem = force_const_mem (mode, new_const_vector);
1907 else
1909 /* We do not expect other modes to be constant-load-swapped. */
1910 gcc_unreachable ();
1913 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1914 can't recognize. Force the SYMBOL_REF into a register. */
1915 if (!REG_P (XEXP (new_mem, 0))) {
1916 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1917 XEXP (new_mem, 0) = base_reg;
1919 /* Move the newly created insn ahead of the load insn. */
1920 /* The last insn is the the insn that forced new_mem into a register. */
1921 rtx_insn *force_insn = get_last_insn ();
1922 /* Remove this insn from the end of the instruction sequence. */
1923 remove_insn (force_insn);
1924 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1926 /* And insert this insn back into the sequence before the previous
1927 load insn so this new expression will be available when the
1928 existing load is modified to load the swapped constant. */
1929 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1930 df_insn_rescan (before_load_insn);
1931 df_insn_rescan (force_insn);
1934 /* Replace the MEM in the load instruction and rescan it. */
1935 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1936 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1937 df_insn_rescan (load_insn);
1939 unsigned int uid = INSN_UID (swap_insn);
1940 mark_swaps_for_removal (insn_entry, uid);
1941 replace_swap_with_copy (insn_entry, uid);
1944 /* Dump the swap table to DUMP_FILE. */
1945 static void
1946 dump_swap_insn_table (swap_web_entry *insn_entry)
1948 int e = get_max_uid ();
1949 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
1951 for (int i = 0; i < e; ++i)
1952 if (insn_entry[i].is_relevant)
1954 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
1955 fprintf (dump_file, "%6d %6d ", i,
1956 pred_entry && pred_entry->insn
1957 ? INSN_UID (pred_entry->insn) : 0);
1958 if (insn_entry[i].is_load)
1959 fputs ("load ", dump_file);
1960 if (insn_entry[i].is_store)
1961 fputs ("store ", dump_file);
1962 if (insn_entry[i].is_swap)
1963 fputs ("swap ", dump_file);
1964 if (insn_entry[i].is_live_in)
1965 fputs ("live-in ", dump_file);
1966 if (insn_entry[i].is_live_out)
1967 fputs ("live-out ", dump_file);
1968 if (insn_entry[i].contains_subreg)
1969 fputs ("subreg ", dump_file);
1970 if (insn_entry[i].is_128_int)
1971 fputs ("int128 ", dump_file);
1972 if (insn_entry[i].is_call)
1973 fputs ("call ", dump_file);
1974 if (insn_entry[i].is_swappable)
1976 fputs ("swappable ", dump_file);
1977 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
1978 fputs ("special:constvec ", dump_file);
1979 else if (insn_entry[i].special_handling == SH_SUBREG)
1980 fputs ("special:subreg ", dump_file);
1981 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
1982 fputs ("special:load ", dump_file);
1983 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
1984 fputs ("special:store ", dump_file);
1985 else if (insn_entry[i].special_handling == SH_EXTRACT)
1986 fputs ("special:extract ", dump_file);
1987 else if (insn_entry[i].special_handling == SH_SPLAT)
1988 fputs ("special:splat ", dump_file);
1989 else if (insn_entry[i].special_handling == SH_XXPERMDI)
1990 fputs ("special:xxpermdi ", dump_file);
1991 else if (insn_entry[i].special_handling == SH_CONCAT)
1992 fputs ("special:concat ", dump_file);
1993 else if (insn_entry[i].special_handling == SH_VPERM)
1994 fputs ("special:vperm ", dump_file);
1996 if (insn_entry[i].web_not_optimizable)
1997 fputs ("unoptimizable ", dump_file);
1998 if (insn_entry[i].will_delete)
1999 fputs ("delete ", dump_file);
2000 fputs ("\n", dump_file);
2002 fputs ("\n", dump_file);
2005 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2006 Here RTX is an (& addr (const_int -16)). Always return a new copy
2007 to avoid problems with combine. */
2008 static rtx
2009 alignment_with_canonical_addr (rtx align)
2011 rtx canon;
2012 rtx addr = XEXP (align, 0);
2014 if (REG_P (addr))
2015 canon = addr;
2017 else if (GET_CODE (addr) == PLUS)
2019 rtx addrop0 = XEXP (addr, 0);
2020 rtx addrop1 = XEXP (addr, 1);
2022 if (!REG_P (addrop0))
2023 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2025 if (!REG_P (addrop1))
2026 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2028 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2031 else
2032 canon = force_reg (GET_MODE (addr), addr);
2034 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2037 /* Check whether an rtx is an alignment mask, and if so, return
2038 a fully-expanded rtx for the masking operation. */
2039 static rtx
2040 alignment_mask (rtx_insn *insn)
2042 rtx body = PATTERN (insn);
2044 if (GET_CODE (body) != SET
2045 || GET_CODE (SET_SRC (body)) != AND
2046 || !REG_P (XEXP (SET_SRC (body), 0)))
2047 return 0;
2049 rtx mask = XEXP (SET_SRC (body), 1);
2051 if (GET_CODE (mask) == CONST_INT)
2053 if (INTVAL (mask) == -16)
2054 return alignment_with_canonical_addr (SET_SRC (body));
2055 else
2056 return 0;
2059 if (!REG_P (mask))
2060 return 0;
2062 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2063 df_ref use;
2064 rtx real_mask = 0;
2066 FOR_EACH_INSN_INFO_USE (use, insn_info)
2068 if (!rtx_equal_p (DF_REF_REG (use), mask))
2069 continue;
2071 struct df_link *def_link = DF_REF_CHAIN (use);
2072 if (!def_link || def_link->next)
2073 return 0;
2075 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2076 rtx const_body = PATTERN (const_insn);
2077 if (GET_CODE (const_body) != SET)
2078 return 0;
2080 real_mask = SET_SRC (const_body);
2082 if (GET_CODE (real_mask) != CONST_INT
2083 || INTVAL (real_mask) != -16)
2084 return 0;
2087 if (real_mask == 0)
2088 return 0;
2090 return alignment_with_canonical_addr (SET_SRC (body));
2093 /* Given INSN that's a load or store based at BASE_REG, look for a
2094 feeding computation that aligns its address on a 16-byte boundary.
2095 Return the rtx and its containing AND_INSN. */
2096 static rtx
2097 find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
2099 df_ref base_use;
2100 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2101 rtx and_operation = 0;
2103 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2105 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2106 continue;
2108 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2109 if (!base_def_link || base_def_link->next)
2110 break;
2112 /* With stack-protector code enabled, and possibly in other
2113 circumstances, there may not be an associated insn for
2114 the def. */
2115 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2116 break;
2118 *and_insn = DF_REF_INSN (base_def_link->ref);
2119 and_operation = alignment_mask (*and_insn);
2120 if (and_operation != 0)
2121 break;
2124 return and_operation;
2127 struct del_info { bool replace; rtx_insn *replace_insn; };
2129 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2130 static void
2131 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2133 rtx body = PATTERN (insn);
2134 gcc_assert (GET_CODE (body) == SET
2135 && GET_CODE (SET_SRC (body)) == VEC_SELECT
2136 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
2138 rtx mem = XEXP (SET_SRC (body), 0);
2139 rtx base_reg = XEXP (mem, 0);
2141 rtx_insn *and_insn;
2142 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2144 if (and_operation != 0)
2146 df_ref def;
2147 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2148 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2150 struct df_link *link = DF_REF_CHAIN (def);
2151 if (!link || link->next)
2152 break;
2154 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2155 if (!insn_is_swap_p (swap_insn)
2156 || insn_is_load_p (swap_insn)
2157 || insn_is_store_p (swap_insn))
2158 break;
2160 /* Expected lvx pattern found. Change the swap to
2161 a copy, and propagate the AND operation into the
2162 load. */
2163 to_delete[INSN_UID (swap_insn)].replace = true;
2164 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2166 /* However, first we must be sure that we make the
2167 base register from the AND operation available
2168 in case the register has been overwritten. Copy
2169 the base register to a new pseudo and use that
2170 as the base register of the AND operation in
2171 the new LVX instruction. */
2172 rtx and_base = XEXP (and_operation, 0);
2173 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2174 rtx copy = gen_rtx_SET (new_reg, and_base);
2175 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2176 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2177 df_insn_rescan (new_insn);
2179 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2180 XEXP (and_operation, 1));
2181 SET_SRC (body) = mem;
2182 INSN_CODE (insn) = -1; /* Force re-recognition. */
2183 df_insn_rescan (insn);
2185 if (dump_file)
2186 fprintf (dump_file, "lvx opportunity found at %d\n",
2187 INSN_UID (insn));
2192 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2193 static void
2194 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2196 rtx body = PATTERN (insn);
2197 gcc_assert (GET_CODE (body) == SET
2198 && GET_CODE (SET_DEST (body)) == MEM
2199 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
2200 rtx mem = SET_DEST (body);
2201 rtx base_reg = XEXP (mem, 0);
2203 rtx_insn *and_insn;
2204 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2206 if (and_operation != 0)
2208 rtx src_reg = XEXP (SET_SRC (body), 0);
2209 df_ref src_use;
2210 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2211 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2213 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2214 continue;
2216 struct df_link *link = DF_REF_CHAIN (src_use);
2217 if (!link || link->next)
2218 break;
2220 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2221 if (!insn_is_swap_p (swap_insn)
2222 || insn_is_load_p (swap_insn)
2223 || insn_is_store_p (swap_insn))
2224 break;
2226 /* Expected stvx pattern found. Change the swap to
2227 a copy, and propagate the AND operation into the
2228 store. */
2229 to_delete[INSN_UID (swap_insn)].replace = true;
2230 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2232 /* However, first we must be sure that we make the
2233 base register from the AND operation available
2234 in case the register has been overwritten. Copy
2235 the base register to a new pseudo and use that
2236 as the base register of the AND operation in
2237 the new STVX instruction. */
2238 rtx and_base = XEXP (and_operation, 0);
2239 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2240 rtx copy = gen_rtx_SET (new_reg, and_base);
2241 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2242 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2243 df_insn_rescan (new_insn);
2245 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2246 XEXP (and_operation, 1));
2247 SET_SRC (body) = src_reg;
2248 INSN_CODE (insn) = -1; /* Force re-recognition. */
2249 df_insn_rescan (insn);
2251 if (dump_file)
2252 fprintf (dump_file, "stvx opportunity found at %d\n",
2253 INSN_UID (insn));
2258 /* Look for patterns created from builtin lvx and stvx calls, and
2259 canonicalize them to be properly recognized as such. */
2260 static void
2261 recombine_lvx_stvx_patterns (function *fun)
2263 int i;
2264 basic_block bb;
2265 rtx_insn *insn;
2267 int num_insns = get_max_uid ();
2268 del_info *to_delete = XCNEWVEC (del_info, num_insns);
2270 FOR_ALL_BB_FN (bb, fun)
2271 FOR_BB_INSNS (bb, insn)
2273 if (!NONDEBUG_INSN_P (insn))
2274 continue;
2276 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2277 recombine_lvx_pattern (insn, to_delete);
2278 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2279 recombine_stvx_pattern (insn, to_delete);
2282 /* Turning swaps into copies is delayed until now, to avoid problems
2283 with deleting instructions during the insn walk. */
2284 for (i = 0; i < num_insns; i++)
2285 if (to_delete[i].replace)
2287 rtx swap_body = PATTERN (to_delete[i].replace_insn);
2288 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2289 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2290 rtx_insn *new_insn = emit_insn_before (copy,
2291 to_delete[i].replace_insn);
2292 set_block_for_insn (new_insn,
2293 BLOCK_FOR_INSN (to_delete[i].replace_insn));
2294 df_insn_rescan (new_insn);
2295 df_insn_delete (to_delete[i].replace_insn);
2296 remove_insn (to_delete[i].replace_insn);
2297 to_delete[i].replace_insn->set_deleted ();
2300 free (to_delete);
2303 /* Main entry point for this pass. */
2304 unsigned int
2305 rs6000_analyze_swaps (function *fun)
2307 swap_web_entry *insn_entry;
2308 basic_block bb;
2309 rtx_insn *insn, *curr_insn = 0;
2311 /* Dataflow analysis for use-def chains. */
2312 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2313 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2314 df_analyze ();
2315 df_set_flags (DF_DEFER_INSN_RESCAN);
2317 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2318 recombine_lvx_stvx_patterns (fun);
2319 df_process_deferred_rescans ();
2321 /* Allocate structure to represent webs of insns. */
2322 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2324 /* Walk the insns to gather basic data. */
2325 FOR_ALL_BB_FN (bb, fun)
2326 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2328 unsigned int uid = INSN_UID (insn);
2329 if (NONDEBUG_INSN_P (insn))
2331 insn_entry[uid].insn = insn;
2333 if (GET_CODE (insn) == CALL_INSN)
2334 insn_entry[uid].is_call = 1;
2336 /* Walk the uses and defs to see if we mention vector regs.
2337 Record any constraints on optimization of such mentions. */
2338 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2339 df_ref mention;
2340 FOR_EACH_INSN_INFO_USE (mention, insn_info)
2342 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2343 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2345 /* If a use gets its value from a call insn, it will be
2346 a hard register and will look like (reg:V4SI 3 3).
2347 The df analysis creates two mentions for GPR3 and GPR4,
2348 both DImode. We must recognize this and treat it as a
2349 vector mention to ensure the call is unioned with this
2350 use. */
2351 if (mode == DImode && DF_REF_INSN_INFO (mention))
2353 rtx feeder = DF_REF_INSN (mention);
2354 /* FIXME: It is pretty hard to get from the df mention
2355 to the mode of the use in the insn. We arbitrarily
2356 pick a vector mode here, even though the use might
2357 be a real DImode. We can be too conservative
2358 (create a web larger than necessary) because of
2359 this, so consider eventually fixing this. */
2360 if (GET_CODE (feeder) == CALL_INSN)
2361 mode = V4SImode;
2364 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2366 insn_entry[uid].is_relevant = 1;
2367 if (mode == TImode || mode == V1TImode
2368 || FLOAT128_VECTOR_P (mode))
2369 insn_entry[uid].is_128_int = 1;
2370 if (DF_REF_INSN_INFO (mention))
2371 insn_entry[uid].contains_subreg
2372 = !rtx_equal_p (DF_REF_REG (mention),
2373 DF_REF_REAL_REG (mention));
2374 union_defs (insn_entry, insn, mention);
2377 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2379 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2380 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2382 /* If we're loading up a hard vector register for a call,
2383 it looks like (set (reg:V4SI 9 9) (...)). The df
2384 analysis creates two mentions for GPR9 and GPR10, both
2385 DImode. So relying on the mode from the mentions
2386 isn't sufficient to ensure we union the call into the
2387 web with the parameter setup code. */
2388 if (mode == DImode && GET_CODE (insn) == SET
2389 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2390 mode = GET_MODE (SET_DEST (insn));
2392 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2394 insn_entry[uid].is_relevant = 1;
2395 if (mode == TImode || mode == V1TImode
2396 || FLOAT128_VECTOR_P (mode))
2397 insn_entry[uid].is_128_int = 1;
2398 if (DF_REF_INSN_INFO (mention))
2399 insn_entry[uid].contains_subreg
2400 = !rtx_equal_p (DF_REF_REG (mention),
2401 DF_REF_REAL_REG (mention));
2402 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2403 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2404 insn_entry[uid].is_live_out = 1;
2405 union_uses (insn_entry, insn, mention);
2409 if (insn_entry[uid].is_relevant)
2411 /* Determine if this is a load or store. */
2412 insn_entry[uid].is_load = insn_is_load_p (insn);
2413 insn_entry[uid].is_store = insn_is_store_p (insn);
2415 /* Determine if this is a doubleword swap. If not,
2416 determine whether it can legally be swapped. */
2417 if (insn_is_swap_p (insn))
2418 insn_entry[uid].is_swap = 1;
2419 else
2421 unsigned int special = SH_NONE;
2422 insn_entry[uid].is_swappable
2423 = insn_is_swappable_p (insn_entry, insn, &special);
2424 if (special != SH_NONE && insn_entry[uid].contains_subreg)
2425 insn_entry[uid].is_swappable = 0;
2426 else if (special != SH_NONE)
2427 insn_entry[uid].special_handling = special;
2428 else if (insn_entry[uid].contains_subreg)
2429 insn_entry[uid].special_handling = SH_SUBREG;
2435 if (dump_file)
2437 fprintf (dump_file, "\nSwap insn entry table when first built\n");
2438 dump_swap_insn_table (insn_entry);
2441 /* Record unoptimizable webs. */
2442 unsigned e = get_max_uid (), i;
2443 for (i = 0; i < e; ++i)
2445 if (!insn_entry[i].is_relevant)
2446 continue;
2448 swap_web_entry *root
2449 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2451 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2452 || (insn_entry[i].contains_subreg
2453 && insn_entry[i].special_handling != SH_SUBREG)
2454 || insn_entry[i].is_128_int || insn_entry[i].is_call
2455 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2456 root->web_not_optimizable = 1;
2458 /* If we have loads or stores that aren't permuting then the
2459 optimization isn't appropriate. */
2460 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2461 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2462 root->web_not_optimizable = 1;
2464 /* If we have a swap that is both fed by a permuting load
2465 and a feeder of a permuting store, then the optimization
2466 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2467 else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2468 && !insn_entry[i].is_store
2469 && swap_feeds_both_load_and_store (&insn_entry[i]))
2470 root->web_not_optimizable = 1;
2472 /* If we have permuting loads or stores that are not accompanied
2473 by a register swap, the optimization isn't appropriate. */
2474 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2476 rtx insn = insn_entry[i].insn;
2477 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2478 df_ref def;
2480 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2482 struct df_link *link = DF_REF_CHAIN (def);
2484 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2486 root->web_not_optimizable = 1;
2487 break;
2491 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2493 rtx insn = insn_entry[i].insn;
2494 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2495 df_ref use;
2497 FOR_EACH_INSN_INFO_USE (use, insn_info)
2499 struct df_link *link = DF_REF_CHAIN (use);
2501 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2503 root->web_not_optimizable = 1;
2504 break;
2510 if (dump_file)
2512 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2513 dump_swap_insn_table (insn_entry);
2516 /* For each load and store in an optimizable web (which implies
2517 the loads and stores are permuting), find the associated
2518 register swaps and mark them for removal. Due to various
2519 optimizations we may mark the same swap more than once. Also
2520 perform special handling for swappable insns that require it. */
2521 for (i = 0; i < e; ++i)
2522 if ((insn_entry[i].is_load || insn_entry[i].is_store)
2523 && insn_entry[i].is_swap)
2525 swap_web_entry* root_entry
2526 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2527 if (!root_entry->web_not_optimizable)
2528 mark_swaps_for_removal (insn_entry, i);
2530 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2532 swap_web_entry* root_entry
2533 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2534 if (!root_entry->web_not_optimizable)
2535 handle_special_swappables (insn_entry, i);
2538 /* Now delete the swaps marked for removal. */
2539 for (i = 0; i < e; ++i)
2540 if (insn_entry[i].will_delete)
2541 replace_swap_with_copy (insn_entry, i);
2543 /* Clean up. */
2544 free (insn_entry);
2546 /* Use a second pass over rtl to detect that certain vector values
2547 fetched from or stored to memory on quad-word aligned addresses
2548 can use lvx/stvx without swaps. */
2550 /* First, rebuild ud chains. */
2551 df_remove_problem (df_chain);
2552 df_process_deferred_rescans ();
2553 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2554 df_chain_add_problem (DF_UD_CHAIN);
2555 df_analyze ();
2557 swap_web_entry *pass2_insn_entry;
2558 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2560 /* Walk the insns to gather basic data. */
2561 FOR_ALL_BB_FN (bb, fun)
2562 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2564 unsigned int uid = INSN_UID (insn);
2565 if (NONDEBUG_INSN_P (insn))
2567 pass2_insn_entry[uid].insn = insn;
2569 pass2_insn_entry[uid].is_relevant = 1;
2570 pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2571 pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2573 /* Determine if this is a doubleword swap. If not,
2574 determine whether it can legally be swapped. */
2575 if (insn_is_swap_p (insn))
2576 pass2_insn_entry[uid].is_swap = 1;
2580 e = get_max_uid ();
2581 for (unsigned i = 0; i < e; ++i)
2582 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2583 && !pass2_insn_entry[i].is_store)
2585 /* Replace swap of aligned load-swap with aligned unswapped
2586 load. */
2587 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2588 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2589 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2591 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2593 /* Replace aligned store-swap of swapped value with aligned
2594 unswapped store. */
2595 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2596 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2597 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2600 /* Clean up. */
2601 free (pass2_insn_entry);
2603 /* Use a third pass over rtl to replace swap(load(vector constant))
2604 with load(swapped vector constant). */
2606 /* First, rebuild ud chains. */
2607 df_remove_problem (df_chain);
2608 df_process_deferred_rescans ();
2609 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2610 df_chain_add_problem (DF_UD_CHAIN);
2611 df_analyze ();
2613 swap_web_entry *pass3_insn_entry;
2614 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2616 /* Walk the insns to gather basic data. */
2617 FOR_ALL_BB_FN (bb, fun)
2618 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2620 unsigned int uid = INSN_UID (insn);
2621 if (NONDEBUG_INSN_P (insn))
2623 pass3_insn_entry[uid].insn = insn;
2625 pass3_insn_entry[uid].is_relevant = 1;
2626 pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2627 pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2629 /* Determine if this is a doubleword swap. If not,
2630 determine whether it can legally be swapped. */
2631 if (insn_is_swap_p (insn))
2632 pass3_insn_entry[uid].is_swap = 1;
2636 e = get_max_uid ();
2637 for (unsigned i = 0; i < e; ++i)
2638 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2639 && !pass3_insn_entry[i].is_store)
2641 insn = pass3_insn_entry[i].insn;
2642 if (const_load_sequence_p (pass3_insn_entry, insn))
2643 replace_swapped_load_constant (pass3_insn_entry, insn);
2646 /* Clean up. */
2647 free (pass3_insn_entry);
2648 return 0;
2651 const pass_data pass_data_analyze_swaps =
2653 RTL_PASS, /* type */
2654 "swaps", /* name */
2655 OPTGROUP_NONE, /* optinfo_flags */
2656 TV_NONE, /* tv_id */
2657 0, /* properties_required */
2658 0, /* properties_provided */
2659 0, /* properties_destroyed */
2660 0, /* todo_flags_start */
2661 TODO_df_finish, /* todo_flags_finish */
2664 class pass_analyze_swaps : public rtl_opt_pass
2666 public:
2667 pass_analyze_swaps(gcc::context *ctxt)
2668 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2671 /* opt_pass methods: */
2672 virtual bool gate (function *)
2674 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2675 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2678 virtual unsigned int execute (function *fun)
2680 return rs6000_analyze_swaps (fun);
2683 opt_pass *clone ()
2685 return new pass_analyze_swaps (m_ctxt);
2688 }; // class pass_analyze_swaps
2690 rtl_opt_pass *
2691 make_pass_analyze_swaps (gcc::context *ctxt)
2693 return new pass_analyze_swaps (ctxt);