1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
49 vect_free_slp_tree (slp_tree node
)
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
55 vect_free_slp_tree (child
);
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt
))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
66 SLP_TREE_CHILDREN (node
).release ();
67 SLP_TREE_SCALAR_STMTS (node
).release ();
68 SLP_TREE_VEC_STMTS (node
).release ();
69 SLP_TREE_LOAD_PERMUTATION (node
).release ();
75 /* Free the memory allocated for the SLP instance. */
78 vect_free_slp_instance (slp_instance instance
)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
81 SLP_INSTANCE_LOADS (instance
).release ();
86 /* Create an SLP node for SCALAR_STMTS. */
89 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
92 gimple
*stmt
= scalar_stmts
[0];
95 if (is_gimple_call (stmt
))
96 nops
= gimple_call_num_args (stmt
);
97 else if (is_gimple_assign (stmt
))
99 nops
= gimple_num_ops (stmt
) - 1;
100 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
103 else if (gimple_code (stmt
) == GIMPLE_PHI
)
108 node
= XNEW (struct _slp_tree
);
109 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
110 SLP_TREE_VEC_STMTS (node
).create (0);
111 SLP_TREE_CHILDREN (node
).create (nops
);
112 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
113 SLP_TREE_TWO_OPERATORS (node
) = false;
114 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
117 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec
<gimple
*> def_stmts
;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
135 enum vect_def_type first_dt
;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
143 static vec
<slp_oprnd_info
>
144 vect_create_oprnd_info (int nops
, int group_size
)
147 slp_oprnd_info oprnd_info
;
148 vec
<slp_oprnd_info
> oprnds_info
;
150 oprnds_info
.create (nops
);
151 for (i
= 0; i
< nops
; i
++)
153 oprnd_info
= XNEW (struct _slp_oprnd_info
);
154 oprnd_info
->def_stmts
.create (group_size
);
155 oprnd_info
->first_dt
= vect_uninitialized_def
;
156 oprnd_info
->first_op_type
= NULL_TREE
;
157 oprnd_info
->first_pattern
= false;
158 oprnd_info
->second_pattern
= false;
159 oprnds_info
.quick_push (oprnd_info
);
166 /* Free operands info. */
169 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
172 slp_oprnd_info oprnd_info
;
174 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
176 oprnd_info
->def_stmts
.release ();
177 XDELETE (oprnd_info
);
180 oprnds_info
.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
188 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
190 gimple
*next_stmt
= first_stmt
;
193 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
198 if (next_stmt
== stmt
)
200 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
202 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
223 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
224 gimple
*stmt
, unsigned stmt_num
,
225 vec
<slp_oprnd_info
> *oprnds_info
)
228 unsigned int i
, number_of_oprnds
;
230 enum vect_def_type dt
= vect_uninitialized_def
;
231 bool pattern
= false;
232 slp_oprnd_info oprnd_info
;
233 int first_op_idx
= 1;
234 bool commutative
= false;
235 bool first_op_cond
= false;
236 bool first
= stmt_num
== 0;
237 bool second
= stmt_num
== 1;
239 if (is_gimple_call (stmt
))
241 number_of_oprnds
= gimple_call_num_args (stmt
);
244 else if (is_gimple_assign (stmt
))
246 enum tree_code code
= gimple_assign_rhs_code (stmt
);
247 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
253 first_op_cond
= true;
257 commutative
= commutative_tree_code (code
);
262 bool swapped
= (*swap
!= 0);
263 gcc_assert (!swapped
|| first_op_cond
);
264 for (i
= 0; i
< number_of_oprnds
; i
++)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map
= maps
[*swap
];
274 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
276 oprnd
= gimple_op (stmt
, map
[i
]);
279 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
281 oprnd_info
= (*oprnds_info
)[i
];
283 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
290 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
299 if (def_stmt
&& gimple_bb (def_stmt
)
300 && vect_stmt_in_region_p (vinfo
, def_stmt
)
301 && vinfo_for_stmt (def_stmt
)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
307 if (!first
&& !oprnd_info
->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info
->first_dt
!= vect_reduction_def
311 || (!second
&& !oprnd_info
->second_pattern
)))
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
327 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
333 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
334 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
336 if (dt
== vect_unknown_def_type
)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
340 "Unsupported pattern.\n");
344 switch (gimple_code (def_stmt
))
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
353 "unsupported defining stmt:\n");
359 oprnd_info
->second_pattern
= pattern
;
363 oprnd_info
->first_dt
= dt
;
364 oprnd_info
->first_pattern
= pattern
;
365 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info
->first_dt
!= dt
375 && !(oprnd_info
->first_dt
== vect_reduction_def
376 && dt
== vect_internal_def
)
377 && !((oprnd_info
->first_dt
== vect_external_def
378 || oprnd_info
->first_dt
== vect_constant_def
)
379 && (dt
== vect_external_def
380 || dt
== vect_constant_def
)))
381 || !types_compatible_p (oprnd_info
->first_op_type
,
384 /* Try swapping operands if we got a mismatch. */
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
395 "Build SLP failed: different types\n");
401 /* Check the types of the definitions. */
404 case vect_constant_def
:
405 case vect_external_def
:
408 case vect_reduction_def
:
409 case vect_induction_def
:
410 case vect_internal_def
:
411 oprnd_info
->def_stmts
.quick_push (def_stmt
);
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
421 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
438 "Build SLP failed: cannot swap operands of "
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
447 tree cond
= gimple_assign_rhs1 (stmt
);
448 enum tree_code code
= TREE_CODE (cond
);
453 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
454 &TREE_OPERAND (cond
, 1));
455 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
460 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
461 gimple_assign_rhs3_ptr (stmt
));
462 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
463 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
464 gcc_assert (code
!= ERROR_MARK
);
465 TREE_SET_CODE (cond
, code
);
469 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
470 gimple_assign_rhs2_ptr (stmt
));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE
, vect_location
,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
484 /* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
489 carried out or the stmts will never be vectorized by SLP.
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
499 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
500 vec
<gimple
*> stmts
, unsigned int group_size
,
501 unsigned nops
, unsigned int *max_nunits
,
502 bool *matches
, bool *two_operators
)
505 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
506 enum tree_code first_stmt_code
= ERROR_MARK
;
507 enum tree_code alt_stmt_code
= ERROR_MARK
;
508 enum tree_code rhs_code
= ERROR_MARK
;
509 enum tree_code first_cond_code
= ERROR_MARK
;
511 bool need_same_oprnds
= false;
512 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
515 machine_mode optab_op2_mode
;
516 machine_mode vec_mode
;
518 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
520 /* For every stmt in NODE find its def stmt/s. */
521 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
535 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
541 /* Fatal mismatch. */
546 lhs
= gimple_get_lhs (stmt
);
547 if (lhs
== NULL_TREE
)
549 if (dump_enabled_p ())
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
556 /* Fatal mismatch. */
561 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
562 vectype
= get_vectype_for_scalar_type (scalar_type
);
565 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
571 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
573 /* Fatal mismatch. */
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
580 if (is_a
<bb_vec_info
> (vinfo
)
581 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
593 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
595 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
597 rhs_code
= CALL_EXPR
;
598 if (gimple_call_internal_p (call_stmt
)
599 || gimple_call_tail_p (call_stmt
)
600 || gimple_call_noreturn_p (call_stmt
)
601 || !gimple_call_nothrow_p (call_stmt
)
602 || gimple_call_chain (call_stmt
))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
607 "Build SLP failed: unsupported call type ");
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
611 /* Fatal mismatch. */
617 rhs_code
= gimple_assign_rhs_code (stmt
);
619 /* Check the operation. */
622 first_stmt_code
= rhs_code
;
624 /* Shift arguments should be equal in all the packed stmts for a
625 vector shift with scalar shift operand. */
626 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
627 || rhs_code
== LROTATE_EXPR
628 || rhs_code
== RROTATE_EXPR
)
630 vec_mode
= TYPE_MODE (vectype
);
632 /* First see if we have a vector/vector shift. */
633 optab
= optab_for_tree_code (rhs_code
, vectype
,
637 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab
= optab_for_tree_code (rhs_code
, vectype
,
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
647 "Build SLP failed: no optab.\n");
648 /* Fatal mismatch. */
652 icode
= (int) optab_handler (optab
, vec_mode
);
653 if (icode
== CODE_FOR_nothing
)
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
658 "op not supported by target.\n");
659 /* Fatal mismatch. */
663 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
664 if (!VECTOR_MODE_P (optab_op2_mode
))
666 need_same_oprnds
= true;
667 first_op1
= gimple_assign_rhs2 (stmt
);
671 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
673 need_same_oprnds
= true;
674 first_op1
= gimple_assign_rhs2 (stmt
);
679 if (first_stmt_code
!= rhs_code
680 && alt_stmt_code
== ERROR_MARK
)
681 alt_stmt_code
= rhs_code
;
682 if (first_stmt_code
!= rhs_code
683 && (first_stmt_code
!= IMAGPART_EXPR
684 || rhs_code
!= REALPART_EXPR
)
685 && (first_stmt_code
!= REALPART_EXPR
686 || rhs_code
!= IMAGPART_EXPR
)
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code
== PLUS_EXPR
690 || first_stmt_code
== MINUS_EXPR
)
691 && (alt_stmt_code
== PLUS_EXPR
692 || alt_stmt_code
== MINUS_EXPR
)
693 && rhs_code
== alt_stmt_code
)
694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
695 && (first_stmt_code
== ARRAY_REF
696 || first_stmt_code
== BIT_FIELD_REF
697 || first_stmt_code
== INDIRECT_REF
698 || first_stmt_code
== COMPONENT_REF
699 || first_stmt_code
== MEM_REF
)))
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
704 "Build SLP failed: different operation "
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
717 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "Build SLP failed: different shift "
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
730 if (rhs_code
== CALL_EXPR
)
732 gimple
*first_stmt
= stmts
[0];
733 if (gimple_call_num_args (stmt
) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt
),
735 gimple_call_fn (stmt
), 0)
736 || gimple_call_fntype (first_stmt
)
737 != gimple_call_fntype (stmt
))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
755 if (REFERENCE_CLASS_P (lhs
))
763 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
766 /* Check that there are no loads from different interleaving
767 chains in the same node. */
768 if (prev_first_load
!= first_load
)
770 if (dump_enabled_p ())
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
784 prev_first_load
= first_load
;
786 } /* Grouped access. */
789 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
791 /* Not grouped load. */
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
799 /* FORNOW: Not grouped loads are not supported. */
800 /* Fatal mismatch. */
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
807 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
808 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
809 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
810 && rhs_code
!= CALL_EXPR
)
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
819 /* Fatal mismatch. */
824 if (rhs_code
== COND_EXPR
)
826 tree cond_expr
= gimple_assign_rhs1 (stmt
);
827 enum tree_code cond_code
= TREE_CODE (cond_expr
);
828 enum tree_code swap_code
= ERROR_MARK
;
829 enum tree_code invert_code
= ERROR_MARK
;
832 first_cond_code
= TREE_CODE (cond_expr
);
833 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
835 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
836 swap_code
= swap_tree_comparison (cond_code
);
837 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
840 if (first_cond_code
== cond_code
)
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code
== swap_code
)
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code
== invert_code
)
850 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
853 "Build SLP failed: different"
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
867 for (i
= 0; i
< group_size
; ++i
)
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code
!= ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
877 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype
));
878 for (i
= 0; i
< TYPE_VECTOR_SUBPARTS (vectype
); ++i
)
881 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
882 sel
[i
] += TYPE_VECTOR_SUBPARTS (vectype
);
884 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
886 for (i
= 0; i
< group_size
; ++i
)
887 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
890 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
893 "Build SLP failed: different operation "
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
905 *two_operators
= true;
911 /* Recursively build an SLP tree starting from NODE.
912 Fail (and return a value not equal to zero) if def-stmts are not
913 isomorphic, require data permutation or are of unsupported types of
914 operation. Otherwise, return 0.
915 The value returned is the depth in the SLP tree where a mismatch
919 vect_build_slp_tree (vec_info
*vinfo
,
920 vec
<gimple
*> stmts
, unsigned int group_size
,
921 unsigned int *max_nunits
,
922 vec
<slp_tree
> *loads
,
923 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
924 unsigned max_tree_size
)
926 unsigned nops
, i
, this_tree_size
= 0, this_max_nunits
= *max_nunits
;
933 if (is_gimple_call (stmt
))
934 nops
= gimple_call_num_args (stmt
);
935 else if (is_gimple_assign (stmt
))
937 nops
= gimple_num_ops (stmt
) - 1;
938 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
941 else if (gimple_code (stmt
) == GIMPLE_PHI
)
946 /* If the SLP node is a PHI (induction or reduction), terminate
948 if (gimple_code (stmt
) == GIMPLE_PHI
)
950 /* Induction from different IVs is not supported. */
951 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) == vect_induction_def
)
952 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
953 if (stmt
!= stmts
[0])
955 node
= vect_create_new_slp_node (stmts
);
960 bool two_operators
= false;
961 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
962 if (!vect_build_slp_tree_1 (vinfo
, swap
,
963 stmts
, group_size
, nops
,
964 &this_max_nunits
, matches
, &two_operators
))
967 /* If the SLP node is a load, terminate the recursion. */
968 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
969 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
971 *max_nunits
= this_max_nunits
;
972 node
= vect_create_new_slp_node (stmts
);
973 loads
->safe_push (node
);
977 /* Get at the operands, verifying they are compatible. */
978 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
979 slp_oprnd_info oprnd_info
;
980 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
982 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
983 stmt
, i
, &oprnds_info
);
985 matches
[(res
== -1) ? 0 : i
] = false;
989 for (i
= 0; i
< group_size
; ++i
)
992 vect_free_oprnd_info (oprnds_info
);
996 auto_vec
<slp_tree
, 4> children
;
997 auto_vec
<slp_tree
> this_loads
;
1001 /* Create SLP_TREE nodes for the definition node/s. */
1002 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1005 unsigned old_nloads
= this_loads
.length ();
1006 unsigned old_tree_size
= this_tree_size
;
1009 if (oprnd_info
->first_dt
!= vect_internal_def
1010 && oprnd_info
->first_dt
!= vect_reduction_def
1011 && oprnd_info
->first_dt
!= vect_induction_def
)
1014 if (++this_tree_size
> max_tree_size
)
1016 FOR_EACH_VEC_ELT (children
, j
, child
)
1017 vect_free_slp_tree (child
);
1018 vect_free_oprnd_info (oprnds_info
);
1022 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1023 group_size
, &this_max_nunits
,
1024 &this_loads
, matches
, npermutes
,
1026 max_tree_size
)) != NULL
)
1028 /* If we have all children of child built up from scalars then just
1029 throw that away and build it up this node from scalars. */
1030 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1031 /* ??? Rejecting patterns this way doesn't work. We'd have to
1032 do extra work to cancel the pattern so the uses see the
1034 && !is_pattern_stmt_p
1035 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1037 slp_tree grandchild
;
1039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1040 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1045 this_loads
.truncate (old_nloads
);
1046 this_tree_size
= old_tree_size
;
1047 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1048 vect_free_slp_tree (grandchild
);
1049 SLP_TREE_CHILDREN (child
).truncate (0);
1051 dump_printf_loc (MSG_NOTE
, vect_location
,
1052 "Building parent vector operands from "
1053 "scalars instead\n");
1054 oprnd_info
->def_stmts
= vNULL
;
1055 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1056 children
.safe_push (child
);
1061 oprnd_info
->def_stmts
= vNULL
;
1062 children
.safe_push (child
);
1066 /* If the SLP build failed fatally and we analyze a basic-block
1067 simply treat nodes we fail to build as externally defined
1068 (and thus build vectors from the scalar defs).
1069 The cost model will reject outright expensive cases.
1070 ??? This doesn't treat cases where permutation ultimatively
1071 fails (or we don't try permutation below). Ideally we'd
1072 even compute a permutation that will end up with the maximum
1074 if (is_a
<bb_vec_info
> (vinfo
)
1076 /* ??? Rejecting patterns this way doesn't work. We'd have to
1077 do extra work to cancel the pattern so the uses see the
1079 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1081 dump_printf_loc (MSG_NOTE
, vect_location
,
1082 "Building vector operands from scalars\n");
1083 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1084 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1085 children
.safe_push (child
);
1086 oprnd_info
->def_stmts
= vNULL
;
1090 /* If the SLP build for operand zero failed and operand zero
1091 and one can be commutated try that for the scalar stmts
1092 that failed the match. */
1094 /* A first scalar stmt mismatch signals a fatal mismatch. */
1096 /* ??? For COND_EXPRs we can swap the comparison operands
1097 as well as the arms under some constraints. */
1099 && oprnds_info
[1]->first_dt
== vect_internal_def
1100 && is_gimple_assign (stmt
)
1101 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1103 /* Do so only if the number of not successful permutes was nor more
1104 than a cut-ff as re-trying the recursive match on
1105 possibly each level of the tree would expose exponential
1109 /* Verify if we can safely swap or if we committed to a specific
1110 operand order already. */
1111 for (j
= 0; j
< group_size
; ++j
)
1114 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1116 if (dump_enabled_p ())
1118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1119 "Build SLP failed: cannot swap operands "
1121 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1127 /* Swap mismatched definition stmts. */
1128 dump_printf_loc (MSG_NOTE
, vect_location
,
1129 "Re-trying with swapped operands of stmts ");
1130 for (j
= 0; j
< group_size
; ++j
)
1133 std::swap (oprnds_info
[0]->def_stmts
[j
],
1134 oprnds_info
[1]->def_stmts
[j
]);
1135 dump_printf (MSG_NOTE
, "%d ", j
);
1137 dump_printf (MSG_NOTE
, "\n");
1138 /* And try again with scratch 'matches' ... */
1139 bool *tem
= XALLOCAVEC (bool, group_size
);
1140 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1141 group_size
, &this_max_nunits
,
1142 &this_loads
, tem
, npermutes
,
1144 max_tree_size
)) != NULL
)
1146 /* ... so if successful we can apply the operand swapping
1147 to the GIMPLE IL. This is necessary because for example
1148 vect_get_slp_defs uses operand indexes and thus expects
1149 canonical operand order. This is also necessary even
1150 if we end up building the operand from scalars as
1151 we'll continue to process swapped operand two. */
1152 for (j
= 0; j
< group_size
; ++j
)
1154 gimple
*stmt
= stmts
[j
];
1155 gimple_set_plf (stmt
, GF_PLF_1
, false);
1157 for (j
= 0; j
< group_size
; ++j
)
1159 gimple
*stmt
= stmts
[j
];
1162 /* Avoid swapping operands twice. */
1163 if (gimple_plf (stmt
, GF_PLF_1
))
1165 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1166 gimple_assign_rhs2_ptr (stmt
));
1167 gimple_set_plf (stmt
, GF_PLF_1
, true);
1170 /* Verify we swap all duplicates or none. */
1172 for (j
= 0; j
< group_size
; ++j
)
1174 gimple
*stmt
= stmts
[j
];
1175 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1178 /* If we have all children of child built up from scalars then
1179 just throw that away and build it up this node from scalars. */
1180 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1181 /* ??? Rejecting patterns this way doesn't work. We'd have
1182 to do extra work to cancel the pattern so the uses see the
1184 && !is_pattern_stmt_p
1185 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1188 slp_tree grandchild
;
1190 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1191 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1196 this_loads
.truncate (old_nloads
);
1197 this_tree_size
= old_tree_size
;
1198 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1199 vect_free_slp_tree (grandchild
);
1200 SLP_TREE_CHILDREN (child
).truncate (0);
1202 dump_printf_loc (MSG_NOTE
, vect_location
,
1203 "Building parent vector operands from "
1204 "scalars instead\n");
1205 oprnd_info
->def_stmts
= vNULL
;
1206 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1207 children
.safe_push (child
);
1212 oprnd_info
->def_stmts
= vNULL
;
1213 children
.safe_push (child
);
1221 gcc_assert (child
== NULL
);
1222 FOR_EACH_VEC_ELT (children
, j
, child
)
1223 vect_free_slp_tree (child
);
1224 vect_free_oprnd_info (oprnds_info
);
1228 vect_free_oprnd_info (oprnds_info
);
1231 *tree_size
+= this_tree_size
;
1232 *max_nunits
= this_max_nunits
;
1233 loads
->safe_splice (this_loads
);
1235 node
= vect_create_new_slp_node (stmts
);
1236 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1237 SLP_TREE_CHILDREN (node
).splice (children
);
1241 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1244 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1250 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1251 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1252 ? " (external)" : "");
1253 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1255 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1256 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1258 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1259 vect_print_slp_tree (dump_kind
, loc
, child
);
1263 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1264 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1265 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1266 stmts in NODE are to be marked. */
1269 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1275 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1278 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1279 if (j
< 0 || i
== j
)
1280 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1282 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1283 vect_mark_slp_stmts (child
, mark
, j
);
1287 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1290 vect_mark_slp_stmts_relevant (slp_tree node
)
1294 stmt_vec_info stmt_info
;
1297 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1300 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1302 stmt_info
= vinfo_for_stmt (stmt
);
1303 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1304 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1305 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1308 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1309 vect_mark_slp_stmts_relevant (child
);
1313 /* Rearrange the statements of NODE according to PERMUTATION. */
1316 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1317 vec
<unsigned> permutation
)
1320 vec
<gimple
*> tmp_stmts
;
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1325 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1327 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1328 tmp_stmts
.create (group_size
);
1329 tmp_stmts
.quick_grow_cleared (group_size
);
1331 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1332 tmp_stmts
[permutation
[i
]] = stmt
;
1334 SLP_TREE_SCALAR_STMTS (node
).release ();
1335 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1339 /* Attempt to reorder stmts in a reduction chain so that we don't
1340 require any load permutation. Return true if that was possible,
1341 otherwise return false. */
1344 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1346 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1349 slp_tree node
, load
;
1351 /* Compare all the permutation sequences to the first one. We know
1352 that at least one load is permuted. */
1353 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1354 if (!node
->load_permutation
.exists ())
1356 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1358 if (!load
->load_permutation
.exists ())
1360 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1361 if (lidx
!= node
->load_permutation
[j
])
1365 /* Check that the loads in the first sequence are different and there
1366 are no gaps between them. */
1367 auto_sbitmap
load_index (group_size
);
1368 bitmap_clear (load_index
);
1369 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1371 if (lidx
>= group_size
)
1373 if (bitmap_bit_p (load_index
, lidx
))
1376 bitmap_set_bit (load_index
, lidx
);
1378 for (i
= 0; i
< group_size
; i
++)
1379 if (!bitmap_bit_p (load_index
, i
))
1382 /* This permutation is valid for reduction. Since the order of the
1383 statements in the nodes is not important unless they are memory
1384 accesses, we can rearrange the statements in all the nodes
1385 according to the order of the loads. */
1386 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1387 node
->load_permutation
);
1389 /* We are done, no actual permutations need to be generated. */
1390 unsigned int unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1391 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1393 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1394 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1395 /* But we have to keep those permutations that are required because
1396 of handling of gaps. */
1397 if (unrolling_factor
== 1
1398 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1399 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1400 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1402 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1403 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1409 /* Check if the required load permutations in the SLP instance
1410 SLP_INSTN are supported. */
1413 vect_supported_load_permutation_p (slp_instance slp_instn
)
1415 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1416 unsigned int i
, j
, k
, next
;
1418 gimple
*stmt
, *load
, *next_load
;
1420 if (dump_enabled_p ())
1422 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1423 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1424 if (node
->load_permutation
.exists ())
1425 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1426 dump_printf (MSG_NOTE
, "%d ", next
);
1428 for (k
= 0; k
< group_size
; ++k
)
1429 dump_printf (MSG_NOTE
, "%d ", k
);
1430 dump_printf (MSG_NOTE
, "\n");
1433 /* In case of reduction every load permutation is allowed, since the order
1434 of the reduction statements is not important (as opposed to the case of
1435 grouped stores). The only condition we need to check is that all the
1436 load nodes are of the same size and have the same permutation (and then
1437 rearrange all the nodes of the SLP instance according to this
1440 /* Check that all the load nodes are of the same size. */
1441 /* ??? Can't we assert this? */
1442 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1443 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1446 node
= SLP_INSTANCE_TREE (slp_instn
);
1447 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1449 /* Reduction (there are no data-refs in the root).
1450 In reduction chain the order of the loads is not important. */
1451 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1452 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1453 vect_attempt_slp_rearrange_stmts (slp_instn
);
1455 /* In basic block vectorization we allow any subchain of an interleaving
1457 FORNOW: not supported in loop SLP because of realignment compications. */
1458 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1460 /* Check whether the loads in an instance form a subchain and thus
1461 no permutation is necessary. */
1462 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1464 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1466 bool subchain_p
= true;
1468 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1471 && (next_load
!= load
1472 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1477 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1480 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1483 stmt_vec_info group_info
1484 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1485 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1487 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1488 unsigned k
, maxk
= 0;
1489 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1492 /* In BB vectorization we may not actually use a loaded vector
1493 accessing elements in excess of GROUP_SIZE. */
1494 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1497 "BB vectorization with gaps at the end of "
1498 "a load is not supported\n");
1502 /* Verify the permutation can be generated. */
1505 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1506 1, slp_instn
, true, &n_perms
))
1508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1510 "unsupported load permutation\n");
1518 /* For loop vectorization verify we can generate the permutation. */
1520 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1521 if (node
->load_permutation
.exists ()
1522 && !vect_transform_slp_perm_load
1524 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true,
1532 /* Find the last store in SLP INSTANCE. */
1535 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1537 gimple
*last
= NULL
, *stmt
;
1539 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1541 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1542 if (is_pattern_stmt_p (stmt_vinfo
))
1543 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1545 last
= get_later_stmt (stmt
, last
);
1551 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1554 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1555 stmt_vector_for_cost
*prologue_cost_vec
,
1556 stmt_vector_for_cost
*body_cost_vec
,
1557 unsigned ncopies_for_cost
)
1562 stmt_vec_info stmt_info
;
1565 /* Recurse down the SLP tree. */
1566 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1567 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1568 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1569 body_cost_vec
, ncopies_for_cost
);
1571 /* Look at the first scalar stmt to determine the cost. */
1572 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1573 stmt_info
= vinfo_for_stmt (stmt
);
1574 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1576 vect_memory_access_type memory_access_type
1577 = (STMT_VINFO_STRIDED_P (stmt_info
)
1580 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1581 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1582 memory_access_type
, vect_uninitialized_def
,
1583 node
, prologue_cost_vec
, body_cost_vec
);
1586 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1587 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1589 /* If the load is permuted then the alignment is determined by
1590 the first group element not by the first scalar stmt DR. */
1591 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1592 stmt_info
= vinfo_for_stmt (stmt
);
1593 /* Record the cost for the permutation. */
1595 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1596 ncopies_for_cost
, instance
, true,
1598 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1599 stmt_info
, 0, vect_body
);
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1602 /* And adjust the number of loads performed. This handles
1603 redundancies as well as loads that are later dead. */
1604 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1605 bitmap_clear (perm
);
1606 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1607 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1608 ncopies_for_cost
= 0;
1609 bool load_seen
= false;
1610 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1612 if (i
% nunits
== 0)
1618 if (bitmap_bit_p (perm
, i
))
1623 gcc_assert (ncopies_for_cost
1624 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1625 + nunits
- 1) / nunits
);
1626 ncopies_for_cost
*= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1628 /* Record the cost for the vector loads. */
1629 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1630 memory_access_type
, node
, prologue_cost_vec
,
1635 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1637 /* ncopies_for_cost is the number of IVs we generate. */
1638 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1639 stmt_info
, 0, vect_body
);
1641 /* Prologue cost for the initial values and step vector. */
1642 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1644 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1646 ? vector_load
: vec_construct
,
1647 stmt_info
, 0, vect_prologue
);
1648 record_stmt_cost (prologue_cost_vec
, 1,
1650 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1651 ? vector_load
: vec_construct
,
1652 stmt_info
, 0, vect_prologue
);
1654 /* ??? No easy way to get at the actual number of vector stmts
1655 to be geneated and thus the derived IVs. */
1659 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1660 stmt_info
, 0, vect_body
);
1661 if (SLP_TREE_TWO_OPERATORS (node
))
1663 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1664 stmt_info
, 0, vect_body
);
1665 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1666 stmt_info
, 0, vect_body
);
1670 /* Push SLP node def-type to stmts. */
1671 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1672 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1673 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1674 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1676 /* Scan operands and account for prologue cost of constants/externals.
1677 ??? This over-estimates cost for multiple uses and should be
1679 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1680 lhs
= gimple_get_lhs (stmt
);
1681 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1683 tree op
= gimple_op (stmt
, i
);
1685 enum vect_def_type dt
;
1686 if (!op
|| op
== lhs
)
1688 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1690 /* Without looking at the actual initializer a vector of
1691 constants can be implemented as load from the constant pool.
1692 ??? We need to pass down stmt_info for a vector type
1693 even if it points to the wrong stmt. */
1694 if (dt
== vect_constant_def
)
1695 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1696 stmt_info
, 0, vect_prologue
);
1697 else if (dt
== vect_external_def
)
1698 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1699 stmt_info
, 0, vect_prologue
);
1703 /* Restore stmt def-types. */
1704 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1705 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1706 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1707 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1710 /* Compute the cost for the SLP instance INSTANCE. */
1713 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1715 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1716 unsigned ncopies_for_cost
;
1717 stmt_info_for_cost
*si
;
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_NOTE
, vect_location
,
1722 "=== vect_analyze_slp_cost ===\n");
1724 /* Calculate the number of vector stmts to create based on the unrolling
1725 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1726 GROUP_SIZE / NUNITS otherwise. */
1727 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1728 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1729 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1730 /* Adjust the group_size by the vectorization factor which is always one
1731 for basic-block vectorization. */
1732 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1733 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1734 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1735 /* For reductions look at a reduction operand in case the reduction
1736 operation is widening like DOT_PROD or SAD. */
1737 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1739 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1740 switch (gimple_assign_rhs_code (stmt
))
1744 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1745 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1750 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1752 prologue_cost_vec
.create (10);
1753 body_cost_vec
.create (10);
1754 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1755 &prologue_cost_vec
, &body_cost_vec
,
1758 /* Record the prologue costs, which were delayed until we were
1759 sure that SLP was successful. */
1760 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1762 struct _stmt_vec_info
*stmt_info
1763 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1764 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1765 si
->misalign
, vect_prologue
);
1768 /* Record the instance's instructions in the target cost model. */
1769 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1771 struct _stmt_vec_info
*stmt_info
1772 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1773 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1774 si
->misalign
, vect_body
);
1777 prologue_cost_vec
.release ();
1778 body_cost_vec
.release ();
1781 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1782 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1783 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1784 containing the remainder.
1785 Return the first stmt in the second group. */
1788 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1790 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1791 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1792 gcc_assert (group1_size
> 0);
1793 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1794 gcc_assert (group2_size
> 0);
1795 GROUP_SIZE (first_vinfo
) = group1_size
;
1797 gimple
*stmt
= first_stmt
;
1798 for (unsigned i
= group1_size
; i
> 1; i
--)
1800 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1801 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1803 /* STMT is now the last element of the first group. */
1804 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1805 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1807 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1808 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1810 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1811 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1814 /* For the second group, the GROUP_GAP is that before the original group,
1815 plus skipping over the first vector. */
1816 GROUP_GAP (vinfo_for_stmt (group2
)) =
1817 GROUP_GAP (first_vinfo
) + group1_size
;
1819 /* GROUP_GAP of the first group now has to skip over the second group too. */
1820 GROUP_GAP (first_vinfo
) += group2_size
;
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1824 group1_size
, group2_size
);
1829 /* Analyze an SLP instance starting from a group of grouped stores. Call
1830 vect_build_slp_tree to build a tree of packed stmts if possible.
1831 Return FALSE if it's impossible to SLP any stmt in the loop. */
1834 vect_analyze_slp_instance (vec_info
*vinfo
,
1835 gimple
*stmt
, unsigned max_tree_size
)
1837 slp_instance new_instance
;
1839 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1840 unsigned int unrolling_factor
= 1, nunits
;
1841 tree vectype
, scalar_type
= NULL_TREE
;
1844 unsigned int max_nunits
= 0;
1845 vec
<slp_tree
> loads
;
1846 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1847 vec
<gimple
*> scalar_stmts
;
1849 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1853 scalar_type
= TREE_TYPE (DR_REF (dr
));
1854 vectype
= get_vectype_for_scalar_type (scalar_type
);
1858 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1859 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1862 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1866 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1867 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1868 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1873 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1876 "Build SLP failed: unsupported data-type ");
1877 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1878 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1883 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1885 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1886 scalar_stmts
.create (group_size
);
1888 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1890 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1893 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1894 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1895 scalar_stmts
.safe_push (
1896 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1898 scalar_stmts
.safe_push (next
);
1899 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1901 /* Mark the first element of the reduction chain as reduction to properly
1902 transform the node. In the reduction analysis phase only the last
1903 element of the chain is marked as reduction. */
1904 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1905 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
1909 /* Collect reduction statements. */
1910 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1911 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1912 scalar_stmts
.safe_push (next
);
1915 loads
.create (group_size
);
1917 /* Build the tree for the SLP instance. */
1918 bool *matches
= XALLOCAVEC (bool, group_size
);
1919 unsigned npermutes
= 0;
1920 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
1921 &max_nunits
, &loads
, matches
, &npermutes
,
1922 NULL
, max_tree_size
);
1925 /* Calculate the unrolling factor based on the smallest type. */
1927 = least_common_multiple (max_nunits
, group_size
) / group_size
;
1929 if (unrolling_factor
!= 1
1930 && is_a
<bb_vec_info
> (vinfo
))
1933 if (max_nunits
> group_size
)
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1936 "Build SLP failed: store group "
1937 "size not a multiple of the vector size "
1938 "in basic block SLP\n");
1939 vect_free_slp_tree (node
);
1943 /* Fatal mismatch. */
1944 matches
[group_size
/max_nunits
* max_nunits
] = false;
1945 vect_free_slp_tree (node
);
1950 /* Create a new SLP instance. */
1951 new_instance
= XNEW (struct _slp_instance
);
1952 SLP_INSTANCE_TREE (new_instance
) = node
;
1953 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1954 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1955 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1957 /* Compute the load permutation. */
1959 bool loads_permuted
= false;
1960 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1962 vec
<unsigned> load_permutation
;
1964 gimple
*load
, *first_stmt
;
1965 bool this_load_permuted
= false;
1966 load_permutation
.create (group_size
);
1967 first_stmt
= GROUP_FIRST_ELEMENT
1968 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1969 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1971 int load_place
= vect_get_place_in_interleaving_chain
1973 gcc_assert (load_place
!= -1);
1974 if (load_place
!= j
)
1975 this_load_permuted
= true;
1976 load_permutation
.safe_push (load_place
);
1978 if (!this_load_permuted
1979 /* The load requires permutation when unrolling exposes
1980 a gap either because the group is larger than the SLP
1981 group-size or because there is a gap between the groups. */
1982 && (unrolling_factor
== 1
1983 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1984 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
1986 load_permutation
.release ();
1989 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1990 loads_permuted
= true;
1995 if (!vect_supported_load_permutation_p (new_instance
))
1997 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2000 "Build SLP failed: unsupported load "
2002 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2005 vect_free_slp_instance (new_instance
);
2010 /* If the loads and stores can be handled with load/store-lan
2011 instructions do not generate this SLP instance. */
2012 if (is_a
<loop_vec_info
> (vinfo
)
2014 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2017 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2019 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2020 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2021 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2022 /* Use SLP for strided accesses (or if we
2023 can't load-lanes). */
2024 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2025 || ! vect_load_lanes_supported
2026 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2027 GROUP_SIZE (stmt_vinfo
)))
2030 if (i
== loads
.length ())
2032 if (dump_enabled_p ())
2033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2034 "Built SLP cancelled: can use "
2035 "load/store-lanes\n");
2036 vect_free_slp_instance (new_instance
);
2041 vinfo
->slp_instances
.safe_push (new_instance
);
2043 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE
, vect_location
,
2046 "Final SLP tree for instance:\n");
2047 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2055 /* Failed to SLP. */
2056 /* Free the allocated memory. */
2057 scalar_stmts
.release ();
2061 /* For basic block SLP, try to break the group up into multiples of the
2063 if (is_a
<bb_vec_info
> (vinfo
)
2064 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2065 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2067 /* We consider breaking the group only on VF boundaries from the existing
2069 for (i
= 0; i
< group_size
; i
++)
2070 if (!matches
[i
]) break;
2072 if (i
>= nunits
&& i
< group_size
)
2074 /* Split into two groups at the first vector boundary before i. */
2075 gcc_assert ((nunits
& (nunits
- 1)) == 0);
2076 unsigned group1_size
= i
& ~(nunits
- 1);
2078 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2079 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2080 /* If the first non-match was in the middle of a vector,
2081 skip the rest of that vector. */
2082 if (group1_size
< i
)
2084 i
= group1_size
+ nunits
;
2086 rest
= vect_split_slp_store_group (rest
, nunits
);
2089 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2092 /* Even though the first vector did not all match, we might be able to SLP
2093 (some) of the remainder. FORNOW ignore this possibility. */
2100 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2101 trees of packed scalar stmts if SLP is possible. */
2104 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2107 gimple
*first_element
;
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2112 /* Find SLP sequences starting from groups of grouped stores. */
2113 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2114 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2116 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2118 if (loop_vinfo
->reduction_chains
.length () > 0)
2120 /* Find SLP sequences starting from reduction chains. */
2121 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2122 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2125 /* Dissolve reduction chain group. */
2126 gimple
*next
, *stmt
= first_element
;
2129 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2130 next
= GROUP_NEXT_ELEMENT (vinfo
);
2131 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2132 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2135 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2136 = vect_internal_def
;
2140 /* Find SLP sequences starting from groups of reductions. */
2141 if (loop_vinfo
->reductions
.length () > 1)
2142 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2150 /* For each possible SLP instance decide whether to SLP it and calculate overall
2151 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2152 least one instance. */
2155 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2157 unsigned int i
, unrolling_factor
= 1;
2158 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2159 slp_instance instance
;
2160 int decided_to_slp
= 0;
2162 if (dump_enabled_p ())
2163 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2166 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2168 /* FORNOW: SLP if you can. */
2169 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2170 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2172 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2173 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2174 loop-based vectorization. Such stmts will be marked as HYBRID. */
2175 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2179 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2181 if (decided_to_slp
&& dump_enabled_p ())
2182 dump_printf_loc (MSG_NOTE
, vect_location
,
2183 "Decided to SLP %d instances. Unrolling factor %d\n",
2184 decided_to_slp
, unrolling_factor
);
2186 return (decided_to_slp
> 0);
2190 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2191 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2194 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2196 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2197 imm_use_iterator imm_iter
;
2199 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2201 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2202 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2205 /* Propagate hybrid down the SLP tree. */
2206 if (stype
== hybrid
)
2208 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2212 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2213 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2214 /* If we get a pattern stmt here we have to use the LHS of the
2215 original stmt for immediate uses. */
2216 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2217 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2218 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2220 if (gimple_code (stmt
) == GIMPLE_PHI
)
2221 def
= gimple_phi_result (stmt
);
2223 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2225 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2227 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2229 use_vinfo
= vinfo_for_stmt (use_stmt
);
2230 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2231 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2232 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2233 if (!STMT_SLP_TYPE (use_vinfo
)
2234 && (STMT_VINFO_RELEVANT (use_vinfo
)
2235 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2236 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2237 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2239 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2242 "def in non-SLP stmt: ");
2243 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2251 && !HYBRID_SLP_STMT (stmt_vinfo
))
2253 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2256 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2258 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2261 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2262 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2263 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2266 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2269 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2271 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2272 struct loop
*loopp
= (struct loop
*)wi
->info
;
2277 if (TREE_CODE (*tp
) == SSA_NAME
2278 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2280 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2281 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2282 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2284 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2287 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2289 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2297 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2300 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2301 /* If the stmt is in a SLP instance then this isn't a reason
2302 to mark use definitions in other SLP instances as hybrid. */
2303 if (! STMT_SLP_TYPE (use_vinfo
)
2304 && (STMT_VINFO_RELEVANT (use_vinfo
)
2305 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2306 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2307 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2314 /* Find stmts that must be both vectorized and SLPed. */
2317 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2320 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2321 slp_instance instance
;
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2327 /* First walk all pattern stmt in the loop and mark defs of uses as
2328 hybrid because immediate uses in them are not recorded. */
2329 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2331 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2332 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2335 gimple
*stmt
= gsi_stmt (gsi
);
2336 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2337 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2340 memset (&wi
, 0, sizeof (wi
));
2341 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2342 gimple_stmt_iterator gsi2
2343 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2344 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2345 vect_detect_hybrid_slp_1
, &wi
);
2346 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2347 vect_detect_hybrid_slp_2
,
2348 vect_detect_hybrid_slp_1
, &wi
);
2353 /* Then walk the SLP instance trees marking stmts with uses in
2354 non-SLP stmts as hybrid, also propagating hybrid down the
2355 SLP tree, collecting the above info on-the-fly. */
2356 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2358 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2359 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2365 /* Create and initialize a new bb_vec_info struct for BB, as well as
2366 stmt_vec_info structs for all the stmts in it. */
2369 new_bb_vec_info (gimple_stmt_iterator region_begin
,
2370 gimple_stmt_iterator region_end
)
2372 basic_block bb
= gsi_bb (region_begin
);
2373 bb_vec_info res
= NULL
;
2374 gimple_stmt_iterator gsi
;
2376 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
2377 res
->kind
= vec_info::bb
;
2378 BB_VINFO_BB (res
) = bb
;
2379 res
->region_begin
= region_begin
;
2380 res
->region_end
= region_end
;
2382 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2385 gimple
*stmt
= gsi_stmt (gsi
);
2386 gimple_set_uid (stmt
, 0);
2387 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
));
2390 BB_VINFO_GROUPED_STORES (res
).create (10);
2391 BB_VINFO_SLP_INSTANCES (res
).create (2);
2392 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
2399 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2400 stmts in the basic block. */
2403 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
2405 slp_instance instance
;
2411 vect_destroy_datarefs (bb_vinfo
);
2412 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
2413 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
2414 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo
), i
, instance
)
2415 vect_free_slp_instance (instance
);
2416 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
2417 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
2419 for (gimple_stmt_iterator si
= bb_vinfo
->region_begin
;
2420 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
2422 gimple
*stmt
= gsi_stmt (si
);
2423 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2426 /* Free stmt_vec_info. */
2427 free_stmt_vec_info (stmt
);
2429 /* Reset region marker. */
2430 gimple_set_uid (stmt
, -1);
2433 BB_VINFO_BB (bb_vinfo
)->aux
= NULL
;
2438 /* Analyze statements contained in SLP tree node after recursively analyzing
2439 the subtree. Return TRUE if the operations are supported. */
2442 vect_slp_analyze_node_operations (slp_tree node
)
2449 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2452 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2453 if (!vect_slp_analyze_node_operations (child
))
2456 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2457 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2458 gcc_assert (stmt_info
);
2459 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2461 /* For BB vectorization vector types are assigned here.
2462 Memory accesses already got their vector type assigned
2463 in vect_analyze_data_refs. */
2464 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2466 && ! STMT_VINFO_DATA_REF (stmt_info
))
2468 gcc_assert (PURE_SLP_STMT (stmt_info
));
2470 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2471 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE
, vect_location
,
2474 "get vectype for scalar type: ");
2475 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2476 dump_printf (MSG_NOTE
, "\n");
2479 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2482 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2485 "not SLPed: unsupported data-type ");
2486 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2488 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2493 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2496 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2497 dump_printf (MSG_NOTE
, "\n");
2501 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2502 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2505 /* Push SLP node def-type to stmt operands. */
2506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2507 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2508 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2509 = SLP_TREE_DEF_TYPE (child
);
2510 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
);
2511 /* Restore def-types. */
2512 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2513 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2514 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2515 = vect_internal_def
;
2523 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2524 operations are supported. */
2527 vect_slp_analyze_operations (vec
<slp_instance
> slp_instances
, void *data
)
2529 slp_instance instance
;
2532 if (dump_enabled_p ())
2533 dump_printf_loc (MSG_NOTE
, vect_location
,
2534 "=== vect_slp_analyze_operations ===\n");
2536 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
2538 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance
)))
2540 dump_printf_loc (MSG_NOTE
, vect_location
,
2541 "removing SLP instance operations starting from: ");
2542 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2543 SLP_TREE_SCALAR_STMTS
2544 (SLP_INSTANCE_TREE (instance
))[0], 0);
2545 vect_free_slp_instance (instance
);
2546 slp_instances
.ordered_remove (i
);
2550 /* Compute the costs of the SLP instance. */
2551 vect_analyze_slp_cost (instance
, data
);
2556 if (!slp_instances
.length ())
2563 /* Compute the scalar cost of the SLP node NODE and its children
2564 and return it. Do not account defs that are marked in LIFE and
2565 update LIFE according to uses of NODE. */
2568 vect_bb_slp_scalar_cost (basic_block bb
,
2569 slp_tree node
, vec
<bool, va_heap
> *life
)
2571 unsigned scalar_cost
= 0;
2576 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2579 ssa_op_iter op_iter
;
2580 def_operand_p def_p
;
2581 stmt_vec_info stmt_info
;
2586 /* If there is a non-vectorized use of the defs then the scalar
2587 stmt is kept live in which case we do not account it or any
2588 required defs in the SLP children in the scalar cost. This
2589 way we make the vectorization more costly when compared to
2591 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2593 imm_use_iterator use_iter
;
2595 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2596 if (!is_gimple_debug (use_stmt
)
2597 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2599 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2602 BREAK_FROM_IMM_USE_STMT (use_iter
);
2608 /* Count scalar stmts only once. */
2609 if (gimple_visited_p (stmt
))
2611 gimple_set_visited (stmt
, true);
2613 stmt_info
= vinfo_for_stmt (stmt
);
2614 if (STMT_VINFO_DATA_REF (stmt_info
))
2616 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2617 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2619 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2622 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2624 scalar_cost
+= stmt_cost
;
2627 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2628 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2629 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
2634 /* Check if vectorization of the basic block is profitable. */
2637 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2639 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2640 slp_instance instance
;
2642 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2643 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2645 /* Calculate scalar cost. */
2646 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2648 auto_vec
<bool, 20> life
;
2649 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2650 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2651 SLP_INSTANCE_TREE (instance
),
2655 /* Unset visited flag. */
2656 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2657 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2658 gimple_set_visited (gsi_stmt (gsi
), false);
2660 /* Complete the target-specific cost calculation. */
2661 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2662 &vec_inside_cost
, &vec_epilogue_cost
);
2664 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2666 if (dump_enabled_p ())
2668 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2669 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2671 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2672 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2673 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2676 /* Vectorization is profitable if its cost is more than the cost of scalar
2677 version. Note that we err on the vector side for equal cost because
2678 the cost estimate is otherwise quite pessimistic (constant uses are
2679 free on the scalar side but cost a load on the vector side for
2681 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2687 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2688 if so and sets fatal to true if failure is independent of
2689 current_vector_size. */
2692 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2693 gimple_stmt_iterator region_end
,
2694 vec
<data_reference_p
> datarefs
, int n_stmts
,
2697 bb_vec_info bb_vinfo
;
2698 slp_instance instance
;
2702 /* The first group of checks is independent of the vector size. */
2705 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2707 if (dump_enabled_p ())
2708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2709 "not vectorized: too many instructions in "
2711 free_data_refs (datarefs
);
2715 bb_vinfo
= new_bb_vec_info (region_begin
, region_end
);
2719 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2721 /* Analyze the data references. */
2723 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2725 if (dump_enabled_p ())
2726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2727 "not vectorized: unhandled data-ref in basic "
2730 destroy_bb_vec_info (bb_vinfo
);
2734 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2736 if (dump_enabled_p ())
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2738 "not vectorized: not enough data-refs in "
2741 destroy_bb_vec_info (bb_vinfo
);
2745 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2747 if (dump_enabled_p ())
2748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2749 "not vectorized: unhandled data access in "
2752 destroy_bb_vec_info (bb_vinfo
);
2756 /* If there are no grouped stores in the region there is no need
2757 to continue with pattern recog as vect_analyze_slp will fail
2759 if (bb_vinfo
->grouped_stores
.is_empty ())
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2763 "not vectorized: no grouped stores in "
2766 destroy_bb_vec_info (bb_vinfo
);
2770 /* While the rest of the analysis below depends on it in some way. */
2773 vect_pattern_recog (bb_vinfo
);
2775 /* Check the SLP opportunities in the basic block, analyze and build SLP
2777 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2779 if (dump_enabled_p ())
2781 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2782 "Failed to SLP the basic block.\n");
2783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2784 "not vectorized: failed to find SLP opportunities "
2785 "in basic block.\n");
2788 destroy_bb_vec_info (bb_vinfo
);
2792 /* Analyze and verify the alignment of data references and the
2793 dependence in the SLP instances. */
2794 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2796 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2797 || ! vect_slp_analyze_instance_dependence (instance
))
2799 dump_printf_loc (MSG_NOTE
, vect_location
,
2800 "removing SLP instance operations starting from: ");
2801 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2802 SLP_TREE_SCALAR_STMTS
2803 (SLP_INSTANCE_TREE (instance
))[0], 0);
2804 vect_free_slp_instance (instance
);
2805 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2809 /* Mark all the statements that we want to vectorize as pure SLP and
2811 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2812 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2816 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2818 destroy_bb_vec_info (bb_vinfo
);
2822 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo
),
2823 BB_VINFO_TARGET_COST_DATA (bb_vinfo
)))
2825 if (dump_enabled_p ())
2826 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2827 "not vectorized: bad operation in basic block.\n");
2829 destroy_bb_vec_info (bb_vinfo
);
2833 /* Cost model: check if the vectorization is worthwhile. */
2834 if (!unlimited_cost_model (NULL
)
2835 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2839 "not vectorized: vectorization is not "
2842 destroy_bb_vec_info (bb_vinfo
);
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_NOTE
, vect_location
,
2848 "Basic block will be vectorized using SLP\n");
2854 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2855 true if anything in the basic-block was vectorized. */
2858 vect_slp_bb (basic_block bb
)
2860 bb_vec_info bb_vinfo
;
2861 gimple_stmt_iterator gsi
;
2862 unsigned int vector_sizes
;
2863 bool any_vectorized
= false;
2865 if (dump_enabled_p ())
2866 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2868 /* Autodetect first vector size we try. */
2869 current_vector_size
= 0;
2870 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2872 gsi
= gsi_start_bb (bb
);
2876 if (gsi_end_p (gsi
))
2879 gimple_stmt_iterator region_begin
= gsi
;
2880 vec
<data_reference_p
> datarefs
= vNULL
;
2883 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2885 gimple
*stmt
= gsi_stmt (gsi
);
2886 if (is_gimple_debug (stmt
))
2890 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
2891 vect_location
= gimple_location (stmt
);
2893 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
2897 /* Skip leading unhandled stmts. */
2898 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
2904 gimple_stmt_iterator region_end
= gsi
;
2906 bool vectorized
= false;
2908 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
2909 datarefs
, insns
, fatal
);
2911 && dbg_cnt (vect_slp
))
2913 if (dump_enabled_p ())
2914 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
2916 vect_schedule_slp (bb_vinfo
);
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE
, vect_location
,
2920 "basic block part vectorized\n");
2922 destroy_bb_vec_info (bb_vinfo
);
2927 destroy_bb_vec_info (bb_vinfo
);
2929 any_vectorized
|= vectorized
;
2931 vector_sizes
&= ~current_vector_size
;
2933 || vector_sizes
== 0
2934 || current_vector_size
== 0
2935 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2936 vector sizes will fail do not bother iterating. */
2939 if (gsi_end_p (region_end
))
2942 /* Skip the unhandled stmt. */
2945 /* And reset vector sizes. */
2946 current_vector_size
= 0;
2947 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2951 /* Try the next biggest vector size. */
2952 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_NOTE
, vect_location
,
2955 "***** Re-trying analysis with "
2956 "vector size %d\n", current_vector_size
);
2963 return any_vectorized
;
2967 /* Return 1 if vector type of boolean constant which is OPNUM
2968 operand in statement STMT is a boolean vector. */
2971 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
2973 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2974 enum tree_code code
= gimple_expr_code (stmt
);
2977 enum vect_def_type dt
;
2979 /* For comparison and COND_EXPR type is chosen depending
2980 on the other comparison operand. */
2981 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2984 op
= gimple_assign_rhs1 (stmt
);
2986 op
= gimple_assign_rhs2 (stmt
);
2988 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
2992 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
2995 if (code
== COND_EXPR
)
2997 tree cond
= gimple_assign_rhs1 (stmt
);
2999 if (TREE_CODE (cond
) == SSA_NAME
)
3002 op
= TREE_OPERAND (cond
, 1);
3004 op
= TREE_OPERAND (cond
, 0);
3006 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3010 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3013 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3017 /* For constant and loop invariant defs of SLP_NODE this function returns
3018 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3019 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3020 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3021 REDUC_INDEX is the index of the reduction operand in the statements, unless
3025 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3026 vec
<tree
> *vec_oprnds
,
3027 unsigned int op_num
, unsigned int number_of_vectors
)
3029 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3030 gimple
*stmt
= stmts
[0];
3031 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3035 unsigned j
, number_of_places_left_in_vector
;
3038 int group_size
= stmts
.length ();
3039 unsigned int vec_num
, i
;
3040 unsigned number_of_copies
= 1;
3042 voprnds
.create (number_of_vectors
);
3043 bool constant_p
, is_store
;
3044 tree neutral_op
= NULL
;
3045 enum tree_code code
= gimple_expr_code (stmt
);
3046 gimple_seq ctor_seq
= NULL
;
3048 /* Check if vector type is a boolean vector. */
3049 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3050 && vect_mask_constant_operand_p (stmt
, op_num
))
3052 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3054 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3055 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3057 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3060 op
= gimple_assign_rhs1 (stmt
);
3067 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3068 created vectors. It is greater than 1 if unrolling is performed.
3070 For example, we have two scalar operands, s1 and s2 (e.g., group of
3071 strided accesses of size two), while NUNITS is four (i.e., four scalars
3072 of this type can be packed in a vector). The output vector will contain
3073 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3076 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3077 containing the operands.
3079 For example, NUNITS is four as before, and the group size is 8
3080 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3081 {s5, s6, s7, s8}. */
3083 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3085 number_of_places_left_in_vector
= nunits
;
3087 elts
= XALLOCAVEC (tree
, nunits
);
3088 bool place_after_defs
= false;
3089 for (j
= 0; j
< number_of_copies
; j
++)
3091 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3094 op
= gimple_assign_rhs1 (stmt
);
3101 tree cond
= gimple_assign_rhs1 (stmt
);
3102 if (TREE_CODE (cond
) == SSA_NAME
)
3103 op
= gimple_op (stmt
, op_num
+ 1);
3104 else if (op_num
== 0 || op_num
== 1)
3105 op
= TREE_OPERAND (cond
, op_num
);
3109 op
= gimple_assign_rhs2 (stmt
);
3111 op
= gimple_assign_rhs3 (stmt
);
3117 op
= gimple_call_arg (stmt
, op_num
);
3124 op
= gimple_op (stmt
, op_num
+ 1);
3125 /* Unlike the other binary operators, shifts/rotates have
3126 the shift count being int, instead of the same type as
3127 the lhs, so make sure the scalar is the right type if
3128 we are dealing with vectors of
3129 long long/long/short/char. */
3130 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3131 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3135 op
= gimple_op (stmt
, op_num
+ 1);
3140 /* Create 'vect_ = {op0,op1,...,opn}'. */
3141 number_of_places_left_in_vector
--;
3143 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3145 if (CONSTANT_CLASS_P (op
))
3147 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3149 /* Can't use VIEW_CONVERT_EXPR for booleans because
3150 of possibly different sizes of scalar value and
3152 if (integer_zerop (op
))
3153 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3154 else if (integer_onep (op
))
3155 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3160 op
= fold_unary (VIEW_CONVERT_EXPR
,
3161 TREE_TYPE (vector_type
), op
);
3162 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3166 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3168 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3171 = build_all_ones_cst (TREE_TYPE (vector_type
));
3173 = build_zero_cst (TREE_TYPE (vector_type
));
3174 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3175 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3181 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3184 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3187 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3191 elts
[number_of_places_left_in_vector
] = op
;
3192 if (!CONSTANT_CLASS_P (op
))
3194 if (TREE_CODE (orig_op
) == SSA_NAME
3195 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3196 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3197 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3198 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3199 place_after_defs
= true;
3201 if (number_of_places_left_in_vector
== 0)
3204 vec_cst
= build_vector (vector_type
, elts
);
3207 vec
<constructor_elt
, va_gc
> *v
;
3209 vec_alloc (v
, nunits
);
3210 for (k
= 0; k
< nunits
; ++k
)
3211 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3212 vec_cst
= build_constructor (vector_type
, v
);
3215 gimple_stmt_iterator gsi
;
3216 if (place_after_defs
)
3219 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3220 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3223 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3224 if (ctor_seq
!= NULL
)
3226 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3227 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3231 voprnds
.quick_push (init
);
3232 place_after_defs
= false;
3233 number_of_places_left_in_vector
= nunits
;
3239 /* Since the vectors are created in the reverse order, we should invert
3241 vec_num
= voprnds
.length ();
3242 for (j
= vec_num
; j
!= 0; j
--)
3244 vop
= voprnds
[j
- 1];
3245 vec_oprnds
->quick_push (vop
);
3250 /* In case that VF is greater than the unrolling factor needed for the SLP
3251 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3252 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3253 to replicate the vectors. */
3254 while (number_of_vectors
> vec_oprnds
->length ())
3256 tree neutral_vec
= NULL
;
3261 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3263 vec_oprnds
->quick_push (neutral_vec
);
3267 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3268 vec_oprnds
->quick_push (vop
);
3274 /* Get vectorized definitions from SLP_NODE that contains corresponding
3275 vectorized def-stmts. */
3278 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3281 gimple
*vec_def_stmt
;
3284 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3286 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3288 gcc_assert (vec_def_stmt
);
3289 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3290 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3292 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3293 vec_oprnds
->quick_push (vec_oprnd
);
3298 /* Get vectorized definitions for SLP_NODE.
3299 If the scalar definitions are loop invariants or constants, collect them and
3300 call vect_get_constant_vectors() to create vector stmts.
3301 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3302 must be stored in the corresponding child of SLP_NODE, and we call
3303 vect_get_slp_vect_defs () to retrieve them. */
3306 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3307 vec
<vec
<tree
> > *vec_oprnds
)
3310 int number_of_vects
= 0, i
;
3311 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3312 slp_tree child
= NULL
;
3315 bool first_iteration
= true;
3317 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3318 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3320 bool vectorized_defs
= false;
3325 vec_defs
.create (0);
3326 vec_oprnds
->quick_push (vec_defs
);
3330 /* For each operand we check if it has vectorized definitions in a child
3331 node or we need to create them (for invariants and constants). We
3332 check if the LHS of the first stmt of the next child matches OPRND.
3333 If it does, we found the correct child. Otherwise, we call
3334 vect_get_constant_vectors (). */
3335 for (unsigned int child_index
= 0;
3336 child_index
< SLP_TREE_CHILDREN (slp_node
).length (); child_index
++)
3338 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3340 /* We have to check both pattern and original def, if available. */
3341 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3343 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3345 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3348 if (gimple_code (first_def
) == GIMPLE_PHI
)
3349 first_def_op
= gimple_phi_result (first_def
);
3351 first_def_op
= gimple_get_lhs (first_def
);
3352 if (operand_equal_p (oprnd
, first_def_op
, 0)
3354 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3356 /* The number of vector defs is determined by the number of
3357 vector statements in the node from which we get those
3359 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3360 vectorized_defs
= true;
3366 if (!vectorized_defs
&& first_iteration
)
3368 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3369 /* Number of vector stmts was calculated according to LHS in
3370 vect_schedule_slp_instance (), fix it by replacing LHS with
3371 RHS, if necessary. See vect_get_smallest_scalar_type () for
3373 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3375 if (rhs_size_unit
!= lhs_size_unit
)
3377 number_of_vects
*= rhs_size_unit
;
3378 number_of_vects
/= lhs_size_unit
;
3382 /* Allocate memory for vectorized defs. */
3384 vec_defs
.create (number_of_vects
);
3386 /* For reduction defs we call vect_get_constant_vectors (), since we are
3387 looking for initial loop invariant values. */
3388 if (vectorized_defs
)
3389 /* The defs are already vectorized. */
3390 vect_get_slp_vect_defs (child
, &vec_defs
);
3392 /* Build vectors from scalar defs. */
3393 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3396 vec_oprnds
->quick_push (vec_defs
);
3398 first_iteration
= false;
3402 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3403 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3404 permute statements for the SLP node NODE of the SLP instance
3405 SLP_NODE_INSTANCE. */
3408 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3409 gimple_stmt_iterator
*gsi
, int vf
,
3410 slp_instance slp_node_instance
, bool analyze_only
,
3413 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3414 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3415 tree mask_element_type
= NULL_TREE
, mask_type
;
3416 int nunits
, vec_index
= 0;
3417 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3418 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3420 unsigned char *mask
;
3423 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3426 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3428 mode
= TYPE_MODE (vectype
);
3430 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3431 same size as the vector element being permuted. */
3432 mask_element_type
= lang_hooks
.types
.type_for_mode
3433 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3434 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3435 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3436 mask
= XALLOCAVEC (unsigned char, nunits
);
3438 /* Initialize the vect stmts of NODE to properly insert the generated
3441 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3442 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3443 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3445 /* Generate permutation masks for every NODE. Number of masks for each NODE
3446 is equal to GROUP_SIZE.
3447 E.g., we have a group of three nodes with three loads from the same
3448 location in each node, and the vector size is 4. I.e., we have a
3449 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3450 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3451 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3454 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3455 The last mask is illegal since we assume two operands for permute
3456 operation, and the mask element values can't be outside that range.
3457 Hence, the last mask must be converted into {2,5,5,5}.
3458 For the first two permutations we need the first and the second input
3459 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3460 we need the second and the third vectors: {b1,c1,a2,b2} and
3463 int vect_stmts_counter
= 0;
3465 int first_vec_index
= -1;
3466 int second_vec_index
= -1;
3470 for (int j
= 0; j
< vf
; j
++)
3472 for (int k
= 0; k
< group_size
; k
++)
3474 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3475 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3476 vec_index
= i
/ nunits
;
3477 mask_element
= i
% nunits
;
3478 if (vec_index
== first_vec_index
3479 || first_vec_index
== -1)
3481 first_vec_index
= vec_index
;
3483 else if (vec_index
== second_vec_index
3484 || second_vec_index
== -1)
3486 second_vec_index
= vec_index
;
3487 mask_element
+= nunits
;
3491 if (dump_enabled_p ())
3493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3494 "permutation requires at "
3495 "least three vectors ");
3496 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3502 gcc_assert (mask_element
>= 0
3503 && mask_element
< 2 * nunits
);
3504 if (mask_element
!= index
)
3506 mask
[index
++] = mask_element
;
3508 if (index
== nunits
)
3511 && ! can_vec_perm_p (mode
, false, mask
))
3513 if (dump_enabled_p ())
3515 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3517 "unsupported vect permute { ");
3518 for (i
= 0; i
< nunits
; ++i
)
3519 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ", mask
[i
]);
3520 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3530 tree mask_vec
= NULL_TREE
;
3534 tree
*mask_elts
= XALLOCAVEC (tree
, nunits
);
3535 for (int l
= 0; l
< nunits
; ++l
)
3536 mask_elts
[l
] = build_int_cst (mask_element_type
,
3538 mask_vec
= build_vector (mask_type
, mask_elts
);
3541 if (second_vec_index
== -1)
3542 second_vec_index
= first_vec_index
;
3544 /* Generate the permute statement if necessary. */
3545 tree first_vec
= dr_chain
[first_vec_index
];
3546 tree second_vec
= dr_chain
[second_vec_index
];
3551 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3553 perm_dest
= make_ssa_name (perm_dest
);
3554 perm_stmt
= gimple_build_assign (perm_dest
,
3556 first_vec
, second_vec
,
3558 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3561 /* If mask was NULL_TREE generate the requested
3562 identity transform. */
3563 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3565 /* Store the vector statement in NODE. */
3566 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3570 first_vec_index
= -1;
3571 second_vec_index
= -1;
3582 /* Vectorize SLP instance tree in postorder. */
3585 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3586 unsigned int vectorization_factor
)
3589 bool grouped_store
, is_store
;
3590 gimple_stmt_iterator si
;
3591 stmt_vec_info stmt_info
;
3592 unsigned int vec_stmts_size
, nunits
, group_size
;
3597 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3600 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3601 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3603 /* Push SLP node def-type to stmts. */
3604 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3605 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3606 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3607 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3609 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3610 stmt_info
= vinfo_for_stmt (stmt
);
3612 /* VECTYPE is the type of the destination. */
3613 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3614 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3615 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3617 /* For each SLP instance calculate number of vector stmts to be created
3618 for the scalar stmts in each node of the SLP tree. Number of vector
3619 elements in one vector iteration is the number of scalar elements in
3620 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3622 Unless this is a SLP reduction in which case the number of vector
3623 stmts is equal to the number of vector stmts of the children. */
3624 if (GROUP_FIRST_ELEMENT (stmt_info
)
3625 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3626 vec_stmts_size
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
3628 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3630 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3632 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3633 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3636 if (dump_enabled_p ())
3638 dump_printf_loc (MSG_NOTE
,vect_location
,
3639 "------>vectorizing SLP node starting from: ");
3640 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3643 /* Vectorized stmts go before the last scalar stmt which is where
3644 all uses are ready. */
3645 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3647 /* Mark the first element of the reduction chain as reduction to properly
3648 transform the node. In the analysis phase only the last element of the
3649 chain is marked as reduction. */
3650 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3651 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3653 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3654 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3657 /* Handle two-operation SLP nodes by vectorizing the group with
3658 both operations and then performing a merge. */
3659 if (SLP_TREE_TWO_OPERATORS (node
))
3661 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3662 enum tree_code ocode
= ERROR_MARK
;
3664 unsigned char *mask
= XALLOCAVEC (unsigned char, group_size
);
3665 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3666 if (gimple_assign_rhs_code (ostmt
) != code0
)
3669 ocode
= gimple_assign_rhs_code (ostmt
);
3673 if (ocode
!= ERROR_MARK
)
3678 tree tmask
= NULL_TREE
;
3679 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3680 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3681 SLP_TREE_VEC_STMTS (node
).truncate (0);
3682 gimple_assign_set_rhs_code (stmt
, ocode
);
3683 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3684 gimple_assign_set_rhs_code (stmt
, code0
);
3685 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3686 SLP_TREE_VEC_STMTS (node
).truncate (0);
3687 tree meltype
= build_nonstandard_integer_type
3688 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3689 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3691 for (j
= 0; j
< v0
.length (); ++j
)
3693 tree
*melts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (vectype
));
3694 for (l
= 0; l
< TYPE_VECTOR_SUBPARTS (vectype
); ++l
)
3696 if (k
>= group_size
)
3698 melts
[l
] = build_int_cst
3699 (meltype
, mask
[k
++] * TYPE_VECTOR_SUBPARTS (vectype
) + l
);
3701 tmask
= build_vector (mvectype
, melts
);
3703 /* ??? Not all targets support a VEC_PERM_EXPR with a
3704 constant mask that would translate to a vec_merge RTX
3705 (with their vec_perm_const_ok). We can either not
3706 vectorize in that case or let veclower do its job.
3707 Unfortunately that isn't too great and at least for
3708 plus/minus we'd eventually like to match targets
3709 vector addsub instructions. */
3711 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3713 gimple_assign_lhs (v0
[j
]),
3714 gimple_assign_lhs (v1
[j
]), tmask
);
3715 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3716 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3723 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3725 /* Restore stmt def-types. */
3726 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3727 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3728 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3729 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3734 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3735 For loop vectorization this is done in vectorizable_call, but for SLP
3736 it needs to be deferred until end of vect_schedule_slp, because multiple
3737 SLP instances may refer to the same scalar stmt. */
3740 vect_remove_slp_scalar_calls (slp_tree node
)
3742 gimple
*stmt
, *new_stmt
;
3743 gimple_stmt_iterator gsi
;
3747 stmt_vec_info stmt_info
;
3749 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3752 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3753 vect_remove_slp_scalar_calls (child
);
3755 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3757 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3759 stmt_info
= vinfo_for_stmt (stmt
);
3760 if (stmt_info
== NULL
3761 || is_pattern_stmt_p (stmt_info
)
3762 || !PURE_SLP_STMT (stmt_info
))
3764 lhs
= gimple_call_lhs (stmt
);
3765 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3766 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3767 set_vinfo_for_stmt (stmt
, NULL
);
3768 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3769 gsi
= gsi_for_stmt (stmt
);
3770 gsi_replace (&gsi
, new_stmt
, false);
3771 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3775 /* Generate vector code for all SLP instances in the loop/basic block. */
3778 vect_schedule_slp (vec_info
*vinfo
)
3780 vec
<slp_instance
> slp_instances
;
3781 slp_instance instance
;
3783 bool is_store
= false;
3785 slp_instances
= vinfo
->slp_instances
;
3786 if (is_a
<loop_vec_info
> (vinfo
))
3787 vf
= as_a
<loop_vec_info
> (vinfo
)->vectorization_factor
;
3791 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3793 /* Schedule the tree of INSTANCE. */
3794 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_NOTE
, vect_location
,
3798 "vectorizing stmts using SLP.\n");
3801 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3803 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3806 gimple_stmt_iterator gsi
;
3808 /* Remove scalar call stmts. Do not do this for basic-block
3809 vectorization as not all uses may be vectorized.
3810 ??? Why should this be necessary? DCE should be able to
3811 remove the stmts itself.
3812 ??? For BB vectorization we can as well remove scalar
3813 stmts starting from the SLP tree root if they have no
3815 if (is_a
<loop_vec_info
> (vinfo
))
3816 vect_remove_slp_scalar_calls (root
);
3818 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3819 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3821 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3824 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3825 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3826 /* Free the attached stmt_vec_info and remove the stmt. */
3827 gsi
= gsi_for_stmt (store
);
3828 unlink_stmt_vdef (store
);
3829 gsi_remove (&gsi
, true);
3830 release_defs (store
);
3831 free_stmt_vec_info (store
);