1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
49 vect_free_slp_tree (slp_tree node
)
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
55 vect_free_slp_tree (child
);
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt
))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
66 SLP_TREE_CHILDREN (node
).release ();
67 SLP_TREE_SCALAR_STMTS (node
).release ();
68 SLP_TREE_VEC_STMTS (node
).release ();
69 SLP_TREE_LOAD_PERMUTATION (node
).release ();
75 /* Free the memory allocated for the SLP instance. */
78 vect_free_slp_instance (slp_instance instance
)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
81 SLP_INSTANCE_LOADS (instance
).release ();
86 /* Create an SLP node for SCALAR_STMTS. */
89 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
92 gimple
*stmt
= scalar_stmts
[0];
95 if (is_gimple_call (stmt
))
96 nops
= gimple_call_num_args (stmt
);
97 else if (is_gimple_assign (stmt
))
99 nops
= gimple_num_ops (stmt
) - 1;
100 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
103 else if (gimple_code (stmt
) == GIMPLE_PHI
)
108 node
= XNEW (struct _slp_tree
);
109 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
110 SLP_TREE_VEC_STMTS (node
).create (0);
111 SLP_TREE_CHILDREN (node
).create (nops
);
112 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
113 SLP_TREE_TWO_OPERATORS (node
) = false;
114 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
117 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec
<gimple
*> def_stmts
;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
135 enum vect_def_type first_dt
;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
143 static vec
<slp_oprnd_info
>
144 vect_create_oprnd_info (int nops
, int group_size
)
147 slp_oprnd_info oprnd_info
;
148 vec
<slp_oprnd_info
> oprnds_info
;
150 oprnds_info
.create (nops
);
151 for (i
= 0; i
< nops
; i
++)
153 oprnd_info
= XNEW (struct _slp_oprnd_info
);
154 oprnd_info
->def_stmts
.create (group_size
);
155 oprnd_info
->first_dt
= vect_uninitialized_def
;
156 oprnd_info
->first_op_type
= NULL_TREE
;
157 oprnd_info
->first_pattern
= false;
158 oprnd_info
->second_pattern
= false;
159 oprnds_info
.quick_push (oprnd_info
);
166 /* Free operands info. */
169 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
172 slp_oprnd_info oprnd_info
;
174 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
176 oprnd_info
->def_stmts
.release ();
177 XDELETE (oprnd_info
);
180 oprnds_info
.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
188 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
190 gimple
*next_stmt
= first_stmt
;
193 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
198 if (next_stmt
== stmt
)
200 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
202 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
223 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
224 gimple
*stmt
, unsigned stmt_num
,
225 vec
<slp_oprnd_info
> *oprnds_info
)
228 unsigned int i
, number_of_oprnds
;
230 enum vect_def_type dt
= vect_uninitialized_def
;
231 bool pattern
= false;
232 slp_oprnd_info oprnd_info
;
233 int first_op_idx
= 1;
234 bool commutative
= false;
235 bool first_op_cond
= false;
236 bool first
= stmt_num
== 0;
237 bool second
= stmt_num
== 1;
239 if (is_gimple_call (stmt
))
241 number_of_oprnds
= gimple_call_num_args (stmt
);
244 else if (is_gimple_assign (stmt
))
246 enum tree_code code
= gimple_assign_rhs_code (stmt
);
247 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
253 first_op_cond
= true;
257 commutative
= commutative_tree_code (code
);
262 bool swapped
= (*swap
!= 0);
263 gcc_assert (!swapped
|| first_op_cond
);
264 for (i
= 0; i
< number_of_oprnds
; i
++)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map
= maps
[*swap
];
274 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
276 oprnd
= gimple_op (stmt
, map
[i
]);
279 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
281 oprnd_info
= (*oprnds_info
)[i
];
283 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
290 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
299 if (def_stmt
&& gimple_bb (def_stmt
)
300 && vect_stmt_in_region_p (vinfo
, def_stmt
)
301 && vinfo_for_stmt (def_stmt
)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
307 if (!first
&& !oprnd_info
->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info
->first_dt
!= vect_reduction_def
311 || (!second
&& !oprnd_info
->second_pattern
)))
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
327 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
333 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
334 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
336 if (dt
== vect_unknown_def_type
)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
340 "Unsupported pattern.\n");
344 switch (gimple_code (def_stmt
))
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
353 "unsupported defining stmt:\n");
359 oprnd_info
->second_pattern
= pattern
;
363 oprnd_info
->first_dt
= dt
;
364 oprnd_info
->first_pattern
= pattern
;
365 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info
->first_dt
!= dt
375 && !(oprnd_info
->first_dt
== vect_reduction_def
376 && dt
== vect_internal_def
)
377 && !((oprnd_info
->first_dt
== vect_external_def
378 || oprnd_info
->first_dt
== vect_constant_def
)
379 && (dt
== vect_external_def
380 || dt
== vect_constant_def
)))
381 || !types_compatible_p (oprnd_info
->first_op_type
,
384 /* Try swapping operands if we got a mismatch. */
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
395 "Build SLP failed: different types\n");
401 /* Check the types of the definitions. */
404 case vect_constant_def
:
405 case vect_external_def
:
408 case vect_reduction_def
:
409 case vect_induction_def
:
410 case vect_internal_def
:
411 oprnd_info
->def_stmts
.quick_push (def_stmt
);
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
421 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
438 "Build SLP failed: cannot swap operands of "
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
447 tree cond
= gimple_assign_rhs1 (stmt
);
448 enum tree_code code
= TREE_CODE (cond
);
453 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
454 &TREE_OPERAND (cond
, 1));
455 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
460 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
461 gimple_assign_rhs3_ptr (stmt
));
462 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
463 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
464 gcc_assert (code
!= ERROR_MARK
);
465 TREE_SET_CODE (cond
, code
);
469 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
470 gimple_assign_rhs2_ptr (stmt
));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE
, vect_location
,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
484 /* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
489 carried out or the stmts will never be vectorized by SLP.
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
499 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
500 vec
<gimple
*> stmts
, unsigned int group_size
,
501 unsigned nops
, unsigned int *max_nunits
,
502 bool *matches
, bool *two_operators
)
505 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
506 enum tree_code first_stmt_code
= ERROR_MARK
;
507 enum tree_code alt_stmt_code
= ERROR_MARK
;
508 enum tree_code rhs_code
= ERROR_MARK
;
509 enum tree_code first_cond_code
= ERROR_MARK
;
511 bool need_same_oprnds
= false;
512 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
515 machine_mode optab_op2_mode
;
516 machine_mode vec_mode
;
518 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
520 /* For every stmt in NODE find its def stmt/s. */
521 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
535 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
541 /* Fatal mismatch. */
546 lhs
= gimple_get_lhs (stmt
);
547 if (lhs
== NULL_TREE
)
549 if (dump_enabled_p ())
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
556 /* Fatal mismatch. */
561 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
562 vectype
= get_vectype_for_scalar_type (scalar_type
);
565 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
571 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
573 /* Fatal mismatch. */
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
580 if (is_a
<bb_vec_info
> (vinfo
)
581 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
593 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
595 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
597 rhs_code
= CALL_EXPR
;
598 if (gimple_call_internal_p (call_stmt
)
599 || gimple_call_tail_p (call_stmt
)
600 || gimple_call_noreturn_p (call_stmt
)
601 || !gimple_call_nothrow_p (call_stmt
)
602 || gimple_call_chain (call_stmt
))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
607 "Build SLP failed: unsupported call type ");
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
611 /* Fatal mismatch. */
617 rhs_code
= gimple_assign_rhs_code (stmt
);
619 /* Check the operation. */
622 first_stmt_code
= rhs_code
;
624 /* Shift arguments should be equal in all the packed stmts for a
625 vector shift with scalar shift operand. */
626 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
627 || rhs_code
== LROTATE_EXPR
628 || rhs_code
== RROTATE_EXPR
)
630 vec_mode
= TYPE_MODE (vectype
);
632 /* First see if we have a vector/vector shift. */
633 optab
= optab_for_tree_code (rhs_code
, vectype
,
637 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab
= optab_for_tree_code (rhs_code
, vectype
,
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
647 "Build SLP failed: no optab.\n");
648 /* Fatal mismatch. */
652 icode
= (int) optab_handler (optab
, vec_mode
);
653 if (icode
== CODE_FOR_nothing
)
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
658 "op not supported by target.\n");
659 /* Fatal mismatch. */
663 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
664 if (!VECTOR_MODE_P (optab_op2_mode
))
666 need_same_oprnds
= true;
667 first_op1
= gimple_assign_rhs2 (stmt
);
671 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
673 need_same_oprnds
= true;
674 first_op1
= gimple_assign_rhs2 (stmt
);
679 if (first_stmt_code
!= rhs_code
680 && alt_stmt_code
== ERROR_MARK
)
681 alt_stmt_code
= rhs_code
;
682 if (first_stmt_code
!= rhs_code
683 && (first_stmt_code
!= IMAGPART_EXPR
684 || rhs_code
!= REALPART_EXPR
)
685 && (first_stmt_code
!= REALPART_EXPR
686 || rhs_code
!= IMAGPART_EXPR
)
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code
== PLUS_EXPR
690 || first_stmt_code
== MINUS_EXPR
)
691 && (alt_stmt_code
== PLUS_EXPR
692 || alt_stmt_code
== MINUS_EXPR
)
693 && rhs_code
== alt_stmt_code
)
694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
695 && (first_stmt_code
== ARRAY_REF
696 || first_stmt_code
== BIT_FIELD_REF
697 || first_stmt_code
== INDIRECT_REF
698 || first_stmt_code
== COMPONENT_REF
699 || first_stmt_code
== MEM_REF
)))
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
704 "Build SLP failed: different operation "
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
717 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "Build SLP failed: different shift "
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
730 if (rhs_code
== CALL_EXPR
)
732 gimple
*first_stmt
= stmts
[0];
733 if (gimple_call_num_args (stmt
) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt
),
735 gimple_call_fn (stmt
), 0)
736 || gimple_call_fntype (first_stmt
)
737 != gimple_call_fntype (stmt
))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
755 if (REFERENCE_CLASS_P (lhs
))
763 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
766 /* Check that there are no loads from different interleaving
767 chains in the same node. */
768 if (prev_first_load
!= first_load
)
770 if (dump_enabled_p ())
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
784 prev_first_load
= first_load
;
786 } /* Grouped access. */
789 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
791 /* Not grouped load. */
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
799 /* FORNOW: Not grouped loads are not supported. */
800 /* Fatal mismatch. */
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
807 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
808 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
809 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
810 && rhs_code
!= CALL_EXPR
)
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
819 /* Fatal mismatch. */
824 if (rhs_code
== COND_EXPR
)
826 tree cond_expr
= gimple_assign_rhs1 (stmt
);
827 enum tree_code cond_code
= TREE_CODE (cond_expr
);
828 enum tree_code swap_code
= ERROR_MARK
;
829 enum tree_code invert_code
= ERROR_MARK
;
832 first_cond_code
= TREE_CODE (cond_expr
);
833 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
835 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
836 swap_code
= swap_tree_comparison (cond_code
);
837 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
840 if (first_cond_code
== cond_code
)
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code
== swap_code
)
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code
== invert_code
)
850 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
853 "Build SLP failed: different"
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
867 for (i
= 0; i
< group_size
; ++i
)
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code
!= ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
877 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype
));
878 for (i
= 0; i
< TYPE_VECTOR_SUBPARTS (vectype
); ++i
)
881 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
882 sel
[i
] += TYPE_VECTOR_SUBPARTS (vectype
);
884 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
886 for (i
= 0; i
< group_size
; ++i
)
887 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
890 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
893 "Build SLP failed: different operation "
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
905 *two_operators
= true;
911 /* Recursively build an SLP tree starting from NODE.
912 Fail (and return a value not equal to zero) if def-stmts are not
913 isomorphic, require data permutation or are of unsupported types of
914 operation. Otherwise, return 0.
915 The value returned is the depth in the SLP tree where a mismatch
919 vect_build_slp_tree (vec_info
*vinfo
,
920 vec
<gimple
*> stmts
, unsigned int group_size
,
921 unsigned int *max_nunits
,
922 vec
<slp_tree
> *loads
,
923 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
924 unsigned max_tree_size
)
926 unsigned nops
, i
, this_tree_size
= 0, this_max_nunits
= *max_nunits
;
933 if (is_gimple_call (stmt
))
934 nops
= gimple_call_num_args (stmt
);
935 else if (is_gimple_assign (stmt
))
937 nops
= gimple_num_ops (stmt
) - 1;
938 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
941 else if (gimple_code (stmt
) == GIMPLE_PHI
)
946 /* If the SLP node is a PHI (induction or reduction), terminate
948 if (gimple_code (stmt
) == GIMPLE_PHI
)
950 /* Induction from different IVs is not supported. */
951 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) == vect_induction_def
)
952 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
953 if (stmt
!= stmts
[0])
955 node
= vect_create_new_slp_node (stmts
);
960 bool two_operators
= false;
961 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
962 if (!vect_build_slp_tree_1 (vinfo
, swap
,
963 stmts
, group_size
, nops
,
964 &this_max_nunits
, matches
, &two_operators
))
967 /* If the SLP node is a load, terminate the recursion. */
968 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
969 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
971 *max_nunits
= this_max_nunits
;
972 node
= vect_create_new_slp_node (stmts
);
973 loads
->safe_push (node
);
977 /* Get at the operands, verifying they are compatible. */
978 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
979 slp_oprnd_info oprnd_info
;
980 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
982 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
983 stmt
, i
, &oprnds_info
);
985 matches
[(res
== -1) ? 0 : i
] = false;
989 for (i
= 0; i
< group_size
; ++i
)
992 vect_free_oprnd_info (oprnds_info
);
996 auto_vec
<slp_tree
, 4> children
;
997 auto_vec
<slp_tree
> this_loads
;
1001 /* Create SLP_TREE nodes for the definition node/s. */
1002 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1005 unsigned old_nloads
= this_loads
.length ();
1006 unsigned old_tree_size
= this_tree_size
;
1009 if (oprnd_info
->first_dt
!= vect_internal_def
1010 && oprnd_info
->first_dt
!= vect_reduction_def
1011 && oprnd_info
->first_dt
!= vect_induction_def
)
1014 if (++this_tree_size
> max_tree_size
)
1016 FOR_EACH_VEC_ELT (children
, j
, child
)
1017 vect_free_slp_tree (child
);
1018 vect_free_oprnd_info (oprnds_info
);
1022 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1023 group_size
, &this_max_nunits
,
1024 &this_loads
, matches
, npermutes
,
1026 max_tree_size
)) != NULL
)
1028 /* If we have all children of child built up from scalars then just
1029 throw that away and build it up this node from scalars. */
1030 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1031 /* ??? Rejecting patterns this way doesn't work. We'd have to
1032 do extra work to cancel the pattern so the uses see the
1034 && !is_pattern_stmt_p
1035 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1037 slp_tree grandchild
;
1039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1040 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1045 this_loads
.truncate (old_nloads
);
1046 this_tree_size
= old_tree_size
;
1047 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1048 vect_free_slp_tree (grandchild
);
1049 SLP_TREE_CHILDREN (child
).truncate (0);
1051 dump_printf_loc (MSG_NOTE
, vect_location
,
1052 "Building parent vector operands from "
1053 "scalars instead\n");
1054 oprnd_info
->def_stmts
= vNULL
;
1055 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1056 children
.safe_push (child
);
1061 oprnd_info
->def_stmts
= vNULL
;
1062 children
.safe_push (child
);
1066 /* If the SLP build failed fatally and we analyze a basic-block
1067 simply treat nodes we fail to build as externally defined
1068 (and thus build vectors from the scalar defs).
1069 The cost model will reject outright expensive cases.
1070 ??? This doesn't treat cases where permutation ultimatively
1071 fails (or we don't try permutation below). Ideally we'd
1072 even compute a permutation that will end up with the maximum
1074 if (is_a
<bb_vec_info
> (vinfo
)
1076 /* ??? Rejecting patterns this way doesn't work. We'd have to
1077 do extra work to cancel the pattern so the uses see the
1079 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1081 dump_printf_loc (MSG_NOTE
, vect_location
,
1082 "Building vector operands from scalars\n");
1083 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1084 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1085 children
.safe_push (child
);
1086 oprnd_info
->def_stmts
= vNULL
;
1090 /* If the SLP build for operand zero failed and operand zero
1091 and one can be commutated try that for the scalar stmts
1092 that failed the match. */
1094 /* A first scalar stmt mismatch signals a fatal mismatch. */
1096 /* ??? For COND_EXPRs we can swap the comparison operands
1097 as well as the arms under some constraints. */
1099 && oprnds_info
[1]->first_dt
== vect_internal_def
1100 && is_gimple_assign (stmt
)
1101 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1103 /* Do so only if the number of not successful permutes was nor more
1104 than a cut-ff as re-trying the recursive match on
1105 possibly each level of the tree would expose exponential
1109 /* Verify if we can safely swap or if we committed to a specific
1110 operand order already. */
1111 for (j
= 0; j
< group_size
; ++j
)
1114 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1116 if (dump_enabled_p ())
1118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1119 "Build SLP failed: cannot swap operands "
1121 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1127 /* Swap mismatched definition stmts. */
1128 dump_printf_loc (MSG_NOTE
, vect_location
,
1129 "Re-trying with swapped operands of stmts ");
1130 for (j
= 0; j
< group_size
; ++j
)
1133 std::swap (oprnds_info
[0]->def_stmts
[j
],
1134 oprnds_info
[1]->def_stmts
[j
]);
1135 dump_printf (MSG_NOTE
, "%d ", j
);
1137 dump_printf (MSG_NOTE
, "\n");
1138 /* And try again with scratch 'matches' ... */
1139 bool *tem
= XALLOCAVEC (bool, group_size
);
1140 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1141 group_size
, &this_max_nunits
,
1142 &this_loads
, tem
, npermutes
,
1144 max_tree_size
)) != NULL
)
1146 /* ... so if successful we can apply the operand swapping
1147 to the GIMPLE IL. This is necessary because for example
1148 vect_get_slp_defs uses operand indexes and thus expects
1149 canonical operand order. This is also necessary even
1150 if we end up building the operand from scalars as
1151 we'll continue to process swapped operand two. */
1152 for (j
= 0; j
< group_size
; ++j
)
1154 gimple
*stmt
= stmts
[j
];
1155 gimple_set_plf (stmt
, GF_PLF_1
, false);
1157 for (j
= 0; j
< group_size
; ++j
)
1159 gimple
*stmt
= stmts
[j
];
1162 /* Avoid swapping operands twice. */
1163 if (gimple_plf (stmt
, GF_PLF_1
))
1165 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1166 gimple_assign_rhs2_ptr (stmt
));
1167 gimple_set_plf (stmt
, GF_PLF_1
, true);
1170 /* Verify we swap all duplicates or none. */
1172 for (j
= 0; j
< group_size
; ++j
)
1174 gimple
*stmt
= stmts
[j
];
1175 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1178 /* If we have all children of child built up from scalars then
1179 just throw that away and build it up this node from scalars. */
1180 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1181 /* ??? Rejecting patterns this way doesn't work. We'd have
1182 to do extra work to cancel the pattern so the uses see the
1184 && !is_pattern_stmt_p
1185 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1188 slp_tree grandchild
;
1190 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1191 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1196 this_loads
.truncate (old_nloads
);
1197 this_tree_size
= old_tree_size
;
1198 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1199 vect_free_slp_tree (grandchild
);
1200 SLP_TREE_CHILDREN (child
).truncate (0);
1202 dump_printf_loc (MSG_NOTE
, vect_location
,
1203 "Building parent vector operands from "
1204 "scalars instead\n");
1205 oprnd_info
->def_stmts
= vNULL
;
1206 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1207 children
.safe_push (child
);
1212 oprnd_info
->def_stmts
= vNULL
;
1213 children
.safe_push (child
);
1221 gcc_assert (child
== NULL
);
1222 FOR_EACH_VEC_ELT (children
, j
, child
)
1223 vect_free_slp_tree (child
);
1224 vect_free_oprnd_info (oprnds_info
);
1228 vect_free_oprnd_info (oprnds_info
);
1231 *tree_size
+= this_tree_size
;
1232 *max_nunits
= this_max_nunits
;
1233 loads
->safe_splice (this_loads
);
1235 node
= vect_create_new_slp_node (stmts
);
1236 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1237 SLP_TREE_CHILDREN (node
).splice (children
);
1241 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1244 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1250 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1251 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1252 ? " (external)" : "");
1253 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1255 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1256 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1258 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1259 vect_print_slp_tree (dump_kind
, loc
, child
);
1263 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1264 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1265 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1266 stmts in NODE are to be marked. */
1269 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1275 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1278 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1279 if (j
< 0 || i
== j
)
1280 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1282 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1283 vect_mark_slp_stmts (child
, mark
, j
);
1287 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1290 vect_mark_slp_stmts_relevant (slp_tree node
)
1294 stmt_vec_info stmt_info
;
1297 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1300 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1302 stmt_info
= vinfo_for_stmt (stmt
);
1303 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1304 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1305 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1308 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1309 vect_mark_slp_stmts_relevant (child
);
1313 /* Rearrange the statements of NODE according to PERMUTATION. */
1316 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1317 vec
<unsigned> permutation
)
1320 vec
<gimple
*> tmp_stmts
;
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1325 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1327 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1328 tmp_stmts
.create (group_size
);
1329 tmp_stmts
.quick_grow_cleared (group_size
);
1331 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1332 tmp_stmts
[permutation
[i
]] = stmt
;
1334 SLP_TREE_SCALAR_STMTS (node
).release ();
1335 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1339 /* Attempt to reorder stmts in a reduction chain so that we don't
1340 require any load permutation. Return true if that was possible,
1341 otherwise return false. */
1344 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1346 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1349 slp_tree node
, load
;
1351 /* Compare all the permutation sequences to the first one. We know
1352 that at least one load is permuted. */
1353 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1354 if (!node
->load_permutation
.exists ())
1356 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1358 if (!load
->load_permutation
.exists ())
1360 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1361 if (lidx
!= node
->load_permutation
[j
])
1365 /* Check that the loads in the first sequence are different and there
1366 are no gaps between them. */
1367 auto_sbitmap
load_index (group_size
);
1368 bitmap_clear (load_index
);
1369 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1371 if (lidx
>= group_size
)
1373 if (bitmap_bit_p (load_index
, lidx
))
1376 bitmap_set_bit (load_index
, lidx
);
1378 for (i
= 0; i
< group_size
; i
++)
1379 if (!bitmap_bit_p (load_index
, i
))
1382 /* This permutation is valid for reduction. Since the order of the
1383 statements in the nodes is not important unless they are memory
1384 accesses, we can rearrange the statements in all the nodes
1385 according to the order of the loads. */
1386 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1387 node
->load_permutation
);
1389 /* We are done, no actual permutations need to be generated. */
1390 unsigned int unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1391 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1393 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1394 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1395 /* But we have to keep those permutations that are required because
1396 of handling of gaps. */
1397 if (unrolling_factor
== 1
1398 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1399 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1400 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1402 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1403 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1409 /* Check if the required load permutations in the SLP instance
1410 SLP_INSTN are supported. */
1413 vect_supported_load_permutation_p (slp_instance slp_instn
)
1415 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1416 unsigned int i
, j
, k
, next
;
1418 gimple
*stmt
, *load
, *next_load
;
1420 if (dump_enabled_p ())
1422 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1423 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1424 if (node
->load_permutation
.exists ())
1425 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1426 dump_printf (MSG_NOTE
, "%d ", next
);
1428 for (k
= 0; k
< group_size
; ++k
)
1429 dump_printf (MSG_NOTE
, "%d ", k
);
1430 dump_printf (MSG_NOTE
, "\n");
1433 /* In case of reduction every load permutation is allowed, since the order
1434 of the reduction statements is not important (as opposed to the case of
1435 grouped stores). The only condition we need to check is that all the
1436 load nodes are of the same size and have the same permutation (and then
1437 rearrange all the nodes of the SLP instance according to this
1440 /* Check that all the load nodes are of the same size. */
1441 /* ??? Can't we assert this? */
1442 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1443 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1446 node
= SLP_INSTANCE_TREE (slp_instn
);
1447 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1449 /* Reduction (there are no data-refs in the root).
1450 In reduction chain the order of the loads is not important. */
1451 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1452 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1453 vect_attempt_slp_rearrange_stmts (slp_instn
);
1455 /* In basic block vectorization we allow any subchain of an interleaving
1457 FORNOW: not supported in loop SLP because of realignment compications. */
1458 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1460 /* Check whether the loads in an instance form a subchain and thus
1461 no permutation is necessary. */
1462 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1464 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1466 bool subchain_p
= true;
1468 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1471 && (next_load
!= load
1472 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1477 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1480 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1483 stmt_vec_info group_info
1484 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1485 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1487 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1488 unsigned k
, maxk
= 0;
1489 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1492 /* In BB vectorization we may not actually use a loaded vector
1493 accessing elements in excess of GROUP_SIZE. */
1494 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1497 "BB vectorization with gaps at the end of "
1498 "a load is not supported\n");
1502 /* Verify the permutation can be generated. */
1505 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1506 1, slp_instn
, true, &n_perms
))
1508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1510 "unsupported load permutation\n");
1518 /* For loop vectorization verify we can generate the permutation. */
1520 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1521 if (node
->load_permutation
.exists ()
1522 && !vect_transform_slp_perm_load
1524 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true,
1532 /* Find the last store in SLP INSTANCE. */
1535 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1537 gimple
*last
= NULL
, *stmt
;
1539 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1541 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1542 if (is_pattern_stmt_p (stmt_vinfo
))
1543 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1545 last
= get_later_stmt (stmt
, last
);
1551 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1554 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1555 stmt_vector_for_cost
*prologue_cost_vec
,
1556 stmt_vector_for_cost
*body_cost_vec
,
1557 unsigned ncopies_for_cost
)
1562 stmt_vec_info stmt_info
;
1565 /* Recurse down the SLP tree. */
1566 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1567 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1568 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1569 body_cost_vec
, ncopies_for_cost
);
1571 /* Look at the first scalar stmt to determine the cost. */
1572 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1573 stmt_info
= vinfo_for_stmt (stmt
);
1574 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1576 vect_memory_access_type memory_access_type
1577 = (STMT_VINFO_STRIDED_P (stmt_info
)
1580 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1581 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1582 memory_access_type
, vect_uninitialized_def
,
1583 node
, prologue_cost_vec
, body_cost_vec
);
1586 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1587 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1589 /* If the load is permuted then the alignment is determined by
1590 the first group element not by the first scalar stmt DR. */
1591 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1592 stmt_info
= vinfo_for_stmt (stmt
);
1593 /* Record the cost for the permutation. */
1595 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1596 ncopies_for_cost
, instance
, true,
1598 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1599 stmt_info
, 0, vect_body
);
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1602 /* And adjust the number of loads performed. This handles
1603 redundancies as well as loads that are later dead. */
1604 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1605 bitmap_clear (perm
);
1606 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1607 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1608 ncopies_for_cost
= 0;
1609 bool load_seen
= false;
1610 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1612 if (i
% nunits
== 0)
1618 if (bitmap_bit_p (perm
, i
))
1623 gcc_assert (ncopies_for_cost
1624 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1625 + nunits
- 1) / nunits
);
1626 ncopies_for_cost
*= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1628 /* Record the cost for the vector loads. */
1629 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1630 memory_access_type
, node
, prologue_cost_vec
,
1635 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1637 /* ncopies_for_cost is the number of IVs we generate. */
1638 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1639 stmt_info
, 0, vect_body
);
1641 /* Prologue cost for the initial values and step vector. */
1642 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1644 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1646 ? vector_load
: vec_construct
,
1647 stmt_info
, 0, vect_prologue
);
1648 record_stmt_cost (prologue_cost_vec
, 1,
1650 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1651 ? vector_load
: vec_construct
,
1652 stmt_info
, 0, vect_prologue
);
1654 /* ??? No easy way to get at the actual number of vector stmts
1655 to be geneated and thus the derived IVs. */
1659 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1660 stmt_info
, 0, vect_body
);
1661 if (SLP_TREE_TWO_OPERATORS (node
))
1663 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1664 stmt_info
, 0, vect_body
);
1665 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1666 stmt_info
, 0, vect_body
);
1670 /* Push SLP node def-type to stmts. */
1671 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1672 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1673 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1674 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1676 /* Scan operands and account for prologue cost of constants/externals.
1677 ??? This over-estimates cost for multiple uses and should be
1679 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1680 lhs
= gimple_get_lhs (stmt
);
1681 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1683 tree op
= gimple_op (stmt
, i
);
1685 enum vect_def_type dt
;
1686 if (!op
|| op
== lhs
)
1688 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1690 /* Without looking at the actual initializer a vector of
1691 constants can be implemented as load from the constant pool.
1692 ??? We need to pass down stmt_info for a vector type
1693 even if it points to the wrong stmt. */
1694 if (dt
== vect_constant_def
)
1695 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1696 stmt_info
, 0, vect_prologue
);
1697 else if (dt
== vect_external_def
)
1698 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1699 stmt_info
, 0, vect_prologue
);
1703 /* Restore stmt def-types. */
1704 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1705 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1706 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1707 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1710 /* Compute the cost for the SLP instance INSTANCE. */
1713 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1715 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1716 unsigned ncopies_for_cost
;
1717 stmt_info_for_cost
*si
;
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_NOTE
, vect_location
,
1722 "=== vect_analyze_slp_cost ===\n");
1724 /* Calculate the number of vector stmts to create based on the unrolling
1725 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1726 GROUP_SIZE / NUNITS otherwise. */
1727 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1728 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1729 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1730 /* Adjust the group_size by the vectorization factor which is always one
1731 for basic-block vectorization. */
1732 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1733 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1734 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1735 /* For reductions look at a reduction operand in case the reduction
1736 operation is widening like DOT_PROD or SAD. */
1737 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1739 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1740 switch (gimple_assign_rhs_code (stmt
))
1744 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1745 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1750 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1752 prologue_cost_vec
.create (10);
1753 body_cost_vec
.create (10);
1754 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1755 &prologue_cost_vec
, &body_cost_vec
,
1758 /* Record the prologue costs, which were delayed until we were
1759 sure that SLP was successful. */
1760 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1762 struct _stmt_vec_info
*stmt_info
1763 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1764 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1765 si
->misalign
, vect_prologue
);
1768 /* Record the instance's instructions in the target cost model. */
1769 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1771 struct _stmt_vec_info
*stmt_info
1772 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1773 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1774 si
->misalign
, vect_body
);
1777 prologue_cost_vec
.release ();
1778 body_cost_vec
.release ();
1781 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1782 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1783 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1784 containing the remainder.
1785 Return the first stmt in the second group. */
1788 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1790 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1791 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1792 gcc_assert (group1_size
> 0);
1793 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1794 gcc_assert (group2_size
> 0);
1795 GROUP_SIZE (first_vinfo
) = group1_size
;
1797 gimple
*stmt
= first_stmt
;
1798 for (unsigned i
= group1_size
; i
> 1; i
--)
1800 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1801 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1803 /* STMT is now the last element of the first group. */
1804 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1805 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1807 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1808 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1810 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1811 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1814 /* For the second group, the GROUP_GAP is that before the original group,
1815 plus skipping over the first vector. */
1816 GROUP_GAP (vinfo_for_stmt (group2
)) =
1817 GROUP_GAP (first_vinfo
) + group1_size
;
1819 /* GROUP_GAP of the first group now has to skip over the second group too. */
1820 GROUP_GAP (first_vinfo
) += group2_size
;
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1824 group1_size
, group2_size
);
1829 /* Analyze an SLP instance starting from a group of grouped stores. Call
1830 vect_build_slp_tree to build a tree of packed stmts if possible.
1831 Return FALSE if it's impossible to SLP any stmt in the loop. */
1834 vect_analyze_slp_instance (vec_info
*vinfo
,
1835 gimple
*stmt
, unsigned max_tree_size
)
1837 slp_instance new_instance
;
1839 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1840 unsigned int unrolling_factor
= 1, nunits
;
1841 tree vectype
, scalar_type
= NULL_TREE
;
1844 unsigned int max_nunits
= 0;
1845 vec
<slp_tree
> loads
;
1846 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1847 vec
<gimple
*> scalar_stmts
;
1849 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1853 scalar_type
= TREE_TYPE (DR_REF (dr
));
1854 vectype
= get_vectype_for_scalar_type (scalar_type
);
1858 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1859 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1862 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1866 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1867 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1868 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1873 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1876 "Build SLP failed: unsupported data-type ");
1877 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1878 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1883 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1885 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1886 scalar_stmts
.create (group_size
);
1888 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1890 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1893 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1894 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1895 scalar_stmts
.safe_push (
1896 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1898 scalar_stmts
.safe_push (next
);
1899 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1901 /* Mark the first element of the reduction chain as reduction to properly
1902 transform the node. In the reduction analysis phase only the last
1903 element of the chain is marked as reduction. */
1904 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1905 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
1909 /* Collect reduction statements. */
1910 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1911 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1912 scalar_stmts
.safe_push (next
);
1915 loads
.create (group_size
);
1917 /* Build the tree for the SLP instance. */
1918 bool *matches
= XALLOCAVEC (bool, group_size
);
1919 unsigned npermutes
= 0;
1920 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
1921 &max_nunits
, &loads
, matches
, &npermutes
,
1922 NULL
, max_tree_size
);
1925 /* Calculate the unrolling factor based on the smallest type. */
1927 = least_common_multiple (max_nunits
, group_size
) / group_size
;
1929 if (unrolling_factor
!= 1
1930 && is_a
<bb_vec_info
> (vinfo
))
1933 if (max_nunits
> group_size
)
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1936 "Build SLP failed: store group "
1937 "size not a multiple of the vector size "
1938 "in basic block SLP\n");
1939 vect_free_slp_tree (node
);
1943 /* Fatal mismatch. */
1944 matches
[group_size
/max_nunits
* max_nunits
] = false;
1945 vect_free_slp_tree (node
);
1950 /* Create a new SLP instance. */
1951 new_instance
= XNEW (struct _slp_instance
);
1952 SLP_INSTANCE_TREE (new_instance
) = node
;
1953 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1954 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1955 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1957 /* Compute the load permutation. */
1959 bool loads_permuted
= false;
1960 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1962 vec
<unsigned> load_permutation
;
1964 gimple
*load
, *first_stmt
;
1965 bool this_load_permuted
= false;
1966 load_permutation
.create (group_size
);
1967 first_stmt
= GROUP_FIRST_ELEMENT
1968 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1969 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1971 int load_place
= vect_get_place_in_interleaving_chain
1973 gcc_assert (load_place
!= -1);
1974 if (load_place
!= j
)
1975 this_load_permuted
= true;
1976 load_permutation
.safe_push (load_place
);
1978 if (!this_load_permuted
1979 /* The load requires permutation when unrolling exposes
1980 a gap either because the group is larger than the SLP
1981 group-size or because there is a gap between the groups. */
1982 && (unrolling_factor
== 1
1983 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1984 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
1986 load_permutation
.release ();
1989 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1990 loads_permuted
= true;
1995 if (!vect_supported_load_permutation_p (new_instance
))
1997 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2000 "Build SLP failed: unsupported load "
2002 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2005 vect_free_slp_instance (new_instance
);
2010 /* If the loads and stores can be handled with load/store-lan
2011 instructions do not generate this SLP instance. */
2012 if (is_a
<loop_vec_info
> (vinfo
)
2014 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2017 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2019 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2020 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2021 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2022 /* Use SLP for strided accesses (or if we
2023 can't load-lanes). */
2024 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2025 || ! vect_load_lanes_supported
2026 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2027 GROUP_SIZE (stmt_vinfo
)))
2030 if (i
== loads
.length ())
2032 if (dump_enabled_p ())
2033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2034 "Built SLP cancelled: can use "
2035 "load/store-lanes\n");
2036 vect_free_slp_instance (new_instance
);
2041 vinfo
->slp_instances
.safe_push (new_instance
);
2043 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE
, vect_location
,
2046 "Final SLP tree for instance:\n");
2047 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2055 /* Failed to SLP. */
2056 /* Free the allocated memory. */
2057 scalar_stmts
.release ();
2061 /* For basic block SLP, try to break the group up into multiples of the
2063 if (is_a
<bb_vec_info
> (vinfo
)
2064 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2065 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2067 /* We consider breaking the group only on VF boundaries from the existing
2069 for (i
= 0; i
< group_size
; i
++)
2070 if (!matches
[i
]) break;
2072 if (i
>= nunits
&& i
< group_size
)
2074 /* Split into two groups at the first vector boundary before i. */
2075 gcc_assert ((nunits
& (nunits
- 1)) == 0);
2076 unsigned group1_size
= i
& ~(nunits
- 1);
2078 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2079 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2080 /* If the first non-match was in the middle of a vector,
2081 skip the rest of that vector. */
2082 if (group1_size
< i
)
2084 i
= group1_size
+ nunits
;
2086 rest
= vect_split_slp_store_group (rest
, nunits
);
2089 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2092 /* Even though the first vector did not all match, we might be able to SLP
2093 (some) of the remainder. FORNOW ignore this possibility. */
2100 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2101 trees of packed scalar stmts if SLP is possible. */
2104 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2107 gimple
*first_element
;
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2112 /* Find SLP sequences starting from groups of grouped stores. */
2113 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2114 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2116 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2118 if (loop_vinfo
->reduction_chains
.length () > 0)
2120 /* Find SLP sequences starting from reduction chains. */
2121 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2122 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2125 /* Dissolve reduction chain group. */
2126 gimple
*next
, *stmt
= first_element
;
2129 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2130 next
= GROUP_NEXT_ELEMENT (vinfo
);
2131 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2132 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2135 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2136 = vect_internal_def
;
2140 /* Find SLP sequences starting from groups of reductions. */
2141 if (loop_vinfo
->reductions
.length () > 1)
2142 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2150 /* For each possible SLP instance decide whether to SLP it and calculate overall
2151 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2152 least one instance. */
2155 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2157 unsigned int i
, unrolling_factor
= 1;
2158 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2159 slp_instance instance
;
2160 int decided_to_slp
= 0;
2162 if (dump_enabled_p ())
2163 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2166 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2168 /* FORNOW: SLP if you can. */
2169 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2170 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2172 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2173 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2174 loop-based vectorization. Such stmts will be marked as HYBRID. */
2175 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2179 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2181 if (decided_to_slp
&& dump_enabled_p ())
2182 dump_printf_loc (MSG_NOTE
, vect_location
,
2183 "Decided to SLP %d instances. Unrolling factor %d\n",
2184 decided_to_slp
, unrolling_factor
);
2186 return (decided_to_slp
> 0);
2190 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2191 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2194 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2196 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2197 imm_use_iterator imm_iter
;
2199 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2201 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2202 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2205 /* Propagate hybrid down the SLP tree. */
2206 if (stype
== hybrid
)
2208 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2212 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2213 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2214 /* If we get a pattern stmt here we have to use the LHS of the
2215 original stmt for immediate uses. */
2216 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2217 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2218 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2220 if (gimple_code (stmt
) == GIMPLE_PHI
)
2221 def
= gimple_phi_result (stmt
);
2223 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2225 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2227 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2229 use_vinfo
= vinfo_for_stmt (use_stmt
);
2230 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2231 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2232 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2233 if (!STMT_SLP_TYPE (use_vinfo
)
2234 && (STMT_VINFO_RELEVANT (use_vinfo
)
2235 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2236 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2237 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2239 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2242 "def in non-SLP stmt: ");
2243 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2251 && !HYBRID_SLP_STMT (stmt_vinfo
))
2253 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2256 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2258 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2261 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2262 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2263 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2266 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2269 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2271 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2272 struct loop
*loopp
= (struct loop
*)wi
->info
;
2277 if (TREE_CODE (*tp
) == SSA_NAME
2278 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2280 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2281 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2282 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2284 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2287 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2289 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2297 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2300 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2301 /* If the stmt is in a SLP instance then this isn't a reason
2302 to mark use definitions in other SLP instances as hybrid. */
2303 if (! STMT_SLP_TYPE (use_vinfo
)
2304 && (STMT_VINFO_RELEVANT (use_vinfo
)
2305 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2306 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2307 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2314 /* Find stmts that must be both vectorized and SLPed. */
2317 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2320 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2321 slp_instance instance
;
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2327 /* First walk all pattern stmt in the loop and mark defs of uses as
2328 hybrid because immediate uses in them are not recorded. */
2329 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2331 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2332 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2335 gimple
*stmt
= gsi_stmt (gsi
);
2336 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2337 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2340 memset (&wi
, 0, sizeof (wi
));
2341 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2342 gimple_stmt_iterator gsi2
2343 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2344 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2345 vect_detect_hybrid_slp_1
, &wi
);
2346 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2347 vect_detect_hybrid_slp_2
,
2348 vect_detect_hybrid_slp_1
, &wi
);
2353 /* Then walk the SLP instance trees marking stmts with uses in
2354 non-SLP stmts as hybrid, also propagating hybrid down the
2355 SLP tree, collecting the above info on-the-fly. */
2356 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2358 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2359 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2365 /* Create and initialize a new bb_vec_info struct for BB, as well as
2366 stmt_vec_info structs for all the stmts in it. */
2369 new_bb_vec_info (gimple_stmt_iterator region_begin
,
2370 gimple_stmt_iterator region_end
)
2372 basic_block bb
= gsi_bb (region_begin
);
2373 bb_vec_info res
= NULL
;
2374 gimple_stmt_iterator gsi
;
2376 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
2377 res
->kind
= vec_info::bb
;
2378 BB_VINFO_BB (res
) = bb
;
2379 res
->region_begin
= region_begin
;
2380 res
->region_end
= region_end
;
2382 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2385 gimple
*stmt
= gsi_stmt (gsi
);
2386 gimple_set_uid (stmt
, 0);
2387 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
));
2390 BB_VINFO_GROUPED_STORES (res
).create (10);
2391 BB_VINFO_SLP_INSTANCES (res
).create (2);
2392 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
2399 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2400 stmts in the basic block. */
2403 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
2405 slp_instance instance
;
2411 vect_destroy_datarefs (bb_vinfo
);
2412 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
2413 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
2414 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo
), i
, instance
)
2415 vect_free_slp_instance (instance
);
2416 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
2417 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
2419 for (gimple_stmt_iterator si
= bb_vinfo
->region_begin
;
2420 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
2422 gimple
*stmt
= gsi_stmt (si
);
2423 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2426 /* Free stmt_vec_info. */
2427 free_stmt_vec_info (stmt
);
2429 /* Reset region marker. */
2430 gimple_set_uid (stmt
, -1);
2433 BB_VINFO_BB (bb_vinfo
)->aux
= NULL
;
2438 /* Analyze statements contained in SLP tree node after recursively analyzing
2439 the subtree. Return TRUE if the operations are supported. */
2442 vect_slp_analyze_node_operations (slp_tree node
, slp_instance node_instance
)
2449 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2452 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2453 if (!vect_slp_analyze_node_operations (child
, node_instance
))
2456 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2457 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2458 gcc_assert (stmt_info
);
2459 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2461 /* For BB vectorization vector types are assigned here.
2462 Memory accesses already got their vector type assigned
2463 in vect_analyze_data_refs. */
2464 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2466 && ! STMT_VINFO_DATA_REF (stmt_info
))
2468 gcc_assert (PURE_SLP_STMT (stmt_info
));
2470 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2471 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE
, vect_location
,
2474 "get vectype for scalar type: ");
2475 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2476 dump_printf (MSG_NOTE
, "\n");
2479 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2482 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2485 "not SLPed: unsupported data-type ");
2486 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2488 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2493 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2496 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2497 dump_printf (MSG_NOTE
, "\n");
2501 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2502 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2505 /* Push SLP node def-type to stmt operands. */
2506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2507 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2508 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2509 = SLP_TREE_DEF_TYPE (child
);
2510 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2511 /* Restore def-types. */
2512 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2513 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2514 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2515 = vect_internal_def
;
2523 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2524 operations are supported. */
2527 vect_slp_analyze_operations (vec
<slp_instance
> slp_instances
, void *data
)
2529 slp_instance instance
;
2532 if (dump_enabled_p ())
2533 dump_printf_loc (MSG_NOTE
, vect_location
,
2534 "=== vect_slp_analyze_operations ===\n");
2536 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
2538 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance
),
2541 dump_printf_loc (MSG_NOTE
, vect_location
,
2542 "removing SLP instance operations starting from: ");
2543 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2544 SLP_TREE_SCALAR_STMTS
2545 (SLP_INSTANCE_TREE (instance
))[0], 0);
2546 vect_free_slp_instance (instance
);
2547 slp_instances
.ordered_remove (i
);
2551 /* Compute the costs of the SLP instance. */
2552 vect_analyze_slp_cost (instance
, data
);
2557 if (!slp_instances
.length ())
2564 /* Compute the scalar cost of the SLP node NODE and its children
2565 and return it. Do not account defs that are marked in LIFE and
2566 update LIFE according to uses of NODE. */
2569 vect_bb_slp_scalar_cost (basic_block bb
,
2570 slp_tree node
, vec
<bool, va_heap
> *life
)
2572 unsigned scalar_cost
= 0;
2577 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2580 ssa_op_iter op_iter
;
2581 def_operand_p def_p
;
2582 stmt_vec_info stmt_info
;
2587 /* If there is a non-vectorized use of the defs then the scalar
2588 stmt is kept live in which case we do not account it or any
2589 required defs in the SLP children in the scalar cost. This
2590 way we make the vectorization more costly when compared to
2592 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2594 imm_use_iterator use_iter
;
2596 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2597 if (!is_gimple_debug (use_stmt
)
2598 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2600 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2603 BREAK_FROM_IMM_USE_STMT (use_iter
);
2609 /* Count scalar stmts only once. */
2610 if (gimple_visited_p (stmt
))
2612 gimple_set_visited (stmt
, true);
2614 stmt_info
= vinfo_for_stmt (stmt
);
2615 if (STMT_VINFO_DATA_REF (stmt_info
))
2617 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2618 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2620 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2623 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2625 scalar_cost
+= stmt_cost
;
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2629 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2630 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
2635 /* Check if vectorization of the basic block is profitable. */
2638 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2640 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2641 slp_instance instance
;
2643 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2644 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2646 /* Calculate scalar cost. */
2647 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2649 auto_vec
<bool, 20> life
;
2650 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2651 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2652 SLP_INSTANCE_TREE (instance
),
2656 /* Unset visited flag. */
2657 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2658 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2659 gimple_set_visited (gsi_stmt (gsi
), false);
2661 /* Complete the target-specific cost calculation. */
2662 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2663 &vec_inside_cost
, &vec_epilogue_cost
);
2665 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2667 if (dump_enabled_p ())
2669 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2670 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2672 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2673 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2674 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2677 /* Vectorization is profitable if its cost is more than the cost of scalar
2678 version. Note that we err on the vector side for equal cost because
2679 the cost estimate is otherwise quite pessimistic (constant uses are
2680 free on the scalar side but cost a load on the vector side for
2682 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2688 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2689 if so and sets fatal to true if failure is independent of
2690 current_vector_size. */
2693 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2694 gimple_stmt_iterator region_end
,
2695 vec
<data_reference_p
> datarefs
, int n_stmts
,
2698 bb_vec_info bb_vinfo
;
2699 slp_instance instance
;
2703 /* The first group of checks is independent of the vector size. */
2706 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2710 "not vectorized: too many instructions in "
2712 free_data_refs (datarefs
);
2716 bb_vinfo
= new_bb_vec_info (region_begin
, region_end
);
2720 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2722 /* Analyze the data references. */
2724 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2726 if (dump_enabled_p ())
2727 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2728 "not vectorized: unhandled data-ref in basic "
2731 destroy_bb_vec_info (bb_vinfo
);
2735 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2737 if (dump_enabled_p ())
2738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2739 "not vectorized: not enough data-refs in "
2742 destroy_bb_vec_info (bb_vinfo
);
2746 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2748 if (dump_enabled_p ())
2749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2750 "not vectorized: unhandled data access in "
2753 destroy_bb_vec_info (bb_vinfo
);
2757 /* If there are no grouped stores in the region there is no need
2758 to continue with pattern recog as vect_analyze_slp will fail
2760 if (bb_vinfo
->grouped_stores
.is_empty ())
2762 if (dump_enabled_p ())
2763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2764 "not vectorized: no grouped stores in "
2767 destroy_bb_vec_info (bb_vinfo
);
2771 /* While the rest of the analysis below depends on it in some way. */
2774 vect_pattern_recog (bb_vinfo
);
2776 /* Check the SLP opportunities in the basic block, analyze and build SLP
2778 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2780 if (dump_enabled_p ())
2782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2783 "Failed to SLP the basic block.\n");
2784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2785 "not vectorized: failed to find SLP opportunities "
2786 "in basic block.\n");
2789 destroy_bb_vec_info (bb_vinfo
);
2793 /* Analyze and verify the alignment of data references and the
2794 dependence in the SLP instances. */
2795 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2797 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2798 || ! vect_slp_analyze_instance_dependence (instance
))
2800 dump_printf_loc (MSG_NOTE
, vect_location
,
2801 "removing SLP instance operations starting from: ");
2802 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2803 SLP_TREE_SCALAR_STMTS
2804 (SLP_INSTANCE_TREE (instance
))[0], 0);
2805 vect_free_slp_instance (instance
);
2806 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2810 /* Mark all the statements that we want to vectorize as pure SLP and
2812 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2813 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2817 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2819 destroy_bb_vec_info (bb_vinfo
);
2823 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo
),
2824 BB_VINFO_TARGET_COST_DATA (bb_vinfo
)))
2826 if (dump_enabled_p ())
2827 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2828 "not vectorized: bad operation in basic block.\n");
2830 destroy_bb_vec_info (bb_vinfo
);
2834 /* Cost model: check if the vectorization is worthwhile. */
2835 if (!unlimited_cost_model (NULL
)
2836 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2840 "not vectorized: vectorization is not "
2843 destroy_bb_vec_info (bb_vinfo
);
2847 if (dump_enabled_p ())
2848 dump_printf_loc (MSG_NOTE
, vect_location
,
2849 "Basic block will be vectorized using SLP\n");
2855 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2856 true if anything in the basic-block was vectorized. */
2859 vect_slp_bb (basic_block bb
)
2861 bb_vec_info bb_vinfo
;
2862 gimple_stmt_iterator gsi
;
2863 unsigned int vector_sizes
;
2864 bool any_vectorized
= false;
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2869 /* Autodetect first vector size we try. */
2870 current_vector_size
= 0;
2871 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2873 gsi
= gsi_start_bb (bb
);
2877 if (gsi_end_p (gsi
))
2880 gimple_stmt_iterator region_begin
= gsi
;
2881 vec
<data_reference_p
> datarefs
= vNULL
;
2884 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2886 gimple
*stmt
= gsi_stmt (gsi
);
2887 if (is_gimple_debug (stmt
))
2891 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
2892 vect_location
= gimple_location (stmt
);
2894 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
2898 /* Skip leading unhandled stmts. */
2899 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
2905 gimple_stmt_iterator region_end
= gsi
;
2907 bool vectorized
= false;
2909 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
2910 datarefs
, insns
, fatal
);
2912 && dbg_cnt (vect_slp
))
2914 if (dump_enabled_p ())
2915 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
2917 vect_schedule_slp (bb_vinfo
);
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_NOTE
, vect_location
,
2921 "basic block part vectorized\n");
2923 destroy_bb_vec_info (bb_vinfo
);
2928 destroy_bb_vec_info (bb_vinfo
);
2930 any_vectorized
|= vectorized
;
2932 vector_sizes
&= ~current_vector_size
;
2934 || vector_sizes
== 0
2935 || current_vector_size
== 0
2936 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2937 vector sizes will fail do not bother iterating. */
2940 if (gsi_end_p (region_end
))
2943 /* Skip the unhandled stmt. */
2946 /* And reset vector sizes. */
2947 current_vector_size
= 0;
2948 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2952 /* Try the next biggest vector size. */
2953 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2954 if (dump_enabled_p ())
2955 dump_printf_loc (MSG_NOTE
, vect_location
,
2956 "***** Re-trying analysis with "
2957 "vector size %d\n", current_vector_size
);
2964 return any_vectorized
;
2968 /* Return 1 if vector type of boolean constant which is OPNUM
2969 operand in statement STMT is a boolean vector. */
2972 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
2974 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2975 enum tree_code code
= gimple_expr_code (stmt
);
2978 enum vect_def_type dt
;
2980 /* For comparison and COND_EXPR type is chosen depending
2981 on the other comparison operand. */
2982 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2985 op
= gimple_assign_rhs1 (stmt
);
2987 op
= gimple_assign_rhs2 (stmt
);
2989 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
2993 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
2996 if (code
== COND_EXPR
)
2998 tree cond
= gimple_assign_rhs1 (stmt
);
3000 if (TREE_CODE (cond
) == SSA_NAME
)
3003 op
= TREE_OPERAND (cond
, 1);
3005 op
= TREE_OPERAND (cond
, 0);
3007 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3011 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3014 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3018 /* For constant and loop invariant defs of SLP_NODE this function returns
3019 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3020 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3021 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3022 REDUC_INDEX is the index of the reduction operand in the statements, unless
3026 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3027 vec
<tree
> *vec_oprnds
,
3028 unsigned int op_num
, unsigned int number_of_vectors
)
3030 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3031 gimple
*stmt
= stmts
[0];
3032 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3036 unsigned j
, number_of_places_left_in_vector
;
3039 int group_size
= stmts
.length ();
3040 unsigned int vec_num
, i
;
3041 unsigned number_of_copies
= 1;
3043 voprnds
.create (number_of_vectors
);
3044 bool constant_p
, is_store
;
3045 tree neutral_op
= NULL
;
3046 enum tree_code code
= gimple_expr_code (stmt
);
3047 gimple_seq ctor_seq
= NULL
;
3049 /* Check if vector type is a boolean vector. */
3050 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3051 && vect_mask_constant_operand_p (stmt
, op_num
))
3053 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3055 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3056 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3058 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3061 op
= gimple_assign_rhs1 (stmt
);
3068 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3069 created vectors. It is greater than 1 if unrolling is performed.
3071 For example, we have two scalar operands, s1 and s2 (e.g., group of
3072 strided accesses of size two), while NUNITS is four (i.e., four scalars
3073 of this type can be packed in a vector). The output vector will contain
3074 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3077 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3078 containing the operands.
3080 For example, NUNITS is four as before, and the group size is 8
3081 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3082 {s5, s6, s7, s8}. */
3084 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3086 number_of_places_left_in_vector
= nunits
;
3088 elts
= XALLOCAVEC (tree
, nunits
);
3089 bool place_after_defs
= false;
3090 for (j
= 0; j
< number_of_copies
; j
++)
3092 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3095 op
= gimple_assign_rhs1 (stmt
);
3102 tree cond
= gimple_assign_rhs1 (stmt
);
3103 if (TREE_CODE (cond
) == SSA_NAME
)
3104 op
= gimple_op (stmt
, op_num
+ 1);
3105 else if (op_num
== 0 || op_num
== 1)
3106 op
= TREE_OPERAND (cond
, op_num
);
3110 op
= gimple_assign_rhs2 (stmt
);
3112 op
= gimple_assign_rhs3 (stmt
);
3118 op
= gimple_call_arg (stmt
, op_num
);
3125 op
= gimple_op (stmt
, op_num
+ 1);
3126 /* Unlike the other binary operators, shifts/rotates have
3127 the shift count being int, instead of the same type as
3128 the lhs, so make sure the scalar is the right type if
3129 we are dealing with vectors of
3130 long long/long/short/char. */
3131 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3132 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3136 op
= gimple_op (stmt
, op_num
+ 1);
3141 /* Create 'vect_ = {op0,op1,...,opn}'. */
3142 number_of_places_left_in_vector
--;
3144 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3146 if (CONSTANT_CLASS_P (op
))
3148 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3150 /* Can't use VIEW_CONVERT_EXPR for booleans because
3151 of possibly different sizes of scalar value and
3153 if (integer_zerop (op
))
3154 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3155 else if (integer_onep (op
))
3156 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3161 op
= fold_unary (VIEW_CONVERT_EXPR
,
3162 TREE_TYPE (vector_type
), op
);
3163 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3167 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3169 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3172 = build_all_ones_cst (TREE_TYPE (vector_type
));
3174 = build_zero_cst (TREE_TYPE (vector_type
));
3175 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3176 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3182 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3185 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3188 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3192 elts
[number_of_places_left_in_vector
] = op
;
3193 if (!CONSTANT_CLASS_P (op
))
3195 if (TREE_CODE (orig_op
) == SSA_NAME
3196 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3197 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3198 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3199 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3200 place_after_defs
= true;
3202 if (number_of_places_left_in_vector
== 0)
3205 vec_cst
= build_vector (vector_type
, elts
);
3208 vec
<constructor_elt
, va_gc
> *v
;
3210 vec_alloc (v
, nunits
);
3211 for (k
= 0; k
< nunits
; ++k
)
3212 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3213 vec_cst
= build_constructor (vector_type
, v
);
3216 gimple_stmt_iterator gsi
;
3217 if (place_after_defs
)
3220 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3221 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3224 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3225 if (ctor_seq
!= NULL
)
3227 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3228 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3232 voprnds
.quick_push (init
);
3233 place_after_defs
= false;
3234 number_of_places_left_in_vector
= nunits
;
3240 /* Since the vectors are created in the reverse order, we should invert
3242 vec_num
= voprnds
.length ();
3243 for (j
= vec_num
; j
!= 0; j
--)
3245 vop
= voprnds
[j
- 1];
3246 vec_oprnds
->quick_push (vop
);
3251 /* In case that VF is greater than the unrolling factor needed for the SLP
3252 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3253 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3254 to replicate the vectors. */
3255 while (number_of_vectors
> vec_oprnds
->length ())
3257 tree neutral_vec
= NULL
;
3262 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3264 vec_oprnds
->quick_push (neutral_vec
);
3268 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3269 vec_oprnds
->quick_push (vop
);
3275 /* Get vectorized definitions from SLP_NODE that contains corresponding
3276 vectorized def-stmts. */
3279 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3282 gimple
*vec_def_stmt
;
3285 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3287 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3289 gcc_assert (vec_def_stmt
);
3290 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3291 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3293 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3294 vec_oprnds
->quick_push (vec_oprnd
);
3299 /* Get vectorized definitions for SLP_NODE.
3300 If the scalar definitions are loop invariants or constants, collect them and
3301 call vect_get_constant_vectors() to create vector stmts.
3302 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3303 must be stored in the corresponding child of SLP_NODE, and we call
3304 vect_get_slp_vect_defs () to retrieve them. */
3307 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3308 vec
<vec
<tree
> > *vec_oprnds
)
3311 int number_of_vects
= 0, i
;
3312 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3313 slp_tree child
= NULL
;
3316 bool first_iteration
= true;
3318 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3319 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3321 bool vectorized_defs
= false;
3326 vec_defs
.create (0);
3327 vec_oprnds
->quick_push (vec_defs
);
3331 /* For each operand we check if it has vectorized definitions in a child
3332 node or we need to create them (for invariants and constants). We
3333 check if the LHS of the first stmt of the next child matches OPRND.
3334 If it does, we found the correct child. Otherwise, we call
3335 vect_get_constant_vectors (). */
3336 for (unsigned int child_index
= 0;
3337 child_index
< SLP_TREE_CHILDREN (slp_node
).length (); child_index
++)
3339 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3341 /* We have to check both pattern and original def, if available. */
3342 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3344 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3346 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3349 if (gimple_code (first_def
) == GIMPLE_PHI
)
3350 first_def_op
= gimple_phi_result (first_def
);
3352 first_def_op
= gimple_get_lhs (first_def
);
3353 if (operand_equal_p (oprnd
, first_def_op
, 0)
3355 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3357 /* The number of vector defs is determined by the number of
3358 vector statements in the node from which we get those
3360 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3361 vectorized_defs
= true;
3367 if (!vectorized_defs
&& first_iteration
)
3369 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3370 /* Number of vector stmts was calculated according to LHS in
3371 vect_schedule_slp_instance (), fix it by replacing LHS with
3372 RHS, if necessary. See vect_get_smallest_scalar_type () for
3374 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3376 if (rhs_size_unit
!= lhs_size_unit
)
3378 number_of_vects
*= rhs_size_unit
;
3379 number_of_vects
/= lhs_size_unit
;
3383 /* Allocate memory for vectorized defs. */
3385 vec_defs
.create (number_of_vects
);
3387 /* For reduction defs we call vect_get_constant_vectors (), since we are
3388 looking for initial loop invariant values. */
3389 if (vectorized_defs
)
3390 /* The defs are already vectorized. */
3391 vect_get_slp_vect_defs (child
, &vec_defs
);
3393 /* Build vectors from scalar defs. */
3394 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3397 vec_oprnds
->quick_push (vec_defs
);
3399 first_iteration
= false;
3403 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3404 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3405 permute statements for the SLP node NODE of the SLP instance
3406 SLP_NODE_INSTANCE. */
3409 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3410 gimple_stmt_iterator
*gsi
, int vf
,
3411 slp_instance slp_node_instance
, bool analyze_only
,
3414 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3415 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3416 tree mask_element_type
= NULL_TREE
, mask_type
;
3417 int nunits
, vec_index
= 0;
3418 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3419 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3421 unsigned char *mask
;
3424 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3427 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3429 mode
= TYPE_MODE (vectype
);
3431 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3432 same size as the vector element being permuted. */
3433 mask_element_type
= lang_hooks
.types
.type_for_mode
3434 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3435 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3436 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3437 mask
= XALLOCAVEC (unsigned char, nunits
);
3439 /* Initialize the vect stmts of NODE to properly insert the generated
3442 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3443 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3444 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3446 /* Generate permutation masks for every NODE. Number of masks for each NODE
3447 is equal to GROUP_SIZE.
3448 E.g., we have a group of three nodes with three loads from the same
3449 location in each node, and the vector size is 4. I.e., we have a
3450 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3451 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3452 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3455 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3456 The last mask is illegal since we assume two operands for permute
3457 operation, and the mask element values can't be outside that range.
3458 Hence, the last mask must be converted into {2,5,5,5}.
3459 For the first two permutations we need the first and the second input
3460 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3461 we need the second and the third vectors: {b1,c1,a2,b2} and
3464 int vect_stmts_counter
= 0;
3466 int first_vec_index
= -1;
3467 int second_vec_index
= -1;
3471 for (int j
= 0; j
< vf
; j
++)
3473 for (int k
= 0; k
< group_size
; k
++)
3475 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3476 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3477 vec_index
= i
/ nunits
;
3478 mask_element
= i
% nunits
;
3479 if (vec_index
== first_vec_index
3480 || first_vec_index
== -1)
3482 first_vec_index
= vec_index
;
3484 else if (vec_index
== second_vec_index
3485 || second_vec_index
== -1)
3487 second_vec_index
= vec_index
;
3488 mask_element
+= nunits
;
3492 if (dump_enabled_p ())
3494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3495 "permutation requires at "
3496 "least three vectors ");
3497 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3503 gcc_assert (mask_element
>= 0
3504 && mask_element
< 2 * nunits
);
3505 if (mask_element
!= index
)
3507 mask
[index
++] = mask_element
;
3509 if (index
== nunits
)
3512 && ! can_vec_perm_p (mode
, false, mask
))
3514 if (dump_enabled_p ())
3516 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3518 "unsupported vect permute { ");
3519 for (i
= 0; i
< nunits
; ++i
)
3520 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ", mask
[i
]);
3521 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3531 tree mask_vec
= NULL_TREE
;
3535 tree
*mask_elts
= XALLOCAVEC (tree
, nunits
);
3536 for (int l
= 0; l
< nunits
; ++l
)
3537 mask_elts
[l
] = build_int_cst (mask_element_type
,
3539 mask_vec
= build_vector (mask_type
, mask_elts
);
3542 if (second_vec_index
== -1)
3543 second_vec_index
= first_vec_index
;
3545 /* Generate the permute statement if necessary. */
3546 tree first_vec
= dr_chain
[first_vec_index
];
3547 tree second_vec
= dr_chain
[second_vec_index
];
3552 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3554 perm_dest
= make_ssa_name (perm_dest
);
3555 perm_stmt
= gimple_build_assign (perm_dest
,
3557 first_vec
, second_vec
,
3559 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3562 /* If mask was NULL_TREE generate the requested
3563 identity transform. */
3564 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3566 /* Store the vector statement in NODE. */
3567 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3571 first_vec_index
= -1;
3572 second_vec_index
= -1;
3583 /* Vectorize SLP instance tree in postorder. */
3586 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3587 unsigned int vectorization_factor
)
3590 bool grouped_store
, is_store
;
3591 gimple_stmt_iterator si
;
3592 stmt_vec_info stmt_info
;
3593 unsigned int vec_stmts_size
, nunits
, group_size
;
3598 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3601 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3602 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3604 /* Push SLP node def-type to stmts. */
3605 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3606 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3607 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3608 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3610 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3611 stmt_info
= vinfo_for_stmt (stmt
);
3613 /* VECTYPE is the type of the destination. */
3614 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3615 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3616 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3618 /* For each SLP instance calculate number of vector stmts to be created
3619 for the scalar stmts in each node of the SLP tree. Number of vector
3620 elements in one vector iteration is the number of scalar elements in
3621 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3623 Unless this is a SLP reduction in which case the number of vector
3624 stmts is equal to the number of vector stmts of the children. */
3625 if (GROUP_FIRST_ELEMENT (stmt_info
)
3626 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3627 vec_stmts_size
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
3629 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3631 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3633 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3634 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3637 if (dump_enabled_p ())
3639 dump_printf_loc (MSG_NOTE
,vect_location
,
3640 "------>vectorizing SLP node starting from: ");
3641 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3644 /* Vectorized stmts go before the last scalar stmt which is where
3645 all uses are ready. */
3646 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3648 /* Mark the first element of the reduction chain as reduction to properly
3649 transform the node. In the analysis phase only the last element of the
3650 chain is marked as reduction. */
3651 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3652 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3654 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3655 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3658 /* Handle two-operation SLP nodes by vectorizing the group with
3659 both operations and then performing a merge. */
3660 if (SLP_TREE_TWO_OPERATORS (node
))
3662 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3663 enum tree_code ocode
= ERROR_MARK
;
3665 unsigned char *mask
= XALLOCAVEC (unsigned char, group_size
);
3666 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3667 if (gimple_assign_rhs_code (ostmt
) != code0
)
3670 ocode
= gimple_assign_rhs_code (ostmt
);
3674 if (ocode
!= ERROR_MARK
)
3679 tree tmask
= NULL_TREE
;
3680 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3681 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3682 SLP_TREE_VEC_STMTS (node
).truncate (0);
3683 gimple_assign_set_rhs_code (stmt
, ocode
);
3684 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3685 gimple_assign_set_rhs_code (stmt
, code0
);
3686 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3687 SLP_TREE_VEC_STMTS (node
).truncate (0);
3688 tree meltype
= build_nonstandard_integer_type
3689 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3690 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3692 for (j
= 0; j
< v0
.length (); ++j
)
3694 tree
*melts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (vectype
));
3695 for (l
= 0; l
< TYPE_VECTOR_SUBPARTS (vectype
); ++l
)
3697 if (k
>= group_size
)
3699 melts
[l
] = build_int_cst
3700 (meltype
, mask
[k
++] * TYPE_VECTOR_SUBPARTS (vectype
) + l
);
3702 tmask
= build_vector (mvectype
, melts
);
3704 /* ??? Not all targets support a VEC_PERM_EXPR with a
3705 constant mask that would translate to a vec_merge RTX
3706 (with their vec_perm_const_ok). We can either not
3707 vectorize in that case or let veclower do its job.
3708 Unfortunately that isn't too great and at least for
3709 plus/minus we'd eventually like to match targets
3710 vector addsub instructions. */
3712 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3714 gimple_assign_lhs (v0
[j
]),
3715 gimple_assign_lhs (v1
[j
]), tmask
);
3716 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3717 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3724 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3726 /* Restore stmt def-types. */
3727 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3728 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3729 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3730 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3735 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3736 For loop vectorization this is done in vectorizable_call, but for SLP
3737 it needs to be deferred until end of vect_schedule_slp, because multiple
3738 SLP instances may refer to the same scalar stmt. */
3741 vect_remove_slp_scalar_calls (slp_tree node
)
3743 gimple
*stmt
, *new_stmt
;
3744 gimple_stmt_iterator gsi
;
3748 stmt_vec_info stmt_info
;
3750 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3753 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3754 vect_remove_slp_scalar_calls (child
);
3756 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3758 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3760 stmt_info
= vinfo_for_stmt (stmt
);
3761 if (stmt_info
== NULL
3762 || is_pattern_stmt_p (stmt_info
)
3763 || !PURE_SLP_STMT (stmt_info
))
3765 lhs
= gimple_call_lhs (stmt
);
3766 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3767 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3768 set_vinfo_for_stmt (stmt
, NULL
);
3769 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3770 gsi
= gsi_for_stmt (stmt
);
3771 gsi_replace (&gsi
, new_stmt
, false);
3772 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3776 /* Generate vector code for all SLP instances in the loop/basic block. */
3779 vect_schedule_slp (vec_info
*vinfo
)
3781 vec
<slp_instance
> slp_instances
;
3782 slp_instance instance
;
3784 bool is_store
= false;
3786 slp_instances
= vinfo
->slp_instances
;
3787 if (is_a
<loop_vec_info
> (vinfo
))
3788 vf
= as_a
<loop_vec_info
> (vinfo
)->vectorization_factor
;
3792 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3794 /* Schedule the tree of INSTANCE. */
3795 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3797 if (dump_enabled_p ())
3798 dump_printf_loc (MSG_NOTE
, vect_location
,
3799 "vectorizing stmts using SLP.\n");
3802 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3804 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3807 gimple_stmt_iterator gsi
;
3809 /* Remove scalar call stmts. Do not do this for basic-block
3810 vectorization as not all uses may be vectorized.
3811 ??? Why should this be necessary? DCE should be able to
3812 remove the stmts itself.
3813 ??? For BB vectorization we can as well remove scalar
3814 stmts starting from the SLP tree root if they have no
3816 if (is_a
<loop_vec_info
> (vinfo
))
3817 vect_remove_slp_scalar_calls (root
);
3819 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3820 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3822 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3825 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3826 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3827 /* Free the attached stmt_vec_info and remove the stmt. */
3828 gsi
= gsi_for_stmt (store
);
3829 unlink_stmt_vdef (store
);
3830 gsi_remove (&gsi
, true);
3831 release_defs (store
);
3832 free_stmt_vec_info (store
);