1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
51 vect_free_slp_tree (slp_tree node
)
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
57 vect_free_slp_tree (child
);
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt
))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
68 SLP_TREE_CHILDREN (node
).release ();
69 SLP_TREE_SCALAR_STMTS (node
).release ();
70 SLP_TREE_VEC_STMTS (node
).release ();
71 SLP_TREE_LOAD_PERMUTATION (node
).release ();
77 /* Free the memory allocated for the SLP instance. */
80 vect_free_slp_instance (slp_instance instance
)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
83 SLP_INSTANCE_LOADS (instance
).release ();
88 /* Create an SLP node for SCALAR_STMTS. */
91 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
94 gimple
*stmt
= scalar_stmts
[0];
97 if (is_gimple_call (stmt
))
98 nops
= gimple_call_num_args (stmt
);
99 else if (is_gimple_assign (stmt
))
101 nops
= gimple_num_ops (stmt
) - 1;
102 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
105 else if (gimple_code (stmt
) == GIMPLE_PHI
)
110 node
= XNEW (struct _slp_tree
);
111 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
112 SLP_TREE_VEC_STMTS (node
).create (0);
113 SLP_TREE_CHILDREN (node
).create (nops
);
114 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
115 SLP_TREE_TWO_OPERATORS (node
) = false;
116 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
119 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec
<gimple
*> def_stmts
;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
137 enum vect_def_type first_dt
;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
145 static vec
<slp_oprnd_info
>
146 vect_create_oprnd_info (int nops
, int group_size
)
149 slp_oprnd_info oprnd_info
;
150 vec
<slp_oprnd_info
> oprnds_info
;
152 oprnds_info
.create (nops
);
153 for (i
= 0; i
< nops
; i
++)
155 oprnd_info
= XNEW (struct _slp_oprnd_info
);
156 oprnd_info
->def_stmts
.create (group_size
);
157 oprnd_info
->first_dt
= vect_uninitialized_def
;
158 oprnd_info
->first_op_type
= NULL_TREE
;
159 oprnd_info
->first_pattern
= false;
160 oprnd_info
->second_pattern
= false;
161 oprnds_info
.quick_push (oprnd_info
);
168 /* Free operands info. */
171 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
174 slp_oprnd_info oprnd_info
;
176 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
178 oprnd_info
->def_stmts
.release ();
179 XDELETE (oprnd_info
);
182 oprnds_info
.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
190 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
192 gimple
*next_stmt
= first_stmt
;
195 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
200 if (next_stmt
== stmt
)
202 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
204 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
225 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
226 gimple
*stmt
, unsigned stmt_num
,
227 vec
<slp_oprnd_info
> *oprnds_info
)
230 unsigned int i
, number_of_oprnds
;
232 enum vect_def_type dt
= vect_uninitialized_def
;
233 bool pattern
= false;
234 slp_oprnd_info oprnd_info
;
235 int first_op_idx
= 1;
236 bool commutative
= false;
237 bool first_op_cond
= false;
238 bool first
= stmt_num
== 0;
239 bool second
= stmt_num
== 1;
241 if (is_gimple_call (stmt
))
243 number_of_oprnds
= gimple_call_num_args (stmt
);
246 else if (is_gimple_assign (stmt
))
248 enum tree_code code
= gimple_assign_rhs_code (stmt
);
249 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
255 first_op_cond
= true;
259 commutative
= commutative_tree_code (code
);
264 bool swapped
= (*swap
!= 0);
265 gcc_assert (!swapped
|| first_op_cond
);
266 for (i
= 0; i
< number_of_oprnds
; i
++)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map
= maps
[*swap
];
276 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
278 oprnd
= gimple_op (stmt
, map
[i
]);
281 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
283 oprnd_info
= (*oprnds_info
)[i
];
285 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
292 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
301 if (def_stmt
&& gimple_bb (def_stmt
)
302 && vect_stmt_in_region_p (vinfo
, def_stmt
)
303 && vinfo_for_stmt (def_stmt
)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
309 if (!first
&& !oprnd_info
->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info
->first_dt
!= vect_reduction_def
313 || (!second
&& !oprnd_info
->second_pattern
)))
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
329 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
335 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
336 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
338 if (dt
== vect_unknown_def_type
)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
342 "Unsupported pattern.\n");
346 switch (gimple_code (def_stmt
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
355 "unsupported defining stmt:\n");
361 oprnd_info
->second_pattern
= pattern
;
365 oprnd_info
->first_dt
= dt
;
366 oprnd_info
->first_pattern
= pattern
;
367 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info
->first_dt
!= dt
377 && !(oprnd_info
->first_dt
== vect_reduction_def
378 && dt
== vect_internal_def
)
379 && !((oprnd_info
->first_dt
== vect_external_def
380 || oprnd_info
->first_dt
== vect_constant_def
)
381 && (dt
== vect_external_def
382 || dt
== vect_constant_def
)))
383 || !types_compatible_p (oprnd_info
->first_op_type
,
386 /* Try swapping operands if we got a mismatch. */
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
397 "Build SLP failed: different types\n");
403 /* Check the types of the definitions. */
406 case vect_constant_def
:
407 case vect_external_def
:
410 case vect_reduction_def
:
411 case vect_induction_def
:
412 case vect_internal_def
:
413 oprnd_info
->def_stmts
.quick_push (def_stmt
);
417 /* FORNOW: Not supported. */
418 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
421 "Build SLP failed: illegal type of def ");
422 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
423 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
433 /* If there are already uses of this stmt in a SLP instance then
434 we've committed to the operand order and can't swap it. */
435 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
437 if (dump_enabled_p ())
439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
440 "Build SLP failed: cannot swap operands of "
442 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
449 tree cond
= gimple_assign_rhs1 (stmt
);
450 enum tree_code code
= TREE_CODE (cond
);
455 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
456 &TREE_OPERAND (cond
, 1));
457 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
462 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
463 gimple_assign_rhs3_ptr (stmt
));
464 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
465 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
466 gcc_assert (code
!= ERROR_MARK
);
467 TREE_SET_CODE (cond
, code
);
471 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
472 gimple_assign_rhs2_ptr (stmt
));
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE
, vect_location
,
476 "swapped operands to match def types in ");
477 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
485 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
486 caller's attempt to find the vector type in STMT with the narrowest
487 element type. Return true if VECTYPE is nonnull and if it is valid
488 for VINFO. When returning true, update MAX_NUNITS to reflect the
489 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
490 as for vect_build_slp_tree. */
493 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
494 tree vectype
, unsigned int *max_nunits
)
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
501 "Build SLP failed: unsupported data-type in ");
502 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
503 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
505 /* Fatal mismatch. */
509 /* If populating the vector type requires unrolling then fail
510 before adjusting *max_nunits for basic-block vectorization. */
511 if (is_a
<bb_vec_info
> (vinfo
)
512 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
515 "Build SLP failed: unrolling required "
516 "in basic block SLP\n");
517 /* Fatal mismatch. */
521 /* In case of multiple types we need to detect the smallest type. */
522 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
523 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
528 /* Verify if the scalar stmts STMTS are isomorphic, require data
529 permutation or are of unsupported types of operation. Return
530 true if they are, otherwise return false and indicate in *MATCHES
531 which stmts are not isomorphic to the first one. If MATCHES[0]
532 is false then this indicates the comparison could not be
533 carried out or the stmts will never be vectorized by SLP.
535 Note COND_EXPR is possibly ismorphic to another one after swapping its
536 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
537 the first stmt by swapping the two operands of comparison; set SWAP[i]
538 to 2 if stmt I is isormorphic to the first stmt by inverting the code
539 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
540 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
543 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
544 vec
<gimple
*> stmts
, unsigned int group_size
,
545 unsigned nops
, unsigned int *max_nunits
,
546 bool *matches
, bool *two_operators
)
549 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
550 enum tree_code first_stmt_code
= ERROR_MARK
;
551 enum tree_code alt_stmt_code
= ERROR_MARK
;
552 enum tree_code rhs_code
= ERROR_MARK
;
553 enum tree_code first_cond_code
= ERROR_MARK
;
555 bool need_same_oprnds
= false;
556 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
559 machine_mode optab_op2_mode
;
560 machine_mode vec_mode
;
562 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
564 /* For every stmt in NODE find its def stmt/s. */
565 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
570 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
573 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
576 /* Fail to vectorize statements marked as unvectorizable. */
577 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
579 if (dump_enabled_p ())
581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
582 "Build SLP failed: unvectorizable statement ");
583 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
585 /* Fatal mismatch. */
590 lhs
= gimple_get_lhs (stmt
);
591 if (lhs
== NULL_TREE
)
593 if (dump_enabled_p ())
595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
596 "Build SLP failed: not GIMPLE_ASSIGN nor "
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
600 /* Fatal mismatch. */
605 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
606 vectype
= get_vectype_for_scalar_type (scalar_type
);
607 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
610 /* Fatal mismatch. */
615 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
617 rhs_code
= CALL_EXPR
;
618 if (gimple_call_internal_p (call_stmt
)
619 || gimple_call_tail_p (call_stmt
)
620 || gimple_call_noreturn_p (call_stmt
)
621 || !gimple_call_nothrow_p (call_stmt
)
622 || gimple_call_chain (call_stmt
))
624 if (dump_enabled_p ())
626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
627 "Build SLP failed: unsupported call type ");
628 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
631 /* Fatal mismatch. */
637 rhs_code
= gimple_assign_rhs_code (stmt
);
639 /* Check the operation. */
642 first_stmt_code
= rhs_code
;
644 /* Shift arguments should be equal in all the packed stmts for a
645 vector shift with scalar shift operand. */
646 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
647 || rhs_code
== LROTATE_EXPR
648 || rhs_code
== RROTATE_EXPR
)
650 vec_mode
= TYPE_MODE (vectype
);
652 /* First see if we have a vector/vector shift. */
653 optab
= optab_for_tree_code (rhs_code
, vectype
,
657 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
659 /* No vector/vector shift, try for a vector/scalar shift. */
660 optab
= optab_for_tree_code (rhs_code
, vectype
,
665 if (dump_enabled_p ())
666 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
667 "Build SLP failed: no optab.\n");
668 /* Fatal mismatch. */
672 icode
= (int) optab_handler (optab
, vec_mode
);
673 if (icode
== CODE_FOR_nothing
)
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
678 "op not supported by target.\n");
679 /* Fatal mismatch. */
683 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
684 if (!VECTOR_MODE_P (optab_op2_mode
))
686 need_same_oprnds
= true;
687 first_op1
= gimple_assign_rhs2 (stmt
);
691 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
693 need_same_oprnds
= true;
694 first_op1
= gimple_assign_rhs2 (stmt
);
699 if (first_stmt_code
!= rhs_code
700 && alt_stmt_code
== ERROR_MARK
)
701 alt_stmt_code
= rhs_code
;
702 if (first_stmt_code
!= rhs_code
703 && (first_stmt_code
!= IMAGPART_EXPR
704 || rhs_code
!= REALPART_EXPR
)
705 && (first_stmt_code
!= REALPART_EXPR
706 || rhs_code
!= IMAGPART_EXPR
)
707 /* Handle mismatches in plus/minus by computing both
708 and merging the results. */
709 && !((first_stmt_code
== PLUS_EXPR
710 || first_stmt_code
== MINUS_EXPR
)
711 && (alt_stmt_code
== PLUS_EXPR
712 || alt_stmt_code
== MINUS_EXPR
)
713 && rhs_code
== alt_stmt_code
)
714 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
715 && (first_stmt_code
== ARRAY_REF
716 || first_stmt_code
== BIT_FIELD_REF
717 || first_stmt_code
== INDIRECT_REF
718 || first_stmt_code
== COMPONENT_REF
719 || first_stmt_code
== MEM_REF
)))
721 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
724 "Build SLP failed: different operation "
726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
727 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
729 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
737 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: different shift "
744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
750 if (rhs_code
== CALL_EXPR
)
752 gimple
*first_stmt
= stmts
[0];
753 if (gimple_call_num_args (stmt
) != nops
754 || !operand_equal_p (gimple_call_fn (first_stmt
),
755 gimple_call_fn (stmt
), 0)
756 || gimple_call_fntype (first_stmt
)
757 != gimple_call_fntype (stmt
))
759 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
762 "Build SLP failed: different calls in ");
763 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
772 /* Grouped store or load. */
773 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
775 if (REFERENCE_CLASS_P (lhs
))
783 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
786 /* Check that there are no loads from different interleaving
787 chains in the same node. */
788 if (prev_first_load
!= first_load
)
790 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
794 "Build SLP failed: different "
795 "interleaving chains in one node ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
804 prev_first_load
= first_load
;
806 } /* Grouped access. */
809 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
811 /* Not grouped load. */
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
815 "Build SLP failed: not grouped load ");
816 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
819 /* FORNOW: Not grouped loads are not supported. */
820 /* Fatal mismatch. */
825 /* Not memory operation. */
826 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
827 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
828 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
829 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
830 && rhs_code
!= CALL_EXPR
)
832 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
835 "Build SLP failed: operation");
836 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
837 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
839 /* Fatal mismatch. */
844 if (rhs_code
== COND_EXPR
)
846 tree cond_expr
= gimple_assign_rhs1 (stmt
);
847 enum tree_code cond_code
= TREE_CODE (cond_expr
);
848 enum tree_code swap_code
= ERROR_MARK
;
849 enum tree_code invert_code
= ERROR_MARK
;
852 first_cond_code
= TREE_CODE (cond_expr
);
853 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
855 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
856 swap_code
= swap_tree_comparison (cond_code
);
857 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
860 if (first_cond_code
== cond_code
)
862 /* Isomorphic can be achieved by swapping. */
863 else if (first_cond_code
== swap_code
)
865 /* Isomorphic can be achieved by inverting. */
866 else if (first_cond_code
== invert_code
)
870 if (dump_enabled_p ())
872 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
873 "Build SLP failed: different"
875 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
887 for (i
= 0; i
< group_size
; ++i
)
891 /* If we allowed a two-operation SLP node verify the target can cope
892 with the permute we are going to use. */
893 if (alt_stmt_code
!= ERROR_MARK
894 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
896 unsigned int count
= TYPE_VECTOR_SUBPARTS (vectype
);
897 vec_perm_builder
sel (count
, count
, 1);
898 for (i
= 0; i
< count
; ++i
)
900 unsigned int elt
= i
;
901 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
903 sel
.quick_push (elt
);
905 vec_perm_indices
indices (sel
, 2, count
);
906 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
908 for (i
= 0; i
< group_size
; ++i
)
909 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
912 if (dump_enabled_p ())
914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
915 "Build SLP failed: different operation "
917 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
921 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
927 *two_operators
= true;
933 /* Traits for the hash_set to record failed SLP builds for a stmt set.
934 Note we never remove apart from at destruction time so we do not
935 need a special value for deleted that differs from empty. */
938 typedef vec
<gimple
*> value_type
;
939 typedef vec
<gimple
*> compare_type
;
940 static inline hashval_t
hash (value_type
);
941 static inline bool equal (value_type existing
, value_type candidate
);
942 static inline bool is_empty (value_type x
) { return !x
.exists (); }
943 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
944 static inline void mark_empty (value_type
&x
) { x
.release (); }
945 static inline void mark_deleted (value_type
&x
) { x
.release (); }
946 static inline void remove (value_type
&x
) { x
.release (); }
949 bst_traits::hash (value_type x
)
952 for (unsigned i
= 0; i
< x
.length (); ++i
)
953 h
.add_int (gimple_uid (x
[i
]));
957 bst_traits::equal (value_type existing
, value_type candidate
)
959 if (existing
.length () != candidate
.length ())
961 for (unsigned i
= 0; i
< existing
.length (); ++i
)
962 if (existing
[i
] != candidate
[i
])
967 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
968 static scalar_stmts_set_t
*bst_fail
;
971 vect_build_slp_tree_2 (vec_info
*vinfo
,
972 vec
<gimple
*> stmts
, unsigned int group_size
,
973 unsigned int *max_nunits
,
974 vec
<slp_tree
> *loads
,
975 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
976 unsigned max_tree_size
);
979 vect_build_slp_tree (vec_info
*vinfo
,
980 vec
<gimple
*> stmts
, unsigned int group_size
,
981 unsigned int *max_nunits
,
982 vec
<slp_tree
> *loads
,
983 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
984 unsigned max_tree_size
)
986 if (bst_fail
->contains (stmts
))
988 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
989 loads
, matches
, npermutes
, tree_size
,
991 /* When SLP build fails for stmts record this, otherwise SLP build
992 can be exponential in time when we allow to construct parts from
993 scalars, see PR81723. */
997 x
.create (stmts
.length ());
1004 /* Recursively build an SLP tree starting from NODE.
1005 Fail (and return a value not equal to zero) if def-stmts are not
1006 isomorphic, require data permutation or are of unsupported types of
1007 operation. Otherwise, return 0.
1008 The value returned is the depth in the SLP tree where a mismatch
1012 vect_build_slp_tree_2 (vec_info
*vinfo
,
1013 vec
<gimple
*> stmts
, unsigned int group_size
,
1014 unsigned int *max_nunits
,
1015 vec
<slp_tree
> *loads
,
1016 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1017 unsigned max_tree_size
)
1019 unsigned nops
, i
, this_tree_size
= 0, this_max_nunits
= *max_nunits
;
1026 if (is_gimple_call (stmt
))
1027 nops
= gimple_call_num_args (stmt
);
1028 else if (is_gimple_assign (stmt
))
1030 nops
= gimple_num_ops (stmt
) - 1;
1031 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1034 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1039 /* If the SLP node is a PHI (induction or reduction), terminate
1041 if (gimple_code (stmt
) == GIMPLE_PHI
)
1043 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1044 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1045 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1049 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1050 /* Induction from different IVs is not supported. */
1051 if (def_type
== vect_induction_def
)
1053 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1054 if (stmt
!= stmts
[0])
1059 /* Else def types have to match. */
1060 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1062 /* But for reduction chains only check on the first stmt. */
1063 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1064 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1066 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1070 node
= vect_create_new_slp_node (stmts
);
1075 bool two_operators
= false;
1076 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1077 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1078 stmts
, group_size
, nops
,
1079 &this_max_nunits
, matches
, &two_operators
))
1082 /* If the SLP node is a load, terminate the recursion. */
1083 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1084 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1086 *max_nunits
= this_max_nunits
;
1087 node
= vect_create_new_slp_node (stmts
);
1088 loads
->safe_push (node
);
1092 /* Get at the operands, verifying they are compatible. */
1093 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1094 slp_oprnd_info oprnd_info
;
1095 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1097 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1098 stmt
, i
, &oprnds_info
);
1100 matches
[(res
== -1) ? 0 : i
] = false;
1104 for (i
= 0; i
< group_size
; ++i
)
1107 vect_free_oprnd_info (oprnds_info
);
1111 auto_vec
<slp_tree
, 4> children
;
1112 auto_vec
<slp_tree
> this_loads
;
1117 max_tree_size
-= *tree_size
;
1119 /* Create SLP_TREE nodes for the definition node/s. */
1120 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1123 unsigned old_nloads
= this_loads
.length ();
1124 unsigned old_tree_size
= this_tree_size
;
1127 if (oprnd_info
->first_dt
!= vect_internal_def
1128 && oprnd_info
->first_dt
!= vect_reduction_def
1129 && oprnd_info
->first_dt
!= vect_induction_def
)
1132 if (++this_tree_size
> max_tree_size
)
1134 FOR_EACH_VEC_ELT (children
, j
, child
)
1135 vect_free_slp_tree (child
);
1136 vect_free_oprnd_info (oprnds_info
);
1140 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1141 group_size
, &this_max_nunits
,
1142 &this_loads
, matches
, npermutes
,
1144 max_tree_size
)) != NULL
)
1146 /* If we have all children of child built up from scalars then just
1147 throw that away and build it up this node from scalars. */
1148 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1149 /* ??? Rejecting patterns this way doesn't work. We'd have to
1150 do extra work to cancel the pattern so the uses see the
1152 && !is_pattern_stmt_p
1153 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1155 slp_tree grandchild
;
1157 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1158 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1163 this_loads
.truncate (old_nloads
);
1164 this_tree_size
= old_tree_size
;
1165 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1166 vect_free_slp_tree (grandchild
);
1167 SLP_TREE_CHILDREN (child
).truncate (0);
1169 dump_printf_loc (MSG_NOTE
, vect_location
,
1170 "Building parent vector operands from "
1171 "scalars instead\n");
1172 oprnd_info
->def_stmts
= vNULL
;
1173 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1174 children
.safe_push (child
);
1179 oprnd_info
->def_stmts
= vNULL
;
1180 children
.safe_push (child
);
1184 /* If the SLP build failed fatally and we analyze a basic-block
1185 simply treat nodes we fail to build as externally defined
1186 (and thus build vectors from the scalar defs).
1187 The cost model will reject outright expensive cases.
1188 ??? This doesn't treat cases where permutation ultimatively
1189 fails (or we don't try permutation below). Ideally we'd
1190 even compute a permutation that will end up with the maximum
1192 if (is_a
<bb_vec_info
> (vinfo
)
1194 /* ??? Rejecting patterns this way doesn't work. We'd have to
1195 do extra work to cancel the pattern so the uses see the
1197 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1199 dump_printf_loc (MSG_NOTE
, vect_location
,
1200 "Building vector operands from scalars\n");
1201 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1202 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1203 children
.safe_push (child
);
1204 oprnd_info
->def_stmts
= vNULL
;
1208 /* If the SLP build for operand zero failed and operand zero
1209 and one can be commutated try that for the scalar stmts
1210 that failed the match. */
1212 /* A first scalar stmt mismatch signals a fatal mismatch. */
1214 /* ??? For COND_EXPRs we can swap the comparison operands
1215 as well as the arms under some constraints. */
1217 && oprnds_info
[1]->first_dt
== vect_internal_def
1218 && is_gimple_assign (stmt
)
1219 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1221 /* Do so only if the number of not successful permutes was nor more
1222 than a cut-ff as re-trying the recursive match on
1223 possibly each level of the tree would expose exponential
1227 /* Verify if we can safely swap or if we committed to a specific
1228 operand order already. */
1229 for (j
= 0; j
< group_size
; ++j
)
1232 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1234 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1237 "Build SLP failed: cannot swap operands "
1239 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1245 /* Swap mismatched definition stmts. */
1246 dump_printf_loc (MSG_NOTE
, vect_location
,
1247 "Re-trying with swapped operands of stmts ");
1248 for (j
= 0; j
< group_size
; ++j
)
1251 std::swap (oprnds_info
[0]->def_stmts
[j
],
1252 oprnds_info
[1]->def_stmts
[j
]);
1253 dump_printf (MSG_NOTE
, "%d ", j
);
1255 dump_printf (MSG_NOTE
, "\n");
1256 /* And try again with scratch 'matches' ... */
1257 bool *tem
= XALLOCAVEC (bool, group_size
);
1258 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1259 group_size
, &this_max_nunits
,
1260 &this_loads
, tem
, npermutes
,
1262 max_tree_size
)) != NULL
)
1264 /* ... so if successful we can apply the operand swapping
1265 to the GIMPLE IL. This is necessary because for example
1266 vect_get_slp_defs uses operand indexes and thus expects
1267 canonical operand order. This is also necessary even
1268 if we end up building the operand from scalars as
1269 we'll continue to process swapped operand two. */
1270 for (j
= 0; j
< group_size
; ++j
)
1272 gimple
*stmt
= stmts
[j
];
1273 gimple_set_plf (stmt
, GF_PLF_1
, false);
1275 for (j
= 0; j
< group_size
; ++j
)
1277 gimple
*stmt
= stmts
[j
];
1280 /* Avoid swapping operands twice. */
1281 if (gimple_plf (stmt
, GF_PLF_1
))
1283 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1284 gimple_assign_rhs2_ptr (stmt
));
1285 gimple_set_plf (stmt
, GF_PLF_1
, true);
1288 /* Verify we swap all duplicates or none. */
1290 for (j
= 0; j
< group_size
; ++j
)
1292 gimple
*stmt
= stmts
[j
];
1293 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1296 /* If we have all children of child built up from scalars then
1297 just throw that away and build it up this node from scalars. */
1298 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1299 /* ??? Rejecting patterns this way doesn't work. We'd have
1300 to do extra work to cancel the pattern so the uses see the
1302 && !is_pattern_stmt_p
1303 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1306 slp_tree grandchild
;
1308 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1309 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1314 this_loads
.truncate (old_nloads
);
1315 this_tree_size
= old_tree_size
;
1316 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1317 vect_free_slp_tree (grandchild
);
1318 SLP_TREE_CHILDREN (child
).truncate (0);
1320 dump_printf_loc (MSG_NOTE
, vect_location
,
1321 "Building parent vector operands from "
1322 "scalars instead\n");
1323 oprnd_info
->def_stmts
= vNULL
;
1324 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1325 children
.safe_push (child
);
1330 oprnd_info
->def_stmts
= vNULL
;
1331 children
.safe_push (child
);
1339 gcc_assert (child
== NULL
);
1340 FOR_EACH_VEC_ELT (children
, j
, child
)
1341 vect_free_slp_tree (child
);
1342 vect_free_oprnd_info (oprnds_info
);
1346 vect_free_oprnd_info (oprnds_info
);
1349 *tree_size
+= this_tree_size
;
1350 *max_nunits
= this_max_nunits
;
1351 loads
->safe_splice (this_loads
);
1353 node
= vect_create_new_slp_node (stmts
);
1354 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1355 SLP_TREE_CHILDREN (node
).splice (children
);
1359 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1362 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1368 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1369 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1370 ? " (external)" : "");
1371 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1373 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1374 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1376 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1377 vect_print_slp_tree (dump_kind
, loc
, child
);
1381 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1382 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1383 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1384 stmts in NODE are to be marked. */
1387 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1393 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1396 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1397 if (j
< 0 || i
== j
)
1398 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1400 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1401 vect_mark_slp_stmts (child
, mark
, j
);
1405 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1408 vect_mark_slp_stmts_relevant (slp_tree node
)
1412 stmt_vec_info stmt_info
;
1415 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1418 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1420 stmt_info
= vinfo_for_stmt (stmt
);
1421 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1422 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1423 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1426 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1427 vect_mark_slp_stmts_relevant (child
);
1431 /* Rearrange the statements of NODE according to PERMUTATION. */
1434 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1435 vec
<unsigned> permutation
)
1438 vec
<gimple
*> tmp_stmts
;
1442 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1443 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1445 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1446 tmp_stmts
.create (group_size
);
1447 tmp_stmts
.quick_grow_cleared (group_size
);
1449 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1450 tmp_stmts
[permutation
[i
]] = stmt
;
1452 SLP_TREE_SCALAR_STMTS (node
).release ();
1453 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1457 /* Attempt to reorder stmts in a reduction chain so that we don't
1458 require any load permutation. Return true if that was possible,
1459 otherwise return false. */
1462 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1464 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1467 slp_tree node
, load
;
1469 /* Compare all the permutation sequences to the first one. We know
1470 that at least one load is permuted. */
1471 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1472 if (!node
->load_permutation
.exists ())
1474 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1476 if (!load
->load_permutation
.exists ())
1478 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1479 if (lidx
!= node
->load_permutation
[j
])
1483 /* Check that the loads in the first sequence are different and there
1484 are no gaps between them. */
1485 auto_sbitmap
load_index (group_size
);
1486 bitmap_clear (load_index
);
1487 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1489 if (lidx
>= group_size
)
1491 if (bitmap_bit_p (load_index
, lidx
))
1494 bitmap_set_bit (load_index
, lidx
);
1496 for (i
= 0; i
< group_size
; i
++)
1497 if (!bitmap_bit_p (load_index
, i
))
1500 /* This permutation is valid for reduction. Since the order of the
1501 statements in the nodes is not important unless they are memory
1502 accesses, we can rearrange the statements in all the nodes
1503 according to the order of the loads. */
1504 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1505 node
->load_permutation
);
1507 /* We are done, no actual permutations need to be generated. */
1508 unsigned int unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1509 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1511 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1512 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1513 /* But we have to keep those permutations that are required because
1514 of handling of gaps. */
1515 if (unrolling_factor
== 1
1516 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1517 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1518 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1520 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1521 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1527 /* Check if the required load permutations in the SLP instance
1528 SLP_INSTN are supported. */
1531 vect_supported_load_permutation_p (slp_instance slp_instn
)
1533 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1534 unsigned int i
, j
, k
, next
;
1536 gimple
*stmt
, *load
, *next_load
;
1538 if (dump_enabled_p ())
1540 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1541 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1542 if (node
->load_permutation
.exists ())
1543 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1544 dump_printf (MSG_NOTE
, "%d ", next
);
1546 for (k
= 0; k
< group_size
; ++k
)
1547 dump_printf (MSG_NOTE
, "%d ", k
);
1548 dump_printf (MSG_NOTE
, "\n");
1551 /* In case of reduction every load permutation is allowed, since the order
1552 of the reduction statements is not important (as opposed to the case of
1553 grouped stores). The only condition we need to check is that all the
1554 load nodes are of the same size and have the same permutation (and then
1555 rearrange all the nodes of the SLP instance according to this
1558 /* Check that all the load nodes are of the same size. */
1559 /* ??? Can't we assert this? */
1560 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1561 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1564 node
= SLP_INSTANCE_TREE (slp_instn
);
1565 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1567 /* Reduction (there are no data-refs in the root).
1568 In reduction chain the order of the loads is not important. */
1569 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1570 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1571 vect_attempt_slp_rearrange_stmts (slp_instn
);
1573 /* In basic block vectorization we allow any subchain of an interleaving
1575 FORNOW: not supported in loop SLP because of realignment compications. */
1576 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1578 /* Check whether the loads in an instance form a subchain and thus
1579 no permutation is necessary. */
1580 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1582 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1584 bool subchain_p
= true;
1586 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1589 && (next_load
!= load
1590 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1595 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1598 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1601 stmt_vec_info group_info
1602 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1603 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1605 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1606 unsigned k
, maxk
= 0;
1607 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1610 /* In BB vectorization we may not actually use a loaded vector
1611 accessing elements in excess of GROUP_SIZE. */
1612 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1615 "BB vectorization with gaps at the end of "
1616 "a load is not supported\n");
1620 /* Verify the permutation can be generated. */
1623 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1624 1, slp_instn
, true, &n_perms
))
1626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1628 "unsupported load permutation\n");
1636 /* For loop vectorization verify we can generate the permutation. Be
1637 conservative about the vectorization factor, there are permutations
1638 that will use three vector inputs only starting from a specific factor
1639 and the vectorization factor is not yet final.
1640 ??? The SLP instance unrolling factor might not be the maximum one. */
1643 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1644 LOOP_VINFO_VECT_FACTOR
1645 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1646 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1647 if (node
->load_permutation
.exists ()
1648 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1649 slp_instn
, true, &n_perms
))
1656 /* Find the last store in SLP INSTANCE. */
1659 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1661 gimple
*last
= NULL
, *stmt
;
1663 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1665 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1666 if (is_pattern_stmt_p (stmt_vinfo
))
1667 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1669 last
= get_later_stmt (stmt
, last
);
1675 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1678 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1679 stmt_vector_for_cost
*prologue_cost_vec
,
1680 stmt_vector_for_cost
*body_cost_vec
,
1681 unsigned ncopies_for_cost
,
1682 scalar_stmts_set_t
* visited
)
1687 stmt_vec_info stmt_info
;
1690 /* If we already costed the exact same set of scalar stmts we're done.
1691 We share the generated vector stmts for those. */
1692 if (visited
->contains (SLP_TREE_SCALAR_STMTS (node
)))
1695 visited
->add (SLP_TREE_SCALAR_STMTS (node
).copy ());
1697 /* Recurse down the SLP tree. */
1698 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1699 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1700 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1701 body_cost_vec
, ncopies_for_cost
, visited
);
1703 /* Look at the first scalar stmt to determine the cost. */
1704 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1705 stmt_info
= vinfo_for_stmt (stmt
);
1706 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1708 vect_memory_access_type memory_access_type
1709 = (STMT_VINFO_STRIDED_P (stmt_info
)
1712 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1713 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1714 memory_access_type
, vect_uninitialized_def
,
1715 node
, prologue_cost_vec
, body_cost_vec
);
1718 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1719 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1721 /* If the load is permuted then the alignment is determined by
1722 the first group element not by the first scalar stmt DR. */
1723 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1724 stmt_info
= vinfo_for_stmt (stmt
);
1725 /* Record the cost for the permutation. */
1727 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1728 ncopies_for_cost
, instance
, true,
1730 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1731 stmt_info
, 0, vect_body
);
1733 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1734 /* And adjust the number of loads performed. This handles
1735 redundancies as well as loads that are later dead. */
1736 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1737 bitmap_clear (perm
);
1738 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1739 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1740 ncopies_for_cost
= 0;
1741 bool load_seen
= false;
1742 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1744 if (i
% nunits
== 0)
1750 if (bitmap_bit_p (perm
, i
))
1755 gcc_assert (ncopies_for_cost
1756 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1757 + nunits
- 1) / nunits
);
1758 ncopies_for_cost
*= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1760 /* Record the cost for the vector loads. */
1761 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1762 memory_access_type
, node
, prologue_cost_vec
,
1767 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1769 /* ncopies_for_cost is the number of IVs we generate. */
1770 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1771 stmt_info
, 0, vect_body
);
1773 /* Prologue cost for the initial values and step vector. */
1774 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1776 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1778 ? vector_load
: vec_construct
,
1779 stmt_info
, 0, vect_prologue
);
1780 record_stmt_cost (prologue_cost_vec
, 1,
1782 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1783 ? vector_load
: vec_construct
,
1784 stmt_info
, 0, vect_prologue
);
1786 /* ??? No easy way to get at the actual number of vector stmts
1787 to be geneated and thus the derived IVs. */
1791 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1792 stmt_info
, 0, vect_body
);
1793 if (SLP_TREE_TWO_OPERATORS (node
))
1795 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1796 stmt_info
, 0, vect_body
);
1797 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1798 stmt_info
, 0, vect_body
);
1802 /* Push SLP node def-type to stmts. */
1803 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1804 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1805 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1806 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1808 /* Scan operands and account for prologue cost of constants/externals.
1809 ??? This over-estimates cost for multiple uses and should be
1811 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1812 lhs
= gimple_get_lhs (stmt
);
1813 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1815 tree op
= gimple_op (stmt
, i
);
1817 enum vect_def_type dt
;
1818 if (!op
|| op
== lhs
)
1820 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1822 /* Without looking at the actual initializer a vector of
1823 constants can be implemented as load from the constant pool.
1824 ??? We need to pass down stmt_info for a vector type
1825 even if it points to the wrong stmt. */
1826 if (dt
== vect_constant_def
)
1827 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1828 stmt_info
, 0, vect_prologue
);
1829 else if (dt
== vect_external_def
)
1830 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1831 stmt_info
, 0, vect_prologue
);
1835 /* Restore stmt def-types. */
1836 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1837 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1838 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1839 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1842 /* Compute the cost for the SLP instance INSTANCE. */
1845 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1847 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1848 unsigned ncopies_for_cost
;
1849 stmt_info_for_cost
*si
;
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_NOTE
, vect_location
,
1854 "=== vect_analyze_slp_cost ===\n");
1856 /* Calculate the number of vector stmts to create based on the unrolling
1857 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1858 GROUP_SIZE / NUNITS otherwise. */
1859 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1860 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1861 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1862 /* Adjust the group_size by the vectorization factor which is always one
1863 for basic-block vectorization. */
1864 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1865 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1866 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1867 /* For reductions look at a reduction operand in case the reduction
1868 operation is widening like DOT_PROD or SAD. */
1869 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1871 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1872 switch (gimple_assign_rhs_code (stmt
))
1876 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1877 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1882 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1884 prologue_cost_vec
.create (10);
1885 body_cost_vec
.create (10);
1886 scalar_stmts_set_t
*visited
= new scalar_stmts_set_t ();
1887 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1888 &prologue_cost_vec
, &body_cost_vec
,
1889 ncopies_for_cost
, visited
);
1892 /* Record the prologue costs, which were delayed until we were
1893 sure that SLP was successful. */
1894 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1896 struct _stmt_vec_info
*stmt_info
1897 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1898 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1899 si
->misalign
, vect_prologue
);
1902 /* Record the instance's instructions in the target cost model. */
1903 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1905 struct _stmt_vec_info
*stmt_info
1906 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1907 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1908 si
->misalign
, vect_body
);
1911 prologue_cost_vec
.release ();
1912 body_cost_vec
.release ();
1915 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1916 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1917 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1918 containing the remainder.
1919 Return the first stmt in the second group. */
1922 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1924 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1925 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1926 gcc_assert (group1_size
> 0);
1927 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1928 gcc_assert (group2_size
> 0);
1929 GROUP_SIZE (first_vinfo
) = group1_size
;
1931 gimple
*stmt
= first_stmt
;
1932 for (unsigned i
= group1_size
; i
> 1; i
--)
1934 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1935 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1937 /* STMT is now the last element of the first group. */
1938 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1939 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1941 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1942 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1944 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1945 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1948 /* For the second group, the GROUP_GAP is that before the original group,
1949 plus skipping over the first vector. */
1950 GROUP_GAP (vinfo_for_stmt (group2
)) =
1951 GROUP_GAP (first_vinfo
) + group1_size
;
1953 /* GROUP_GAP of the first group now has to skip over the second group too. */
1954 GROUP_GAP (first_vinfo
) += group2_size
;
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1958 group1_size
, group2_size
);
1963 /* Analyze an SLP instance starting from a group of grouped stores. Call
1964 vect_build_slp_tree to build a tree of packed stmts if possible.
1965 Return FALSE if it's impossible to SLP any stmt in the loop. */
1968 vect_analyze_slp_instance (vec_info
*vinfo
,
1969 gimple
*stmt
, unsigned max_tree_size
)
1971 slp_instance new_instance
;
1973 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1974 unsigned int unrolling_factor
= 1, nunits
;
1975 tree vectype
, scalar_type
= NULL_TREE
;
1978 unsigned int max_nunits
= 0;
1979 vec
<slp_tree
> loads
;
1980 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1981 vec
<gimple
*> scalar_stmts
;
1983 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1987 scalar_type
= TREE_TYPE (DR_REF (dr
));
1988 vectype
= get_vectype_for_scalar_type (scalar_type
);
1992 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1993 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1996 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2000 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2001 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2002 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2007 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2010 "Build SLP failed: unsupported data-type ");
2011 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
2012 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2017 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2019 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2020 scalar_stmts
.create (group_size
);
2022 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2024 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2027 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2028 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2029 scalar_stmts
.safe_push (
2030 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2032 scalar_stmts
.safe_push (next
);
2033 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2035 /* Mark the first element of the reduction chain as reduction to properly
2036 transform the node. In the reduction analysis phase only the last
2037 element of the chain is marked as reduction. */
2038 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2039 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2043 /* Collect reduction statements. */
2044 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2045 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2046 scalar_stmts
.safe_push (next
);
2049 loads
.create (group_size
);
2051 /* Build the tree for the SLP instance. */
2052 bool *matches
= XALLOCAVEC (bool, group_size
);
2053 unsigned npermutes
= 0;
2054 bst_fail
= new scalar_stmts_set_t ();
2055 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2056 &max_nunits
, &loads
, matches
, &npermutes
,
2057 NULL
, max_tree_size
);
2061 /* Calculate the unrolling factor based on the smallest type. */
2063 = least_common_multiple (max_nunits
, group_size
) / group_size
;
2065 if (unrolling_factor
!= 1
2066 && is_a
<bb_vec_info
> (vinfo
))
2069 if (max_nunits
> group_size
)
2071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2072 "Build SLP failed: store group "
2073 "size not a multiple of the vector size "
2074 "in basic block SLP\n");
2075 vect_free_slp_tree (node
);
2079 /* Fatal mismatch. */
2080 matches
[group_size
/max_nunits
* max_nunits
] = false;
2081 vect_free_slp_tree (node
);
2086 /* Create a new SLP instance. */
2087 new_instance
= XNEW (struct _slp_instance
);
2088 SLP_INSTANCE_TREE (new_instance
) = node
;
2089 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2090 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2091 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2093 /* Compute the load permutation. */
2095 bool loads_permuted
= false;
2096 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2098 vec
<unsigned> load_permutation
;
2100 gimple
*load
, *first_stmt
;
2101 bool this_load_permuted
= false;
2102 load_permutation
.create (group_size
);
2103 first_stmt
= GROUP_FIRST_ELEMENT
2104 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2105 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2107 int load_place
= vect_get_place_in_interleaving_chain
2109 gcc_assert (load_place
!= -1);
2110 if (load_place
!= j
)
2111 this_load_permuted
= true;
2112 load_permutation
.safe_push (load_place
);
2114 if (!this_load_permuted
2115 /* The load requires permutation when unrolling exposes
2116 a gap either because the group is larger than the SLP
2117 group-size or because there is a gap between the groups. */
2118 && (unrolling_factor
== 1
2119 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2120 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2122 load_permutation
.release ();
2125 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2126 loads_permuted
= true;
2131 if (!vect_supported_load_permutation_p (new_instance
))
2133 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2136 "Build SLP failed: unsupported load "
2138 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2141 vect_free_slp_instance (new_instance
);
2146 /* If the loads and stores can be handled with load/store-lan
2147 instructions do not generate this SLP instance. */
2148 if (is_a
<loop_vec_info
> (vinfo
)
2150 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2153 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2155 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2156 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2157 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2158 /* Use SLP for strided accesses (or if we
2159 can't load-lanes). */
2160 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2161 || ! vect_load_lanes_supported
2162 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2163 GROUP_SIZE (stmt_vinfo
)))
2166 if (i
== loads
.length ())
2168 if (dump_enabled_p ())
2169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2170 "Built SLP cancelled: can use "
2171 "load/store-lanes\n");
2172 vect_free_slp_instance (new_instance
);
2177 vinfo
->slp_instances
.safe_push (new_instance
);
2179 if (dump_enabled_p ())
2181 dump_printf_loc (MSG_NOTE
, vect_location
,
2182 "Final SLP tree for instance:\n");
2183 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2191 /* Failed to SLP. */
2192 /* Free the allocated memory. */
2193 scalar_stmts
.release ();
2197 /* For basic block SLP, try to break the group up into multiples of the
2199 if (is_a
<bb_vec_info
> (vinfo
)
2200 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2201 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2203 /* We consider breaking the group only on VF boundaries from the existing
2205 for (i
= 0; i
< group_size
; i
++)
2206 if (!matches
[i
]) break;
2208 if (i
>= nunits
&& i
< group_size
)
2210 /* Split into two groups at the first vector boundary before i. */
2211 gcc_assert ((nunits
& (nunits
- 1)) == 0);
2212 unsigned group1_size
= i
& ~(nunits
- 1);
2214 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2215 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2216 /* If the first non-match was in the middle of a vector,
2217 skip the rest of that vector. */
2218 if (group1_size
< i
)
2220 i
= group1_size
+ nunits
;
2222 rest
= vect_split_slp_store_group (rest
, nunits
);
2225 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2228 /* Even though the first vector did not all match, we might be able to SLP
2229 (some) of the remainder. FORNOW ignore this possibility. */
2236 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2237 trees of packed scalar stmts if SLP is possible. */
2240 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2243 gimple
*first_element
;
2245 if (dump_enabled_p ())
2246 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2248 /* Find SLP sequences starting from groups of grouped stores. */
2249 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2250 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2252 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2254 if (loop_vinfo
->reduction_chains
.length () > 0)
2256 /* Find SLP sequences starting from reduction chains. */
2257 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2258 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2261 /* Dissolve reduction chain group. */
2262 gimple
*next
, *stmt
= first_element
;
2265 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2266 next
= GROUP_NEXT_ELEMENT (vinfo
);
2267 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2268 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2271 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2272 = vect_internal_def
;
2276 /* Find SLP sequences starting from groups of reductions. */
2277 if (loop_vinfo
->reductions
.length () > 1)
2278 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2286 /* For each possible SLP instance decide whether to SLP it and calculate overall
2287 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2288 least one instance. */
2291 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2293 unsigned int i
, unrolling_factor
= 1;
2294 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2295 slp_instance instance
;
2296 int decided_to_slp
= 0;
2298 if (dump_enabled_p ())
2299 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2302 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2304 /* FORNOW: SLP if you can. */
2305 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2306 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2308 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2309 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2310 loop-based vectorization. Such stmts will be marked as HYBRID. */
2311 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2315 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2317 if (decided_to_slp
&& dump_enabled_p ())
2318 dump_printf_loc (MSG_NOTE
, vect_location
,
2319 "Decided to SLP %d instances. Unrolling factor %d\n",
2320 decided_to_slp
, unrolling_factor
);
2322 return (decided_to_slp
> 0);
2326 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2327 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2330 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2332 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2333 imm_use_iterator imm_iter
;
2335 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2337 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2338 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2341 /* Propagate hybrid down the SLP tree. */
2342 if (stype
== hybrid
)
2344 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2348 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2349 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2350 /* If we get a pattern stmt here we have to use the LHS of the
2351 original stmt for immediate uses. */
2352 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2353 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2354 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2356 if (gimple_code (stmt
) == GIMPLE_PHI
)
2357 def
= gimple_phi_result (stmt
);
2359 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2361 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2363 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2365 use_vinfo
= vinfo_for_stmt (use_stmt
);
2366 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2367 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2368 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2369 if (!STMT_SLP_TYPE (use_vinfo
)
2370 && (STMT_VINFO_RELEVANT (use_vinfo
)
2371 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2372 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2373 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2375 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2378 "def in non-SLP stmt: ");
2379 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2387 && !HYBRID_SLP_STMT (stmt_vinfo
))
2389 if (dump_enabled_p ())
2391 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2392 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2394 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2397 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2398 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2399 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2402 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2405 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2407 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2408 struct loop
*loopp
= (struct loop
*)wi
->info
;
2413 if (TREE_CODE (*tp
) == SSA_NAME
2414 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2416 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2417 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2418 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2420 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2423 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2425 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2433 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2436 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2437 /* If the stmt is in a SLP instance then this isn't a reason
2438 to mark use definitions in other SLP instances as hybrid. */
2439 if (! STMT_SLP_TYPE (use_vinfo
)
2440 && (STMT_VINFO_RELEVANT (use_vinfo
)
2441 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2442 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2443 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2450 /* Find stmts that must be both vectorized and SLPed. */
2453 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2456 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2457 slp_instance instance
;
2459 if (dump_enabled_p ())
2460 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2463 /* First walk all pattern stmt in the loop and mark defs of uses as
2464 hybrid because immediate uses in them are not recorded. */
2465 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2467 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2468 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2471 gimple
*stmt
= gsi_stmt (gsi
);
2472 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2473 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2476 memset (&wi
, 0, sizeof (wi
));
2477 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2478 gimple_stmt_iterator gsi2
2479 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2480 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2481 vect_detect_hybrid_slp_1
, &wi
);
2482 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2483 vect_detect_hybrid_slp_2
,
2484 vect_detect_hybrid_slp_1
, &wi
);
2489 /* Then walk the SLP instance trees marking stmts with uses in
2490 non-SLP stmts as hybrid, also propagating hybrid down the
2491 SLP tree, collecting the above info on-the-fly. */
2492 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2494 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2495 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2501 /* Initialize a bb_vec_info struct for the statements between
2502 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2504 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2505 gimple_stmt_iterator region_end_in
)
2506 : vec_info (vec_info::bb
, init_cost (NULL
)),
2507 bb (gsi_bb (region_begin_in
)),
2508 region_begin (region_begin_in
),
2509 region_end (region_end_in
)
2511 gimple_stmt_iterator gsi
;
2513 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2516 gimple
*stmt
= gsi_stmt (gsi
);
2517 gimple_set_uid (stmt
, 0);
2518 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2525 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2526 stmts in the basic block. */
2528 _bb_vec_info::~_bb_vec_info ()
2530 for (gimple_stmt_iterator si
= region_begin
;
2531 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2533 gimple
*stmt
= gsi_stmt (si
);
2534 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2537 /* Free stmt_vec_info. */
2538 free_stmt_vec_info (stmt
);
2540 /* Reset region marker. */
2541 gimple_set_uid (stmt
, -1);
2548 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2549 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2551 Return true if the operations are supported. */
2554 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2555 slp_instance node_instance
)
2562 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2565 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2566 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2569 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2570 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2571 gcc_assert (stmt_info
);
2572 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2574 /* For BB vectorization vector types are assigned here.
2575 Memory accesses already got their vector type assigned
2576 in vect_analyze_data_refs. */
2577 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2579 && ! STMT_VINFO_DATA_REF (stmt_info
))
2581 gcc_assert (PURE_SLP_STMT (stmt_info
));
2583 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2584 if (dump_enabled_p ())
2586 dump_printf_loc (MSG_NOTE
, vect_location
,
2587 "get vectype for scalar type: ");
2588 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2589 dump_printf (MSG_NOTE
, "\n");
2592 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2595 if (dump_enabled_p ())
2597 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2598 "not SLPed: unsupported data-type ");
2599 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2601 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2606 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2609 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2610 dump_printf (MSG_NOTE
, "\n");
2614 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2615 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2618 /* Calculate the number of vector statements to be created for the
2619 scalar stmts in this node. For SLP reductions it is equal to the
2620 number of vector statements in the children (which has already been
2621 calculated by the recursive call). Otherwise it is the number of
2622 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2623 VF divided by the number of elements in a vector. */
2624 if (GROUP_FIRST_ELEMENT (stmt_info
)
2625 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2626 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2627 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2631 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2632 vf
= loop_vinfo
->vectorization_factor
;
2635 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2636 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2637 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2638 = vf
* group_size
/ TYPE_VECTOR_SUBPARTS (vectype
);
2641 /* Push SLP node def-type to stmt operands. */
2642 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2643 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2644 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2645 = SLP_TREE_DEF_TYPE (child
);
2646 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2647 /* Restore def-types. */
2648 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2649 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2650 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2651 = vect_internal_def
;
2659 /* Analyze statements in SLP instances of VINFO. Return true if the
2660 operations are supported. */
2663 vect_slp_analyze_operations (vec_info
*vinfo
)
2665 slp_instance instance
;
2668 if (dump_enabled_p ())
2669 dump_printf_loc (MSG_NOTE
, vect_location
,
2670 "=== vect_slp_analyze_operations ===\n");
2672 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2674 if (!vect_slp_analyze_node_operations (vinfo
,
2675 SLP_INSTANCE_TREE (instance
),
2678 dump_printf_loc (MSG_NOTE
, vect_location
,
2679 "removing SLP instance operations starting from: ");
2680 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2681 SLP_TREE_SCALAR_STMTS
2682 (SLP_INSTANCE_TREE (instance
))[0], 0);
2683 vect_free_slp_instance (instance
);
2684 vinfo
->slp_instances
.ordered_remove (i
);
2688 /* Compute the costs of the SLP instance. */
2689 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2694 return !vinfo
->slp_instances
.is_empty ();
2698 /* Compute the scalar cost of the SLP node NODE and its children
2699 and return it. Do not account defs that are marked in LIFE and
2700 update LIFE according to uses of NODE. */
2703 vect_bb_slp_scalar_cost (basic_block bb
,
2704 slp_tree node
, vec
<bool, va_heap
> *life
)
2706 unsigned scalar_cost
= 0;
2711 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2714 ssa_op_iter op_iter
;
2715 def_operand_p def_p
;
2716 stmt_vec_info stmt_info
;
2721 /* If there is a non-vectorized use of the defs then the scalar
2722 stmt is kept live in which case we do not account it or any
2723 required defs in the SLP children in the scalar cost. This
2724 way we make the vectorization more costly when compared to
2726 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2728 imm_use_iterator use_iter
;
2730 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2731 if (!is_gimple_debug (use_stmt
)
2732 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2734 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2737 BREAK_FROM_IMM_USE_STMT (use_iter
);
2743 /* Count scalar stmts only once. */
2744 if (gimple_visited_p (stmt
))
2746 gimple_set_visited (stmt
, true);
2748 stmt_info
= vinfo_for_stmt (stmt
);
2749 if (STMT_VINFO_DATA_REF (stmt_info
))
2751 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2752 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2754 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2757 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2759 scalar_cost
+= stmt_cost
;
2762 auto_vec
<bool, 20> subtree_life
;
2763 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2765 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2767 /* Do not directly pass LIFE to the recursive call, copy it to
2768 confine changes in the callee to the current child/subtree. */
2769 subtree_life
.safe_splice (*life
);
2770 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2771 subtree_life
.truncate (0);
2778 /* Check if vectorization of the basic block is profitable. */
2781 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2783 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2784 slp_instance instance
;
2786 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2787 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2789 /* Calculate scalar cost. */
2790 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2792 auto_vec
<bool, 20> life
;
2793 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2794 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2795 SLP_INSTANCE_TREE (instance
),
2799 /* Unset visited flag. */
2800 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2801 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2802 gimple_set_visited (gsi_stmt (gsi
), false);
2804 /* Complete the target-specific cost calculation. */
2805 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2806 &vec_inside_cost
, &vec_epilogue_cost
);
2808 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2810 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2813 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2815 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2816 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2817 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2820 /* Vectorization is profitable if its cost is more than the cost of scalar
2821 version. Note that we err on the vector side for equal cost because
2822 the cost estimate is otherwise quite pessimistic (constant uses are
2823 free on the scalar side but cost a load on the vector side for
2825 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2831 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2832 if so and sets fatal to true if failure is independent of
2833 current_vector_size. */
2836 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2837 gimple_stmt_iterator region_end
,
2838 vec
<data_reference_p
> datarefs
, int n_stmts
,
2841 bb_vec_info bb_vinfo
;
2842 slp_instance instance
;
2846 /* The first group of checks is independent of the vector size. */
2849 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2851 if (dump_enabled_p ())
2852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2853 "not vectorized: too many instructions in "
2855 free_data_refs (datarefs
);
2859 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2863 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2865 /* Analyze the data references. */
2867 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2869 if (dump_enabled_p ())
2870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2871 "not vectorized: unhandled data-ref in basic "
2878 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2880 if (dump_enabled_p ())
2881 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2882 "not vectorized: not enough data-refs in "
2889 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2893 "not vectorized: unhandled data access in "
2900 /* If there are no grouped stores in the region there is no need
2901 to continue with pattern recog as vect_analyze_slp will fail
2903 if (bb_vinfo
->grouped_stores
.is_empty ())
2905 if (dump_enabled_p ())
2906 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2907 "not vectorized: no grouped stores in "
2914 /* While the rest of the analysis below depends on it in some way. */
2917 vect_pattern_recog (bb_vinfo
);
2919 /* Check the SLP opportunities in the basic block, analyze and build SLP
2921 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2923 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2926 "Failed to SLP the basic block.\n");
2927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2928 "not vectorized: failed to find SLP opportunities "
2929 "in basic block.\n");
2936 vect_record_base_alignments (bb_vinfo
);
2938 /* Analyze and verify the alignment of data references and the
2939 dependence in the SLP instances. */
2940 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2942 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2943 || ! vect_slp_analyze_instance_dependence (instance
))
2945 dump_printf_loc (MSG_NOTE
, vect_location
,
2946 "removing SLP instance operations starting from: ");
2947 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2948 SLP_TREE_SCALAR_STMTS
2949 (SLP_INSTANCE_TREE (instance
))[0], 0);
2950 vect_free_slp_instance (instance
);
2951 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2955 /* Mark all the statements that we want to vectorize as pure SLP and
2957 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2958 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2962 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2968 if (!vect_slp_analyze_operations (bb_vinfo
))
2970 if (dump_enabled_p ())
2971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2972 "not vectorized: bad operation in basic block.\n");
2978 /* Cost model: check if the vectorization is worthwhile. */
2979 if (!unlimited_cost_model (NULL
)
2980 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2982 if (dump_enabled_p ())
2983 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2984 "not vectorized: vectorization is not "
2991 if (dump_enabled_p ())
2992 dump_printf_loc (MSG_NOTE
, vect_location
,
2993 "Basic block will be vectorized using SLP\n");
2999 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3000 true if anything in the basic-block was vectorized. */
3003 vect_slp_bb (basic_block bb
)
3005 bb_vec_info bb_vinfo
;
3006 gimple_stmt_iterator gsi
;
3007 unsigned int vector_sizes
;
3008 bool any_vectorized
= false;
3010 if (dump_enabled_p ())
3011 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
3013 /* Autodetect first vector size we try. */
3014 current_vector_size
= 0;
3015 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
3017 gsi
= gsi_start_bb (bb
);
3021 if (gsi_end_p (gsi
))
3024 gimple_stmt_iterator region_begin
= gsi
;
3025 vec
<data_reference_p
> datarefs
= vNULL
;
3028 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3030 gimple
*stmt
= gsi_stmt (gsi
);
3031 if (is_gimple_debug (stmt
))
3035 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3036 vect_location
= gimple_location (stmt
);
3038 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3042 /* Skip leading unhandled stmts. */
3043 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3049 gimple_stmt_iterator region_end
= gsi
;
3051 bool vectorized
= false;
3053 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3054 datarefs
, insns
, fatal
);
3056 && dbg_cnt (vect_slp
))
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3061 vect_schedule_slp (bb_vinfo
);
3063 if (dump_enabled_p ())
3064 dump_printf_loc (MSG_NOTE
, vect_location
,
3065 "basic block part vectorized\n");
3071 any_vectorized
|= vectorized
;
3073 vector_sizes
&= ~current_vector_size
;
3075 || vector_sizes
== 0
3076 || current_vector_size
== 0
3077 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3078 vector sizes will fail do not bother iterating. */
3081 if (gsi_end_p (region_end
))
3084 /* Skip the unhandled stmt. */
3087 /* And reset vector sizes. */
3088 current_vector_size
= 0;
3089 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
3093 /* Try the next biggest vector size. */
3094 current_vector_size
= 1 << floor_log2 (vector_sizes
);
3095 if (dump_enabled_p ())
3096 dump_printf_loc (MSG_NOTE
, vect_location
,
3097 "***** Re-trying analysis with "
3098 "vector size %d\n", current_vector_size
);
3105 return any_vectorized
;
3109 /* Return 1 if vector type of boolean constant which is OPNUM
3110 operand in statement STMT is a boolean vector. */
3113 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3115 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3116 enum tree_code code
= gimple_expr_code (stmt
);
3119 enum vect_def_type dt
;
3121 /* For comparison and COND_EXPR type is chosen depending
3122 on the other comparison operand. */
3123 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3126 op
= gimple_assign_rhs1 (stmt
);
3128 op
= gimple_assign_rhs2 (stmt
);
3130 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3134 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3137 if (code
== COND_EXPR
)
3139 tree cond
= gimple_assign_rhs1 (stmt
);
3141 if (TREE_CODE (cond
) == SSA_NAME
)
3144 op
= TREE_OPERAND (cond
, 1);
3146 op
= TREE_OPERAND (cond
, 0);
3148 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3152 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3155 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3159 /* For constant and loop invariant defs of SLP_NODE this function returns
3160 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3161 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3162 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3163 REDUC_INDEX is the index of the reduction operand in the statements, unless
3167 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3168 vec
<tree
> *vec_oprnds
,
3169 unsigned int op_num
, unsigned int number_of_vectors
)
3171 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3172 gimple
*stmt
= stmts
[0];
3173 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3176 unsigned j
, number_of_places_left_in_vector
;
3179 int group_size
= stmts
.length ();
3180 unsigned int vec_num
, i
;
3181 unsigned number_of_copies
= 1;
3183 voprnds
.create (number_of_vectors
);
3184 bool constant_p
, is_store
;
3185 tree neutral_op
= NULL
;
3186 enum tree_code code
= gimple_expr_code (stmt
);
3187 gimple_seq ctor_seq
= NULL
;
3189 /* Check if vector type is a boolean vector. */
3190 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3191 && vect_mask_constant_operand_p (stmt
, op_num
))
3193 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3195 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3196 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3198 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3201 op
= gimple_assign_rhs1 (stmt
);
3208 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3209 created vectors. It is greater than 1 if unrolling is performed.
3211 For example, we have two scalar operands, s1 and s2 (e.g., group of
3212 strided accesses of size two), while NUNITS is four (i.e., four scalars
3213 of this type can be packed in a vector). The output vector will contain
3214 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3217 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3218 containing the operands.
3220 For example, NUNITS is four as before, and the group size is 8
3221 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3222 {s5, s6, s7, s8}. */
3224 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3226 number_of_places_left_in_vector
= nunits
;
3228 tree_vector_builder
elts (vector_type
, nunits
, 1);
3229 elts
.quick_grow (nunits
);
3230 bool place_after_defs
= false;
3231 for (j
= 0; j
< number_of_copies
; j
++)
3233 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3236 op
= gimple_assign_rhs1 (stmt
);
3243 tree cond
= gimple_assign_rhs1 (stmt
);
3244 if (TREE_CODE (cond
) == SSA_NAME
)
3245 op
= gimple_op (stmt
, op_num
+ 1);
3246 else if (op_num
== 0 || op_num
== 1)
3247 op
= TREE_OPERAND (cond
, op_num
);
3251 op
= gimple_assign_rhs2 (stmt
);
3253 op
= gimple_assign_rhs3 (stmt
);
3259 op
= gimple_call_arg (stmt
, op_num
);
3266 op
= gimple_op (stmt
, op_num
+ 1);
3267 /* Unlike the other binary operators, shifts/rotates have
3268 the shift count being int, instead of the same type as
3269 the lhs, so make sure the scalar is the right type if
3270 we are dealing with vectors of
3271 long long/long/short/char. */
3272 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3273 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3277 op
= gimple_op (stmt
, op_num
+ 1);
3282 /* Create 'vect_ = {op0,op1,...,opn}'. */
3283 number_of_places_left_in_vector
--;
3285 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3287 if (CONSTANT_CLASS_P (op
))
3289 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3291 /* Can't use VIEW_CONVERT_EXPR for booleans because
3292 of possibly different sizes of scalar value and
3294 if (integer_zerop (op
))
3295 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3296 else if (integer_onep (op
))
3297 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3302 op
= fold_unary (VIEW_CONVERT_EXPR
,
3303 TREE_TYPE (vector_type
), op
);
3304 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3308 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3310 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3313 = build_all_ones_cst (TREE_TYPE (vector_type
));
3315 = build_zero_cst (TREE_TYPE (vector_type
));
3316 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3317 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3323 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3326 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3329 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3333 elts
[number_of_places_left_in_vector
] = op
;
3334 if (!CONSTANT_CLASS_P (op
))
3336 if (TREE_CODE (orig_op
) == SSA_NAME
3337 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3338 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3339 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3340 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3341 place_after_defs
= true;
3343 if (number_of_places_left_in_vector
== 0)
3346 vec_cst
= elts
.build ();
3349 vec
<constructor_elt
, va_gc
> *v
;
3351 vec_alloc (v
, nunits
);
3352 for (k
= 0; k
< nunits
; ++k
)
3353 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3354 vec_cst
= build_constructor (vector_type
, v
);
3357 gimple_stmt_iterator gsi
;
3358 if (place_after_defs
)
3361 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3362 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3365 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3366 if (ctor_seq
!= NULL
)
3368 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3369 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3373 voprnds
.quick_push (init
);
3374 place_after_defs
= false;
3375 number_of_places_left_in_vector
= nunits
;
3377 elts
.new_vector (vector_type
, nunits
, 1);
3378 elts
.quick_grow (nunits
);
3383 /* Since the vectors are created in the reverse order, we should invert
3385 vec_num
= voprnds
.length ();
3386 for (j
= vec_num
; j
!= 0; j
--)
3388 vop
= voprnds
[j
- 1];
3389 vec_oprnds
->quick_push (vop
);
3394 /* In case that VF is greater than the unrolling factor needed for the SLP
3395 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3396 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3397 to replicate the vectors. */
3398 while (number_of_vectors
> vec_oprnds
->length ())
3400 tree neutral_vec
= NULL
;
3405 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3407 vec_oprnds
->quick_push (neutral_vec
);
3411 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3412 vec_oprnds
->quick_push (vop
);
3418 /* Get vectorized definitions from SLP_NODE that contains corresponding
3419 vectorized def-stmts. */
3422 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3425 gimple
*vec_def_stmt
;
3428 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3430 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3432 gcc_assert (vec_def_stmt
);
3433 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3434 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3436 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3437 vec_oprnds
->quick_push (vec_oprnd
);
3442 /* Get vectorized definitions for SLP_NODE.
3443 If the scalar definitions are loop invariants or constants, collect them and
3444 call vect_get_constant_vectors() to create vector stmts.
3445 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3446 must be stored in the corresponding child of SLP_NODE, and we call
3447 vect_get_slp_vect_defs () to retrieve them. */
3450 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3451 vec
<vec
<tree
> > *vec_oprnds
)
3454 int number_of_vects
= 0, i
;
3455 unsigned int child_index
= 0;
3456 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3457 slp_tree child
= NULL
;
3460 bool vectorized_defs
;
3462 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3463 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3465 /* For each operand we check if it has vectorized definitions in a child
3466 node or we need to create them (for invariants and constants). We
3467 check if the LHS of the first stmt of the next child matches OPRND.
3468 If it does, we found the correct child. Otherwise, we call
3469 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3470 to check this child node for the next operand. */
3471 vectorized_defs
= false;
3472 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3474 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3476 /* We have to check both pattern and original def, if available. */
3477 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3479 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3481 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3484 if (gimple_code (first_def
) == GIMPLE_PHI
)
3485 first_def_op
= gimple_phi_result (first_def
);
3487 first_def_op
= gimple_get_lhs (first_def
);
3488 if (operand_equal_p (oprnd
, first_def_op
, 0)
3490 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3492 /* The number of vector defs is determined by the number of
3493 vector statements in the node from which we get those
3495 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3496 vectorized_defs
= true;
3504 if (!vectorized_defs
)
3508 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3509 /* Number of vector stmts was calculated according to LHS in
3510 vect_schedule_slp_instance (), fix it by replacing LHS with
3511 RHS, if necessary. See vect_get_smallest_scalar_type () for
3513 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3515 if (rhs_size_unit
!= lhs_size_unit
)
3517 number_of_vects
*= rhs_size_unit
;
3518 number_of_vects
/= lhs_size_unit
;
3523 /* Allocate memory for vectorized defs. */
3525 vec_defs
.create (number_of_vects
);
3527 /* For reduction defs we call vect_get_constant_vectors (), since we are
3528 looking for initial loop invariant values. */
3529 if (vectorized_defs
)
3530 /* The defs are already vectorized. */
3531 vect_get_slp_vect_defs (child
, &vec_defs
);
3533 /* Build vectors from scalar defs. */
3534 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3537 vec_oprnds
->quick_push (vec_defs
);
3541 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3542 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3543 permute statements for the SLP node NODE of the SLP instance
3544 SLP_NODE_INSTANCE. */
3547 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3548 gimple_stmt_iterator
*gsi
, int vf
,
3549 slp_instance slp_node_instance
, bool analyze_only
,
3552 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3553 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3554 tree mask_element_type
= NULL_TREE
, mask_type
;
3555 int nunits
, vec_index
= 0;
3556 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3557 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3561 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3564 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3566 mode
= TYPE_MODE (vectype
);
3568 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3569 same size as the vector element being permuted. */
3570 mask_element_type
= lang_hooks
.types
.type_for_mode
3571 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3572 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3573 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3574 vec_perm_builder
mask (nunits
, nunits
, 1);
3575 mask
.quick_grow (nunits
);
3576 vec_perm_indices indices
;
3578 /* Initialize the vect stmts of NODE to properly insert the generated
3581 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3582 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3583 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3585 /* Generate permutation masks for every NODE. Number of masks for each NODE
3586 is equal to GROUP_SIZE.
3587 E.g., we have a group of three nodes with three loads from the same
3588 location in each node, and the vector size is 4. I.e., we have a
3589 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3590 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3591 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3594 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3595 The last mask is illegal since we assume two operands for permute
3596 operation, and the mask element values can't be outside that range.
3597 Hence, the last mask must be converted into {2,5,5,5}.
3598 For the first two permutations we need the first and the second input
3599 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3600 we need the second and the third vectors: {b1,c1,a2,b2} and
3603 int vect_stmts_counter
= 0;
3605 int first_vec_index
= -1;
3606 int second_vec_index
= -1;
3610 for (int j
= 0; j
< vf
; j
++)
3612 for (int k
= 0; k
< group_size
; k
++)
3614 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3615 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3616 vec_index
= i
/ nunits
;
3617 mask_element
= i
% nunits
;
3618 if (vec_index
== first_vec_index
3619 || first_vec_index
== -1)
3621 first_vec_index
= vec_index
;
3623 else if (vec_index
== second_vec_index
3624 || second_vec_index
== -1)
3626 second_vec_index
= vec_index
;
3627 mask_element
+= nunits
;
3631 if (dump_enabled_p ())
3633 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3634 "permutation requires at "
3635 "least three vectors ");
3636 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3639 gcc_assert (analyze_only
);
3643 gcc_assert (mask_element
>= 0
3644 && mask_element
< 2 * nunits
);
3645 if (mask_element
!= index
)
3647 mask
[index
++] = mask_element
;
3649 if (index
== nunits
&& !noop_p
)
3651 indices
.new_vector (mask
, 2, nunits
);
3652 if (!can_vec_perm_const_p (mode
, indices
))
3654 if (dump_enabled_p ())
3656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3658 "unsupported vect permute { ");
3659 for (i
= 0; i
< nunits
; ++i
)
3660 dump_printf (MSG_MISSED_OPTIMIZATION
,
3661 HOST_WIDE_INT_PRINT_DEC
" ", mask
[i
]);
3662 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3664 gcc_assert (analyze_only
);
3671 if (index
== nunits
)
3675 tree mask_vec
= NULL_TREE
;
3678 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3680 if (second_vec_index
== -1)
3681 second_vec_index
= first_vec_index
;
3683 /* Generate the permute statement if necessary. */
3684 tree first_vec
= dr_chain
[first_vec_index
];
3685 tree second_vec
= dr_chain
[second_vec_index
];
3690 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3692 perm_dest
= make_ssa_name (perm_dest
);
3693 perm_stmt
= gimple_build_assign (perm_dest
,
3695 first_vec
, second_vec
,
3697 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3700 /* If mask was NULL_TREE generate the requested
3701 identity transform. */
3702 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3704 /* Store the vector statement in NODE. */
3705 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3709 first_vec_index
= -1;
3710 second_vec_index
= -1;
3719 typedef hash_map
<vec
<gimple
*>, slp_tree
,
3720 simple_hashmap_traits
<bst_traits
, slp_tree
> >
3721 scalar_stmts_to_slp_tree_map_t
;
3723 /* Vectorize SLP instance tree in postorder. */
3726 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3727 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3730 bool grouped_store
, is_store
;
3731 gimple_stmt_iterator si
;
3732 stmt_vec_info stmt_info
;
3733 unsigned int group_size
;
3738 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3741 /* See if we have already vectorized the same set of stmts and reuse their
3742 vectorized stmts. */
3744 = bst_map
->get_or_insert (SLP_TREE_SCALAR_STMTS (node
).copy ());
3747 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (leader
));
3752 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3753 vect_schedule_slp_instance (child
, instance
, bst_map
);
3755 /* Push SLP node def-type to stmts. */
3756 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3757 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3758 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3759 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3761 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3762 stmt_info
= vinfo_for_stmt (stmt
);
3764 /* VECTYPE is the type of the destination. */
3765 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3766 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3768 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3769 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3771 if (dump_enabled_p ())
3773 dump_printf_loc (MSG_NOTE
,vect_location
,
3774 "------>vectorizing SLP node starting from: ");
3775 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3778 /* Vectorized stmts go before the last scalar stmt which is where
3779 all uses are ready. */
3780 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3782 /* Mark the first element of the reduction chain as reduction to properly
3783 transform the node. In the analysis phase only the last element of the
3784 chain is marked as reduction. */
3785 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3786 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3788 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3789 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3792 /* Handle two-operation SLP nodes by vectorizing the group with
3793 both operations and then performing a merge. */
3794 if (SLP_TREE_TWO_OPERATORS (node
))
3796 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3797 enum tree_code ocode
= ERROR_MARK
;
3799 vec_perm_builder
mask (group_size
, group_size
, 1);
3800 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3801 if (gimple_assign_rhs_code (ostmt
) != code0
)
3803 mask
.quick_push (1);
3804 ocode
= gimple_assign_rhs_code (ostmt
);
3807 mask
.quick_push (0);
3808 if (ocode
!= ERROR_MARK
)
3813 tree tmask
= NULL_TREE
;
3814 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3815 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3816 SLP_TREE_VEC_STMTS (node
).truncate (0);
3817 gimple_assign_set_rhs_code (stmt
, ocode
);
3818 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3819 gimple_assign_set_rhs_code (stmt
, code0
);
3820 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3821 SLP_TREE_VEC_STMTS (node
).truncate (0);
3822 tree meltype
= build_nonstandard_integer_type
3823 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3824 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3826 for (j
= 0; j
< v0
.length (); ++j
)
3828 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3829 tree_vector_builder
melts (mvectype
, nunits
, 1);
3830 for (l
= 0; l
< nunits
; ++l
)
3832 if (k
>= group_size
)
3834 tree t
= build_int_cst (meltype
, mask
[k
++] * nunits
+ l
);
3835 melts
.quick_push (t
);
3837 tmask
= melts
.build ();
3839 /* ??? Not all targets support a VEC_PERM_EXPR with a
3840 constant mask that would translate to a vec_merge RTX
3841 (with their vec_perm_const_ok). We can either not
3842 vectorize in that case or let veclower do its job.
3843 Unfortunately that isn't too great and at least for
3844 plus/minus we'd eventually like to match targets
3845 vector addsub instructions. */
3847 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3849 gimple_assign_lhs (v0
[j
]),
3850 gimple_assign_lhs (v1
[j
]), tmask
);
3851 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3852 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3859 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3861 /* Restore stmt def-types. */
3862 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3863 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3864 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3865 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3870 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3871 For loop vectorization this is done in vectorizable_call, but for SLP
3872 it needs to be deferred until end of vect_schedule_slp, because multiple
3873 SLP instances may refer to the same scalar stmt. */
3876 vect_remove_slp_scalar_calls (slp_tree node
)
3878 gimple
*stmt
, *new_stmt
;
3879 gimple_stmt_iterator gsi
;
3883 stmt_vec_info stmt_info
;
3885 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3889 vect_remove_slp_scalar_calls (child
);
3891 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3893 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3895 stmt_info
= vinfo_for_stmt (stmt
);
3896 if (stmt_info
== NULL
3897 || is_pattern_stmt_p (stmt_info
)
3898 || !PURE_SLP_STMT (stmt_info
))
3900 lhs
= gimple_call_lhs (stmt
);
3901 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3902 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3903 set_vinfo_for_stmt (stmt
, NULL
);
3904 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3905 gsi
= gsi_for_stmt (stmt
);
3906 gsi_replace (&gsi
, new_stmt
, false);
3907 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3911 /* Generate vector code for all SLP instances in the loop/basic block. */
3914 vect_schedule_slp (vec_info
*vinfo
)
3916 vec
<slp_instance
> slp_instances
;
3917 slp_instance instance
;
3919 bool is_store
= false;
3921 slp_instances
= vinfo
->slp_instances
;
3922 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3924 /* Schedule the tree of INSTANCE. */
3925 scalar_stmts_to_slp_tree_map_t
*bst_map
3926 = new scalar_stmts_to_slp_tree_map_t ();
3927 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3930 if (dump_enabled_p ())
3931 dump_printf_loc (MSG_NOTE
, vect_location
,
3932 "vectorizing stmts using SLP.\n");
3935 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3937 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3940 gimple_stmt_iterator gsi
;
3942 /* Remove scalar call stmts. Do not do this for basic-block
3943 vectorization as not all uses may be vectorized.
3944 ??? Why should this be necessary? DCE should be able to
3945 remove the stmts itself.
3946 ??? For BB vectorization we can as well remove scalar
3947 stmts starting from the SLP tree root if they have no
3949 if (is_a
<loop_vec_info
> (vinfo
))
3950 vect_remove_slp_scalar_calls (root
);
3952 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3953 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3955 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3958 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3959 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3960 /* Free the attached stmt_vec_info and remove the stmt. */
3961 gsi
= gsi_for_stmt (store
);
3962 unlink_stmt_vdef (store
);
3963 gsi_remove (&gsi
, true);
3964 release_defs (store
);
3965 free_stmt_vec_info (store
);