1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
51 vect_free_slp_tree (slp_tree node
)
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
57 vect_free_slp_tree (child
);
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt
))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
68 SLP_TREE_CHILDREN (node
).release ();
69 SLP_TREE_SCALAR_STMTS (node
).release ();
70 SLP_TREE_VEC_STMTS (node
).release ();
71 SLP_TREE_LOAD_PERMUTATION (node
).release ();
77 /* Free the memory allocated for the SLP instance. */
80 vect_free_slp_instance (slp_instance instance
)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
83 SLP_INSTANCE_LOADS (instance
).release ();
88 /* Create an SLP node for SCALAR_STMTS. */
91 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
94 gimple
*stmt
= scalar_stmts
[0];
97 if (is_gimple_call (stmt
))
98 nops
= gimple_call_num_args (stmt
);
99 else if (is_gimple_assign (stmt
))
101 nops
= gimple_num_ops (stmt
) - 1;
102 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
105 else if (gimple_code (stmt
) == GIMPLE_PHI
)
110 node
= XNEW (struct _slp_tree
);
111 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
112 SLP_TREE_VEC_STMTS (node
).create (0);
113 SLP_TREE_CHILDREN (node
).create (nops
);
114 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
115 SLP_TREE_TWO_OPERATORS (node
) = false;
116 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
119 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec
<gimple
*> def_stmts
;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
137 enum vect_def_type first_dt
;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
145 static vec
<slp_oprnd_info
>
146 vect_create_oprnd_info (int nops
, int group_size
)
149 slp_oprnd_info oprnd_info
;
150 vec
<slp_oprnd_info
> oprnds_info
;
152 oprnds_info
.create (nops
);
153 for (i
= 0; i
< nops
; i
++)
155 oprnd_info
= XNEW (struct _slp_oprnd_info
);
156 oprnd_info
->def_stmts
.create (group_size
);
157 oprnd_info
->first_dt
= vect_uninitialized_def
;
158 oprnd_info
->first_op_type
= NULL_TREE
;
159 oprnd_info
->first_pattern
= false;
160 oprnd_info
->second_pattern
= false;
161 oprnds_info
.quick_push (oprnd_info
);
168 /* Free operands info. */
171 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
174 slp_oprnd_info oprnd_info
;
176 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
178 oprnd_info
->def_stmts
.release ();
179 XDELETE (oprnd_info
);
182 oprnds_info
.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
190 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
192 gimple
*next_stmt
= first_stmt
;
195 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
200 if (next_stmt
== stmt
)
202 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
204 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
225 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
226 gimple
*stmt
, unsigned stmt_num
,
227 vec
<slp_oprnd_info
> *oprnds_info
)
230 unsigned int i
, number_of_oprnds
;
232 enum vect_def_type dt
= vect_uninitialized_def
;
233 bool pattern
= false;
234 slp_oprnd_info oprnd_info
;
235 int first_op_idx
= 1;
236 bool commutative
= false;
237 bool first_op_cond
= false;
238 bool first
= stmt_num
== 0;
239 bool second
= stmt_num
== 1;
241 if (is_gimple_call (stmt
))
243 number_of_oprnds
= gimple_call_num_args (stmt
);
246 else if (is_gimple_assign (stmt
))
248 enum tree_code code
= gimple_assign_rhs_code (stmt
);
249 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
255 first_op_cond
= true;
259 commutative
= commutative_tree_code (code
);
264 bool swapped
= (*swap
!= 0);
265 gcc_assert (!swapped
|| first_op_cond
);
266 for (i
= 0; i
< number_of_oprnds
; i
++)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map
= maps
[*swap
];
276 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
278 oprnd
= gimple_op (stmt
, map
[i
]);
281 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
283 oprnd_info
= (*oprnds_info
)[i
];
285 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
292 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
301 if (def_stmt
&& gimple_bb (def_stmt
)
302 && vect_stmt_in_region_p (vinfo
, def_stmt
)
303 && vinfo_for_stmt (def_stmt
)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
309 if (!first
&& !oprnd_info
->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info
->first_dt
!= vect_reduction_def
313 || (!second
&& !oprnd_info
->second_pattern
)))
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
329 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
335 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
336 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
338 if (dt
== vect_unknown_def_type
)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
342 "Unsupported pattern.\n");
346 switch (gimple_code (def_stmt
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
355 "unsupported defining stmt:\n");
361 oprnd_info
->second_pattern
= pattern
;
365 oprnd_info
->first_dt
= dt
;
366 oprnd_info
->first_pattern
= pattern
;
367 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info
->first_dt
!= dt
377 && !(oprnd_info
->first_dt
== vect_reduction_def
378 && dt
== vect_internal_def
)
379 && !((oprnd_info
->first_dt
== vect_external_def
380 || oprnd_info
->first_dt
== vect_constant_def
)
381 && (dt
== vect_external_def
382 || dt
== vect_constant_def
)))
383 || !types_compatible_p (oprnd_info
->first_op_type
,
386 /* Try swapping operands if we got a mismatch. */
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
397 "Build SLP failed: different types\n");
403 /* Check the types of the definitions. */
406 case vect_constant_def
:
407 case vect_external_def
:
408 /* We must already have set a vector size by now. */
409 gcc_checking_assert (maybe_ne (current_vector_size
, 0U));
410 if (!current_vector_size
.is_constant ())
412 if (dump_enabled_p ())
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
415 "Build SLP failed: invalid type of def "
416 "for variable-length SLP ");
417 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
418 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
424 case vect_reduction_def
:
425 case vect_induction_def
:
426 case vect_internal_def
:
427 oprnd_info
->def_stmts
.quick_push (def_stmt
);
431 /* FORNOW: Not supported. */
432 if (dump_enabled_p ())
434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
435 "Build SLP failed: illegal type of def ");
436 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
437 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
447 /* If there are already uses of this stmt in a SLP instance then
448 we've committed to the operand order and can't swap it. */
449 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
451 if (dump_enabled_p ())
453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
454 "Build SLP failed: cannot swap operands of "
456 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
463 tree cond
= gimple_assign_rhs1 (stmt
);
464 enum tree_code code
= TREE_CODE (cond
);
469 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
470 &TREE_OPERAND (cond
, 1));
471 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
476 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
477 gimple_assign_rhs3_ptr (stmt
));
478 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
479 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
480 gcc_assert (code
!= ERROR_MARK
);
481 TREE_SET_CODE (cond
, code
);
485 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
486 gimple_assign_rhs2_ptr (stmt
));
487 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE
, vect_location
,
490 "swapped operands to match def types in ");
491 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
499 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
500 caller's attempt to find the vector type in STMT with the narrowest
501 element type. Return true if VECTYPE is nonnull and if it is valid
502 for VINFO. When returning true, update MAX_NUNITS to reflect the
503 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
504 as for vect_build_slp_tree. */
507 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
508 tree vectype
, poly_uint64
*max_nunits
)
512 if (dump_enabled_p ())
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
515 "Build SLP failed: unsupported data-type in ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
519 /* Fatal mismatch. */
523 /* If populating the vector type requires unrolling then fail
524 before adjusting *max_nunits for basic-block vectorization. */
525 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
526 unsigned HOST_WIDE_INT const_nunits
;
527 if (is_a
<bb_vec_info
> (vinfo
)
528 && (!nunits
.is_constant (&const_nunits
)
529 || const_nunits
> group_size
))
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
532 "Build SLP failed: unrolling required "
533 "in basic block SLP\n");
534 /* Fatal mismatch. */
538 /* In case of multiple types we need to detect the smallest type. */
539 vect_update_max_nunits (max_nunits
, vectype
);
543 /* Verify if the scalar stmts STMTS are isomorphic, require data
544 permutation or are of unsupported types of operation. Return
545 true if they are, otherwise return false and indicate in *MATCHES
546 which stmts are not isomorphic to the first one. If MATCHES[0]
547 is false then this indicates the comparison could not be
548 carried out or the stmts will never be vectorized by SLP.
550 Note COND_EXPR is possibly ismorphic to another one after swapping its
551 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
552 the first stmt by swapping the two operands of comparison; set SWAP[i]
553 to 2 if stmt I is isormorphic to the first stmt by inverting the code
554 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
555 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
558 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
559 vec
<gimple
*> stmts
, unsigned int group_size
,
560 unsigned nops
, poly_uint64
*max_nunits
,
561 bool *matches
, bool *two_operators
)
564 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
565 enum tree_code first_stmt_code
= ERROR_MARK
;
566 enum tree_code alt_stmt_code
= ERROR_MARK
;
567 enum tree_code rhs_code
= ERROR_MARK
;
568 enum tree_code first_cond_code
= ERROR_MARK
;
570 bool need_same_oprnds
= false;
571 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
574 machine_mode optab_op2_mode
;
575 machine_mode vec_mode
;
577 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
579 /* For every stmt in NODE find its def stmt/s. */
580 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
585 if (dump_enabled_p ())
587 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
588 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
591 /* Fail to vectorize statements marked as unvectorizable. */
592 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
597 "Build SLP failed: unvectorizable statement ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
600 /* Fatal mismatch. */
605 lhs
= gimple_get_lhs (stmt
);
606 if (lhs
== NULL_TREE
)
608 if (dump_enabled_p ())
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
611 "Build SLP failed: not GIMPLE_ASSIGN nor "
613 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
615 /* Fatal mismatch. */
620 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
621 vectype
= get_vectype_for_scalar_type (scalar_type
);
622 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
625 /* Fatal mismatch. */
630 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
632 rhs_code
= CALL_EXPR
;
633 if (gimple_call_internal_p (call_stmt
)
634 || gimple_call_tail_p (call_stmt
)
635 || gimple_call_noreturn_p (call_stmt
)
636 || !gimple_call_nothrow_p (call_stmt
)
637 || gimple_call_chain (call_stmt
))
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
642 "Build SLP failed: unsupported call type ");
643 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
646 /* Fatal mismatch. */
652 rhs_code
= gimple_assign_rhs_code (stmt
);
654 /* Check the operation. */
657 first_stmt_code
= rhs_code
;
659 /* Shift arguments should be equal in all the packed stmts for a
660 vector shift with scalar shift operand. */
661 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
662 || rhs_code
== LROTATE_EXPR
663 || rhs_code
== RROTATE_EXPR
)
665 vec_mode
= TYPE_MODE (vectype
);
667 /* First see if we have a vector/vector shift. */
668 optab
= optab_for_tree_code (rhs_code
, vectype
,
672 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
674 /* No vector/vector shift, try for a vector/scalar shift. */
675 optab
= optab_for_tree_code (rhs_code
, vectype
,
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
682 "Build SLP failed: no optab.\n");
683 /* Fatal mismatch. */
687 icode
= (int) optab_handler (optab
, vec_mode
);
688 if (icode
== CODE_FOR_nothing
)
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
693 "op not supported by target.\n");
694 /* Fatal mismatch. */
698 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
699 if (!VECTOR_MODE_P (optab_op2_mode
))
701 need_same_oprnds
= true;
702 first_op1
= gimple_assign_rhs2 (stmt
);
706 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
708 need_same_oprnds
= true;
709 first_op1
= gimple_assign_rhs2 (stmt
);
714 if (first_stmt_code
!= rhs_code
715 && alt_stmt_code
== ERROR_MARK
)
716 alt_stmt_code
= rhs_code
;
717 if (first_stmt_code
!= rhs_code
718 && (first_stmt_code
!= IMAGPART_EXPR
719 || rhs_code
!= REALPART_EXPR
)
720 && (first_stmt_code
!= REALPART_EXPR
721 || rhs_code
!= IMAGPART_EXPR
)
722 /* Handle mismatches in plus/minus by computing both
723 and merging the results. */
724 && !((first_stmt_code
== PLUS_EXPR
725 || first_stmt_code
== MINUS_EXPR
)
726 && (alt_stmt_code
== PLUS_EXPR
727 || alt_stmt_code
== MINUS_EXPR
)
728 && rhs_code
== alt_stmt_code
)
729 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
730 && (first_stmt_code
== ARRAY_REF
731 || first_stmt_code
== BIT_FIELD_REF
732 || first_stmt_code
== INDIRECT_REF
733 || first_stmt_code
== COMPONENT_REF
734 || first_stmt_code
== MEM_REF
)))
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
739 "Build SLP failed: different operation "
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
752 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
757 "Build SLP failed: different shift "
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
765 if (rhs_code
== CALL_EXPR
)
767 gimple
*first_stmt
= stmts
[0];
768 if (gimple_call_num_args (stmt
) != nops
769 || !operand_equal_p (gimple_call_fn (first_stmt
),
770 gimple_call_fn (stmt
), 0)
771 || gimple_call_fntype (first_stmt
)
772 != gimple_call_fntype (stmt
))
774 if (dump_enabled_p ())
776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
777 "Build SLP failed: different calls in ");
778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
787 /* Grouped store or load. */
788 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
790 if (REFERENCE_CLASS_P (lhs
))
798 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
801 /* Check that there are no loads from different interleaving
802 chains in the same node. */
803 if (prev_first_load
!= first_load
)
805 if (dump_enabled_p ())
807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
809 "Build SLP failed: different "
810 "interleaving chains in one node ");
811 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
819 prev_first_load
= first_load
;
821 } /* Grouped access. */
824 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
826 /* Not grouped load. */
827 if (dump_enabled_p ())
829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
830 "Build SLP failed: not grouped load ");
831 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
834 /* FORNOW: Not grouped loads are not supported. */
835 /* Fatal mismatch. */
840 /* Not memory operation. */
841 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
842 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
843 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
844 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
845 && rhs_code
!= CALL_EXPR
)
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
850 "Build SLP failed: operation");
851 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
854 /* Fatal mismatch. */
859 if (rhs_code
== COND_EXPR
)
861 tree cond_expr
= gimple_assign_rhs1 (stmt
);
862 enum tree_code cond_code
= TREE_CODE (cond_expr
);
863 enum tree_code swap_code
= ERROR_MARK
;
864 enum tree_code invert_code
= ERROR_MARK
;
867 first_cond_code
= TREE_CODE (cond_expr
);
868 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
870 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
871 swap_code
= swap_tree_comparison (cond_code
);
872 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
875 if (first_cond_code
== cond_code
)
877 /* Isomorphic can be achieved by swapping. */
878 else if (first_cond_code
== swap_code
)
880 /* Isomorphic can be achieved by inverting. */
881 else if (first_cond_code
== invert_code
)
885 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
888 "Build SLP failed: different"
890 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
902 for (i
= 0; i
< group_size
; ++i
)
906 /* If we allowed a two-operation SLP node verify the target can cope
907 with the permute we are going to use. */
908 if (alt_stmt_code
!= ERROR_MARK
909 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
911 unsigned int count
= TYPE_VECTOR_SUBPARTS (vectype
);
912 vec_perm_builder
sel (count
, count
, 1);
913 for (i
= 0; i
< count
; ++i
)
915 unsigned int elt
= i
;
916 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
918 sel
.quick_push (elt
);
920 vec_perm_indices
indices (sel
, 2, count
);
921 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
923 for (i
= 0; i
< group_size
; ++i
)
924 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
927 if (dump_enabled_p ())
929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
930 "Build SLP failed: different operation "
932 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
936 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
942 *two_operators
= true;
948 /* Traits for the hash_set to record failed SLP builds for a stmt set.
949 Note we never remove apart from at destruction time so we do not
950 need a special value for deleted that differs from empty. */
953 typedef vec
<gimple
*> value_type
;
954 typedef vec
<gimple
*> compare_type
;
955 static inline hashval_t
hash (value_type
);
956 static inline bool equal (value_type existing
, value_type candidate
);
957 static inline bool is_empty (value_type x
) { return !x
.exists (); }
958 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
959 static inline void mark_empty (value_type
&x
) { x
.release (); }
960 static inline void mark_deleted (value_type
&x
) { x
.release (); }
961 static inline void remove (value_type
&x
) { x
.release (); }
964 bst_traits::hash (value_type x
)
967 for (unsigned i
= 0; i
< x
.length (); ++i
)
968 h
.add_int (gimple_uid (x
[i
]));
972 bst_traits::equal (value_type existing
, value_type candidate
)
974 if (existing
.length () != candidate
.length ())
976 for (unsigned i
= 0; i
< existing
.length (); ++i
)
977 if (existing
[i
] != candidate
[i
])
982 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
983 static scalar_stmts_set_t
*bst_fail
;
986 vect_build_slp_tree_2 (vec_info
*vinfo
,
987 vec
<gimple
*> stmts
, unsigned int group_size
,
988 poly_uint64
*max_nunits
,
989 vec
<slp_tree
> *loads
,
990 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
991 unsigned max_tree_size
);
994 vect_build_slp_tree (vec_info
*vinfo
,
995 vec
<gimple
*> stmts
, unsigned int group_size
,
996 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
997 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
998 unsigned max_tree_size
)
1000 if (bst_fail
->contains (stmts
))
1002 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1003 loads
, matches
, npermutes
, tree_size
,
1005 /* When SLP build fails for stmts record this, otherwise SLP build
1006 can be exponential in time when we allow to construct parts from
1007 scalars, see PR81723. */
1011 x
.create (stmts
.length ());
1018 /* Recursively build an SLP tree starting from NODE.
1019 Fail (and return a value not equal to zero) if def-stmts are not
1020 isomorphic, require data permutation or are of unsupported types of
1021 operation. Otherwise, return 0.
1022 The value returned is the depth in the SLP tree where a mismatch
1026 vect_build_slp_tree_2 (vec_info
*vinfo
,
1027 vec
<gimple
*> stmts
, unsigned int group_size
,
1028 poly_uint64
*max_nunits
,
1029 vec
<slp_tree
> *loads
,
1030 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1031 unsigned max_tree_size
)
1033 unsigned nops
, i
, this_tree_size
= 0;
1034 poly_uint64 this_max_nunits
= *max_nunits
;
1041 if (is_gimple_call (stmt
))
1042 nops
= gimple_call_num_args (stmt
);
1043 else if (is_gimple_assign (stmt
))
1045 nops
= gimple_num_ops (stmt
) - 1;
1046 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1049 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1054 /* If the SLP node is a PHI (induction or reduction), terminate
1056 if (gimple_code (stmt
) == GIMPLE_PHI
)
1058 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1059 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1060 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1064 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1065 /* Induction from different IVs is not supported. */
1066 if (def_type
== vect_induction_def
)
1068 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1069 if (stmt
!= stmts
[0])
1074 /* Else def types have to match. */
1075 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1077 /* But for reduction chains only check on the first stmt. */
1078 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1079 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1081 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1085 node
= vect_create_new_slp_node (stmts
);
1090 bool two_operators
= false;
1091 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1092 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1093 stmts
, group_size
, nops
,
1094 &this_max_nunits
, matches
, &two_operators
))
1097 /* If the SLP node is a load, terminate the recursion. */
1098 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1099 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1101 *max_nunits
= this_max_nunits
;
1102 node
= vect_create_new_slp_node (stmts
);
1103 loads
->safe_push (node
);
1107 /* Get at the operands, verifying they are compatible. */
1108 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1109 slp_oprnd_info oprnd_info
;
1110 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1112 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1113 stmt
, i
, &oprnds_info
);
1115 matches
[(res
== -1) ? 0 : i
] = false;
1119 for (i
= 0; i
< group_size
; ++i
)
1122 vect_free_oprnd_info (oprnds_info
);
1126 auto_vec
<slp_tree
, 4> children
;
1127 auto_vec
<slp_tree
> this_loads
;
1132 max_tree_size
-= *tree_size
;
1134 /* Create SLP_TREE nodes for the definition node/s. */
1135 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1138 unsigned old_nloads
= this_loads
.length ();
1139 unsigned old_tree_size
= this_tree_size
;
1142 if (oprnd_info
->first_dt
!= vect_internal_def
1143 && oprnd_info
->first_dt
!= vect_reduction_def
1144 && oprnd_info
->first_dt
!= vect_induction_def
)
1147 if (++this_tree_size
> max_tree_size
)
1149 FOR_EACH_VEC_ELT (children
, j
, child
)
1150 vect_free_slp_tree (child
);
1151 vect_free_oprnd_info (oprnds_info
);
1155 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1156 group_size
, &this_max_nunits
,
1157 &this_loads
, matches
, npermutes
,
1159 max_tree_size
)) != NULL
)
1161 /* If we have all children of child built up from scalars then just
1162 throw that away and build it up this node from scalars. */
1163 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1164 /* ??? Rejecting patterns this way doesn't work. We'd have to
1165 do extra work to cancel the pattern so the uses see the
1167 && !is_pattern_stmt_p
1168 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1170 slp_tree grandchild
;
1172 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1173 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1178 this_loads
.truncate (old_nloads
);
1179 this_tree_size
= old_tree_size
;
1180 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1181 vect_free_slp_tree (grandchild
);
1182 SLP_TREE_CHILDREN (child
).truncate (0);
1184 dump_printf_loc (MSG_NOTE
, vect_location
,
1185 "Building parent vector operands from "
1186 "scalars instead\n");
1187 oprnd_info
->def_stmts
= vNULL
;
1188 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1189 children
.safe_push (child
);
1194 oprnd_info
->def_stmts
= vNULL
;
1195 children
.safe_push (child
);
1199 /* If the SLP build failed fatally and we analyze a basic-block
1200 simply treat nodes we fail to build as externally defined
1201 (and thus build vectors from the scalar defs).
1202 The cost model will reject outright expensive cases.
1203 ??? This doesn't treat cases where permutation ultimatively
1204 fails (or we don't try permutation below). Ideally we'd
1205 even compute a permutation that will end up with the maximum
1207 if (is_a
<bb_vec_info
> (vinfo
)
1209 /* ??? Rejecting patterns this way doesn't work. We'd have to
1210 do extra work to cancel the pattern so the uses see the
1212 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1214 dump_printf_loc (MSG_NOTE
, vect_location
,
1215 "Building vector operands from scalars\n");
1216 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1217 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1218 children
.safe_push (child
);
1219 oprnd_info
->def_stmts
= vNULL
;
1223 /* If the SLP build for operand zero failed and operand zero
1224 and one can be commutated try that for the scalar stmts
1225 that failed the match. */
1227 /* A first scalar stmt mismatch signals a fatal mismatch. */
1229 /* ??? For COND_EXPRs we can swap the comparison operands
1230 as well as the arms under some constraints. */
1232 && oprnds_info
[1]->first_dt
== vect_internal_def
1233 && is_gimple_assign (stmt
)
1234 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1236 /* Do so only if the number of not successful permutes was nor more
1237 than a cut-ff as re-trying the recursive match on
1238 possibly each level of the tree would expose exponential
1242 /* Verify if we can safely swap or if we committed to a specific
1243 operand order already. */
1244 for (j
= 0; j
< group_size
; ++j
)
1247 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1249 if (dump_enabled_p ())
1251 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1252 "Build SLP failed: cannot swap operands "
1254 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1260 /* Swap mismatched definition stmts. */
1261 dump_printf_loc (MSG_NOTE
, vect_location
,
1262 "Re-trying with swapped operands of stmts ");
1263 for (j
= 0; j
< group_size
; ++j
)
1266 std::swap (oprnds_info
[0]->def_stmts
[j
],
1267 oprnds_info
[1]->def_stmts
[j
]);
1268 dump_printf (MSG_NOTE
, "%d ", j
);
1270 dump_printf (MSG_NOTE
, "\n");
1271 /* And try again with scratch 'matches' ... */
1272 bool *tem
= XALLOCAVEC (bool, group_size
);
1273 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1274 group_size
, &this_max_nunits
,
1275 &this_loads
, tem
, npermutes
,
1277 max_tree_size
)) != NULL
)
1279 /* ... so if successful we can apply the operand swapping
1280 to the GIMPLE IL. This is necessary because for example
1281 vect_get_slp_defs uses operand indexes and thus expects
1282 canonical operand order. This is also necessary even
1283 if we end up building the operand from scalars as
1284 we'll continue to process swapped operand two. */
1285 for (j
= 0; j
< group_size
; ++j
)
1287 gimple
*stmt
= stmts
[j
];
1288 gimple_set_plf (stmt
, GF_PLF_1
, false);
1290 for (j
= 0; j
< group_size
; ++j
)
1292 gimple
*stmt
= stmts
[j
];
1295 /* Avoid swapping operands twice. */
1296 if (gimple_plf (stmt
, GF_PLF_1
))
1298 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1299 gimple_assign_rhs2_ptr (stmt
));
1300 gimple_set_plf (stmt
, GF_PLF_1
, true);
1303 /* Verify we swap all duplicates or none. */
1305 for (j
= 0; j
< group_size
; ++j
)
1307 gimple
*stmt
= stmts
[j
];
1308 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1311 /* If we have all children of child built up from scalars then
1312 just throw that away and build it up this node from scalars. */
1313 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1314 /* ??? Rejecting patterns this way doesn't work. We'd have
1315 to do extra work to cancel the pattern so the uses see the
1317 && !is_pattern_stmt_p
1318 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1321 slp_tree grandchild
;
1323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1324 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1329 this_loads
.truncate (old_nloads
);
1330 this_tree_size
= old_tree_size
;
1331 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1332 vect_free_slp_tree (grandchild
);
1333 SLP_TREE_CHILDREN (child
).truncate (0);
1335 dump_printf_loc (MSG_NOTE
, vect_location
,
1336 "Building parent vector operands from "
1337 "scalars instead\n");
1338 oprnd_info
->def_stmts
= vNULL
;
1339 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1340 children
.safe_push (child
);
1345 oprnd_info
->def_stmts
= vNULL
;
1346 children
.safe_push (child
);
1354 gcc_assert (child
== NULL
);
1355 FOR_EACH_VEC_ELT (children
, j
, child
)
1356 vect_free_slp_tree (child
);
1357 vect_free_oprnd_info (oprnds_info
);
1361 vect_free_oprnd_info (oprnds_info
);
1364 *tree_size
+= this_tree_size
;
1365 *max_nunits
= this_max_nunits
;
1366 loads
->safe_splice (this_loads
);
1368 node
= vect_create_new_slp_node (stmts
);
1369 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1370 SLP_TREE_CHILDREN (node
).splice (children
);
1374 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1377 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1383 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1384 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1385 ? " (external)" : "");
1386 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1388 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1389 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1391 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1392 vect_print_slp_tree (dump_kind
, loc
, child
);
1396 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1397 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1398 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1399 stmts in NODE are to be marked. */
1402 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1408 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1411 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1412 if (j
< 0 || i
== j
)
1413 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1415 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1416 vect_mark_slp_stmts (child
, mark
, j
);
1420 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1423 vect_mark_slp_stmts_relevant (slp_tree node
)
1427 stmt_vec_info stmt_info
;
1430 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1433 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1435 stmt_info
= vinfo_for_stmt (stmt
);
1436 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1437 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1438 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1441 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1442 vect_mark_slp_stmts_relevant (child
);
1446 /* Rearrange the statements of NODE according to PERMUTATION. */
1449 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1450 vec
<unsigned> permutation
)
1453 vec
<gimple
*> tmp_stmts
;
1457 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1458 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1460 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1461 tmp_stmts
.create (group_size
);
1462 tmp_stmts
.quick_grow_cleared (group_size
);
1464 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1465 tmp_stmts
[permutation
[i
]] = stmt
;
1467 SLP_TREE_SCALAR_STMTS (node
).release ();
1468 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1472 /* Attempt to reorder stmts in a reduction chain so that we don't
1473 require any load permutation. Return true if that was possible,
1474 otherwise return false. */
1477 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1479 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1482 slp_tree node
, load
;
1484 /* Compare all the permutation sequences to the first one. We know
1485 that at least one load is permuted. */
1486 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1487 if (!node
->load_permutation
.exists ())
1489 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1491 if (!load
->load_permutation
.exists ())
1493 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1494 if (lidx
!= node
->load_permutation
[j
])
1498 /* Check that the loads in the first sequence are different and there
1499 are no gaps between them. */
1500 auto_sbitmap
load_index (group_size
);
1501 bitmap_clear (load_index
);
1502 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1504 if (lidx
>= group_size
)
1506 if (bitmap_bit_p (load_index
, lidx
))
1509 bitmap_set_bit (load_index
, lidx
);
1511 for (i
= 0; i
< group_size
; i
++)
1512 if (!bitmap_bit_p (load_index
, i
))
1515 /* This permutation is valid for reduction. Since the order of the
1516 statements in the nodes is not important unless they are memory
1517 accesses, we can rearrange the statements in all the nodes
1518 according to the order of the loads. */
1519 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1520 node
->load_permutation
);
1522 /* We are done, no actual permutations need to be generated. */
1523 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1524 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1526 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1527 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1528 /* But we have to keep those permutations that are required because
1529 of handling of gaps. */
1530 if (known_eq (unrolling_factor
, 1U)
1531 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1532 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1533 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1535 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1536 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1542 /* Check if the required load permutations in the SLP instance
1543 SLP_INSTN are supported. */
1546 vect_supported_load_permutation_p (slp_instance slp_instn
)
1548 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1549 unsigned int i
, j
, k
, next
;
1551 gimple
*stmt
, *load
, *next_load
;
1553 if (dump_enabled_p ())
1555 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1557 if (node
->load_permutation
.exists ())
1558 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1559 dump_printf (MSG_NOTE
, "%d ", next
);
1561 for (k
= 0; k
< group_size
; ++k
)
1562 dump_printf (MSG_NOTE
, "%d ", k
);
1563 dump_printf (MSG_NOTE
, "\n");
1566 /* In case of reduction every load permutation is allowed, since the order
1567 of the reduction statements is not important (as opposed to the case of
1568 grouped stores). The only condition we need to check is that all the
1569 load nodes are of the same size and have the same permutation (and then
1570 rearrange all the nodes of the SLP instance according to this
1573 /* Check that all the load nodes are of the same size. */
1574 /* ??? Can't we assert this? */
1575 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1576 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1579 node
= SLP_INSTANCE_TREE (slp_instn
);
1580 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1582 /* Reduction (there are no data-refs in the root).
1583 In reduction chain the order of the loads is not important. */
1584 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1585 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1586 vect_attempt_slp_rearrange_stmts (slp_instn
);
1588 /* In basic block vectorization we allow any subchain of an interleaving
1590 FORNOW: not supported in loop SLP because of realignment compications. */
1591 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1593 /* Check whether the loads in an instance form a subchain and thus
1594 no permutation is necessary. */
1595 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1597 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1599 bool subchain_p
= true;
1601 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1604 && (next_load
!= load
1605 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1610 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1613 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1616 stmt_vec_info group_info
1617 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1618 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1620 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1621 unsigned k
, maxk
= 0;
1622 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1625 /* In BB vectorization we may not actually use a loaded vector
1626 accessing elements in excess of GROUP_SIZE. */
1627 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1630 "BB vectorization with gaps at the end of "
1631 "a load is not supported\n");
1635 /* Verify the permutation can be generated. */
1638 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1639 1, slp_instn
, true, &n_perms
))
1641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1643 "unsupported load permutation\n");
1651 /* For loop vectorization verify we can generate the permutation. Be
1652 conservative about the vectorization factor, there are permutations
1653 that will use three vector inputs only starting from a specific factor
1654 and the vectorization factor is not yet final.
1655 ??? The SLP instance unrolling factor might not be the maximum one. */
1658 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1659 LOOP_VINFO_VECT_FACTOR
1660 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1661 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1662 if (node
->load_permutation
.exists ()
1663 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1664 slp_instn
, true, &n_perms
))
1671 /* Find the last store in SLP INSTANCE. */
1674 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1676 gimple
*last
= NULL
, *stmt
;
1678 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1680 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1681 if (is_pattern_stmt_p (stmt_vinfo
))
1682 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1684 last
= get_later_stmt (stmt
, last
);
1690 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1693 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1694 stmt_vector_for_cost
*prologue_cost_vec
,
1695 stmt_vector_for_cost
*body_cost_vec
,
1696 unsigned ncopies_for_cost
,
1697 scalar_stmts_set_t
* visited
)
1702 stmt_vec_info stmt_info
;
1705 /* If we already costed the exact same set of scalar stmts we're done.
1706 We share the generated vector stmts for those. */
1707 if (visited
->contains (SLP_TREE_SCALAR_STMTS (node
)))
1710 visited
->add (SLP_TREE_SCALAR_STMTS (node
).copy ());
1712 /* Recurse down the SLP tree. */
1713 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1714 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1715 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1716 body_cost_vec
, ncopies_for_cost
, visited
);
1718 /* Look at the first scalar stmt to determine the cost. */
1719 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1720 stmt_info
= vinfo_for_stmt (stmt
);
1721 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1723 vect_memory_access_type memory_access_type
1724 = (STMT_VINFO_STRIDED_P (stmt_info
)
1727 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1728 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1729 memory_access_type
, vect_uninitialized_def
,
1730 node
, prologue_cost_vec
, body_cost_vec
);
1733 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1734 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1736 /* If the load is permuted then the alignment is determined by
1737 the first group element not by the first scalar stmt DR. */
1738 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1739 stmt_info
= vinfo_for_stmt (stmt
);
1740 /* Record the cost for the permutation. */
1742 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1743 ncopies_for_cost
, instance
, true,
1745 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1746 stmt_info
, 0, vect_body
);
1747 unsigned assumed_nunits
1748 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info
));
1749 /* And adjust the number of loads performed. This handles
1750 redundancies as well as loads that are later dead. */
1751 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1752 bitmap_clear (perm
);
1753 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1754 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1755 ncopies_for_cost
= 0;
1756 bool load_seen
= false;
1757 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1759 if (i
% assumed_nunits
== 0)
1765 if (bitmap_bit_p (perm
, i
))
1770 gcc_assert (ncopies_for_cost
1771 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1772 + assumed_nunits
- 1) / assumed_nunits
);
1773 poly_uint64 uf
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1774 ncopies_for_cost
*= estimated_poly_value (uf
);
1776 /* Record the cost for the vector loads. */
1777 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1778 memory_access_type
, node
, prologue_cost_vec
,
1783 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1785 /* ncopies_for_cost is the number of IVs we generate. */
1786 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1787 stmt_info
, 0, vect_body
);
1789 /* Prologue cost for the initial values and step vector. */
1790 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1792 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1794 ? vector_load
: vec_construct
,
1795 stmt_info
, 0, vect_prologue
);
1796 record_stmt_cost (prologue_cost_vec
, 1,
1798 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1799 ? vector_load
: vec_construct
,
1800 stmt_info
, 0, vect_prologue
);
1802 /* ??? No easy way to get at the actual number of vector stmts
1803 to be geneated and thus the derived IVs. */
1807 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1808 stmt_info
, 0, vect_body
);
1809 if (SLP_TREE_TWO_OPERATORS (node
))
1811 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1812 stmt_info
, 0, vect_body
);
1813 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1814 stmt_info
, 0, vect_body
);
1818 /* Push SLP node def-type to stmts. */
1819 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1820 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1821 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1822 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1824 /* Scan operands and account for prologue cost of constants/externals.
1825 ??? This over-estimates cost for multiple uses and should be
1827 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1828 lhs
= gimple_get_lhs (stmt
);
1829 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1831 tree op
= gimple_op (stmt
, i
);
1833 enum vect_def_type dt
;
1834 if (!op
|| op
== lhs
)
1836 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1838 /* Without looking at the actual initializer a vector of
1839 constants can be implemented as load from the constant pool.
1840 ??? We need to pass down stmt_info for a vector type
1841 even if it points to the wrong stmt. */
1842 if (dt
== vect_constant_def
)
1843 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1844 stmt_info
, 0, vect_prologue
);
1845 else if (dt
== vect_external_def
)
1846 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1847 stmt_info
, 0, vect_prologue
);
1851 /* Restore stmt def-types. */
1852 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1853 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1854 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1855 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1858 /* Compute the cost for the SLP instance INSTANCE. */
1861 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1863 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1864 unsigned ncopies_for_cost
;
1865 stmt_info_for_cost
*si
;
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_NOTE
, vect_location
,
1870 "=== vect_analyze_slp_cost ===\n");
1872 /* Calculate the number of vector stmts to create based on the unrolling
1873 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1874 GROUP_SIZE / NUNITS otherwise. */
1875 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1876 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1877 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1878 /* Get the estimated vectorization factor, which is always one for
1879 basic-block vectorization. */
1880 unsigned int assumed_vf
;
1881 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1882 assumed_vf
= vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info
));
1885 /* For reductions look at a reduction operand in case the reduction
1886 operation is widening like DOT_PROD or SAD. */
1887 tree vectype_for_cost
= STMT_VINFO_VECTYPE (stmt_info
);
1888 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1890 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1891 switch (gimple_assign_rhs_code (stmt
))
1895 vectype_for_cost
= get_vectype_for_scalar_type
1896 (TREE_TYPE (gimple_assign_rhs1 (stmt
)));
1901 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype_for_cost
);
1902 ncopies_for_cost
= (least_common_multiple (assumed_nunits
,
1903 group_size
* assumed_vf
)
1906 prologue_cost_vec
.create (10);
1907 body_cost_vec
.create (10);
1908 scalar_stmts_set_t
*visited
= new scalar_stmts_set_t ();
1909 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1910 &prologue_cost_vec
, &body_cost_vec
,
1911 ncopies_for_cost
, visited
);
1914 /* Record the prologue costs, which were delayed until we were
1915 sure that SLP was successful. */
1916 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1918 struct _stmt_vec_info
*stmt_info
1919 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1920 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1921 si
->misalign
, vect_prologue
);
1924 /* Record the instance's instructions in the target cost model. */
1925 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1927 struct _stmt_vec_info
*stmt_info
1928 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1929 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1930 si
->misalign
, vect_body
);
1933 prologue_cost_vec
.release ();
1934 body_cost_vec
.release ();
1937 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1938 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1939 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1940 containing the remainder.
1941 Return the first stmt in the second group. */
1944 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1946 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1947 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1948 gcc_assert (group1_size
> 0);
1949 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1950 gcc_assert (group2_size
> 0);
1951 GROUP_SIZE (first_vinfo
) = group1_size
;
1953 gimple
*stmt
= first_stmt
;
1954 for (unsigned i
= group1_size
; i
> 1; i
--)
1956 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1957 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1959 /* STMT is now the last element of the first group. */
1960 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1961 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1963 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1964 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1966 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1967 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1970 /* For the second group, the GROUP_GAP is that before the original group,
1971 plus skipping over the first vector. */
1972 GROUP_GAP (vinfo_for_stmt (group2
)) =
1973 GROUP_GAP (first_vinfo
) + group1_size
;
1975 /* GROUP_GAP of the first group now has to skip over the second group too. */
1976 GROUP_GAP (first_vinfo
) += group2_size
;
1978 if (dump_enabled_p ())
1979 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1980 group1_size
, group2_size
);
1985 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1986 statements and a vector of NUNITS elements. */
1989 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1991 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1994 /* Analyze an SLP instance starting from a group of grouped stores. Call
1995 vect_build_slp_tree to build a tree of packed stmts if possible.
1996 Return FALSE if it's impossible to SLP any stmt in the loop. */
1999 vect_analyze_slp_instance (vec_info
*vinfo
,
2000 gimple
*stmt
, unsigned max_tree_size
)
2002 slp_instance new_instance
;
2004 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2005 tree vectype
, scalar_type
= NULL_TREE
;
2008 vec
<slp_tree
> loads
;
2009 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
2010 vec
<gimple
*> scalar_stmts
;
2012 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2016 scalar_type
= TREE_TYPE (DR_REF (dr
));
2017 vectype
= get_vectype_for_scalar_type (scalar_type
);
2021 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2022 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2025 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2029 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2030 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2031 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2036 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2039 "Build SLP failed: unsupported data-type ");
2040 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
2041 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2046 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2048 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2049 scalar_stmts
.create (group_size
);
2051 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2053 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2056 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2057 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2058 scalar_stmts
.safe_push (
2059 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2061 scalar_stmts
.safe_push (next
);
2062 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2064 /* Mark the first element of the reduction chain as reduction to properly
2065 transform the node. In the reduction analysis phase only the last
2066 element of the chain is marked as reduction. */
2067 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2068 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2072 /* Collect reduction statements. */
2073 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2074 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2075 scalar_stmts
.safe_push (next
);
2078 loads
.create (group_size
);
2080 /* Build the tree for the SLP instance. */
2081 bool *matches
= XALLOCAVEC (bool, group_size
);
2082 unsigned npermutes
= 0;
2083 bst_fail
= new scalar_stmts_set_t ();
2084 poly_uint64 max_nunits
= nunits
;
2085 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2086 &max_nunits
, &loads
, matches
, &npermutes
,
2087 NULL
, max_tree_size
);
2091 /* Calculate the unrolling factor based on the smallest type. */
2092 poly_uint64 unrolling_factor
2093 = calculate_unrolling_factor (max_nunits
, group_size
);
2095 if (maybe_ne (unrolling_factor
, 1U)
2096 && is_a
<bb_vec_info
> (vinfo
))
2098 unsigned HOST_WIDE_INT const_max_nunits
;
2099 if (!max_nunits
.is_constant (&const_max_nunits
)
2100 || const_max_nunits
> group_size
)
2102 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2104 "Build SLP failed: store group "
2105 "size not a multiple of the vector size "
2106 "in basic block SLP\n");
2107 vect_free_slp_tree (node
);
2111 /* Fatal mismatch. */
2112 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2113 vect_free_slp_tree (node
);
2118 /* Create a new SLP instance. */
2119 new_instance
= XNEW (struct _slp_instance
);
2120 SLP_INSTANCE_TREE (new_instance
) = node
;
2121 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2122 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2123 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2125 /* Compute the load permutation. */
2127 bool loads_permuted
= false;
2128 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2130 vec
<unsigned> load_permutation
;
2132 gimple
*load
, *first_stmt
;
2133 bool this_load_permuted
= false;
2134 load_permutation
.create (group_size
);
2135 first_stmt
= GROUP_FIRST_ELEMENT
2136 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2137 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2139 int load_place
= vect_get_place_in_interleaving_chain
2141 gcc_assert (load_place
!= -1);
2142 if (load_place
!= j
)
2143 this_load_permuted
= true;
2144 load_permutation
.safe_push (load_place
);
2146 if (!this_load_permuted
2147 /* The load requires permutation when unrolling exposes
2148 a gap either because the group is larger than the SLP
2149 group-size or because there is a gap between the groups. */
2150 && (known_eq (unrolling_factor
, 1U)
2151 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2152 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2154 load_permutation
.release ();
2157 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2158 loads_permuted
= true;
2163 if (!vect_supported_load_permutation_p (new_instance
))
2165 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2168 "Build SLP failed: unsupported load "
2170 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2173 vect_free_slp_instance (new_instance
);
2178 /* If the loads and stores can be handled with load/store-lan
2179 instructions do not generate this SLP instance. */
2180 if (is_a
<loop_vec_info
> (vinfo
)
2182 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2185 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2187 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2188 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2189 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2190 /* Use SLP for strided accesses (or if we
2191 can't load-lanes). */
2192 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2193 || ! vect_load_lanes_supported
2194 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2195 GROUP_SIZE (stmt_vinfo
)))
2198 if (i
== loads
.length ())
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2202 "Built SLP cancelled: can use "
2203 "load/store-lanes\n");
2204 vect_free_slp_instance (new_instance
);
2209 vinfo
->slp_instances
.safe_push (new_instance
);
2211 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_NOTE
, vect_location
,
2214 "Final SLP tree for instance:\n");
2215 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2223 /* Failed to SLP. */
2224 /* Free the allocated memory. */
2225 scalar_stmts
.release ();
2229 /* For basic block SLP, try to break the group up into multiples of the
2231 unsigned HOST_WIDE_INT const_nunits
;
2232 if (is_a
<bb_vec_info
> (vinfo
)
2233 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2234 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2235 && nunits
.is_constant (&const_nunits
))
2237 /* We consider breaking the group only on VF boundaries from the existing
2239 for (i
= 0; i
< group_size
; i
++)
2240 if (!matches
[i
]) break;
2242 if (i
>= const_nunits
&& i
< group_size
)
2244 /* Split into two groups at the first vector boundary before i. */
2245 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2246 unsigned group1_size
= i
& ~(const_nunits
- 1);
2248 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2249 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2250 /* If the first non-match was in the middle of a vector,
2251 skip the rest of that vector. */
2252 if (group1_size
< i
)
2254 i
= group1_size
+ const_nunits
;
2256 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2259 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2262 /* Even though the first vector did not all match, we might be able to SLP
2263 (some) of the remainder. FORNOW ignore this possibility. */
2270 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2271 trees of packed scalar stmts if SLP is possible. */
2274 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2277 gimple
*first_element
;
2279 if (dump_enabled_p ())
2280 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2282 /* Find SLP sequences starting from groups of grouped stores. */
2283 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2284 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2286 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2288 if (loop_vinfo
->reduction_chains
.length () > 0)
2290 /* Find SLP sequences starting from reduction chains. */
2291 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2292 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2295 /* Dissolve reduction chain group. */
2296 gimple
*next
, *stmt
= first_element
;
2299 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2300 next
= GROUP_NEXT_ELEMENT (vinfo
);
2301 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2302 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2305 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2306 = vect_internal_def
;
2310 /* Find SLP sequences starting from groups of reductions. */
2311 if (loop_vinfo
->reductions
.length () > 1)
2312 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2320 /* For each possible SLP instance decide whether to SLP it and calculate overall
2321 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2322 least one instance. */
2325 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2328 poly_uint64 unrolling_factor
= 1;
2329 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2330 slp_instance instance
;
2331 int decided_to_slp
= 0;
2333 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2337 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2339 /* FORNOW: SLP if you can. */
2340 /* All unroll factors have the form current_vector_size * X for some
2341 rational X, so they must have a common multiple. */
2343 = force_common_multiple (unrolling_factor
,
2344 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2346 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2347 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2348 loop-based vectorization. Such stmts will be marked as HYBRID. */
2349 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2353 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2355 if (decided_to_slp
&& dump_enabled_p ())
2357 dump_printf_loc (MSG_NOTE
, vect_location
,
2358 "Decided to SLP %d instances. Unrolling factor ",
2360 dump_dec (MSG_NOTE
, unrolling_factor
);
2361 dump_printf (MSG_NOTE
, "\n");
2364 return (decided_to_slp
> 0);
2368 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2369 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2372 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2374 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2375 imm_use_iterator imm_iter
;
2377 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2379 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2380 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2383 /* Propagate hybrid down the SLP tree. */
2384 if (stype
== hybrid
)
2386 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2390 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2391 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2392 /* If we get a pattern stmt here we have to use the LHS of the
2393 original stmt for immediate uses. */
2394 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2395 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2396 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2398 if (gimple_code (stmt
) == GIMPLE_PHI
)
2399 def
= gimple_phi_result (stmt
);
2401 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2403 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2405 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2407 use_vinfo
= vinfo_for_stmt (use_stmt
);
2408 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2409 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2410 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2411 if (!STMT_SLP_TYPE (use_vinfo
)
2412 && (STMT_VINFO_RELEVANT (use_vinfo
)
2413 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2414 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2415 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2417 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2420 "def in non-SLP stmt: ");
2421 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2429 && !HYBRID_SLP_STMT (stmt_vinfo
))
2431 if (dump_enabled_p ())
2433 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2434 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2436 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2439 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2440 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2441 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2444 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2447 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2449 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2450 struct loop
*loopp
= (struct loop
*)wi
->info
;
2455 if (TREE_CODE (*tp
) == SSA_NAME
2456 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2458 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2459 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2460 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2462 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2465 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2467 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2475 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2478 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2479 /* If the stmt is in a SLP instance then this isn't a reason
2480 to mark use definitions in other SLP instances as hybrid. */
2481 if (! STMT_SLP_TYPE (use_vinfo
)
2482 && (STMT_VINFO_RELEVANT (use_vinfo
)
2483 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2484 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2485 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2492 /* Find stmts that must be both vectorized and SLPed. */
2495 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2498 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2499 slp_instance instance
;
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2505 /* First walk all pattern stmt in the loop and mark defs of uses as
2506 hybrid because immediate uses in them are not recorded. */
2507 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2509 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2510 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2513 gimple
*stmt
= gsi_stmt (gsi
);
2514 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2515 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2518 memset (&wi
, 0, sizeof (wi
));
2519 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2520 gimple_stmt_iterator gsi2
2521 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2522 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2523 vect_detect_hybrid_slp_1
, &wi
);
2524 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2525 vect_detect_hybrid_slp_2
,
2526 vect_detect_hybrid_slp_1
, &wi
);
2531 /* Then walk the SLP instance trees marking stmts with uses in
2532 non-SLP stmts as hybrid, also propagating hybrid down the
2533 SLP tree, collecting the above info on-the-fly. */
2534 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2536 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2537 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2543 /* Initialize a bb_vec_info struct for the statements between
2544 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2546 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2547 gimple_stmt_iterator region_end_in
)
2548 : vec_info (vec_info::bb
, init_cost (NULL
)),
2549 bb (gsi_bb (region_begin_in
)),
2550 region_begin (region_begin_in
),
2551 region_end (region_end_in
)
2553 gimple_stmt_iterator gsi
;
2555 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2558 gimple
*stmt
= gsi_stmt (gsi
);
2559 gimple_set_uid (stmt
, 0);
2560 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2567 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2568 stmts in the basic block. */
2570 _bb_vec_info::~_bb_vec_info ()
2572 for (gimple_stmt_iterator si
= region_begin
;
2573 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2575 gimple
*stmt
= gsi_stmt (si
);
2576 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2579 /* Free stmt_vec_info. */
2580 free_stmt_vec_info (stmt
);
2582 /* Reset region marker. */
2583 gimple_set_uid (stmt
, -1);
2590 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2591 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2593 Return true if the operations are supported. */
2596 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2597 slp_instance node_instance
)
2604 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2607 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2608 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2611 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2612 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2613 gcc_assert (stmt_info
);
2614 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2616 /* For BB vectorization vector types are assigned here.
2617 Memory accesses already got their vector type assigned
2618 in vect_analyze_data_refs. */
2619 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2621 && ! STMT_VINFO_DATA_REF (stmt_info
))
2623 gcc_assert (PURE_SLP_STMT (stmt_info
));
2625 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2626 if (dump_enabled_p ())
2628 dump_printf_loc (MSG_NOTE
, vect_location
,
2629 "get vectype for scalar type: ");
2630 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2631 dump_printf (MSG_NOTE
, "\n");
2634 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2637 if (dump_enabled_p ())
2639 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2640 "not SLPed: unsupported data-type ");
2641 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2643 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2648 if (dump_enabled_p ())
2650 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2651 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2652 dump_printf (MSG_NOTE
, "\n");
2656 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2657 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2660 /* Calculate the number of vector statements to be created for the
2661 scalar stmts in this node. For SLP reductions it is equal to the
2662 number of vector statements in the children (which has already been
2663 calculated by the recursive call). Otherwise it is the number of
2664 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2665 VF divided by the number of elements in a vector. */
2666 if (GROUP_FIRST_ELEMENT (stmt_info
)
2667 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2668 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2669 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2673 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2674 vf
= loop_vinfo
->vectorization_factor
;
2677 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2678 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2679 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2680 = vect_get_num_vectors (vf
* group_size
, vectype
);
2683 /* Push SLP node def-type to stmt operands. */
2684 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2685 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2686 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2687 = SLP_TREE_DEF_TYPE (child
);
2688 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2689 /* Restore def-types. */
2690 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2691 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2692 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2693 = vect_internal_def
;
2701 /* Analyze statements in SLP instances of VINFO. Return true if the
2702 operations are supported. */
2705 vect_slp_analyze_operations (vec_info
*vinfo
)
2707 slp_instance instance
;
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_NOTE
, vect_location
,
2712 "=== vect_slp_analyze_operations ===\n");
2714 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2716 if (!vect_slp_analyze_node_operations (vinfo
,
2717 SLP_INSTANCE_TREE (instance
),
2720 dump_printf_loc (MSG_NOTE
, vect_location
,
2721 "removing SLP instance operations starting from: ");
2722 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2723 SLP_TREE_SCALAR_STMTS
2724 (SLP_INSTANCE_TREE (instance
))[0], 0);
2725 vect_free_slp_instance (instance
);
2726 vinfo
->slp_instances
.ordered_remove (i
);
2730 /* Compute the costs of the SLP instance. */
2731 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2736 return !vinfo
->slp_instances
.is_empty ();
2740 /* Compute the scalar cost of the SLP node NODE and its children
2741 and return it. Do not account defs that are marked in LIFE and
2742 update LIFE according to uses of NODE. */
2745 vect_bb_slp_scalar_cost (basic_block bb
,
2746 slp_tree node
, vec
<bool, va_heap
> *life
)
2748 unsigned scalar_cost
= 0;
2753 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2756 ssa_op_iter op_iter
;
2757 def_operand_p def_p
;
2758 stmt_vec_info stmt_info
;
2763 /* If there is a non-vectorized use of the defs then the scalar
2764 stmt is kept live in which case we do not account it or any
2765 required defs in the SLP children in the scalar cost. This
2766 way we make the vectorization more costly when compared to
2768 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2770 imm_use_iterator use_iter
;
2772 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2773 if (!is_gimple_debug (use_stmt
)
2774 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2776 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2779 BREAK_FROM_IMM_USE_STMT (use_iter
);
2785 /* Count scalar stmts only once. */
2786 if (gimple_visited_p (stmt
))
2788 gimple_set_visited (stmt
, true);
2790 stmt_info
= vinfo_for_stmt (stmt
);
2791 if (STMT_VINFO_DATA_REF (stmt_info
))
2793 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2794 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2796 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2799 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2801 scalar_cost
+= stmt_cost
;
2804 auto_vec
<bool, 20> subtree_life
;
2805 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2807 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2809 /* Do not directly pass LIFE to the recursive call, copy it to
2810 confine changes in the callee to the current child/subtree. */
2811 subtree_life
.safe_splice (*life
);
2812 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2813 subtree_life
.truncate (0);
2820 /* Check if vectorization of the basic block is profitable. */
2823 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2825 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2826 slp_instance instance
;
2828 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2829 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2831 /* Calculate scalar cost. */
2832 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2834 auto_vec
<bool, 20> life
;
2835 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2836 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2837 SLP_INSTANCE_TREE (instance
),
2841 /* Unset visited flag. */
2842 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2843 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2844 gimple_set_visited (gsi_stmt (gsi
), false);
2846 /* Complete the target-specific cost calculation. */
2847 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2848 &vec_inside_cost
, &vec_epilogue_cost
);
2850 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2852 if (dump_enabled_p ())
2854 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2855 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2857 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2858 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2859 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2862 /* Vectorization is profitable if its cost is more than the cost of scalar
2863 version. Note that we err on the vector side for equal cost because
2864 the cost estimate is otherwise quite pessimistic (constant uses are
2865 free on the scalar side but cost a load on the vector side for
2867 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2873 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2874 if so and sets fatal to true if failure is independent of
2875 current_vector_size. */
2878 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2879 gimple_stmt_iterator region_end
,
2880 vec
<data_reference_p
> datarefs
, int n_stmts
,
2883 bb_vec_info bb_vinfo
;
2884 slp_instance instance
;
2886 poly_uint64 min_vf
= 2;
2888 /* The first group of checks is independent of the vector size. */
2891 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2893 if (dump_enabled_p ())
2894 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2895 "not vectorized: too many instructions in "
2897 free_data_refs (datarefs
);
2901 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2905 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2907 /* Analyze the data references. */
2909 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2911 if (dump_enabled_p ())
2912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2913 "not vectorized: unhandled data-ref in basic "
2920 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2922 if (dump_enabled_p ())
2923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2924 "not vectorized: not enough data-refs in "
2931 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2935 "not vectorized: unhandled data access in "
2942 /* If there are no grouped stores in the region there is no need
2943 to continue with pattern recog as vect_analyze_slp will fail
2945 if (bb_vinfo
->grouped_stores
.is_empty ())
2947 if (dump_enabled_p ())
2948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2949 "not vectorized: no grouped stores in "
2956 /* While the rest of the analysis below depends on it in some way. */
2959 vect_pattern_recog (bb_vinfo
);
2961 /* Check the SLP opportunities in the basic block, analyze and build SLP
2963 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2965 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2968 "Failed to SLP the basic block.\n");
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2970 "not vectorized: failed to find SLP opportunities "
2971 "in basic block.\n");
2978 vect_record_base_alignments (bb_vinfo
);
2980 /* Analyze and verify the alignment of data references and the
2981 dependence in the SLP instances. */
2982 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2984 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2985 || ! vect_slp_analyze_instance_dependence (instance
))
2987 dump_printf_loc (MSG_NOTE
, vect_location
,
2988 "removing SLP instance operations starting from: ");
2989 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2990 SLP_TREE_SCALAR_STMTS
2991 (SLP_INSTANCE_TREE (instance
))[0], 0);
2992 vect_free_slp_instance (instance
);
2993 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2997 /* Mark all the statements that we want to vectorize as pure SLP and
2999 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
3000 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3004 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3010 if (!vect_slp_analyze_operations (bb_vinfo
))
3012 if (dump_enabled_p ())
3013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3014 "not vectorized: bad operation in basic block.\n");
3020 /* Cost model: check if the vectorization is worthwhile. */
3021 if (!unlimited_cost_model (NULL
)
3022 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3024 if (dump_enabled_p ())
3025 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3026 "not vectorized: vectorization is not "
3033 if (dump_enabled_p ())
3034 dump_printf_loc (MSG_NOTE
, vect_location
,
3035 "Basic block will be vectorized using SLP\n");
3041 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3042 true if anything in the basic-block was vectorized. */
3045 vect_slp_bb (basic_block bb
)
3047 bb_vec_info bb_vinfo
;
3048 gimple_stmt_iterator gsi
;
3049 bool any_vectorized
= false;
3050 auto_vector_sizes vector_sizes
;
3052 if (dump_enabled_p ())
3053 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
3055 /* Autodetect first vector size we try. */
3056 current_vector_size
= 0;
3057 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3058 unsigned int next_size
= 0;
3060 gsi
= gsi_start_bb (bb
);
3062 poly_uint64 autodetected_vector_size
= 0;
3065 if (gsi_end_p (gsi
))
3068 gimple_stmt_iterator region_begin
= gsi
;
3069 vec
<data_reference_p
> datarefs
= vNULL
;
3072 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3074 gimple
*stmt
= gsi_stmt (gsi
);
3075 if (is_gimple_debug (stmt
))
3079 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3080 vect_location
= gimple_location (stmt
);
3082 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3086 /* Skip leading unhandled stmts. */
3087 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3093 gimple_stmt_iterator region_end
= gsi
;
3095 bool vectorized
= false;
3097 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3098 datarefs
, insns
, fatal
);
3100 && dbg_cnt (vect_slp
))
3102 if (dump_enabled_p ())
3103 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3105 vect_schedule_slp (bb_vinfo
);
3107 if (dump_enabled_p ())
3108 dump_printf_loc (MSG_NOTE
, vect_location
,
3109 "basic block part vectorized\n");
3115 any_vectorized
|= vectorized
;
3118 autodetected_vector_size
= current_vector_size
;
3120 if (next_size
< vector_sizes
.length ()
3121 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3125 || next_size
== vector_sizes
.length ()
3126 || known_eq (current_vector_size
, 0U)
3127 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3128 vector sizes will fail do not bother iterating. */
3131 if (gsi_end_p (region_end
))
3134 /* Skip the unhandled stmt. */
3137 /* And reset vector sizes. */
3138 current_vector_size
= 0;
3143 /* Try the next biggest vector size. */
3144 current_vector_size
= vector_sizes
[next_size
++];
3145 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_NOTE
, vect_location
,
3148 "***** Re-trying analysis with "
3150 dump_dec (MSG_NOTE
, current_vector_size
);
3151 dump_printf (MSG_NOTE
, "\n");
3159 return any_vectorized
;
3163 /* Return 1 if vector type of boolean constant which is OPNUM
3164 operand in statement STMT is a boolean vector. */
3167 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3169 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3170 enum tree_code code
= gimple_expr_code (stmt
);
3173 enum vect_def_type dt
;
3175 /* For comparison and COND_EXPR type is chosen depending
3176 on the other comparison operand. */
3177 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3180 op
= gimple_assign_rhs1 (stmt
);
3182 op
= gimple_assign_rhs2 (stmt
);
3184 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3188 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3191 if (code
== COND_EXPR
)
3193 tree cond
= gimple_assign_rhs1 (stmt
);
3195 if (TREE_CODE (cond
) == SSA_NAME
)
3198 op
= TREE_OPERAND (cond
, 1);
3200 op
= TREE_OPERAND (cond
, 0);
3202 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3206 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3209 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3213 /* For constant and loop invariant defs of SLP_NODE this function returns
3214 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3215 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3216 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3217 REDUC_INDEX is the index of the reduction operand in the statements, unless
3221 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3222 vec
<tree
> *vec_oprnds
,
3223 unsigned int op_num
, unsigned int number_of_vectors
)
3225 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3226 gimple
*stmt
= stmts
[0];
3227 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3230 unsigned j
, number_of_places_left_in_vector
;
3233 int group_size
= stmts
.length ();
3234 unsigned int vec_num
, i
;
3235 unsigned number_of_copies
= 1;
3237 voprnds
.create (number_of_vectors
);
3238 bool constant_p
, is_store
;
3239 tree neutral_op
= NULL
;
3240 enum tree_code code
= gimple_expr_code (stmt
);
3241 gimple_seq ctor_seq
= NULL
;
3243 /* Check if vector type is a boolean vector. */
3244 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3245 && vect_mask_constant_operand_p (stmt
, op_num
))
3247 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3249 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3250 /* Enforced by vect_get_and_check_slp_defs. */
3251 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3253 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3256 op
= gimple_assign_rhs1 (stmt
);
3263 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3264 created vectors. It is greater than 1 if unrolling is performed.
3266 For example, we have two scalar operands, s1 and s2 (e.g., group of
3267 strided accesses of size two), while NUNITS is four (i.e., four scalars
3268 of this type can be packed in a vector). The output vector will contain
3269 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3272 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3273 containing the operands.
3275 For example, NUNITS is four as before, and the group size is 8
3276 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3277 {s5, s6, s7, s8}. */
3279 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3281 number_of_places_left_in_vector
= nunits
;
3283 tree_vector_builder
elts (vector_type
, nunits
, 1);
3284 elts
.quick_grow (nunits
);
3285 bool place_after_defs
= false;
3286 for (j
= 0; j
< number_of_copies
; j
++)
3288 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3291 op
= gimple_assign_rhs1 (stmt
);
3298 tree cond
= gimple_assign_rhs1 (stmt
);
3299 if (TREE_CODE (cond
) == SSA_NAME
)
3300 op
= gimple_op (stmt
, op_num
+ 1);
3301 else if (op_num
== 0 || op_num
== 1)
3302 op
= TREE_OPERAND (cond
, op_num
);
3306 op
= gimple_assign_rhs2 (stmt
);
3308 op
= gimple_assign_rhs3 (stmt
);
3314 op
= gimple_call_arg (stmt
, op_num
);
3321 op
= gimple_op (stmt
, op_num
+ 1);
3322 /* Unlike the other binary operators, shifts/rotates have
3323 the shift count being int, instead of the same type as
3324 the lhs, so make sure the scalar is the right type if
3325 we are dealing with vectors of
3326 long long/long/short/char. */
3327 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3328 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3332 op
= gimple_op (stmt
, op_num
+ 1);
3337 /* Create 'vect_ = {op0,op1,...,opn}'. */
3338 number_of_places_left_in_vector
--;
3340 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3342 if (CONSTANT_CLASS_P (op
))
3344 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3346 /* Can't use VIEW_CONVERT_EXPR for booleans because
3347 of possibly different sizes of scalar value and
3349 if (integer_zerop (op
))
3350 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3351 else if (integer_onep (op
))
3352 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3357 op
= fold_unary (VIEW_CONVERT_EXPR
,
3358 TREE_TYPE (vector_type
), op
);
3359 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3363 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3365 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3368 = build_all_ones_cst (TREE_TYPE (vector_type
));
3370 = build_zero_cst (TREE_TYPE (vector_type
));
3371 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3372 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3378 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3381 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3384 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3388 elts
[number_of_places_left_in_vector
] = op
;
3389 if (!CONSTANT_CLASS_P (op
))
3391 if (TREE_CODE (orig_op
) == SSA_NAME
3392 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3393 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3394 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3395 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3396 place_after_defs
= true;
3398 if (number_of_places_left_in_vector
== 0)
3401 vec_cst
= elts
.build ();
3404 vec
<constructor_elt
, va_gc
> *v
;
3406 vec_alloc (v
, nunits
);
3407 for (k
= 0; k
< nunits
; ++k
)
3408 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3409 vec_cst
= build_constructor (vector_type
, v
);
3412 gimple_stmt_iterator gsi
;
3413 if (place_after_defs
)
3416 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3417 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3420 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3421 if (ctor_seq
!= NULL
)
3423 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3424 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3428 voprnds
.quick_push (init
);
3429 place_after_defs
= false;
3430 number_of_places_left_in_vector
= nunits
;
3432 elts
.new_vector (vector_type
, nunits
, 1);
3433 elts
.quick_grow (nunits
);
3438 /* Since the vectors are created in the reverse order, we should invert
3440 vec_num
= voprnds
.length ();
3441 for (j
= vec_num
; j
!= 0; j
--)
3443 vop
= voprnds
[j
- 1];
3444 vec_oprnds
->quick_push (vop
);
3449 /* In case that VF is greater than the unrolling factor needed for the SLP
3450 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3451 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3452 to replicate the vectors. */
3453 while (number_of_vectors
> vec_oprnds
->length ())
3455 tree neutral_vec
= NULL
;
3460 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3462 vec_oprnds
->quick_push (neutral_vec
);
3466 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3467 vec_oprnds
->quick_push (vop
);
3473 /* Get vectorized definitions from SLP_NODE that contains corresponding
3474 vectorized def-stmts. */
3477 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3480 gimple
*vec_def_stmt
;
3483 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3485 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3487 gcc_assert (vec_def_stmt
);
3488 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3489 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3491 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3492 vec_oprnds
->quick_push (vec_oprnd
);
3497 /* Get vectorized definitions for SLP_NODE.
3498 If the scalar definitions are loop invariants or constants, collect them and
3499 call vect_get_constant_vectors() to create vector stmts.
3500 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3501 must be stored in the corresponding child of SLP_NODE, and we call
3502 vect_get_slp_vect_defs () to retrieve them. */
3505 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3506 vec
<vec
<tree
> > *vec_oprnds
)
3509 int number_of_vects
= 0, i
;
3510 unsigned int child_index
= 0;
3511 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3512 slp_tree child
= NULL
;
3515 bool vectorized_defs
;
3517 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3518 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3520 /* For each operand we check if it has vectorized definitions in a child
3521 node or we need to create them (for invariants and constants). We
3522 check if the LHS of the first stmt of the next child matches OPRND.
3523 If it does, we found the correct child. Otherwise, we call
3524 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3525 to check this child node for the next operand. */
3526 vectorized_defs
= false;
3527 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3529 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3531 /* We have to check both pattern and original def, if available. */
3532 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3534 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3536 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3539 if (gimple_code (first_def
) == GIMPLE_PHI
)
3540 first_def_op
= gimple_phi_result (first_def
);
3542 first_def_op
= gimple_get_lhs (first_def
);
3543 if (operand_equal_p (oprnd
, first_def_op
, 0)
3545 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3547 /* The number of vector defs is determined by the number of
3548 vector statements in the node from which we get those
3550 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3551 vectorized_defs
= true;
3559 if (!vectorized_defs
)
3563 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3564 /* Number of vector stmts was calculated according to LHS in
3565 vect_schedule_slp_instance (), fix it by replacing LHS with
3566 RHS, if necessary. See vect_get_smallest_scalar_type () for
3568 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3570 if (rhs_size_unit
!= lhs_size_unit
)
3572 number_of_vects
*= rhs_size_unit
;
3573 number_of_vects
/= lhs_size_unit
;
3578 /* Allocate memory for vectorized defs. */
3580 vec_defs
.create (number_of_vects
);
3582 /* For reduction defs we call vect_get_constant_vectors (), since we are
3583 looking for initial loop invariant values. */
3584 if (vectorized_defs
)
3585 /* The defs are already vectorized. */
3586 vect_get_slp_vect_defs (child
, &vec_defs
);
3588 /* Build vectors from scalar defs. */
3589 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3592 vec_oprnds
->quick_push (vec_defs
);
3596 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3597 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3598 permute statements for the SLP node NODE of the SLP instance
3599 SLP_NODE_INSTANCE. */
3602 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3603 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3604 slp_instance slp_node_instance
, bool analyze_only
,
3607 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3608 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3609 tree mask_element_type
= NULL_TREE
, mask_type
;
3610 int nunits
, vec_index
= 0;
3611 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3612 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3615 unsigned HOST_WIDE_INT const_vf
;
3617 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3620 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3622 mode
= TYPE_MODE (vectype
);
3624 /* At the moment, all permutations are represented using per-element
3625 indices, so we can't cope with variable vectorization factors. */
3626 if (!vf
.is_constant (&const_vf
))
3629 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3630 same size as the vector element being permuted. */
3631 mask_element_type
= lang_hooks
.types
.type_for_mode
3632 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3633 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3634 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3635 vec_perm_builder
mask (nunits
, nunits
, 1);
3636 mask
.quick_grow (nunits
);
3637 vec_perm_indices indices
;
3639 /* Initialize the vect stmts of NODE to properly insert the generated
3642 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3643 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3644 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3646 /* Generate permutation masks for every NODE. Number of masks for each NODE
3647 is equal to GROUP_SIZE.
3648 E.g., we have a group of three nodes with three loads from the same
3649 location in each node, and the vector size is 4. I.e., we have a
3650 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3651 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3652 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3655 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3656 The last mask is illegal since we assume two operands for permute
3657 operation, and the mask element values can't be outside that range.
3658 Hence, the last mask must be converted into {2,5,5,5}.
3659 For the first two permutations we need the first and the second input
3660 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3661 we need the second and the third vectors: {b1,c1,a2,b2} and
3664 int vect_stmts_counter
= 0;
3666 int first_vec_index
= -1;
3667 int second_vec_index
= -1;
3671 for (unsigned int j
= 0; j
< const_vf
; j
++)
3673 for (int k
= 0; k
< group_size
; k
++)
3675 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3676 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3677 vec_index
= i
/ nunits
;
3678 mask_element
= i
% nunits
;
3679 if (vec_index
== first_vec_index
3680 || first_vec_index
== -1)
3682 first_vec_index
= vec_index
;
3684 else if (vec_index
== second_vec_index
3685 || second_vec_index
== -1)
3687 second_vec_index
= vec_index
;
3688 mask_element
+= nunits
;
3692 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3695 "permutation requires at "
3696 "least three vectors ");
3697 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3700 gcc_assert (analyze_only
);
3704 gcc_assert (mask_element
>= 0
3705 && mask_element
< 2 * nunits
);
3706 if (mask_element
!= index
)
3708 mask
[index
++] = mask_element
;
3710 if (index
== nunits
&& !noop_p
)
3712 indices
.new_vector (mask
, 2, nunits
);
3713 if (!can_vec_perm_const_p (mode
, indices
))
3715 if (dump_enabled_p ())
3717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3719 "unsupported vect permute { ");
3720 for (i
= 0; i
< nunits
; ++i
)
3721 dump_printf (MSG_MISSED_OPTIMIZATION
,
3722 HOST_WIDE_INT_PRINT_DEC
" ", mask
[i
]);
3723 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3725 gcc_assert (analyze_only
);
3732 if (index
== nunits
)
3736 tree mask_vec
= NULL_TREE
;
3739 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3741 if (second_vec_index
== -1)
3742 second_vec_index
= first_vec_index
;
3744 /* Generate the permute statement if necessary. */
3745 tree first_vec
= dr_chain
[first_vec_index
];
3746 tree second_vec
= dr_chain
[second_vec_index
];
3751 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3753 perm_dest
= make_ssa_name (perm_dest
);
3754 perm_stmt
= gimple_build_assign (perm_dest
,
3756 first_vec
, second_vec
,
3758 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3761 /* If mask was NULL_TREE generate the requested
3762 identity transform. */
3763 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3765 /* Store the vector statement in NODE. */
3766 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3770 first_vec_index
= -1;
3771 second_vec_index
= -1;
3780 typedef hash_map
<vec
<gimple
*>, slp_tree
,
3781 simple_hashmap_traits
<bst_traits
, slp_tree
> >
3782 scalar_stmts_to_slp_tree_map_t
;
3784 /* Vectorize SLP instance tree in postorder. */
3787 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3788 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3791 bool grouped_store
, is_store
;
3792 gimple_stmt_iterator si
;
3793 stmt_vec_info stmt_info
;
3794 unsigned int group_size
;
3799 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3802 /* See if we have already vectorized the same set of stmts and reuse their
3803 vectorized stmts. */
3805 = bst_map
->get_or_insert (SLP_TREE_SCALAR_STMTS (node
).copy ());
3808 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (leader
));
3813 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3814 vect_schedule_slp_instance (child
, instance
, bst_map
);
3816 /* Push SLP node def-type to stmts. */
3817 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3818 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3819 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3820 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3822 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3823 stmt_info
= vinfo_for_stmt (stmt
);
3825 /* VECTYPE is the type of the destination. */
3826 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3827 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3829 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3830 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3832 if (dump_enabled_p ())
3834 dump_printf_loc (MSG_NOTE
,vect_location
,
3835 "------>vectorizing SLP node starting from: ");
3836 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3839 /* Vectorized stmts go before the last scalar stmt which is where
3840 all uses are ready. */
3841 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3843 /* Mark the first element of the reduction chain as reduction to properly
3844 transform the node. In the analysis phase only the last element of the
3845 chain is marked as reduction. */
3846 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3847 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3849 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3850 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3853 /* Handle two-operation SLP nodes by vectorizing the group with
3854 both operations and then performing a merge. */
3855 if (SLP_TREE_TWO_OPERATORS (node
))
3857 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3858 enum tree_code ocode
= ERROR_MARK
;
3860 vec_perm_builder
mask (group_size
, group_size
, 1);
3861 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3862 if (gimple_assign_rhs_code (ostmt
) != code0
)
3864 mask
.quick_push (1);
3865 ocode
= gimple_assign_rhs_code (ostmt
);
3868 mask
.quick_push (0);
3869 if (ocode
!= ERROR_MARK
)
3874 tree tmask
= NULL_TREE
;
3875 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3876 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3877 SLP_TREE_VEC_STMTS (node
).truncate (0);
3878 gimple_assign_set_rhs_code (stmt
, ocode
);
3879 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3880 gimple_assign_set_rhs_code (stmt
, code0
);
3881 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3882 SLP_TREE_VEC_STMTS (node
).truncate (0);
3883 tree meltype
= build_nonstandard_integer_type
3884 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3885 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3887 for (j
= 0; j
< v0
.length (); ++j
)
3889 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3890 tree_vector_builder
melts (mvectype
, nunits
, 1);
3891 for (l
= 0; l
< nunits
; ++l
)
3893 if (k
>= group_size
)
3895 tree t
= build_int_cst (meltype
, mask
[k
++] * nunits
+ l
);
3896 melts
.quick_push (t
);
3898 tmask
= melts
.build ();
3900 /* ??? Not all targets support a VEC_PERM_EXPR with a
3901 constant mask that would translate to a vec_merge RTX
3902 (with their vec_perm_const_ok). We can either not
3903 vectorize in that case or let veclower do its job.
3904 Unfortunately that isn't too great and at least for
3905 plus/minus we'd eventually like to match targets
3906 vector addsub instructions. */
3908 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3910 gimple_assign_lhs (v0
[j
]),
3911 gimple_assign_lhs (v1
[j
]), tmask
);
3912 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3913 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3920 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3922 /* Restore stmt def-types. */
3923 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3924 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3925 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3926 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3931 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3932 For loop vectorization this is done in vectorizable_call, but for SLP
3933 it needs to be deferred until end of vect_schedule_slp, because multiple
3934 SLP instances may refer to the same scalar stmt. */
3937 vect_remove_slp_scalar_calls (slp_tree node
)
3939 gimple
*stmt
, *new_stmt
;
3940 gimple_stmt_iterator gsi
;
3944 stmt_vec_info stmt_info
;
3946 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3949 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3950 vect_remove_slp_scalar_calls (child
);
3952 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3954 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3956 stmt_info
= vinfo_for_stmt (stmt
);
3957 if (stmt_info
== NULL
3958 || is_pattern_stmt_p (stmt_info
)
3959 || !PURE_SLP_STMT (stmt_info
))
3961 lhs
= gimple_call_lhs (stmt
);
3962 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3963 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3964 set_vinfo_for_stmt (stmt
, NULL
);
3965 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3966 gsi
= gsi_for_stmt (stmt
);
3967 gsi_replace (&gsi
, new_stmt
, false);
3968 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3972 /* Generate vector code for all SLP instances in the loop/basic block. */
3975 vect_schedule_slp (vec_info
*vinfo
)
3977 vec
<slp_instance
> slp_instances
;
3978 slp_instance instance
;
3980 bool is_store
= false;
3982 slp_instances
= vinfo
->slp_instances
;
3983 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3985 /* Schedule the tree of INSTANCE. */
3986 scalar_stmts_to_slp_tree_map_t
*bst_map
3987 = new scalar_stmts_to_slp_tree_map_t ();
3988 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3991 if (dump_enabled_p ())
3992 dump_printf_loc (MSG_NOTE
, vect_location
,
3993 "vectorizing stmts using SLP.\n");
3996 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3998 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4001 gimple_stmt_iterator gsi
;
4003 /* Remove scalar call stmts. Do not do this for basic-block
4004 vectorization as not all uses may be vectorized.
4005 ??? Why should this be necessary? DCE should be able to
4006 remove the stmts itself.
4007 ??? For BB vectorization we can as well remove scalar
4008 stmts starting from the SLP tree root if they have no
4010 if (is_a
<loop_vec_info
> (vinfo
))
4011 vect_remove_slp_scalar_calls (root
);
4013 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
4014 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4016 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4019 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4020 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4021 /* Free the attached stmt_vec_info and remove the stmt. */
4022 gsi
= gsi_for_stmt (store
);
4023 unlink_stmt_vdef (store
);
4024 gsi_remove (&gsi
, true);
4025 release_defs (store
);
4026 free_stmt_vec_info (store
);