1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
53 vect_free_slp_tree (slp_tree node
)
58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
59 vect_free_slp_tree (child
);
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt
))
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
70 SLP_TREE_CHILDREN (node
).release ();
71 SLP_TREE_SCALAR_STMTS (node
).release ();
72 SLP_TREE_VEC_STMTS (node
).release ();
73 SLP_TREE_LOAD_PERMUTATION (node
).release ();
79 /* Free the memory allocated for the SLP instance. */
82 vect_free_slp_instance (slp_instance instance
)
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
85 SLP_INSTANCE_LOADS (instance
).release ();
90 /* Create an SLP node for SCALAR_STMTS. */
93 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
96 gimple
*stmt
= scalar_stmts
[0];
99 if (is_gimple_call (stmt
))
100 nops
= gimple_call_num_args (stmt
);
101 else if (is_gimple_assign (stmt
))
103 nops
= gimple_num_ops (stmt
) - 1;
104 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
107 else if (gimple_code (stmt
) == GIMPLE_PHI
)
112 node
= XNEW (struct _slp_tree
);
113 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
114 SLP_TREE_VEC_STMTS (node
).create (0);
115 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
116 SLP_TREE_CHILDREN (node
).create (nops
);
117 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
118 SLP_TREE_TWO_OPERATORS (node
) = false;
119 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
122 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
123 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
129 /* This structure is used in creation of an SLP tree. Each instance
130 corresponds to the same operand in a group of scalar stmts in an SLP
132 typedef struct _slp_oprnd_info
134 /* Def-stmts for the operands. */
135 vec
<gimple
*> def_stmts
;
136 /* Information about the first statement, its vector def-type, type, the
137 operand itself in case it's constant, and an indication if it's a pattern
140 enum vect_def_type first_dt
;
146 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
148 static vec
<slp_oprnd_info
>
149 vect_create_oprnd_info (int nops
, int group_size
)
152 slp_oprnd_info oprnd_info
;
153 vec
<slp_oprnd_info
> oprnds_info
;
155 oprnds_info
.create (nops
);
156 for (i
= 0; i
< nops
; i
++)
158 oprnd_info
= XNEW (struct _slp_oprnd_info
);
159 oprnd_info
->def_stmts
.create (group_size
);
160 oprnd_info
->first_dt
= vect_uninitialized_def
;
161 oprnd_info
->first_op_type
= NULL_TREE
;
162 oprnd_info
->first_pattern
= false;
163 oprnd_info
->second_pattern
= false;
164 oprnds_info
.quick_push (oprnd_info
);
171 /* Free operands info. */
174 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
177 slp_oprnd_info oprnd_info
;
179 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
181 oprnd_info
->def_stmts
.release ();
182 XDELETE (oprnd_info
);
185 oprnds_info
.release ();
189 /* Find the place of the data-ref in STMT in the interleaving chain that starts
190 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
193 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
195 gimple
*next_stmt
= first_stmt
;
198 if (first_stmt
!= DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
203 if (next_stmt
== stmt
)
205 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
207 result
+= DR_GROUP_GAP (vinfo_for_stmt (next_stmt
));
214 /* Check whether it is possible to load COUNT elements of type ELT_MODE
215 using the method implemented by duplicate_and_interleave. Return true
216 if so, returning the number of intermediate vectors in *NVECTORS_OUT
217 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
221 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
222 unsigned int *nvectors_out
,
223 tree
*vector_type_out
,
226 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
228 unsigned int nvectors
= 1;
231 scalar_int_mode int_mode
;
232 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
233 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
234 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
236 tree int_type
= build_nonstandard_integer_type
237 (GET_MODE_BITSIZE (int_mode
), 1);
238 tree vector_type
= build_vector_type (int_type
, nelts
);
239 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
241 vec_perm_builder
sel1 (nelts
, 2, 3);
242 vec_perm_builder
sel2 (nelts
, 2, 3);
243 poly_int64 half_nelts
= exact_div (nelts
, 2);
244 for (unsigned int i
= 0; i
< 3; ++i
)
247 sel1
.quick_push (i
+ nelts
);
248 sel2
.quick_push (half_nelts
+ i
);
249 sel2
.quick_push (half_nelts
+ i
+ nelts
);
251 vec_perm_indices
indices1 (sel1
, 2, nelts
);
252 vec_perm_indices
indices2 (sel2
, 2, nelts
);
253 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
254 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
257 *nvectors_out
= nvectors
;
259 *vector_type_out
= vector_type
;
262 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
264 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
271 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
277 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
278 they are of a valid type and that they match the defs of the first stmt of
279 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
280 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
281 indicates swap is required for cond_expr stmts. Specifically, *SWAP
282 is 1 if STMT is cond and operands of comparison need to be swapped;
283 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
284 If there is any operand swap in this function, *SWAP is set to non-zero
286 If there was a fatal error return -1; if the error could be corrected by
287 swapping operands of father node of this one, return 1; if everything is
290 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
291 vec
<gimple
*> stmts
, unsigned stmt_num
,
292 vec
<slp_oprnd_info
> *oprnds_info
)
294 gimple
*stmt
= stmts
[stmt_num
];
296 unsigned int i
, number_of_oprnds
;
298 enum vect_def_type dt
= vect_uninitialized_def
;
299 bool pattern
= false;
300 slp_oprnd_info oprnd_info
;
301 int first_op_idx
= 1;
302 bool commutative
= false;
303 bool first_op_cond
= false;
304 bool first
= stmt_num
== 0;
305 bool second
= stmt_num
== 1;
307 if (is_gimple_call (stmt
))
309 number_of_oprnds
= gimple_call_num_args (stmt
);
312 else if (is_gimple_assign (stmt
))
314 enum tree_code code
= gimple_assign_rhs_code (stmt
);
315 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
316 /* Swap can only be done for cond_expr if asked to, otherwise we
317 could result in different comparison code to the first stmt. */
318 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
319 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
321 first_op_cond
= true;
325 commutative
= commutative_tree_code (code
);
330 bool swapped
= (*swap
!= 0);
331 gcc_assert (!swapped
|| first_op_cond
);
332 for (i
= 0; i
< number_of_oprnds
; i
++)
337 /* Map indicating how operands of cond_expr should be swapped. */
338 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
339 int *map
= maps
[*swap
];
342 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
344 oprnd
= gimple_op (stmt
, map
[i
]);
347 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
349 oprnd_info
= (*oprnds_info
)[i
];
351 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt
))
353 if (dump_enabled_p ())
355 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
356 "Build SLP failed: can't analyze def for ");
357 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
358 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
364 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
365 from the pattern. Check that all the stmts of the node are in the
367 if (def_stmt
&& gimple_bb (def_stmt
)
368 && vect_stmt_in_region_p (vinfo
, def_stmt
)
369 && vinfo_for_stmt (def_stmt
)
370 && is_pattern_stmt_p (vinfo_for_stmt (def_stmt
)))
373 if (!first
&& !oprnd_info
->first_pattern
374 /* Allow different pattern state for the defs of the
375 first stmt in reduction chains. */
376 && (oprnd_info
->first_dt
!= vect_reduction_def
377 || (!second
&& !oprnd_info
->second_pattern
)))
387 if (dump_enabled_p ())
389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
390 "Build SLP failed: some of the stmts"
391 " are in a pattern, and others are not ");
392 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
393 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
399 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
401 if (dt
== vect_unknown_def_type
)
403 if (dump_enabled_p ())
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
405 "Unsupported pattern.\n");
409 switch (gimple_code (def_stmt
))
416 if (dump_enabled_p ())
417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
418 "unsupported defining stmt:\n");
424 oprnd_info
->second_pattern
= pattern
;
428 oprnd_info
->first_dt
= dt
;
429 oprnd_info
->first_pattern
= pattern
;
430 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
434 /* Not first stmt of the group, check that the def-stmt/s match
435 the def-stmt/s of the first stmt. Allow different definition
436 types for reduction chains: the first stmt must be a
437 vect_reduction_def (a phi node), and the rest
438 vect_internal_def. */
439 tree type
= TREE_TYPE (oprnd
);
440 if ((oprnd_info
->first_dt
!= dt
441 && !(oprnd_info
->first_dt
== vect_reduction_def
442 && dt
== vect_internal_def
)
443 && !((oprnd_info
->first_dt
== vect_external_def
444 || oprnd_info
->first_dt
== vect_constant_def
)
445 && (dt
== vect_external_def
446 || dt
== vect_constant_def
)))
447 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
449 /* Try swapping operands if we got a mismatch. */
458 if (dump_enabled_p ())
459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
460 "Build SLP failed: different types\n");
464 if ((dt
== vect_constant_def
465 || dt
== vect_external_def
)
466 && !current_vector_size
.is_constant ()
467 && (TREE_CODE (type
) == BOOLEAN_TYPE
468 || !can_duplicate_and_interleave_p (stmts
.length (),
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
474 "Build SLP failed: invalid type of def "
475 "for variable-length SLP ");
476 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
477 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
483 /* Check the types of the definitions. */
486 case vect_constant_def
:
487 case vect_external_def
:
490 case vect_reduction_def
:
491 case vect_induction_def
:
492 case vect_internal_def
:
493 oprnd_info
->def_stmts
.quick_push (def_stmt
);
497 /* FORNOW: Not supported. */
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
501 "Build SLP failed: illegal type of def ");
502 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
503 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
513 /* If there are already uses of this stmt in a SLP instance then
514 we've committed to the operand order and can't swap it. */
515 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
517 if (dump_enabled_p ())
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
520 "Build SLP failed: cannot swap operands of "
522 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
529 tree cond
= gimple_assign_rhs1 (stmt
);
530 enum tree_code code
= TREE_CODE (cond
);
535 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
536 &TREE_OPERAND (cond
, 1));
537 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
542 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
543 gimple_assign_rhs3_ptr (stmt
));
544 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
545 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
546 gcc_assert (code
!= ERROR_MARK
);
547 TREE_SET_CODE (cond
, code
);
551 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
552 gimple_assign_rhs2_ptr (stmt
));
553 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE
, vect_location
,
556 "swapped operands to match def types in ");
557 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
565 /* Return true if call statements CALL1 and CALL2 are similar enough
566 to be combined into the same SLP group. */
569 compatible_calls_p (gcall
*call1
, gcall
*call2
)
571 unsigned int nargs
= gimple_call_num_args (call1
);
572 if (nargs
!= gimple_call_num_args (call2
))
575 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
578 if (gimple_call_internal_p (call1
))
580 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
581 TREE_TYPE (gimple_call_lhs (call2
))))
583 for (unsigned int i
= 0; i
< nargs
; ++i
)
584 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
585 TREE_TYPE (gimple_call_arg (call2
, i
))))
590 if (!operand_equal_p (gimple_call_fn (call1
),
591 gimple_call_fn (call2
), 0))
594 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
600 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
601 caller's attempt to find the vector type in STMT with the narrowest
602 element type. Return true if VECTYPE is nonnull and if it is valid
603 for VINFO. When returning true, update MAX_NUNITS to reflect the
604 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
605 as for vect_build_slp_tree. */
608 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
609 tree vectype
, poly_uint64
*max_nunits
)
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
616 "Build SLP failed: unsupported data-type in ");
617 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
618 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
620 /* Fatal mismatch. */
624 /* If populating the vector type requires unrolling then fail
625 before adjusting *max_nunits for basic-block vectorization. */
626 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
627 unsigned HOST_WIDE_INT const_nunits
;
628 if (is_a
<bb_vec_info
> (vinfo
)
629 && (!nunits
.is_constant (&const_nunits
)
630 || const_nunits
> group_size
))
632 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
633 "Build SLP failed: unrolling required "
634 "in basic block SLP\n");
635 /* Fatal mismatch. */
639 /* In case of multiple types we need to detect the smallest type. */
640 vect_update_max_nunits (max_nunits
, vectype
);
644 /* STMTS is a group of GROUP_SIZE SLP statements in which some
645 statements do the same operation as the first statement and in which
646 the others do ALT_STMT_CODE. Return true if we can take one vector
647 of the first operation and one vector of the second and permute them
648 to get the required result. VECTYPE is the type of the vector that
649 would be permuted. */
652 vect_two_operations_perm_ok_p (vec
<gimple
*> stmts
, unsigned int group_size
,
653 tree vectype
, tree_code alt_stmt_code
)
655 unsigned HOST_WIDE_INT count
;
656 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
659 vec_perm_builder
sel (count
, count
, 1);
660 for (unsigned int i
= 0; i
< count
; ++i
)
662 unsigned int elt
= i
;
663 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
665 sel
.quick_push (elt
);
667 vec_perm_indices
indices (sel
, 2, count
);
668 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
671 /* Verify if the scalar stmts STMTS are isomorphic, require data
672 permutation or are of unsupported types of operation. Return
673 true if they are, otherwise return false and indicate in *MATCHES
674 which stmts are not isomorphic to the first one. If MATCHES[0]
675 is false then this indicates the comparison could not be
676 carried out or the stmts will never be vectorized by SLP.
678 Note COND_EXPR is possibly ismorphic to another one after swapping its
679 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
680 the first stmt by swapping the two operands of comparison; set SWAP[i]
681 to 2 if stmt I is isormorphic to the first stmt by inverting the code
682 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
683 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
686 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
687 vec
<gimple
*> stmts
, unsigned int group_size
,
688 poly_uint64
*max_nunits
, bool *matches
,
692 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
693 enum tree_code first_stmt_code
= ERROR_MARK
;
694 enum tree_code alt_stmt_code
= ERROR_MARK
;
695 enum tree_code rhs_code
= ERROR_MARK
;
696 enum tree_code first_cond_code
= ERROR_MARK
;
698 bool need_same_oprnds
= false;
699 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
702 machine_mode optab_op2_mode
;
703 machine_mode vec_mode
;
704 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
706 /* For every stmt in NODE find its def stmt/s. */
707 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
709 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
713 if (dump_enabled_p ())
715 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
716 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
719 /* Fail to vectorize statements marked as unvectorizable. */
720 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
722 if (dump_enabled_p ())
724 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
725 "Build SLP failed: unvectorizable statement ");
726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
728 /* Fatal mismatch. */
733 lhs
= gimple_get_lhs (stmt
);
734 if (lhs
== NULL_TREE
)
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
739 "Build SLP failed: not GIMPLE_ASSIGN nor "
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
743 /* Fatal mismatch. */
749 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
752 && !vect_record_max_nunits (vinfo
, stmt
, group_size
,
753 nunits_vectype
, max_nunits
)))
755 /* Fatal mismatch. */
760 gcc_assert (vectype
);
762 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
764 rhs_code
= CALL_EXPR
;
765 if ((gimple_call_internal_p (call_stmt
)
766 && (!vectorizable_internal_fn_p
767 (gimple_call_internal_fn (call_stmt
))))
768 || gimple_call_tail_p (call_stmt
)
769 || gimple_call_noreturn_p (call_stmt
)
770 || !gimple_call_nothrow_p (call_stmt
)
771 || gimple_call_chain (call_stmt
))
773 if (dump_enabled_p ())
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
776 "Build SLP failed: unsupported call type ");
777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
780 /* Fatal mismatch. */
786 rhs_code
= gimple_assign_rhs_code (stmt
);
788 /* Check the operation. */
791 first_stmt_code
= rhs_code
;
793 /* Shift arguments should be equal in all the packed stmts for a
794 vector shift with scalar shift operand. */
795 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
796 || rhs_code
== LROTATE_EXPR
797 || rhs_code
== RROTATE_EXPR
)
799 if (vectype
== boolean_type_node
)
801 if (dump_enabled_p ())
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
803 "Build SLP failed: shift of a"
805 /* Fatal mismatch. */
810 vec_mode
= TYPE_MODE (vectype
);
812 /* First see if we have a vector/vector shift. */
813 optab
= optab_for_tree_code (rhs_code
, vectype
,
817 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
819 /* No vector/vector shift, try for a vector/scalar shift. */
820 optab
= optab_for_tree_code (rhs_code
, vectype
,
825 if (dump_enabled_p ())
826 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
827 "Build SLP failed: no optab.\n");
828 /* Fatal mismatch. */
832 icode
= (int) optab_handler (optab
, vec_mode
);
833 if (icode
== CODE_FOR_nothing
)
835 if (dump_enabled_p ())
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
838 "op not supported by target.\n");
839 /* Fatal mismatch. */
843 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
844 if (!VECTOR_MODE_P (optab_op2_mode
))
846 need_same_oprnds
= true;
847 first_op1
= gimple_assign_rhs2 (stmt
);
851 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
853 need_same_oprnds
= true;
854 first_op1
= gimple_assign_rhs2 (stmt
);
859 if (first_stmt_code
!= rhs_code
860 && alt_stmt_code
== ERROR_MARK
)
861 alt_stmt_code
= rhs_code
;
862 if (first_stmt_code
!= rhs_code
863 && (first_stmt_code
!= IMAGPART_EXPR
864 || rhs_code
!= REALPART_EXPR
)
865 && (first_stmt_code
!= REALPART_EXPR
866 || rhs_code
!= IMAGPART_EXPR
)
867 /* Handle mismatches in plus/minus by computing both
868 and merging the results. */
869 && !((first_stmt_code
== PLUS_EXPR
870 || first_stmt_code
== MINUS_EXPR
)
871 && (alt_stmt_code
== PLUS_EXPR
872 || alt_stmt_code
== MINUS_EXPR
)
873 && rhs_code
== alt_stmt_code
)
874 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
875 && (first_stmt_code
== ARRAY_REF
876 || first_stmt_code
== BIT_FIELD_REF
877 || first_stmt_code
== INDIRECT_REF
878 || first_stmt_code
== COMPONENT_REF
879 || first_stmt_code
== MEM_REF
)))
881 if (dump_enabled_p ())
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
884 "Build SLP failed: different operation "
886 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
897 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
899 if (dump_enabled_p ())
901 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
902 "Build SLP failed: different shift "
904 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
910 if (rhs_code
== CALL_EXPR
)
912 gimple
*first_stmt
= stmts
[0];
913 if (!compatible_calls_p (as_a
<gcall
*> (first_stmt
),
914 as_a
<gcall
*> (stmt
)))
916 if (dump_enabled_p ())
918 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
919 "Build SLP failed: different calls in ");
920 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
929 /* Grouped store or load. */
930 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
932 if (REFERENCE_CLASS_P (lhs
))
940 first_load
= DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
943 /* Check that there are no loads from different interleaving
944 chains in the same node. */
945 if (prev_first_load
!= first_load
)
947 if (dump_enabled_p ())
949 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
951 "Build SLP failed: different "
952 "interleaving chains in one node ");
953 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
961 prev_first_load
= first_load
;
963 } /* Grouped access. */
966 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
968 /* Not grouped load. */
969 if (dump_enabled_p ())
971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
972 "Build SLP failed: not grouped load ");
973 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
976 /* FORNOW: Not grouped loads are not supported. */
977 /* Fatal mismatch. */
982 /* Not memory operation. */
983 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
984 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
985 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
986 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
987 && rhs_code
!= CALL_EXPR
)
989 if (dump_enabled_p ())
991 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
992 "Build SLP failed: operation");
993 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
994 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
996 /* Fatal mismatch. */
1001 if (rhs_code
== COND_EXPR
)
1003 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1004 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1005 enum tree_code swap_code
= ERROR_MARK
;
1006 enum tree_code invert_code
= ERROR_MARK
;
1009 first_cond_code
= TREE_CODE (cond_expr
);
1010 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1012 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1013 swap_code
= swap_tree_comparison (cond_code
);
1014 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1017 if (first_cond_code
== cond_code
)
1019 /* Isomorphic can be achieved by swapping. */
1020 else if (first_cond_code
== swap_code
)
1022 /* Isomorphic can be achieved by inverting. */
1023 else if (first_cond_code
== invert_code
)
1027 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1030 "Build SLP failed: different"
1032 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1044 for (i
= 0; i
< group_size
; ++i
)
1048 /* If we allowed a two-operation SLP node verify the target can cope
1049 with the permute we are going to use. */
1050 if (alt_stmt_code
!= ERROR_MARK
1051 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1053 if (vectype
== boolean_type_node
1054 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1055 vectype
, alt_stmt_code
))
1057 for (i
= 0; i
< group_size
; ++i
)
1058 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
1061 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1064 "Build SLP failed: different operation "
1066 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1070 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1076 *two_operators
= true;
1082 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1083 Note we never remove apart from at destruction time so we do not
1084 need a special value for deleted that differs from empty. */
1087 typedef vec
<gimple
*> value_type
;
1088 typedef vec
<gimple
*> compare_type
;
1089 static inline hashval_t
hash (value_type
);
1090 static inline bool equal (value_type existing
, value_type candidate
);
1091 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1092 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1093 static inline void mark_empty (value_type
&x
) { x
.release (); }
1094 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1095 static inline void remove (value_type
&x
) { x
.release (); }
1098 bst_traits::hash (value_type x
)
1101 for (unsigned i
= 0; i
< x
.length (); ++i
)
1102 h
.add_int (gimple_uid (x
[i
]));
1106 bst_traits::equal (value_type existing
, value_type candidate
)
1108 if (existing
.length () != candidate
.length ())
1110 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1111 if (existing
[i
] != candidate
[i
])
1116 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
1117 static scalar_stmts_set_t
*bst_fail
;
1119 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1120 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1121 scalar_stmts_to_slp_tree_map_t
;
1124 vect_build_slp_tree_2 (vec_info
*vinfo
,
1125 vec
<gimple
*> stmts
, unsigned int group_size
,
1126 poly_uint64
*max_nunits
,
1127 vec
<slp_tree
> *loads
,
1128 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1129 unsigned max_tree_size
);
1132 vect_build_slp_tree (vec_info
*vinfo
,
1133 vec
<gimple
*> stmts
, unsigned int group_size
,
1134 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1135 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1136 unsigned max_tree_size
)
1138 if (bst_fail
->contains (stmts
))
1140 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1141 loads
, matches
, npermutes
, tree_size
,
1143 /* When SLP build fails for stmts record this, otherwise SLP build
1144 can be exponential in time when we allow to construct parts from
1145 scalars, see PR81723. */
1149 x
.create (stmts
.length ());
1156 /* Recursively build an SLP tree starting from NODE.
1157 Fail (and return a value not equal to zero) if def-stmts are not
1158 isomorphic, require data permutation or are of unsupported types of
1159 operation. Otherwise, return 0.
1160 The value returned is the depth in the SLP tree where a mismatch
1164 vect_build_slp_tree_2 (vec_info
*vinfo
,
1165 vec
<gimple
*> stmts
, unsigned int group_size
,
1166 poly_uint64
*max_nunits
,
1167 vec
<slp_tree
> *loads
,
1168 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1169 unsigned max_tree_size
)
1171 unsigned nops
, i
, this_tree_size
= 0;
1172 poly_uint64 this_max_nunits
= *max_nunits
;
1179 if (is_gimple_call (stmt
))
1180 nops
= gimple_call_num_args (stmt
);
1181 else if (is_gimple_assign (stmt
))
1183 nops
= gimple_num_ops (stmt
) - 1;
1184 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1187 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1192 /* If the SLP node is a PHI (induction or reduction), terminate
1194 if (gimple_code (stmt
) == GIMPLE_PHI
)
1196 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1197 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1198 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1202 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1203 /* Induction from different IVs is not supported. */
1204 if (def_type
== vect_induction_def
)
1206 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1207 if (stmt
!= stmts
[0])
1212 /* Else def types have to match. */
1213 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1215 /* But for reduction chains only check on the first stmt. */
1216 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1217 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1219 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1223 node
= vect_create_new_slp_node (stmts
);
1228 bool two_operators
= false;
1229 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1230 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1231 &this_max_nunits
, matches
, &two_operators
))
1234 /* If the SLP node is a load, terminate the recursion. */
1235 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1236 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1238 *max_nunits
= this_max_nunits
;
1239 node
= vect_create_new_slp_node (stmts
);
1240 loads
->safe_push (node
);
1244 /* Get at the operands, verifying they are compatible. */
1245 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1246 slp_oprnd_info oprnd_info
;
1247 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1249 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1250 stmts
, i
, &oprnds_info
);
1252 matches
[(res
== -1) ? 0 : i
] = false;
1256 for (i
= 0; i
< group_size
; ++i
)
1259 vect_free_oprnd_info (oprnds_info
);
1263 auto_vec
<slp_tree
, 4> children
;
1264 auto_vec
<slp_tree
> this_loads
;
1269 max_tree_size
-= *tree_size
;
1271 /* Create SLP_TREE nodes for the definition node/s. */
1272 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1275 unsigned old_nloads
= this_loads
.length ();
1276 unsigned old_tree_size
= this_tree_size
;
1279 if (oprnd_info
->first_dt
!= vect_internal_def
1280 && oprnd_info
->first_dt
!= vect_reduction_def
1281 && oprnd_info
->first_dt
!= vect_induction_def
)
1284 if (++this_tree_size
> max_tree_size
)
1286 FOR_EACH_VEC_ELT (children
, j
, child
)
1287 vect_free_slp_tree (child
);
1288 vect_free_oprnd_info (oprnds_info
);
1292 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1293 group_size
, &this_max_nunits
,
1294 &this_loads
, matches
, npermutes
,
1296 max_tree_size
)) != NULL
)
1298 /* If we have all children of child built up from scalars then just
1299 throw that away and build it up this node from scalars. */
1300 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1301 /* ??? Rejecting patterns this way doesn't work. We'd have to
1302 do extra work to cancel the pattern so the uses see the
1304 && !is_pattern_stmt_p
1305 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1307 slp_tree grandchild
;
1309 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1310 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1315 this_loads
.truncate (old_nloads
);
1316 this_tree_size
= old_tree_size
;
1317 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1318 vect_free_slp_tree (grandchild
);
1319 SLP_TREE_CHILDREN (child
).truncate (0);
1321 dump_printf_loc (MSG_NOTE
, vect_location
,
1322 "Building parent vector operands from "
1323 "scalars instead\n");
1324 oprnd_info
->def_stmts
= vNULL
;
1325 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1326 children
.safe_push (child
);
1331 oprnd_info
->def_stmts
= vNULL
;
1332 children
.safe_push (child
);
1336 /* If the SLP build failed fatally and we analyze a basic-block
1337 simply treat nodes we fail to build as externally defined
1338 (and thus build vectors from the scalar defs).
1339 The cost model will reject outright expensive cases.
1340 ??? This doesn't treat cases where permutation ultimatively
1341 fails (or we don't try permutation below). Ideally we'd
1342 even compute a permutation that will end up with the maximum
1344 if (is_a
<bb_vec_info
> (vinfo
)
1346 /* ??? Rejecting patterns this way doesn't work. We'd have to
1347 do extra work to cancel the pattern so the uses see the
1349 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1351 dump_printf_loc (MSG_NOTE
, vect_location
,
1352 "Building vector operands from scalars\n");
1353 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1354 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1355 children
.safe_push (child
);
1356 oprnd_info
->def_stmts
= vNULL
;
1360 /* If the SLP build for operand zero failed and operand zero
1361 and one can be commutated try that for the scalar stmts
1362 that failed the match. */
1364 /* A first scalar stmt mismatch signals a fatal mismatch. */
1366 /* ??? For COND_EXPRs we can swap the comparison operands
1367 as well as the arms under some constraints. */
1369 && oprnds_info
[1]->first_dt
== vect_internal_def
1370 && is_gimple_assign (stmt
)
1371 /* Do so only if the number of not successful permutes was nor more
1372 than a cut-ff as re-trying the recursive match on
1373 possibly each level of the tree would expose exponential
1377 /* See whether we can swap the matching or the non-matching
1379 bool swap_not_matching
= true;
1382 for (j
= 0; j
< group_size
; ++j
)
1384 if (matches
[j
] != !swap_not_matching
)
1386 gimple
*stmt
= stmts
[j
];
1387 /* Verify if we can swap operands of this stmt. */
1388 if (!is_gimple_assign (stmt
)
1389 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1391 if (!swap_not_matching
)
1393 swap_not_matching
= false;
1396 /* Verify if we can safely swap or if we committed to a
1397 specific operand order already.
1398 ??? Instead of modifying GIMPLE stmts here we could
1399 record whether we want to swap operands in the SLP
1400 node and temporarily do that when processing it
1401 (or wrap operand accessors in a helper). */
1402 else if (swap
[j
] != 0
1403 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)))
1405 if (!swap_not_matching
)
1407 if (dump_enabled_p ())
1409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1411 "Build SLP failed: cannot swap "
1412 "operands of shared stmt ");
1413 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
1414 TDF_SLIM
, stmts
[j
], 0);
1418 swap_not_matching
= false;
1423 while (j
!= group_size
);
1425 /* Swap mismatched definition stmts. */
1426 dump_printf_loc (MSG_NOTE
, vect_location
,
1427 "Re-trying with swapped operands of stmts ");
1428 for (j
= 0; j
< group_size
; ++j
)
1429 if (matches
[j
] == !swap_not_matching
)
1431 std::swap (oprnds_info
[0]->def_stmts
[j
],
1432 oprnds_info
[1]->def_stmts
[j
]);
1433 dump_printf (MSG_NOTE
, "%d ", j
);
1435 dump_printf (MSG_NOTE
, "\n");
1436 /* And try again with scratch 'matches' ... */
1437 bool *tem
= XALLOCAVEC (bool, group_size
);
1438 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1439 group_size
, &this_max_nunits
,
1440 &this_loads
, tem
, npermutes
,
1442 max_tree_size
)) != NULL
)
1444 /* ... so if successful we can apply the operand swapping
1445 to the GIMPLE IL. This is necessary because for example
1446 vect_get_slp_defs uses operand indexes and thus expects
1447 canonical operand order. This is also necessary even
1448 if we end up building the operand from scalars as
1449 we'll continue to process swapped operand two. */
1450 for (j
= 0; j
< group_size
; ++j
)
1452 gimple
*stmt
= stmts
[j
];
1453 gimple_set_plf (stmt
, GF_PLF_1
, false);
1455 for (j
= 0; j
< group_size
; ++j
)
1457 gimple
*stmt
= stmts
[j
];
1458 if (matches
[j
] == !swap_not_matching
)
1460 /* Avoid swapping operands twice. */
1461 if (gimple_plf (stmt
, GF_PLF_1
))
1463 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1464 gimple_assign_rhs2_ptr (stmt
));
1465 gimple_set_plf (stmt
, GF_PLF_1
, true);
1468 /* Verify we swap all duplicates or none. */
1470 for (j
= 0; j
< group_size
; ++j
)
1472 gimple
*stmt
= stmts
[j
];
1473 gcc_assert (gimple_plf (stmt
, GF_PLF_1
)
1474 == (matches
[j
] == !swap_not_matching
));
1477 /* If we have all children of child built up from scalars then
1478 just throw that away and build it up this node from scalars. */
1479 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1480 /* ??? Rejecting patterns this way doesn't work. We'd have
1481 to do extra work to cancel the pattern so the uses see the
1483 && !is_pattern_stmt_p
1484 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1487 slp_tree grandchild
;
1489 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1490 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1495 this_loads
.truncate (old_nloads
);
1496 this_tree_size
= old_tree_size
;
1497 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1498 vect_free_slp_tree (grandchild
);
1499 SLP_TREE_CHILDREN (child
).truncate (0);
1501 dump_printf_loc (MSG_NOTE
, vect_location
,
1502 "Building parent vector operands from "
1503 "scalars instead\n");
1504 oprnd_info
->def_stmts
= vNULL
;
1505 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1506 children
.safe_push (child
);
1511 oprnd_info
->def_stmts
= vNULL
;
1512 children
.safe_push (child
);
1520 gcc_assert (child
== NULL
);
1521 FOR_EACH_VEC_ELT (children
, j
, child
)
1522 vect_free_slp_tree (child
);
1523 vect_free_oprnd_info (oprnds_info
);
1527 vect_free_oprnd_info (oprnds_info
);
1530 *tree_size
+= this_tree_size
;
1531 *max_nunits
= this_max_nunits
;
1532 loads
->safe_splice (this_loads
);
1534 node
= vect_create_new_slp_node (stmts
);
1535 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1536 SLP_TREE_CHILDREN (node
).splice (children
);
1540 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1543 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1550 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1551 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1552 ? " (external)" : "");
1553 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1555 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1556 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1558 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1559 vect_print_slp_tree (dump_kind
, loc
, child
);
1563 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1564 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1565 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1566 stmts in NODE are to be marked. */
1569 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1575 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1578 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1579 if (j
< 0 || i
== j
)
1580 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1582 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1583 vect_mark_slp_stmts (child
, mark
, j
);
1587 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1590 vect_mark_slp_stmts_relevant (slp_tree node
)
1594 stmt_vec_info stmt_info
;
1597 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1602 stmt_info
= vinfo_for_stmt (stmt
);
1603 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1604 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1605 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1608 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1609 vect_mark_slp_stmts_relevant (child
);
1613 /* Rearrange the statements of NODE according to PERMUTATION. */
1616 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1617 vec
<unsigned> permutation
)
1620 vec
<gimple
*> tmp_stmts
;
1624 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1625 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1627 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1628 tmp_stmts
.create (group_size
);
1629 tmp_stmts
.quick_grow_cleared (group_size
);
1631 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1632 tmp_stmts
[permutation
[i
]] = stmt
;
1634 SLP_TREE_SCALAR_STMTS (node
).release ();
1635 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1639 /* Attempt to reorder stmts in a reduction chain so that we don't
1640 require any load permutation. Return true if that was possible,
1641 otherwise return false. */
1644 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1646 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1649 slp_tree node
, load
;
1651 /* Compare all the permutation sequences to the first one. We know
1652 that at least one load is permuted. */
1653 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1654 if (!node
->load_permutation
.exists ())
1656 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1658 if (!load
->load_permutation
.exists ())
1660 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1661 if (lidx
!= node
->load_permutation
[j
])
1665 /* Check that the loads in the first sequence are different and there
1666 are no gaps between them. */
1667 auto_sbitmap
load_index (group_size
);
1668 bitmap_clear (load_index
);
1669 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1671 if (lidx
>= group_size
)
1673 if (bitmap_bit_p (load_index
, lidx
))
1676 bitmap_set_bit (load_index
, lidx
);
1678 for (i
= 0; i
< group_size
; i
++)
1679 if (!bitmap_bit_p (load_index
, i
))
1682 /* This permutation is valid for reduction. Since the order of the
1683 statements in the nodes is not important unless they are memory
1684 accesses, we can rearrange the statements in all the nodes
1685 according to the order of the loads. */
1686 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1687 node
->load_permutation
);
1689 /* We are done, no actual permutations need to be generated. */
1690 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1691 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1693 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1694 first_stmt
= DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1695 /* But we have to keep those permutations that are required because
1696 of handling of gaps. */
1697 if (known_eq (unrolling_factor
, 1U)
1698 || (group_size
== DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1699 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1700 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1702 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1703 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1709 /* Check if the required load permutations in the SLP instance
1710 SLP_INSTN are supported. */
1713 vect_supported_load_permutation_p (slp_instance slp_instn
)
1715 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1716 unsigned int i
, j
, k
, next
;
1718 gimple
*stmt
, *load
, *next_load
;
1720 if (dump_enabled_p ())
1722 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1724 if (node
->load_permutation
.exists ())
1725 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1726 dump_printf (MSG_NOTE
, "%d ", next
);
1728 for (k
= 0; k
< group_size
; ++k
)
1729 dump_printf (MSG_NOTE
, "%d ", k
);
1730 dump_printf (MSG_NOTE
, "\n");
1733 /* In case of reduction every load permutation is allowed, since the order
1734 of the reduction statements is not important (as opposed to the case of
1735 grouped stores). The only condition we need to check is that all the
1736 load nodes are of the same size and have the same permutation (and then
1737 rearrange all the nodes of the SLP instance according to this
1740 /* Check that all the load nodes are of the same size. */
1741 /* ??? Can't we assert this? */
1742 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1743 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1746 node
= SLP_INSTANCE_TREE (slp_instn
);
1747 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1749 /* Reduction (there are no data-refs in the root).
1750 In reduction chain the order of the loads is not important. */
1751 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1752 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1753 vect_attempt_slp_rearrange_stmts (slp_instn
);
1755 /* In basic block vectorization we allow any subchain of an interleaving
1757 FORNOW: not supported in loop SLP because of realignment compications. */
1758 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1760 /* Check whether the loads in an instance form a subchain and thus
1761 no permutation is necessary. */
1762 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1764 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1766 bool subchain_p
= true;
1768 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1771 && (next_load
!= load
1772 || DR_GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1777 next_load
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1780 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1783 stmt_vec_info group_info
1784 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1785 group_info
= vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info
));
1786 unsigned HOST_WIDE_INT nunits
;
1787 unsigned k
, maxk
= 0;
1788 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1791 /* In BB vectorization we may not actually use a loaded vector
1792 accessing elements in excess of DR_GROUP_SIZE. */
1793 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1794 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1795 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1798 "BB vectorization with gaps at the end of "
1799 "a load is not supported\n");
1803 /* Verify the permutation can be generated. */
1806 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1807 1, slp_instn
, true, &n_perms
))
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1811 "unsupported load permutation\n");
1819 /* For loop vectorization verify we can generate the permutation. Be
1820 conservative about the vectorization factor, there are permutations
1821 that will use three vector inputs only starting from a specific factor
1822 and the vectorization factor is not yet final.
1823 ??? The SLP instance unrolling factor might not be the maximum one. */
1826 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1827 LOOP_VINFO_VECT_FACTOR
1828 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1829 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1830 if (node
->load_permutation
.exists ()
1831 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1832 slp_instn
, true, &n_perms
))
1839 /* Find the last store in SLP INSTANCE. */
1842 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1844 gimple
*last
= NULL
, *stmt
;
1846 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1848 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1849 if (is_pattern_stmt_p (stmt_vinfo
))
1850 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1852 last
= get_later_stmt (stmt
, last
);
1858 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1859 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1860 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1861 containing the remainder.
1862 Return the first stmt in the second group. */
1865 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1867 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1868 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1869 gcc_assert (group1_size
> 0);
1870 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1871 gcc_assert (group2_size
> 0);
1872 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1874 gimple
*stmt
= first_stmt
;
1875 for (unsigned i
= group1_size
; i
> 1; i
--)
1877 stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1878 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1880 /* STMT is now the last element of the first group. */
1881 gimple
*group2
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1882 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1884 DR_GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1885 for (stmt
= group2
; stmt
; stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1887 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1888 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1891 /* For the second group, the DR_GROUP_GAP is that before the original group,
1892 plus skipping over the first vector. */
1893 DR_GROUP_GAP (vinfo_for_stmt (group2
))
1894 = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1896 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1897 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1901 group1_size
, group2_size
);
1906 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1907 statements and a vector of NUNITS elements. */
1910 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1912 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1915 /* Analyze an SLP instance starting from a group of grouped stores. Call
1916 vect_build_slp_tree to build a tree of packed stmts if possible.
1917 Return FALSE if it's impossible to SLP any stmt in the loop. */
1920 vect_analyze_slp_instance (vec_info
*vinfo
,
1921 gimple
*stmt
, unsigned max_tree_size
)
1923 slp_instance new_instance
;
1925 unsigned int group_size
;
1926 tree vectype
, scalar_type
= NULL_TREE
;
1929 vec
<slp_tree
> loads
;
1930 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1931 vec
<gimple
*> scalar_stmts
;
1933 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1935 scalar_type
= TREE_TYPE (DR_REF (dr
));
1936 vectype
= get_vectype_for_scalar_type (scalar_type
);
1937 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1939 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1941 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1942 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1943 group_size
= REDUC_GROUP_SIZE (vinfo_for_stmt (stmt
));
1947 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1948 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1949 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1954 if (dump_enabled_p ())
1956 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1957 "Build SLP failed: unsupported data-type ");
1958 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1959 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1964 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1966 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1967 scalar_stmts
.create (group_size
);
1969 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1971 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1974 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1975 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1976 scalar_stmts
.safe_push (
1977 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1979 scalar_stmts
.safe_push (next
);
1980 next
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1983 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1985 /* Collect the reduction stmts and store them in
1986 SLP_TREE_SCALAR_STMTS. */
1989 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1990 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1991 scalar_stmts
.safe_push (
1992 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1994 scalar_stmts
.safe_push (next
);
1995 next
= REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1997 /* Mark the first element of the reduction chain as reduction to properly
1998 transform the node. In the reduction analysis phase only the last
1999 element of the chain is marked as reduction. */
2000 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2004 /* Collect reduction statements. */
2005 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2006 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2007 scalar_stmts
.safe_push (next
);
2010 loads
.create (group_size
);
2012 /* Build the tree for the SLP instance. */
2013 bool *matches
= XALLOCAVEC (bool, group_size
);
2014 unsigned npermutes
= 0;
2015 bst_fail
= new scalar_stmts_set_t ();
2016 poly_uint64 max_nunits
= nunits
;
2017 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2018 &max_nunits
, &loads
, matches
, &npermutes
,
2019 NULL
, max_tree_size
);
2023 /* Calculate the unrolling factor based on the smallest type. */
2024 poly_uint64 unrolling_factor
2025 = calculate_unrolling_factor (max_nunits
, group_size
);
2027 if (maybe_ne (unrolling_factor
, 1U)
2028 && is_a
<bb_vec_info
> (vinfo
))
2030 unsigned HOST_WIDE_INT const_max_nunits
;
2031 if (!max_nunits
.is_constant (&const_max_nunits
)
2032 || const_max_nunits
> group_size
)
2034 if (dump_enabled_p ())
2035 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2036 "Build SLP failed: store group "
2037 "size not a multiple of the vector size "
2038 "in basic block SLP\n");
2039 vect_free_slp_tree (node
);
2043 /* Fatal mismatch. */
2044 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2045 vect_free_slp_tree (node
);
2050 /* Create a new SLP instance. */
2051 new_instance
= XNEW (struct _slp_instance
);
2052 SLP_INSTANCE_TREE (new_instance
) = node
;
2053 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2054 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2055 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2057 /* Compute the load permutation. */
2059 bool loads_permuted
= false;
2060 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2062 vec
<unsigned> load_permutation
;
2064 gimple
*load
, *first_stmt
;
2065 bool this_load_permuted
= false;
2066 load_permutation
.create (group_size
);
2067 first_stmt
= DR_GROUP_FIRST_ELEMENT
2068 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2069 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2071 int load_place
= vect_get_place_in_interleaving_chain
2073 gcc_assert (load_place
!= -1);
2074 if (load_place
!= j
)
2075 this_load_permuted
= true;
2076 load_permutation
.safe_push (load_place
);
2078 if (!this_load_permuted
2079 /* The load requires permutation when unrolling exposes
2080 a gap either because the group is larger than the SLP
2081 group-size or because there is a gap between the groups. */
2082 && (known_eq (unrolling_factor
, 1U)
2083 || (group_size
== DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2084 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2086 load_permutation
.release ();
2089 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2090 loads_permuted
= true;
2095 if (!vect_supported_load_permutation_p (new_instance
))
2097 if (dump_enabled_p ())
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2100 "Build SLP failed: unsupported load "
2102 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2105 vect_free_slp_instance (new_instance
);
2110 /* If the loads and stores can be handled with load/store-lan
2111 instructions do not generate this SLP instance. */
2112 if (is_a
<loop_vec_info
> (vinfo
)
2114 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2117 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2119 gimple
*first_stmt
= DR_GROUP_FIRST_ELEMENT
2120 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2121 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2122 /* Use SLP for strided accesses (or if we
2123 can't load-lanes). */
2124 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2125 || ! vect_load_lanes_supported
2126 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2127 DR_GROUP_SIZE (stmt_vinfo
), false))
2130 if (i
== loads
.length ())
2132 if (dump_enabled_p ())
2133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2134 "Built SLP cancelled: can use "
2135 "load/store-lanes\n");
2136 vect_free_slp_instance (new_instance
);
2141 vinfo
->slp_instances
.safe_push (new_instance
);
2143 if (dump_enabled_p ())
2145 dump_printf_loc (MSG_NOTE
, vect_location
,
2146 "Final SLP tree for instance:\n");
2147 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2155 /* Failed to SLP. */
2156 /* Free the allocated memory. */
2157 scalar_stmts
.release ();
2161 /* For basic block SLP, try to break the group up into multiples of the
2163 unsigned HOST_WIDE_INT const_nunits
;
2164 if (is_a
<bb_vec_info
> (vinfo
)
2165 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2166 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2167 && nunits
.is_constant (&const_nunits
))
2169 /* We consider breaking the group only on VF boundaries from the existing
2171 for (i
= 0; i
< group_size
; i
++)
2172 if (!matches
[i
]) break;
2174 if (i
>= const_nunits
&& i
< group_size
)
2176 /* Split into two groups at the first vector boundary before i. */
2177 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2178 unsigned group1_size
= i
& ~(const_nunits
- 1);
2180 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2181 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2182 /* If the first non-match was in the middle of a vector,
2183 skip the rest of that vector. */
2184 if (group1_size
< i
)
2186 i
= group1_size
+ const_nunits
;
2188 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2191 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2194 /* Even though the first vector did not all match, we might be able to SLP
2195 (some) of the remainder. FORNOW ignore this possibility. */
2202 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2203 trees of packed scalar stmts if SLP is possible. */
2206 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2209 gimple
*first_element
;
2211 DUMP_VECT_SCOPE ("vect_analyze_slp");
2213 /* Find SLP sequences starting from groups of grouped stores. */
2214 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2215 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2217 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2219 if (loop_vinfo
->reduction_chains
.length () > 0)
2221 /* Find SLP sequences starting from reduction chains. */
2222 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2223 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2226 /* Dissolve reduction chain group. */
2227 gimple
*next
, *stmt
= first_element
;
2230 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2231 next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2232 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2233 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2236 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2237 = vect_internal_def
;
2241 /* Find SLP sequences starting from groups of reductions. */
2242 if (loop_vinfo
->reductions
.length () > 1)
2243 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2251 /* For each possible SLP instance decide whether to SLP it and calculate overall
2252 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2253 least one instance. */
2256 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2259 poly_uint64 unrolling_factor
= 1;
2260 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2261 slp_instance instance
;
2262 int decided_to_slp
= 0;
2264 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2266 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2268 /* FORNOW: SLP if you can. */
2269 /* All unroll factors have the form current_vector_size * X for some
2270 rational X, so they must have a common multiple. */
2272 = force_common_multiple (unrolling_factor
,
2273 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2275 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2276 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2277 loop-based vectorization. Such stmts will be marked as HYBRID. */
2278 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2282 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2284 if (decided_to_slp
&& dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE
, vect_location
,
2287 "Decided to SLP %d instances. Unrolling factor ",
2289 dump_dec (MSG_NOTE
, unrolling_factor
);
2290 dump_printf (MSG_NOTE
, "\n");
2293 return (decided_to_slp
> 0);
2297 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2298 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2301 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2303 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2304 imm_use_iterator imm_iter
;
2306 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2308 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2309 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2312 /* Propagate hybrid down the SLP tree. */
2313 if (stype
== hybrid
)
2315 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2319 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2320 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2321 /* If we get a pattern stmt here we have to use the LHS of the
2322 original stmt for immediate uses. */
2323 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2324 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2325 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2327 if (gimple_code (stmt
) == GIMPLE_PHI
)
2328 def
= gimple_phi_result (stmt
);
2330 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2332 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2334 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2336 use_vinfo
= vinfo_for_stmt (use_stmt
);
2337 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2338 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2339 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2340 if (!STMT_SLP_TYPE (use_vinfo
)
2341 && (STMT_VINFO_RELEVANT (use_vinfo
)
2342 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2343 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2344 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2346 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2349 "def in non-SLP stmt: ");
2350 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2358 && !HYBRID_SLP_STMT (stmt_vinfo
))
2360 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2363 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2365 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2368 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2369 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2370 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2373 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2376 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2378 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2379 struct loop
*loopp
= (struct loop
*)wi
->info
;
2384 if (TREE_CODE (*tp
) == SSA_NAME
2385 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2387 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2388 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2389 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2391 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2394 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2396 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2404 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2407 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2408 /* If the stmt is in a SLP instance then this isn't a reason
2409 to mark use definitions in other SLP instances as hybrid. */
2410 if (! STMT_SLP_TYPE (use_vinfo
)
2411 && (STMT_VINFO_RELEVANT (use_vinfo
)
2412 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2413 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2414 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2421 /* Find stmts that must be both vectorized and SLPed. */
2424 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2427 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2428 slp_instance instance
;
2430 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2432 /* First walk all pattern stmt in the loop and mark defs of uses as
2433 hybrid because immediate uses in them are not recorded. */
2434 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2436 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2437 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2440 gimple
*stmt
= gsi_stmt (gsi
);
2441 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2442 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2445 memset (&wi
, 0, sizeof (wi
));
2446 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2447 gimple_stmt_iterator gsi2
2448 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2449 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2450 vect_detect_hybrid_slp_1
, &wi
);
2451 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2452 vect_detect_hybrid_slp_2
,
2453 vect_detect_hybrid_slp_1
, &wi
);
2458 /* Then walk the SLP instance trees marking stmts with uses in
2459 non-SLP stmts as hybrid, also propagating hybrid down the
2460 SLP tree, collecting the above info on-the-fly. */
2461 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2463 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2464 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2470 /* Initialize a bb_vec_info struct for the statements between
2471 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2473 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2474 gimple_stmt_iterator region_end_in
,
2475 vec_info_shared
*shared
)
2476 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2477 bb (gsi_bb (region_begin_in
)),
2478 region_begin (region_begin_in
),
2479 region_end (region_end_in
)
2481 gimple_stmt_iterator gsi
;
2483 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2486 gimple
*stmt
= gsi_stmt (gsi
);
2487 gimple_set_uid (stmt
, 0);
2488 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2495 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2496 stmts in the basic block. */
2498 _bb_vec_info::~_bb_vec_info ()
2500 for (gimple_stmt_iterator si
= region_begin
;
2501 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2503 gimple
*stmt
= gsi_stmt (si
);
2504 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2507 /* Free stmt_vec_info. */
2508 free_stmt_vec_info (stmt
);
2510 /* Reset region marker. */
2511 gimple_set_uid (stmt
, -1);
2517 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2518 given then that child nodes have already been processed, and that
2519 their def types currently match their SLP node's def type. */
2522 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2523 slp_instance node_instance
,
2524 stmt_vector_for_cost
*cost_vec
)
2526 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2527 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2528 gcc_assert (stmt_info
);
2529 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2531 /* For BB vectorization vector types are assigned here.
2532 Memory accesses already got their vector type assigned
2533 in vect_analyze_data_refs. */
2534 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2536 && ! STMT_VINFO_DATA_REF (stmt_info
))
2538 tree vectype
, nunits_vectype
;
2539 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2541 /* We checked this when building the node. */
2543 if (vectype
== boolean_type_node
)
2545 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2547 /* vect_get_mask_type_for_stmt has already explained the
2554 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2555 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2558 /* Calculate the number of vector statements to be created for the
2559 scalar stmts in this node. For SLP reductions it is equal to the
2560 number of vector statements in the children (which has already been
2561 calculated by the recursive call). Otherwise it is the number of
2562 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2563 VF divided by the number of elements in a vector. */
2564 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2565 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2566 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2567 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2571 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2572 vf
= loop_vinfo
->vectorization_factor
;
2575 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2576 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2577 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2578 = vect_get_num_vectors (vf
* group_size
, vectype
);
2582 return vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
, cost_vec
);
2585 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2586 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2588 Return true if the operations are supported. */
2591 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2592 slp_instance node_instance
,
2593 scalar_stmts_to_slp_tree_map_t
*visited
,
2594 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2595 stmt_vector_for_cost
*cost_vec
)
2600 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2603 /* If we already analyzed the exact same set of scalar stmts we're done.
2604 We share the generated vector stmts for those. */
2606 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2607 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2609 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2610 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2614 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2615 doesn't result in any issue since we throw away the lvisited set
2617 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2619 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2620 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2621 visited
, lvisited
, cost_vec
))
2624 /* Push SLP node def-type to stmt operands. */
2625 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2626 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2627 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2628 = SLP_TREE_DEF_TYPE (child
);
2629 bool res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2631 /* Restore def-types. */
2632 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2633 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2634 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2635 = vect_internal_def
;
2643 /* Analyze statements in SLP instances of VINFO. Return true if the
2644 operations are supported. */
2647 vect_slp_analyze_operations (vec_info
*vinfo
)
2649 slp_instance instance
;
2652 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2654 scalar_stmts_to_slp_tree_map_t
*visited
2655 = new scalar_stmts_to_slp_tree_map_t ();
2656 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2658 scalar_stmts_to_slp_tree_map_t lvisited
;
2659 stmt_vector_for_cost cost_vec
;
2660 cost_vec
.create (2);
2661 if (!vect_slp_analyze_node_operations (vinfo
,
2662 SLP_INSTANCE_TREE (instance
),
2663 instance
, visited
, &lvisited
,
2666 dump_printf_loc (MSG_NOTE
, vect_location
,
2667 "removing SLP instance operations starting from: ");
2668 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2669 SLP_TREE_SCALAR_STMTS
2670 (SLP_INSTANCE_TREE (instance
))[0], 0);
2671 vect_free_slp_instance (instance
);
2672 vinfo
->slp_instances
.ordered_remove (i
);
2673 cost_vec
.release ();
2677 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2678 x
!= lvisited
.end(); ++x
)
2679 visited
->put ((*x
).first
.copy (), (*x
).second
);
2682 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2683 cost_vec
.release ();
2688 return !vinfo
->slp_instances
.is_empty ();
2692 /* Compute the scalar cost of the SLP node NODE and its children
2693 and return it. Do not account defs that are marked in LIFE and
2694 update LIFE according to uses of NODE. */
2697 vect_bb_slp_scalar_cost (basic_block bb
,
2698 slp_tree node
, vec
<bool, va_heap
> *life
,
2699 stmt_vector_for_cost
*cost_vec
)
2705 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2707 ssa_op_iter op_iter
;
2708 def_operand_p def_p
;
2709 stmt_vec_info stmt_info
;
2714 /* If there is a non-vectorized use of the defs then the scalar
2715 stmt is kept live in which case we do not account it or any
2716 required defs in the SLP children in the scalar cost. This
2717 way we make the vectorization more costly when compared to
2719 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2721 imm_use_iterator use_iter
;
2723 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2724 if (!is_gimple_debug (use_stmt
)
2725 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2727 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2730 BREAK_FROM_IMM_USE_STMT (use_iter
);
2736 /* Count scalar stmts only once. */
2737 if (gimple_visited_p (stmt
))
2739 gimple_set_visited (stmt
, true);
2741 stmt_info
= vinfo_for_stmt (stmt
);
2742 vect_cost_for_stmt kind
;
2743 if (STMT_VINFO_DATA_REF (stmt_info
))
2745 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2748 kind
= scalar_store
;
2752 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2755 auto_vec
<bool, 20> subtree_life
;
2756 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2758 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2760 /* Do not directly pass LIFE to the recursive call, copy it to
2761 confine changes in the callee to the current child/subtree. */
2762 subtree_life
.safe_splice (*life
);
2763 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
);
2764 subtree_life
.truncate (0);
2769 /* Check if vectorization of the basic block is profitable. */
2772 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2774 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2775 slp_instance instance
;
2777 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2778 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2780 /* Calculate scalar cost. */
2781 stmt_vector_for_cost scalar_costs
;
2782 scalar_costs
.create (0);
2783 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2785 auto_vec
<bool, 20> life
;
2786 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2787 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2788 SLP_INSTANCE_TREE (instance
),
2789 &life
, &scalar_costs
);
2791 void *target_cost_data
= init_cost (NULL
);
2792 add_stmt_costs (target_cost_data
, &scalar_costs
);
2793 scalar_costs
.release ();
2795 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2796 destroy_cost_data (target_cost_data
);
2798 /* Unset visited flag. */
2799 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2800 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2801 gimple_set_visited (gsi_stmt (gsi
), false);
2803 /* Complete the target-specific cost calculation. */
2804 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2805 &vec_inside_cost
, &vec_epilogue_cost
);
2807 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2809 if (dump_enabled_p ())
2811 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2812 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2814 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2815 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2816 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2819 /* Vectorization is profitable if its cost is more than the cost of scalar
2820 version. Note that we err on the vector side for equal cost because
2821 the cost estimate is otherwise quite pessimistic (constant uses are
2822 free on the scalar side but cost a load on the vector side for
2824 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2830 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2831 if so and sets fatal to true if failure is independent of
2832 current_vector_size. */
2835 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2836 gimple_stmt_iterator region_end
,
2837 vec
<data_reference_p
> datarefs
, int n_stmts
,
2838 bool &fatal
, vec_info_shared
*shared
)
2840 bb_vec_info bb_vinfo
;
2841 slp_instance instance
;
2843 poly_uint64 min_vf
= 2;
2845 /* The first group of checks is independent of the vector size. */
2848 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2850 if (dump_enabled_p ())
2851 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2852 "not vectorized: too many instructions in "
2854 free_data_refs (datarefs
);
2858 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, shared
);
2862 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2863 bb_vinfo
->shared
->save_datarefs ();
2865 /* Analyze the data references. */
2867 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2869 if (dump_enabled_p ())
2870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2871 "not vectorized: unhandled data-ref in basic "
2878 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2880 if (dump_enabled_p ())
2881 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2882 "not vectorized: not enough data-refs in "
2889 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2893 "not vectorized: unhandled data access in "
2900 /* If there are no grouped stores in the region there is no need
2901 to continue with pattern recog as vect_analyze_slp will fail
2903 if (bb_vinfo
->grouped_stores
.is_empty ())
2905 if (dump_enabled_p ())
2906 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2907 "not vectorized: no grouped stores in "
2914 /* While the rest of the analysis below depends on it in some way. */
2917 vect_pattern_recog (bb_vinfo
);
2919 /* Check the SLP opportunities in the basic block, analyze and build SLP
2921 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2923 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2926 "Failed to SLP the basic block.\n");
2927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2928 "not vectorized: failed to find SLP opportunities "
2929 "in basic block.\n");
2936 vect_record_base_alignments (bb_vinfo
);
2938 /* Analyze and verify the alignment of data references and the
2939 dependence in the SLP instances. */
2940 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2942 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2943 || ! vect_slp_analyze_instance_dependence (instance
))
2945 dump_printf_loc (MSG_NOTE
, vect_location
,
2946 "removing SLP instance operations starting from: ");
2947 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2948 SLP_TREE_SCALAR_STMTS
2949 (SLP_INSTANCE_TREE (instance
))[0], 0);
2950 vect_free_slp_instance (instance
);
2951 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2955 /* Mark all the statements that we want to vectorize as pure SLP and
2957 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2958 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2962 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2968 if (!vect_slp_analyze_operations (bb_vinfo
))
2970 if (dump_enabled_p ())
2971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2972 "not vectorized: bad operation in basic block.\n");
2978 /* Cost model: check if the vectorization is worthwhile. */
2979 if (!unlimited_cost_model (NULL
)
2980 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2982 if (dump_enabled_p ())
2983 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2984 "not vectorized: vectorization is not "
2991 if (dump_enabled_p ())
2992 dump_printf_loc (MSG_NOTE
, vect_location
,
2993 "Basic block will be vectorized using SLP\n");
2999 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3000 true if anything in the basic-block was vectorized. */
3003 vect_slp_bb (basic_block bb
)
3005 bb_vec_info bb_vinfo
;
3006 gimple_stmt_iterator gsi
;
3007 bool any_vectorized
= false;
3008 auto_vector_sizes vector_sizes
;
3010 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3012 /* Autodetect first vector size we try. */
3013 current_vector_size
= 0;
3014 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3015 unsigned int next_size
= 0;
3017 gsi
= gsi_start_bb (bb
);
3019 poly_uint64 autodetected_vector_size
= 0;
3022 if (gsi_end_p (gsi
))
3025 gimple_stmt_iterator region_begin
= gsi
;
3026 vec
<data_reference_p
> datarefs
= vNULL
;
3029 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3031 gimple
*stmt
= gsi_stmt (gsi
);
3032 if (is_gimple_debug (stmt
))
3036 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3037 vect_location
= stmt
;
3039 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3043 /* Skip leading unhandled stmts. */
3044 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3050 gimple_stmt_iterator region_end
= gsi
;
3052 bool vectorized
= false;
3054 vec_info_shared shared
;
3055 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3056 datarefs
, insns
, fatal
, &shared
);
3058 && dbg_cnt (vect_slp
))
3060 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3063 bb_vinfo
->shared
->check_datarefs ();
3064 vect_schedule_slp (bb_vinfo
);
3066 unsigned HOST_WIDE_INT bytes
;
3067 if (current_vector_size
.is_constant (&bytes
))
3068 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3069 "basic block part vectorized using "
3070 HOST_WIDE_INT_PRINT_UNSIGNED
" byte "
3071 "vectors\n", bytes
);
3073 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3074 "basic block part vectorized using variable "
3075 "length vectors\n");
3081 any_vectorized
|= vectorized
;
3084 autodetected_vector_size
= current_vector_size
;
3086 if (next_size
< vector_sizes
.length ()
3087 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3091 || next_size
== vector_sizes
.length ()
3092 || known_eq (current_vector_size
, 0U)
3093 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3094 vector sizes will fail do not bother iterating. */
3097 if (gsi_end_p (region_end
))
3100 /* Skip the unhandled stmt. */
3103 /* And reset vector sizes. */
3104 current_vector_size
= 0;
3109 /* Try the next biggest vector size. */
3110 current_vector_size
= vector_sizes
[next_size
++];
3111 if (dump_enabled_p ())
3113 dump_printf_loc (MSG_NOTE
, vect_location
,
3114 "***** Re-trying analysis with "
3116 dump_dec (MSG_NOTE
, current_vector_size
);
3117 dump_printf (MSG_NOTE
, "\n");
3125 return any_vectorized
;
3129 /* Return 1 if vector type of boolean constant which is OPNUM
3130 operand in statement STMT is a boolean vector. */
3133 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3135 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3136 enum tree_code code
= gimple_expr_code (stmt
);
3138 enum vect_def_type dt
;
3140 /* For comparison and COND_EXPR type is chosen depending
3141 on the other comparison operand. */
3142 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3145 op
= gimple_assign_rhs1 (stmt
);
3147 op
= gimple_assign_rhs2 (stmt
);
3149 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3152 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3155 if (code
== COND_EXPR
)
3157 tree cond
= gimple_assign_rhs1 (stmt
);
3159 if (TREE_CODE (cond
) == SSA_NAME
)
3162 op
= TREE_OPERAND (cond
, 1);
3164 op
= TREE_OPERAND (cond
, 0);
3166 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3169 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3172 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3175 /* Build a variable-length vector in which the elements in ELTS are repeated
3176 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3177 RESULTS and add any new instructions to SEQ.
3179 The approach we use is:
3181 (1) Find a vector mode VM with integer elements of mode IM.
3183 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3184 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3185 from small vectors to IM.
3187 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3189 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3190 correct byte contents.
3192 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3194 We try to find the largest IM for which this sequence works, in order
3195 to cut down on the number of interleaves. */
3198 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3199 unsigned int nresults
, vec
<tree
> &results
)
3201 unsigned int nelts
= elts
.length ();
3202 tree element_type
= TREE_TYPE (vector_type
);
3204 /* (1) Find a vector mode VM with integer elements of mode IM. */
3205 unsigned int nvectors
= 1;
3206 tree new_vector_type
;
3208 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3209 &nvectors
, &new_vector_type
,
3213 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3214 unsigned int partial_nelts
= nelts
/ nvectors
;
3215 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3217 tree_vector_builder partial_elts
;
3218 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3219 pieces
.quick_grow (nvectors
* 2);
3220 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3222 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3223 ELTS' has mode IM. */
3224 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3225 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3226 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3227 tree t
= gimple_build_vector (seq
, &partial_elts
);
3228 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3229 TREE_TYPE (new_vector_type
), t
);
3231 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3232 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3235 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3236 correct byte contents.
3238 We need to repeat the following operation log2(nvectors) times:
3240 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3241 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3243 However, if each input repeats every N elements and the VF is
3244 a multiple of N * 2, the HI result is the same as the LO. */
3245 unsigned int in_start
= 0;
3246 unsigned int out_start
= nvectors
;
3247 unsigned int hi_start
= nvectors
/ 2;
3248 /* A bound on the number of outputs needed to produce NRESULTS results
3249 in the final iteration. */
3250 unsigned int noutputs_bound
= nvectors
* nresults
;
3251 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3253 noutputs_bound
/= 2;
3254 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3255 for (unsigned int i
= 0; i
< limit
; ++i
)
3258 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3261 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3265 tree output
= make_ssa_name (new_vector_type
);
3266 tree input1
= pieces
[in_start
+ (i
/ 2)];
3267 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3268 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3271 gimple_seq_add_stmt (seq
, stmt
);
3272 pieces
[out_start
+ i
] = output
;
3274 std::swap (in_start
, out_start
);
3277 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3278 results
.reserve (nresults
);
3279 for (unsigned int i
= 0; i
< nresults
; ++i
)
3281 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3282 pieces
[in_start
+ i
]));
3284 results
.quick_push (results
[i
- nvectors
]);
3288 /* For constant and loop invariant defs of SLP_NODE this function returns
3289 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3290 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3291 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3292 REDUC_INDEX is the index of the reduction operand in the statements, unless
3296 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3297 vec
<tree
> *vec_oprnds
,
3298 unsigned int op_num
, unsigned int number_of_vectors
)
3300 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3301 gimple
*stmt
= stmts
[0];
3302 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3303 unsigned HOST_WIDE_INT nunits
;
3305 unsigned j
, number_of_places_left_in_vector
;
3308 int group_size
= stmts
.length ();
3309 unsigned int vec_num
, i
;
3310 unsigned number_of_copies
= 1;
3312 voprnds
.create (number_of_vectors
);
3313 bool constant_p
, is_store
;
3314 tree neutral_op
= NULL
;
3315 enum tree_code code
= gimple_expr_code (stmt
);
3316 gimple_seq ctor_seq
= NULL
;
3317 auto_vec
<tree
, 16> permute_results
;
3319 /* Check if vector type is a boolean vector. */
3320 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3321 && vect_mask_constant_operand_p (stmt
, op_num
))
3323 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3325 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3327 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3330 op
= gimple_assign_rhs1 (stmt
);
3337 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3338 created vectors. It is greater than 1 if unrolling is performed.
3340 For example, we have two scalar operands, s1 and s2 (e.g., group of
3341 strided accesses of size two), while NUNITS is four (i.e., four scalars
3342 of this type can be packed in a vector). The output vector will contain
3343 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3346 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3347 containing the operands.
3349 For example, NUNITS is four as before, and the group size is 8
3350 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3351 {s5, s6, s7, s8}. */
3353 /* When using duplicate_and_interleave, we just need one element for
3354 each scalar statement. */
3355 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3356 nunits
= group_size
;
3358 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3360 number_of_places_left_in_vector
= nunits
;
3362 tree_vector_builder
elts (vector_type
, nunits
, 1);
3363 elts
.quick_grow (nunits
);
3364 bool place_after_defs
= false;
3365 for (j
= 0; j
< number_of_copies
; j
++)
3367 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3370 op
= gimple_assign_rhs1 (stmt
);
3377 tree cond
= gimple_assign_rhs1 (stmt
);
3378 if (TREE_CODE (cond
) == SSA_NAME
)
3379 op
= gimple_op (stmt
, op_num
+ 1);
3380 else if (op_num
== 0 || op_num
== 1)
3381 op
= TREE_OPERAND (cond
, op_num
);
3385 op
= gimple_assign_rhs2 (stmt
);
3387 op
= gimple_assign_rhs3 (stmt
);
3393 op
= gimple_call_arg (stmt
, op_num
);
3400 op
= gimple_op (stmt
, op_num
+ 1);
3401 /* Unlike the other binary operators, shifts/rotates have
3402 the shift count being int, instead of the same type as
3403 the lhs, so make sure the scalar is the right type if
3404 we are dealing with vectors of
3405 long long/long/short/char. */
3406 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3407 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3411 op
= gimple_op (stmt
, op_num
+ 1);
3416 /* Create 'vect_ = {op0,op1,...,opn}'. */
3417 number_of_places_left_in_vector
--;
3419 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3421 if (CONSTANT_CLASS_P (op
))
3423 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3425 /* Can't use VIEW_CONVERT_EXPR for booleans because
3426 of possibly different sizes of scalar value and
3428 if (integer_zerop (op
))
3429 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3430 else if (integer_onep (op
))
3431 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3436 op
= fold_unary (VIEW_CONVERT_EXPR
,
3437 TREE_TYPE (vector_type
), op
);
3438 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3442 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3444 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3447 = build_all_ones_cst (TREE_TYPE (vector_type
));
3449 = build_zero_cst (TREE_TYPE (vector_type
));
3450 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3451 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3457 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3460 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3463 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3467 elts
[number_of_places_left_in_vector
] = op
;
3468 if (!CONSTANT_CLASS_P (op
))
3470 if (TREE_CODE (orig_op
) == SSA_NAME
3471 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3472 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3473 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3474 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3475 place_after_defs
= true;
3477 if (number_of_places_left_in_vector
== 0)
3480 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3481 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3482 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3485 if (vec_oprnds
->is_empty ())
3486 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3489 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3492 gimple_stmt_iterator gsi
;
3493 if (place_after_defs
)
3496 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3497 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3500 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3501 if (ctor_seq
!= NULL
)
3503 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3504 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3507 voprnds
.quick_push (init
);
3508 place_after_defs
= false;
3509 number_of_places_left_in_vector
= nunits
;
3511 elts
.new_vector (vector_type
, nunits
, 1);
3512 elts
.quick_grow (nunits
);
3517 /* Since the vectors are created in the reverse order, we should invert
3519 vec_num
= voprnds
.length ();
3520 for (j
= vec_num
; j
!= 0; j
--)
3522 vop
= voprnds
[j
- 1];
3523 vec_oprnds
->quick_push (vop
);
3528 /* In case that VF is greater than the unrolling factor needed for the SLP
3529 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3530 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3531 to replicate the vectors. */
3532 while (number_of_vectors
> vec_oprnds
->length ())
3534 tree neutral_vec
= NULL
;
3539 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3541 vec_oprnds
->quick_push (neutral_vec
);
3545 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3546 vec_oprnds
->quick_push (vop
);
3552 /* Get vectorized definitions from SLP_NODE that contains corresponding
3553 vectorized def-stmts. */
3556 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3559 gimple
*vec_def_stmt
;
3562 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3564 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3566 gcc_assert (vec_def_stmt
);
3567 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3568 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3570 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3571 vec_oprnds
->quick_push (vec_oprnd
);
3576 /* Get vectorized definitions for SLP_NODE.
3577 If the scalar definitions are loop invariants or constants, collect them and
3578 call vect_get_constant_vectors() to create vector stmts.
3579 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3580 must be stored in the corresponding child of SLP_NODE, and we call
3581 vect_get_slp_vect_defs () to retrieve them. */
3584 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3585 vec
<vec
<tree
> > *vec_oprnds
)
3588 int number_of_vects
= 0, i
;
3589 unsigned int child_index
= 0;
3590 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3591 slp_tree child
= NULL
;
3594 bool vectorized_defs
;
3596 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3597 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3599 /* For each operand we check if it has vectorized definitions in a child
3600 node or we need to create them (for invariants and constants). We
3601 check if the LHS of the first stmt of the next child matches OPRND.
3602 If it does, we found the correct child. Otherwise, we call
3603 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3604 to check this child node for the next operand. */
3605 vectorized_defs
= false;
3606 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3608 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3610 /* We have to check both pattern and original def, if available. */
3611 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3613 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3615 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3618 if (gimple_code (first_def
) == GIMPLE_PHI
)
3619 first_def_op
= gimple_phi_result (first_def
);
3621 first_def_op
= gimple_get_lhs (first_def
);
3622 if (operand_equal_p (oprnd
, first_def_op
, 0)
3624 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3626 /* The number of vector defs is determined by the number of
3627 vector statements in the node from which we get those
3629 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3630 vectorized_defs
= true;
3638 if (!vectorized_defs
)
3642 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3643 /* Number of vector stmts was calculated according to LHS in
3644 vect_schedule_slp_instance (), fix it by replacing LHS with
3645 RHS, if necessary. See vect_get_smallest_scalar_type () for
3647 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3649 if (rhs_size_unit
!= lhs_size_unit
)
3651 number_of_vects
*= rhs_size_unit
;
3652 number_of_vects
/= lhs_size_unit
;
3657 /* Allocate memory for vectorized defs. */
3659 vec_defs
.create (number_of_vects
);
3661 /* For reduction defs we call vect_get_constant_vectors (), since we are
3662 looking for initial loop invariant values. */
3663 if (vectorized_defs
)
3664 /* The defs are already vectorized. */
3665 vect_get_slp_vect_defs (child
, &vec_defs
);
3667 /* Build vectors from scalar defs. */
3668 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3671 vec_oprnds
->quick_push (vec_defs
);
3675 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3676 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3677 permute statements for the SLP node NODE of the SLP instance
3678 SLP_NODE_INSTANCE. */
3681 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3682 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3683 slp_instance slp_node_instance
, bool analyze_only
,
3686 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3687 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3688 tree mask_element_type
= NULL_TREE
, mask_type
;
3690 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3691 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3692 unsigned int mask_element
;
3694 unsigned HOST_WIDE_INT nunits
, const_vf
;
3696 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3699 stmt_info
= vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info
));
3701 mode
= TYPE_MODE (vectype
);
3703 /* At the moment, all permutations are represented using per-element
3704 indices, so we can't cope with variable vector lengths or
3705 vectorization factors. */
3706 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
3707 || !vf
.is_constant (&const_vf
))
3710 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3711 same size as the vector element being permuted. */
3712 mask_element_type
= lang_hooks
.types
.type_for_mode
3713 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3714 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3715 vec_perm_builder
mask (nunits
, nunits
, 1);
3716 mask
.quick_grow (nunits
);
3717 vec_perm_indices indices
;
3719 /* Initialize the vect stmts of NODE to properly insert the generated
3722 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3723 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3724 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3726 /* Generate permutation masks for every NODE. Number of masks for each NODE
3727 is equal to GROUP_SIZE.
3728 E.g., we have a group of three nodes with three loads from the same
3729 location in each node, and the vector size is 4. I.e., we have a
3730 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3731 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3732 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3735 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3736 The last mask is illegal since we assume two operands for permute
3737 operation, and the mask element values can't be outside that range.
3738 Hence, the last mask must be converted into {2,5,5,5}.
3739 For the first two permutations we need the first and the second input
3740 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3741 we need the second and the third vectors: {b1,c1,a2,b2} and
3744 int vect_stmts_counter
= 0;
3745 unsigned int index
= 0;
3746 int first_vec_index
= -1;
3747 int second_vec_index
= -1;
3751 for (unsigned int j
= 0; j
< const_vf
; j
++)
3753 for (int k
= 0; k
< group_size
; k
++)
3755 unsigned int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3756 + j
* DR_GROUP_SIZE (stmt_info
));
3757 vec_index
= i
/ nunits
;
3758 mask_element
= i
% nunits
;
3759 if (vec_index
== first_vec_index
3760 || first_vec_index
== -1)
3762 first_vec_index
= vec_index
;
3764 else if (vec_index
== second_vec_index
3765 || second_vec_index
== -1)
3767 second_vec_index
= vec_index
;
3768 mask_element
+= nunits
;
3772 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3775 "permutation requires at "
3776 "least three vectors ");
3777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3780 gcc_assert (analyze_only
);
3784 gcc_assert (mask_element
< 2 * nunits
);
3785 if (mask_element
!= index
)
3787 mask
[index
++] = mask_element
;
3789 if (index
== nunits
&& !noop_p
)
3791 indices
.new_vector (mask
, 2, nunits
);
3792 if (!can_vec_perm_const_p (mode
, indices
))
3794 if (dump_enabled_p ())
3796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3798 "unsupported vect permute { ");
3799 for (i
= 0; i
< nunits
; ++i
)
3801 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3802 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3804 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3806 gcc_assert (analyze_only
);
3813 if (index
== nunits
)
3817 tree mask_vec
= NULL_TREE
;
3820 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3822 if (second_vec_index
== -1)
3823 second_vec_index
= first_vec_index
;
3825 /* Generate the permute statement if necessary. */
3826 tree first_vec
= dr_chain
[first_vec_index
];
3827 tree second_vec
= dr_chain
[second_vec_index
];
3832 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3834 perm_dest
= make_ssa_name (perm_dest
);
3835 perm_stmt
= gimple_build_assign (perm_dest
,
3837 first_vec
, second_vec
,
3839 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3842 /* If mask was NULL_TREE generate the requested
3843 identity transform. */
3844 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3846 /* Store the vector statement in NODE. */
3847 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3851 first_vec_index
= -1;
3852 second_vec_index
= -1;
3861 /* Vectorize SLP instance tree in postorder. */
3864 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3865 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3868 bool grouped_store
, is_store
;
3869 gimple_stmt_iterator si
;
3870 stmt_vec_info stmt_info
;
3871 unsigned int group_size
;
3876 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3879 /* See if we have already vectorized the same set of stmts and reuse their
3880 vectorized stmts. */
3881 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3883 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3887 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3889 vect_schedule_slp_instance (child
, instance
, bst_map
);
3891 /* Push SLP node def-type to stmts. */
3892 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3893 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3894 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3895 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3897 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3898 stmt_info
= vinfo_for_stmt (stmt
);
3900 /* VECTYPE is the type of the destination. */
3901 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3902 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3903 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3905 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3906 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3907 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3909 if (dump_enabled_p ())
3911 dump_printf_loc (MSG_NOTE
,vect_location
,
3912 "------>vectorizing SLP node starting from: ");
3913 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3916 /* Vectorized stmts go before the last scalar stmt which is where
3917 all uses are ready. */
3918 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3920 /* Mark the first element of the reduction chain as reduction to properly
3921 transform the node. In the analysis phase only the last element of the
3922 chain is marked as reduction. */
3923 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3924 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
3925 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3927 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3928 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3931 /* Handle two-operation SLP nodes by vectorizing the group with
3932 both operations and then performing a merge. */
3933 if (SLP_TREE_TWO_OPERATORS (node
))
3935 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3936 enum tree_code ocode
= ERROR_MARK
;
3938 vec_perm_builder
mask (group_size
, group_size
, 1);
3939 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3940 if (gimple_assign_rhs_code (ostmt
) != code0
)
3942 mask
.quick_push (1);
3943 ocode
= gimple_assign_rhs_code (ostmt
);
3946 mask
.quick_push (0);
3947 if (ocode
!= ERROR_MARK
)
3952 tree tmask
= NULL_TREE
;
3953 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3954 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3955 SLP_TREE_VEC_STMTS (node
).truncate (0);
3956 gimple_assign_set_rhs_code (stmt
, ocode
);
3957 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3958 gimple_assign_set_rhs_code (stmt
, code0
);
3959 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3960 SLP_TREE_VEC_STMTS (node
).truncate (0);
3961 tree meltype
= build_nonstandard_integer_type
3962 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3963 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3965 for (j
= 0; j
< v0
.length (); ++j
)
3967 /* Enforced by vect_build_slp_tree, which rejects variable-length
3968 vectors for SLP_TREE_TWO_OPERATORS. */
3969 unsigned int const_nunits
= nunits
.to_constant ();
3970 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3971 for (l
= 0; l
< const_nunits
; ++l
)
3973 if (k
>= group_size
)
3975 tree t
= build_int_cst (meltype
,
3976 mask
[k
++] * const_nunits
+ l
);
3977 melts
.quick_push (t
);
3979 tmask
= melts
.build ();
3981 /* ??? Not all targets support a VEC_PERM_EXPR with a
3982 constant mask that would translate to a vec_merge RTX
3983 (with their vec_perm_const_ok). We can either not
3984 vectorize in that case or let veclower do its job.
3985 Unfortunately that isn't too great and at least for
3986 plus/minus we'd eventually like to match targets
3987 vector addsub instructions. */
3989 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3991 gimple_assign_lhs (v0
[j
]),
3992 gimple_assign_lhs (v1
[j
]), tmask
);
3993 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3994 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
4001 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4003 /* Restore stmt def-types. */
4004 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4005 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4006 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
4007 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
4012 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4013 For loop vectorization this is done in vectorizable_call, but for SLP
4014 it needs to be deferred until end of vect_schedule_slp, because multiple
4015 SLP instances may refer to the same scalar stmt. */
4018 vect_remove_slp_scalar_calls (slp_tree node
)
4020 gimple
*stmt
, *new_stmt
;
4021 gimple_stmt_iterator gsi
;
4025 stmt_vec_info stmt_info
;
4027 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4030 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4031 vect_remove_slp_scalar_calls (child
);
4033 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
4035 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
4037 stmt_info
= vinfo_for_stmt (stmt
);
4038 if (stmt_info
== NULL
4039 || is_pattern_stmt_p (stmt_info
)
4040 || !PURE_SLP_STMT (stmt_info
))
4042 lhs
= gimple_call_lhs (stmt
);
4043 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4044 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4045 set_vinfo_for_stmt (stmt
, NULL
);
4046 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4047 gsi
= gsi_for_stmt (stmt
);
4048 gsi_replace (&gsi
, new_stmt
, false);
4049 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4053 /* Generate vector code for all SLP instances in the loop/basic block. */
4056 vect_schedule_slp (vec_info
*vinfo
)
4058 vec
<slp_instance
> slp_instances
;
4059 slp_instance instance
;
4061 bool is_store
= false;
4064 scalar_stmts_to_slp_tree_map_t
*bst_map
4065 = new scalar_stmts_to_slp_tree_map_t ();
4066 slp_instances
= vinfo
->slp_instances
;
4067 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4069 /* Schedule the tree of INSTANCE. */
4070 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4072 if (dump_enabled_p ())
4073 dump_printf_loc (MSG_NOTE
, vect_location
,
4074 "vectorizing stmts using SLP.\n");
4078 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4080 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4083 gimple_stmt_iterator gsi
;
4085 /* Remove scalar call stmts. Do not do this for basic-block
4086 vectorization as not all uses may be vectorized.
4087 ??? Why should this be necessary? DCE should be able to
4088 remove the stmts itself.
4089 ??? For BB vectorization we can as well remove scalar
4090 stmts starting from the SLP tree root if they have no
4092 if (is_a
<loop_vec_info
> (vinfo
))
4093 vect_remove_slp_scalar_calls (root
);
4095 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
4096 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4098 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4101 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4102 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4103 /* Free the attached stmt_vec_info and remove the stmt. */
4104 gsi
= gsi_for_stmt (store
);
4105 unlink_stmt_vdef (store
);
4106 gsi_remove (&gsi
, true);
4107 release_defs (store
);
4108 free_stmt_vec_info (store
);