1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
53 vect_free_slp_tree (slp_tree node
)
58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
59 vect_free_slp_tree (child
);
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt
))
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
70 SLP_TREE_CHILDREN (node
).release ();
71 SLP_TREE_SCALAR_STMTS (node
).release ();
72 SLP_TREE_VEC_STMTS (node
).release ();
73 SLP_TREE_LOAD_PERMUTATION (node
).release ();
79 /* Free the memory allocated for the SLP instance. */
82 vect_free_slp_instance (slp_instance instance
)
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
85 SLP_INSTANCE_LOADS (instance
).release ();
90 /* Create an SLP node for SCALAR_STMTS. */
93 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
96 gimple
*stmt
= scalar_stmts
[0];
99 if (is_gimple_call (stmt
))
100 nops
= gimple_call_num_args (stmt
);
101 else if (is_gimple_assign (stmt
))
103 nops
= gimple_num_ops (stmt
) - 1;
104 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
107 else if (gimple_code (stmt
) == GIMPLE_PHI
)
112 node
= XNEW (struct _slp_tree
);
113 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
114 SLP_TREE_VEC_STMTS (node
).create (0);
115 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
116 SLP_TREE_CHILDREN (node
).create (nops
);
117 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
118 SLP_TREE_TWO_OPERATORS (node
) = false;
119 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
122 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
123 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
129 /* This structure is used in creation of an SLP tree. Each instance
130 corresponds to the same operand in a group of scalar stmts in an SLP
132 typedef struct _slp_oprnd_info
134 /* Def-stmts for the operands. */
135 vec
<gimple
*> def_stmts
;
136 /* Information about the first statement, its vector def-type, type, the
137 operand itself in case it's constant, and an indication if it's a pattern
140 enum vect_def_type first_dt
;
146 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
148 static vec
<slp_oprnd_info
>
149 vect_create_oprnd_info (int nops
, int group_size
)
152 slp_oprnd_info oprnd_info
;
153 vec
<slp_oprnd_info
> oprnds_info
;
155 oprnds_info
.create (nops
);
156 for (i
= 0; i
< nops
; i
++)
158 oprnd_info
= XNEW (struct _slp_oprnd_info
);
159 oprnd_info
->def_stmts
.create (group_size
);
160 oprnd_info
->first_dt
= vect_uninitialized_def
;
161 oprnd_info
->first_op_type
= NULL_TREE
;
162 oprnd_info
->first_pattern
= false;
163 oprnd_info
->second_pattern
= false;
164 oprnds_info
.quick_push (oprnd_info
);
171 /* Free operands info. */
174 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
177 slp_oprnd_info oprnd_info
;
179 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
181 oprnd_info
->def_stmts
.release ();
182 XDELETE (oprnd_info
);
185 oprnds_info
.release ();
189 /* Find the place of the data-ref in STMT in the interleaving chain that starts
190 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
193 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
195 gimple
*next_stmt
= first_stmt
;
198 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
203 if (next_stmt
== stmt
)
205 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
207 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
214 /* Check whether it is possible to load COUNT elements of type ELT_MODE
215 using the method implemented by duplicate_and_interleave. Return true
216 if so, returning the number of intermediate vectors in *NVECTORS_OUT
217 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
221 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
222 unsigned int *nvectors_out
,
223 tree
*vector_type_out
,
226 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
228 unsigned int nvectors
= 1;
231 scalar_int_mode int_mode
;
232 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
233 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
234 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
236 tree int_type
= build_nonstandard_integer_type
237 (GET_MODE_BITSIZE (int_mode
), 1);
238 tree vector_type
= build_vector_type (int_type
, nelts
);
239 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
241 vec_perm_builder
sel1 (nelts
, 2, 3);
242 vec_perm_builder
sel2 (nelts
, 2, 3);
243 poly_int64 half_nelts
= exact_div (nelts
, 2);
244 for (unsigned int i
= 0; i
< 3; ++i
)
247 sel1
.quick_push (i
+ nelts
);
248 sel2
.quick_push (half_nelts
+ i
);
249 sel2
.quick_push (half_nelts
+ i
+ nelts
);
251 vec_perm_indices
indices1 (sel1
, 2, nelts
);
252 vec_perm_indices
indices2 (sel2
, 2, nelts
);
253 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
254 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
257 *nvectors_out
= nvectors
;
259 *vector_type_out
= vector_type
;
262 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
264 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
271 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
277 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
278 they are of a valid type and that they match the defs of the first stmt of
279 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
280 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
281 indicates swap is required for cond_expr stmts. Specifically, *SWAP
282 is 1 if STMT is cond and operands of comparison need to be swapped;
283 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
284 If there is any operand swap in this function, *SWAP is set to non-zero
286 If there was a fatal error return -1; if the error could be corrected by
287 swapping operands of father node of this one, return 1; if everything is
290 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
291 vec
<gimple
*> stmts
, unsigned stmt_num
,
292 vec
<slp_oprnd_info
> *oprnds_info
)
294 gimple
*stmt
= stmts
[stmt_num
];
296 unsigned int i
, number_of_oprnds
;
298 enum vect_def_type dt
= vect_uninitialized_def
;
299 bool pattern
= false;
300 slp_oprnd_info oprnd_info
;
301 int first_op_idx
= 1;
302 bool commutative
= false;
303 bool first_op_cond
= false;
304 bool first
= stmt_num
== 0;
305 bool second
= stmt_num
== 1;
307 if (is_gimple_call (stmt
))
309 number_of_oprnds
= gimple_call_num_args (stmt
);
312 else if (is_gimple_assign (stmt
))
314 enum tree_code code
= gimple_assign_rhs_code (stmt
);
315 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
316 /* Swap can only be done for cond_expr if asked to, otherwise we
317 could result in different comparison code to the first stmt. */
318 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
319 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
321 first_op_cond
= true;
325 commutative
= commutative_tree_code (code
);
330 bool swapped
= (*swap
!= 0);
331 gcc_assert (!swapped
|| first_op_cond
);
332 for (i
= 0; i
< number_of_oprnds
; i
++)
337 /* Map indicating how operands of cond_expr should be swapped. */
338 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
339 int *map
= maps
[*swap
];
342 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
344 oprnd
= gimple_op (stmt
, map
[i
]);
347 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
349 oprnd_info
= (*oprnds_info
)[i
];
351 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
353 if (dump_enabled_p ())
355 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
356 "Build SLP failed: can't analyze def for ");
357 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
358 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
364 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
365 from the pattern. Check that all the stmts of the node are in the
367 if (def_stmt
&& gimple_bb (def_stmt
)
368 && vect_stmt_in_region_p (vinfo
, def_stmt
)
369 && vinfo_for_stmt (def_stmt
)
370 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
371 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
372 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
375 if (!first
&& !oprnd_info
->first_pattern
376 /* Allow different pattern state for the defs of the
377 first stmt in reduction chains. */
378 && (oprnd_info
->first_dt
!= vect_reduction_def
379 || (!second
&& !oprnd_info
->second_pattern
)))
389 if (dump_enabled_p ())
391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
392 "Build SLP failed: some of the stmts"
393 " are in a pattern, and others are not ");
394 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
395 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
401 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
402 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
404 if (dt
== vect_unknown_def_type
)
406 if (dump_enabled_p ())
407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
408 "Unsupported pattern.\n");
412 switch (gimple_code (def_stmt
))
419 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
421 "unsupported defining stmt:\n");
427 oprnd_info
->second_pattern
= pattern
;
431 oprnd_info
->first_dt
= dt
;
432 oprnd_info
->first_pattern
= pattern
;
433 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
437 /* Not first stmt of the group, check that the def-stmt/s match
438 the def-stmt/s of the first stmt. Allow different definition
439 types for reduction chains: the first stmt must be a
440 vect_reduction_def (a phi node), and the rest
441 vect_internal_def. */
442 tree type
= TREE_TYPE (oprnd
);
443 if ((oprnd_info
->first_dt
!= dt
444 && !(oprnd_info
->first_dt
== vect_reduction_def
445 && dt
== vect_internal_def
)
446 && !((oprnd_info
->first_dt
== vect_external_def
447 || oprnd_info
->first_dt
== vect_constant_def
)
448 && (dt
== vect_external_def
449 || dt
== vect_constant_def
)))
450 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
452 /* Try swapping operands if we got a mismatch. */
461 if (dump_enabled_p ())
462 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
463 "Build SLP failed: different types\n");
467 if ((dt
== vect_constant_def
468 || dt
== vect_external_def
)
469 && !current_vector_size
.is_constant ()
470 && (TREE_CODE (type
) == BOOLEAN_TYPE
471 || !can_duplicate_and_interleave_p (stmts
.length (),
474 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
477 "Build SLP failed: invalid type of def "
478 "for variable-length SLP ");
479 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
480 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
486 /* Check the types of the definitions. */
489 case vect_constant_def
:
490 case vect_external_def
:
493 case vect_reduction_def
:
494 case vect_induction_def
:
495 case vect_internal_def
:
496 oprnd_info
->def_stmts
.quick_push (def_stmt
);
500 /* FORNOW: Not supported. */
501 if (dump_enabled_p ())
503 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
504 "Build SLP failed: illegal type of def ");
505 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
506 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
516 /* If there are already uses of this stmt in a SLP instance then
517 we've committed to the operand order and can't swap it. */
518 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
520 if (dump_enabled_p ())
522 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
523 "Build SLP failed: cannot swap operands of "
525 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
532 tree cond
= gimple_assign_rhs1 (stmt
);
533 enum tree_code code
= TREE_CODE (cond
);
538 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
539 &TREE_OPERAND (cond
, 1));
540 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
545 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
546 gimple_assign_rhs3_ptr (stmt
));
547 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
548 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
549 gcc_assert (code
!= ERROR_MARK
);
550 TREE_SET_CODE (cond
, code
);
554 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
555 gimple_assign_rhs2_ptr (stmt
));
556 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE
, vect_location
,
559 "swapped operands to match def types in ");
560 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
568 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
569 caller's attempt to find the vector type in STMT with the narrowest
570 element type. Return true if VECTYPE is nonnull and if it is valid
571 for VINFO. When returning true, update MAX_NUNITS to reflect the
572 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
573 as for vect_build_slp_tree. */
576 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
577 tree vectype
, poly_uint64
*max_nunits
)
581 if (dump_enabled_p ())
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
584 "Build SLP failed: unsupported data-type in ");
585 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
586 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
588 /* Fatal mismatch. */
592 /* If populating the vector type requires unrolling then fail
593 before adjusting *max_nunits for basic-block vectorization. */
594 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
595 unsigned HOST_WIDE_INT const_nunits
;
596 if (is_a
<bb_vec_info
> (vinfo
)
597 && (!nunits
.is_constant (&const_nunits
)
598 || const_nunits
> group_size
))
600 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
601 "Build SLP failed: unrolling required "
602 "in basic block SLP\n");
603 /* Fatal mismatch. */
607 /* In case of multiple types we need to detect the smallest type. */
608 vect_update_max_nunits (max_nunits
, vectype
);
612 /* STMTS is a group of GROUP_SIZE SLP statements in which some
613 statements do the same operation as the first statement and in which
614 the others do ALT_STMT_CODE. Return true if we can take one vector
615 of the first operation and one vector of the second and permute them
616 to get the required result. VECTYPE is the type of the vector that
617 would be permuted. */
620 vect_two_operations_perm_ok_p (vec
<gimple
*> stmts
, unsigned int group_size
,
621 tree vectype
, tree_code alt_stmt_code
)
623 unsigned HOST_WIDE_INT count
;
624 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
627 vec_perm_builder
sel (count
, count
, 1);
628 for (unsigned int i
= 0; i
< count
; ++i
)
630 unsigned int elt
= i
;
631 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
633 sel
.quick_push (elt
);
635 vec_perm_indices
indices (sel
, 2, count
);
636 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
639 /* Verify if the scalar stmts STMTS are isomorphic, require data
640 permutation or are of unsupported types of operation. Return
641 true if they are, otherwise return false and indicate in *MATCHES
642 which stmts are not isomorphic to the first one. If MATCHES[0]
643 is false then this indicates the comparison could not be
644 carried out or the stmts will never be vectorized by SLP.
646 Note COND_EXPR is possibly ismorphic to another one after swapping its
647 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
648 the first stmt by swapping the two operands of comparison; set SWAP[i]
649 to 2 if stmt I is isormorphic to the first stmt by inverting the code
650 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
651 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
654 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
655 vec
<gimple
*> stmts
, unsigned int group_size
,
656 unsigned nops
, poly_uint64
*max_nunits
,
657 bool *matches
, bool *two_operators
)
660 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
661 enum tree_code first_stmt_code
= ERROR_MARK
;
662 enum tree_code alt_stmt_code
= ERROR_MARK
;
663 enum tree_code rhs_code
= ERROR_MARK
;
664 enum tree_code first_cond_code
= ERROR_MARK
;
666 bool need_same_oprnds
= false;
667 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
670 machine_mode optab_op2_mode
;
671 machine_mode vec_mode
;
672 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
674 /* For every stmt in NODE find its def stmt/s. */
675 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
677 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
681 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
684 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
687 /* Fail to vectorize statements marked as unvectorizable. */
688 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
693 "Build SLP failed: unvectorizable statement ");
694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
696 /* Fatal mismatch. */
701 lhs
= gimple_get_lhs (stmt
);
702 if (lhs
== NULL_TREE
)
704 if (dump_enabled_p ())
706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
707 "Build SLP failed: not GIMPLE_ASSIGN nor "
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
711 /* Fatal mismatch. */
717 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
720 && !vect_record_max_nunits (vinfo
, stmt
, group_size
,
721 nunits_vectype
, max_nunits
)))
723 /* Fatal mismatch. */
728 gcc_assert (vectype
);
730 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
732 rhs_code
= CALL_EXPR
;
733 if (gimple_call_internal_p (call_stmt
)
734 || gimple_call_tail_p (call_stmt
)
735 || gimple_call_noreturn_p (call_stmt
)
736 || !gimple_call_nothrow_p (call_stmt
)
737 || gimple_call_chain (call_stmt
))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: unsupported call type ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
746 /* Fatal mismatch. */
752 rhs_code
= gimple_assign_rhs_code (stmt
);
754 /* Check the operation. */
757 first_stmt_code
= rhs_code
;
759 /* Shift arguments should be equal in all the packed stmts for a
760 vector shift with scalar shift operand. */
761 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
762 || rhs_code
== LROTATE_EXPR
763 || rhs_code
== RROTATE_EXPR
)
765 if (vectype
== boolean_type_node
)
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
769 "Build SLP failed: shift of a"
771 /* Fatal mismatch. */
776 vec_mode
= TYPE_MODE (vectype
);
778 /* First see if we have a vector/vector shift. */
779 optab
= optab_for_tree_code (rhs_code
, vectype
,
783 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
785 /* No vector/vector shift, try for a vector/scalar shift. */
786 optab
= optab_for_tree_code (rhs_code
, vectype
,
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
793 "Build SLP failed: no optab.\n");
794 /* Fatal mismatch. */
798 icode
= (int) optab_handler (optab
, vec_mode
);
799 if (icode
== CODE_FOR_nothing
)
801 if (dump_enabled_p ())
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
804 "op not supported by target.\n");
805 /* Fatal mismatch. */
809 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
810 if (!VECTOR_MODE_P (optab_op2_mode
))
812 need_same_oprnds
= true;
813 first_op1
= gimple_assign_rhs2 (stmt
);
817 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
819 need_same_oprnds
= true;
820 first_op1
= gimple_assign_rhs2 (stmt
);
825 if (first_stmt_code
!= rhs_code
826 && alt_stmt_code
== ERROR_MARK
)
827 alt_stmt_code
= rhs_code
;
828 if (first_stmt_code
!= rhs_code
829 && (first_stmt_code
!= IMAGPART_EXPR
830 || rhs_code
!= REALPART_EXPR
)
831 && (first_stmt_code
!= REALPART_EXPR
832 || rhs_code
!= IMAGPART_EXPR
)
833 /* Handle mismatches in plus/minus by computing both
834 and merging the results. */
835 && !((first_stmt_code
== PLUS_EXPR
836 || first_stmt_code
== MINUS_EXPR
)
837 && (alt_stmt_code
== PLUS_EXPR
838 || alt_stmt_code
== MINUS_EXPR
)
839 && rhs_code
== alt_stmt_code
)
840 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
841 && (first_stmt_code
== ARRAY_REF
842 || first_stmt_code
== BIT_FIELD_REF
843 || first_stmt_code
== INDIRECT_REF
844 || first_stmt_code
== COMPONENT_REF
845 || first_stmt_code
== MEM_REF
)))
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
850 "Build SLP failed: different operation "
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
863 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
865 if (dump_enabled_p ())
867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
868 "Build SLP failed: different shift "
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
876 if (rhs_code
== CALL_EXPR
)
878 gimple
*first_stmt
= stmts
[0];
879 if (gimple_call_num_args (stmt
) != nops
880 || !operand_equal_p (gimple_call_fn (first_stmt
),
881 gimple_call_fn (stmt
), 0)
882 || gimple_call_fntype (first_stmt
)
883 != gimple_call_fntype (stmt
))
885 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
888 "Build SLP failed: different calls in ");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
898 /* Grouped store or load. */
899 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
901 if (REFERENCE_CLASS_P (lhs
))
909 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
912 /* Check that there are no loads from different interleaving
913 chains in the same node. */
914 if (prev_first_load
!= first_load
)
916 if (dump_enabled_p ())
918 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
920 "Build SLP failed: different "
921 "interleaving chains in one node ");
922 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
930 prev_first_load
= first_load
;
932 } /* Grouped access. */
935 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
937 /* Not grouped load. */
938 if (dump_enabled_p ())
940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
941 "Build SLP failed: not grouped load ");
942 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
945 /* FORNOW: Not grouped loads are not supported. */
946 /* Fatal mismatch. */
951 /* Not memory operation. */
952 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
953 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
954 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
955 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
956 && rhs_code
!= CALL_EXPR
)
958 if (dump_enabled_p ())
960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
961 "Build SLP failed: operation");
962 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
963 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
965 /* Fatal mismatch. */
970 if (rhs_code
== COND_EXPR
)
972 tree cond_expr
= gimple_assign_rhs1 (stmt
);
973 enum tree_code cond_code
= TREE_CODE (cond_expr
);
974 enum tree_code swap_code
= ERROR_MARK
;
975 enum tree_code invert_code
= ERROR_MARK
;
978 first_cond_code
= TREE_CODE (cond_expr
);
979 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
981 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
982 swap_code
= swap_tree_comparison (cond_code
);
983 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
986 if (first_cond_code
== cond_code
)
988 /* Isomorphic can be achieved by swapping. */
989 else if (first_cond_code
== swap_code
)
991 /* Isomorphic can be achieved by inverting. */
992 else if (first_cond_code
== invert_code
)
996 if (dump_enabled_p ())
998 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
999 "Build SLP failed: different"
1001 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1013 for (i
= 0; i
< group_size
; ++i
)
1017 /* If we allowed a two-operation SLP node verify the target can cope
1018 with the permute we are going to use. */
1019 if (alt_stmt_code
!= ERROR_MARK
1020 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1022 if (vectype
== boolean_type_node
1023 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1024 vectype
, alt_stmt_code
))
1026 for (i
= 0; i
< group_size
; ++i
)
1027 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
1030 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1033 "Build SLP failed: different operation "
1035 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1037 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1039 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1045 *two_operators
= true;
1051 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1052 Note we never remove apart from at destruction time so we do not
1053 need a special value for deleted that differs from empty. */
1056 typedef vec
<gimple
*> value_type
;
1057 typedef vec
<gimple
*> compare_type
;
1058 static inline hashval_t
hash (value_type
);
1059 static inline bool equal (value_type existing
, value_type candidate
);
1060 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1061 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1062 static inline void mark_empty (value_type
&x
) { x
.release (); }
1063 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1064 static inline void remove (value_type
&x
) { x
.release (); }
1067 bst_traits::hash (value_type x
)
1070 for (unsigned i
= 0; i
< x
.length (); ++i
)
1071 h
.add_int (gimple_uid (x
[i
]));
1075 bst_traits::equal (value_type existing
, value_type candidate
)
1077 if (existing
.length () != candidate
.length ())
1079 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1080 if (existing
[i
] != candidate
[i
])
1085 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
1086 static scalar_stmts_set_t
*bst_fail
;
1088 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1089 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1090 scalar_stmts_to_slp_tree_map_t
;
1093 vect_build_slp_tree_2 (vec_info
*vinfo
,
1094 vec
<gimple
*> stmts
, unsigned int group_size
,
1095 poly_uint64
*max_nunits
,
1096 vec
<slp_tree
> *loads
,
1097 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1098 unsigned max_tree_size
);
1101 vect_build_slp_tree (vec_info
*vinfo
,
1102 vec
<gimple
*> stmts
, unsigned int group_size
,
1103 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1104 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1105 unsigned max_tree_size
)
1107 if (bst_fail
->contains (stmts
))
1109 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1110 loads
, matches
, npermutes
, tree_size
,
1112 /* When SLP build fails for stmts record this, otherwise SLP build
1113 can be exponential in time when we allow to construct parts from
1114 scalars, see PR81723. */
1118 x
.create (stmts
.length ());
1125 /* Recursively build an SLP tree starting from NODE.
1126 Fail (and return a value not equal to zero) if def-stmts are not
1127 isomorphic, require data permutation or are of unsupported types of
1128 operation. Otherwise, return 0.
1129 The value returned is the depth in the SLP tree where a mismatch
1133 vect_build_slp_tree_2 (vec_info
*vinfo
,
1134 vec
<gimple
*> stmts
, unsigned int group_size
,
1135 poly_uint64
*max_nunits
,
1136 vec
<slp_tree
> *loads
,
1137 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1138 unsigned max_tree_size
)
1140 unsigned nops
, i
, this_tree_size
= 0;
1141 poly_uint64 this_max_nunits
= *max_nunits
;
1148 if (is_gimple_call (stmt
))
1149 nops
= gimple_call_num_args (stmt
);
1150 else if (is_gimple_assign (stmt
))
1152 nops
= gimple_num_ops (stmt
) - 1;
1153 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1156 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1161 /* If the SLP node is a PHI (induction or reduction), terminate
1163 if (gimple_code (stmt
) == GIMPLE_PHI
)
1165 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1166 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1167 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1171 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1172 /* Induction from different IVs is not supported. */
1173 if (def_type
== vect_induction_def
)
1175 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1176 if (stmt
!= stmts
[0])
1181 /* Else def types have to match. */
1182 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1184 /* But for reduction chains only check on the first stmt. */
1185 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1186 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1188 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1192 node
= vect_create_new_slp_node (stmts
);
1197 bool two_operators
= false;
1198 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1199 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1200 stmts
, group_size
, nops
,
1201 &this_max_nunits
, matches
, &two_operators
))
1204 /* If the SLP node is a load, terminate the recursion. */
1205 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1206 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1208 *max_nunits
= this_max_nunits
;
1209 node
= vect_create_new_slp_node (stmts
);
1210 loads
->safe_push (node
);
1214 /* Get at the operands, verifying they are compatible. */
1215 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1216 slp_oprnd_info oprnd_info
;
1217 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1219 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1220 stmts
, i
, &oprnds_info
);
1222 matches
[(res
== -1) ? 0 : i
] = false;
1226 for (i
= 0; i
< group_size
; ++i
)
1229 vect_free_oprnd_info (oprnds_info
);
1233 auto_vec
<slp_tree
, 4> children
;
1234 auto_vec
<slp_tree
> this_loads
;
1239 max_tree_size
-= *tree_size
;
1241 /* Create SLP_TREE nodes for the definition node/s. */
1242 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1245 unsigned old_nloads
= this_loads
.length ();
1246 unsigned old_tree_size
= this_tree_size
;
1249 if (oprnd_info
->first_dt
!= vect_internal_def
1250 && oprnd_info
->first_dt
!= vect_reduction_def
1251 && oprnd_info
->first_dt
!= vect_induction_def
)
1254 if (++this_tree_size
> max_tree_size
)
1256 FOR_EACH_VEC_ELT (children
, j
, child
)
1257 vect_free_slp_tree (child
);
1258 vect_free_oprnd_info (oprnds_info
);
1262 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1263 group_size
, &this_max_nunits
,
1264 &this_loads
, matches
, npermutes
,
1266 max_tree_size
)) != NULL
)
1268 /* If we have all children of child built up from scalars then just
1269 throw that away and build it up this node from scalars. */
1270 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1271 /* ??? Rejecting patterns this way doesn't work. We'd have to
1272 do extra work to cancel the pattern so the uses see the
1274 && !is_pattern_stmt_p
1275 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1277 slp_tree grandchild
;
1279 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1280 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1285 this_loads
.truncate (old_nloads
);
1286 this_tree_size
= old_tree_size
;
1287 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1288 vect_free_slp_tree (grandchild
);
1289 SLP_TREE_CHILDREN (child
).truncate (0);
1291 dump_printf_loc (MSG_NOTE
, vect_location
,
1292 "Building parent vector operands from "
1293 "scalars instead\n");
1294 oprnd_info
->def_stmts
= vNULL
;
1295 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1296 children
.safe_push (child
);
1301 oprnd_info
->def_stmts
= vNULL
;
1302 children
.safe_push (child
);
1306 /* If the SLP build failed fatally and we analyze a basic-block
1307 simply treat nodes we fail to build as externally defined
1308 (and thus build vectors from the scalar defs).
1309 The cost model will reject outright expensive cases.
1310 ??? This doesn't treat cases where permutation ultimatively
1311 fails (or we don't try permutation below). Ideally we'd
1312 even compute a permutation that will end up with the maximum
1314 if (is_a
<bb_vec_info
> (vinfo
)
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1319 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1321 dump_printf_loc (MSG_NOTE
, vect_location
,
1322 "Building vector operands from scalars\n");
1323 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1324 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1325 children
.safe_push (child
);
1326 oprnd_info
->def_stmts
= vNULL
;
1330 /* If the SLP build for operand zero failed and operand zero
1331 and one can be commutated try that for the scalar stmts
1332 that failed the match. */
1334 /* A first scalar stmt mismatch signals a fatal mismatch. */
1336 /* ??? For COND_EXPRs we can swap the comparison operands
1337 as well as the arms under some constraints. */
1339 && oprnds_info
[1]->first_dt
== vect_internal_def
1340 && is_gimple_assign (stmt
)
1341 /* Do so only if the number of not successful permutes was nor more
1342 than a cut-ff as re-trying the recursive match on
1343 possibly each level of the tree would expose exponential
1347 /* See whether we can swap the matching or the non-matching
1349 bool swap_not_matching
= true;
1352 for (j
= 0; j
< group_size
; ++j
)
1354 if (matches
[j
] != !swap_not_matching
)
1356 gimple
*stmt
= stmts
[j
];
1357 /* Verify if we can swap operands of this stmt. */
1358 if (!is_gimple_assign (stmt
)
1359 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1361 if (!swap_not_matching
)
1363 swap_not_matching
= false;
1366 /* Verify if we can safely swap or if we committed to a
1367 specific operand order already.
1368 ??? Instead of modifying GIMPLE stmts here we could
1369 record whether we want to swap operands in the SLP
1370 node and temporarily do that when processing it
1371 (or wrap operand accessors in a helper). */
1372 else if (swap
[j
] != 0
1373 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)))
1375 if (!swap_not_matching
)
1377 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1381 "Build SLP failed: cannot swap "
1382 "operands of shared stmt ");
1383 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
1384 TDF_SLIM
, stmts
[j
], 0);
1388 swap_not_matching
= false;
1393 while (j
!= group_size
);
1395 /* Swap mismatched definition stmts. */
1396 dump_printf_loc (MSG_NOTE
, vect_location
,
1397 "Re-trying with swapped operands of stmts ");
1398 for (j
= 0; j
< group_size
; ++j
)
1399 if (matches
[j
] == !swap_not_matching
)
1401 std::swap (oprnds_info
[0]->def_stmts
[j
],
1402 oprnds_info
[1]->def_stmts
[j
]);
1403 dump_printf (MSG_NOTE
, "%d ", j
);
1405 dump_printf (MSG_NOTE
, "\n");
1406 /* And try again with scratch 'matches' ... */
1407 bool *tem
= XALLOCAVEC (bool, group_size
);
1408 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1409 group_size
, &this_max_nunits
,
1410 &this_loads
, tem
, npermutes
,
1412 max_tree_size
)) != NULL
)
1414 /* ... so if successful we can apply the operand swapping
1415 to the GIMPLE IL. This is necessary because for example
1416 vect_get_slp_defs uses operand indexes and thus expects
1417 canonical operand order. This is also necessary even
1418 if we end up building the operand from scalars as
1419 we'll continue to process swapped operand two. */
1420 for (j
= 0; j
< group_size
; ++j
)
1422 gimple
*stmt
= stmts
[j
];
1423 gimple_set_plf (stmt
, GF_PLF_1
, false);
1425 for (j
= 0; j
< group_size
; ++j
)
1427 gimple
*stmt
= stmts
[j
];
1428 if (matches
[j
] == !swap_not_matching
)
1430 /* Avoid swapping operands twice. */
1431 if (gimple_plf (stmt
, GF_PLF_1
))
1433 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1434 gimple_assign_rhs2_ptr (stmt
));
1435 gimple_set_plf (stmt
, GF_PLF_1
, true);
1438 /* Verify we swap all duplicates or none. */
1440 for (j
= 0; j
< group_size
; ++j
)
1442 gimple
*stmt
= stmts
[j
];
1443 gcc_assert (gimple_plf (stmt
, GF_PLF_1
)
1444 == (matches
[j
] == !swap_not_matching
));
1447 /* If we have all children of child built up from scalars then
1448 just throw that away and build it up this node from scalars. */
1449 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1450 /* ??? Rejecting patterns this way doesn't work. We'd have
1451 to do extra work to cancel the pattern so the uses see the
1453 && !is_pattern_stmt_p
1454 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1457 slp_tree grandchild
;
1459 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1460 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1465 this_loads
.truncate (old_nloads
);
1466 this_tree_size
= old_tree_size
;
1467 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1468 vect_free_slp_tree (grandchild
);
1469 SLP_TREE_CHILDREN (child
).truncate (0);
1471 dump_printf_loc (MSG_NOTE
, vect_location
,
1472 "Building parent vector operands from "
1473 "scalars instead\n");
1474 oprnd_info
->def_stmts
= vNULL
;
1475 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1476 children
.safe_push (child
);
1481 oprnd_info
->def_stmts
= vNULL
;
1482 children
.safe_push (child
);
1490 gcc_assert (child
== NULL
);
1491 FOR_EACH_VEC_ELT (children
, j
, child
)
1492 vect_free_slp_tree (child
);
1493 vect_free_oprnd_info (oprnds_info
);
1497 vect_free_oprnd_info (oprnds_info
);
1500 *tree_size
+= this_tree_size
;
1501 *max_nunits
= this_max_nunits
;
1502 loads
->safe_splice (this_loads
);
1504 node
= vect_create_new_slp_node (stmts
);
1505 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1506 SLP_TREE_CHILDREN (node
).splice (children
);
1510 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1513 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1519 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1520 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1521 ? " (external)" : "");
1522 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1524 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1525 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1527 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1528 vect_print_slp_tree (dump_kind
, loc
, child
);
1532 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1533 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1534 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1535 stmts in NODE are to be marked. */
1538 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1544 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1547 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1548 if (j
< 0 || i
== j
)
1549 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1552 vect_mark_slp_stmts (child
, mark
, j
);
1556 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1559 vect_mark_slp_stmts_relevant (slp_tree node
)
1563 stmt_vec_info stmt_info
;
1566 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1569 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1571 stmt_info
= vinfo_for_stmt (stmt
);
1572 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1573 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1574 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1577 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1578 vect_mark_slp_stmts_relevant (child
);
1582 /* Rearrange the statements of NODE according to PERMUTATION. */
1585 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1586 vec
<unsigned> permutation
)
1589 vec
<gimple
*> tmp_stmts
;
1593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1594 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1596 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1597 tmp_stmts
.create (group_size
);
1598 tmp_stmts
.quick_grow_cleared (group_size
);
1600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1601 tmp_stmts
[permutation
[i
]] = stmt
;
1603 SLP_TREE_SCALAR_STMTS (node
).release ();
1604 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1608 /* Attempt to reorder stmts in a reduction chain so that we don't
1609 require any load permutation. Return true if that was possible,
1610 otherwise return false. */
1613 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1615 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1618 slp_tree node
, load
;
1620 /* Compare all the permutation sequences to the first one. We know
1621 that at least one load is permuted. */
1622 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1623 if (!node
->load_permutation
.exists ())
1625 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1627 if (!load
->load_permutation
.exists ())
1629 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1630 if (lidx
!= node
->load_permutation
[j
])
1634 /* Check that the loads in the first sequence are different and there
1635 are no gaps between them. */
1636 auto_sbitmap
load_index (group_size
);
1637 bitmap_clear (load_index
);
1638 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1640 if (lidx
>= group_size
)
1642 if (bitmap_bit_p (load_index
, lidx
))
1645 bitmap_set_bit (load_index
, lidx
);
1647 for (i
= 0; i
< group_size
; i
++)
1648 if (!bitmap_bit_p (load_index
, i
))
1651 /* This permutation is valid for reduction. Since the order of the
1652 statements in the nodes is not important unless they are memory
1653 accesses, we can rearrange the statements in all the nodes
1654 according to the order of the loads. */
1655 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1656 node
->load_permutation
);
1658 /* We are done, no actual permutations need to be generated. */
1659 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1660 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1662 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1663 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1664 /* But we have to keep those permutations that are required because
1665 of handling of gaps. */
1666 if (known_eq (unrolling_factor
, 1U)
1667 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1668 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1669 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1671 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1672 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1678 /* Check if the required load permutations in the SLP instance
1679 SLP_INSTN are supported. */
1682 vect_supported_load_permutation_p (slp_instance slp_instn
)
1684 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1685 unsigned int i
, j
, k
, next
;
1687 gimple
*stmt
, *load
, *next_load
;
1689 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1692 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1693 if (node
->load_permutation
.exists ())
1694 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1695 dump_printf (MSG_NOTE
, "%d ", next
);
1697 for (k
= 0; k
< group_size
; ++k
)
1698 dump_printf (MSG_NOTE
, "%d ", k
);
1699 dump_printf (MSG_NOTE
, "\n");
1702 /* In case of reduction every load permutation is allowed, since the order
1703 of the reduction statements is not important (as opposed to the case of
1704 grouped stores). The only condition we need to check is that all the
1705 load nodes are of the same size and have the same permutation (and then
1706 rearrange all the nodes of the SLP instance according to this
1709 /* Check that all the load nodes are of the same size. */
1710 /* ??? Can't we assert this? */
1711 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1712 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1715 node
= SLP_INSTANCE_TREE (slp_instn
);
1716 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1718 /* Reduction (there are no data-refs in the root).
1719 In reduction chain the order of the loads is not important. */
1720 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1721 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1722 vect_attempt_slp_rearrange_stmts (slp_instn
);
1724 /* In basic block vectorization we allow any subchain of an interleaving
1726 FORNOW: not supported in loop SLP because of realignment compications. */
1727 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1729 /* Check whether the loads in an instance form a subchain and thus
1730 no permutation is necessary. */
1731 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1733 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1735 bool subchain_p
= true;
1737 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1740 && (next_load
!= load
1741 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1746 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1749 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1752 stmt_vec_info group_info
1753 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1754 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1755 unsigned HOST_WIDE_INT nunits
;
1756 unsigned k
, maxk
= 0;
1757 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1760 /* In BB vectorization we may not actually use a loaded vector
1761 accessing elements in excess of GROUP_SIZE. */
1762 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1763 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1764 || maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1767 "BB vectorization with gaps at the end of "
1768 "a load is not supported\n");
1772 /* Verify the permutation can be generated. */
1775 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1776 1, slp_instn
, true, &n_perms
))
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1780 "unsupported load permutation\n");
1788 /* For loop vectorization verify we can generate the permutation. Be
1789 conservative about the vectorization factor, there are permutations
1790 that will use three vector inputs only starting from a specific factor
1791 and the vectorization factor is not yet final.
1792 ??? The SLP instance unrolling factor might not be the maximum one. */
1795 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1796 LOOP_VINFO_VECT_FACTOR
1797 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1798 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1799 if (node
->load_permutation
.exists ()
1800 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1801 slp_instn
, true, &n_perms
))
1808 /* Find the last store in SLP INSTANCE. */
1811 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1813 gimple
*last
= NULL
, *stmt
;
1815 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1817 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1818 if (is_pattern_stmt_p (stmt_vinfo
))
1819 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1821 last
= get_later_stmt (stmt
, last
);
1827 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1828 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1829 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1830 containing the remainder.
1831 Return the first stmt in the second group. */
1834 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1836 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1837 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1838 gcc_assert (group1_size
> 0);
1839 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1840 gcc_assert (group2_size
> 0);
1841 GROUP_SIZE (first_vinfo
) = group1_size
;
1843 gimple
*stmt
= first_stmt
;
1844 for (unsigned i
= group1_size
; i
> 1; i
--)
1846 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1847 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1849 /* STMT is now the last element of the first group. */
1850 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1851 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1853 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1854 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1856 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1857 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1860 /* For the second group, the GROUP_GAP is that before the original group,
1861 plus skipping over the first vector. */
1862 GROUP_GAP (vinfo_for_stmt (group2
)) =
1863 GROUP_GAP (first_vinfo
) + group1_size
;
1865 /* GROUP_GAP of the first group now has to skip over the second group too. */
1866 GROUP_GAP (first_vinfo
) += group2_size
;
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1870 group1_size
, group2_size
);
1875 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1876 statements and a vector of NUNITS elements. */
1879 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1881 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1884 /* Analyze an SLP instance starting from a group of grouped stores. Call
1885 vect_build_slp_tree to build a tree of packed stmts if possible.
1886 Return FALSE if it's impossible to SLP any stmt in the loop. */
1889 vect_analyze_slp_instance (vec_info
*vinfo
,
1890 gimple
*stmt
, unsigned max_tree_size
)
1892 slp_instance new_instance
;
1894 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1895 tree vectype
, scalar_type
= NULL_TREE
;
1898 vec
<slp_tree
> loads
;
1899 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1900 vec
<gimple
*> scalar_stmts
;
1902 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1906 scalar_type
= TREE_TYPE (DR_REF (dr
));
1907 vectype
= get_vectype_for_scalar_type (scalar_type
);
1911 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1912 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1915 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1919 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1920 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1921 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1926 if (dump_enabled_p ())
1928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1929 "Build SLP failed: unsupported data-type ");
1930 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1931 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1936 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1938 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1939 scalar_stmts
.create (group_size
);
1941 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1943 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1946 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1947 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1948 scalar_stmts
.safe_push (
1949 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1951 scalar_stmts
.safe_push (next
);
1952 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1954 /* Mark the first element of the reduction chain as reduction to properly
1955 transform the node. In the reduction analysis phase only the last
1956 element of the chain is marked as reduction. */
1957 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1958 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
1962 /* Collect reduction statements. */
1963 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1964 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1965 scalar_stmts
.safe_push (next
);
1968 loads
.create (group_size
);
1970 /* Build the tree for the SLP instance. */
1971 bool *matches
= XALLOCAVEC (bool, group_size
);
1972 unsigned npermutes
= 0;
1973 bst_fail
= new scalar_stmts_set_t ();
1974 poly_uint64 max_nunits
= nunits
;
1975 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
1976 &max_nunits
, &loads
, matches
, &npermutes
,
1977 NULL
, max_tree_size
);
1981 /* Calculate the unrolling factor based on the smallest type. */
1982 poly_uint64 unrolling_factor
1983 = calculate_unrolling_factor (max_nunits
, group_size
);
1985 if (maybe_ne (unrolling_factor
, 1U)
1986 && is_a
<bb_vec_info
> (vinfo
))
1988 unsigned HOST_WIDE_INT const_max_nunits
;
1989 if (!max_nunits
.is_constant (&const_max_nunits
)
1990 || const_max_nunits
> group_size
)
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1994 "Build SLP failed: store group "
1995 "size not a multiple of the vector size "
1996 "in basic block SLP\n");
1997 vect_free_slp_tree (node
);
2001 /* Fatal mismatch. */
2002 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2003 vect_free_slp_tree (node
);
2008 /* Create a new SLP instance. */
2009 new_instance
= XNEW (struct _slp_instance
);
2010 SLP_INSTANCE_TREE (new_instance
) = node
;
2011 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2012 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2013 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2015 /* Compute the load permutation. */
2017 bool loads_permuted
= false;
2018 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2020 vec
<unsigned> load_permutation
;
2022 gimple
*load
, *first_stmt
;
2023 bool this_load_permuted
= false;
2024 load_permutation
.create (group_size
);
2025 first_stmt
= GROUP_FIRST_ELEMENT
2026 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2027 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2029 int load_place
= vect_get_place_in_interleaving_chain
2031 gcc_assert (load_place
!= -1);
2032 if (load_place
!= j
)
2033 this_load_permuted
= true;
2034 load_permutation
.safe_push (load_place
);
2036 if (!this_load_permuted
2037 /* The load requires permutation when unrolling exposes
2038 a gap either because the group is larger than the SLP
2039 group-size or because there is a gap between the groups. */
2040 && (known_eq (unrolling_factor
, 1U)
2041 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2042 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2044 load_permutation
.release ();
2047 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2048 loads_permuted
= true;
2053 if (!vect_supported_load_permutation_p (new_instance
))
2055 if (dump_enabled_p ())
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2058 "Build SLP failed: unsupported load "
2060 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2063 vect_free_slp_instance (new_instance
);
2068 /* If the loads and stores can be handled with load/store-lan
2069 instructions do not generate this SLP instance. */
2070 if (is_a
<loop_vec_info
> (vinfo
)
2072 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2075 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2077 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2078 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2079 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2080 /* Use SLP for strided accesses (or if we
2081 can't load-lanes). */
2082 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2083 || ! vect_load_lanes_supported
2084 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2085 GROUP_SIZE (stmt_vinfo
), false))
2088 if (i
== loads
.length ())
2090 if (dump_enabled_p ())
2091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2092 "Built SLP cancelled: can use "
2093 "load/store-lanes\n");
2094 vect_free_slp_instance (new_instance
);
2099 vinfo
->slp_instances
.safe_push (new_instance
);
2101 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_NOTE
, vect_location
,
2104 "Final SLP tree for instance:\n");
2105 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2113 /* Failed to SLP. */
2114 /* Free the allocated memory. */
2115 scalar_stmts
.release ();
2119 /* For basic block SLP, try to break the group up into multiples of the
2121 unsigned HOST_WIDE_INT const_nunits
;
2122 if (is_a
<bb_vec_info
> (vinfo
)
2123 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2124 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2125 && nunits
.is_constant (&const_nunits
))
2127 /* We consider breaking the group only on VF boundaries from the existing
2129 for (i
= 0; i
< group_size
; i
++)
2130 if (!matches
[i
]) break;
2132 if (i
>= const_nunits
&& i
< group_size
)
2134 /* Split into two groups at the first vector boundary before i. */
2135 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2136 unsigned group1_size
= i
& ~(const_nunits
- 1);
2138 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2139 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2140 /* If the first non-match was in the middle of a vector,
2141 skip the rest of that vector. */
2142 if (group1_size
< i
)
2144 i
= group1_size
+ const_nunits
;
2146 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2149 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2152 /* Even though the first vector did not all match, we might be able to SLP
2153 (some) of the remainder. FORNOW ignore this possibility. */
2160 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2161 trees of packed scalar stmts if SLP is possible. */
2164 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2167 gimple
*first_element
;
2169 if (dump_enabled_p ())
2170 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2172 /* Find SLP sequences starting from groups of grouped stores. */
2173 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2174 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2176 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2178 if (loop_vinfo
->reduction_chains
.length () > 0)
2180 /* Find SLP sequences starting from reduction chains. */
2181 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2182 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2185 /* Dissolve reduction chain group. */
2186 gimple
*next
, *stmt
= first_element
;
2189 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2190 next
= GROUP_NEXT_ELEMENT (vinfo
);
2191 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2192 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2195 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2196 = vect_internal_def
;
2200 /* Find SLP sequences starting from groups of reductions. */
2201 if (loop_vinfo
->reductions
.length () > 1)
2202 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2210 /* For each possible SLP instance decide whether to SLP it and calculate overall
2211 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2212 least one instance. */
2215 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2218 poly_uint64 unrolling_factor
= 1;
2219 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2220 slp_instance instance
;
2221 int decided_to_slp
= 0;
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2227 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2229 /* FORNOW: SLP if you can. */
2230 /* All unroll factors have the form current_vector_size * X for some
2231 rational X, so they must have a common multiple. */
2233 = force_common_multiple (unrolling_factor
,
2234 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2236 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2237 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2238 loop-based vectorization. Such stmts will be marked as HYBRID. */
2239 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2243 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2245 if (decided_to_slp
&& dump_enabled_p ())
2247 dump_printf_loc (MSG_NOTE
, vect_location
,
2248 "Decided to SLP %d instances. Unrolling factor ",
2250 dump_dec (MSG_NOTE
, unrolling_factor
);
2251 dump_printf (MSG_NOTE
, "\n");
2254 return (decided_to_slp
> 0);
2258 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2259 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2262 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2264 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2265 imm_use_iterator imm_iter
;
2267 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2269 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2270 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2273 /* Propagate hybrid down the SLP tree. */
2274 if (stype
== hybrid
)
2276 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2280 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2281 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2282 /* If we get a pattern stmt here we have to use the LHS of the
2283 original stmt for immediate uses. */
2284 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2285 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2286 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2288 if (gimple_code (stmt
) == GIMPLE_PHI
)
2289 def
= gimple_phi_result (stmt
);
2291 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2293 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2295 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2297 use_vinfo
= vinfo_for_stmt (use_stmt
);
2298 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2299 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2300 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2301 if (!STMT_SLP_TYPE (use_vinfo
)
2302 && (STMT_VINFO_RELEVANT (use_vinfo
)
2303 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2304 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2305 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2307 if (dump_enabled_p ())
2309 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2310 "def in non-SLP stmt: ");
2311 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2319 && !HYBRID_SLP_STMT (stmt_vinfo
))
2321 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2324 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2326 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2329 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2330 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2331 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2334 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2337 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2339 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2340 struct loop
*loopp
= (struct loop
*)wi
->info
;
2345 if (TREE_CODE (*tp
) == SSA_NAME
2346 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2348 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2349 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2350 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2352 if (dump_enabled_p ())
2354 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2355 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2357 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2365 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2368 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2369 /* If the stmt is in a SLP instance then this isn't a reason
2370 to mark use definitions in other SLP instances as hybrid. */
2371 if (! STMT_SLP_TYPE (use_vinfo
)
2372 && (STMT_VINFO_RELEVANT (use_vinfo
)
2373 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2374 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2375 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2382 /* Find stmts that must be both vectorized and SLPed. */
2385 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2388 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2389 slp_instance instance
;
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2395 /* First walk all pattern stmt in the loop and mark defs of uses as
2396 hybrid because immediate uses in them are not recorded. */
2397 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2399 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2400 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2403 gimple
*stmt
= gsi_stmt (gsi
);
2404 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2405 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2408 memset (&wi
, 0, sizeof (wi
));
2409 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2410 gimple_stmt_iterator gsi2
2411 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2412 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2413 vect_detect_hybrid_slp_1
, &wi
);
2414 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2415 vect_detect_hybrid_slp_2
,
2416 vect_detect_hybrid_slp_1
, &wi
);
2421 /* Then walk the SLP instance trees marking stmts with uses in
2422 non-SLP stmts as hybrid, also propagating hybrid down the
2423 SLP tree, collecting the above info on-the-fly. */
2424 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2426 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2427 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2433 /* Initialize a bb_vec_info struct for the statements between
2434 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2436 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2437 gimple_stmt_iterator region_end_in
)
2438 : vec_info (vec_info::bb
, init_cost (NULL
)),
2439 bb (gsi_bb (region_begin_in
)),
2440 region_begin (region_begin_in
),
2441 region_end (region_end_in
)
2443 gimple_stmt_iterator gsi
;
2445 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2448 gimple
*stmt
= gsi_stmt (gsi
);
2449 gimple_set_uid (stmt
, 0);
2450 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2457 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2458 stmts in the basic block. */
2460 _bb_vec_info::~_bb_vec_info ()
2462 for (gimple_stmt_iterator si
= region_begin
;
2463 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2465 gimple
*stmt
= gsi_stmt (si
);
2466 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2469 /* Free stmt_vec_info. */
2470 free_stmt_vec_info (stmt
);
2472 /* Reset region marker. */
2473 gimple_set_uid (stmt
, -1);
2480 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2481 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2483 Return true if the operations are supported. */
2486 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2487 slp_instance node_instance
,
2488 scalar_stmts_to_slp_tree_map_t
*visited
,
2489 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2490 stmt_vector_for_cost
*cost_vec
)
2497 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2500 /* If we already analyzed the exact same set of scalar stmts we're done.
2501 We share the generated vector stmts for those. */
2503 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2504 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2506 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2507 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2511 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2512 doesn't result in any issue since we throw away the lvisited set
2514 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2516 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2517 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2518 visited
, lvisited
, cost_vec
))
2521 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2522 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2523 gcc_assert (stmt_info
);
2524 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2526 /* For BB vectorization vector types are assigned here.
2527 Memory accesses already got their vector type assigned
2528 in vect_analyze_data_refs. */
2529 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2531 && ! STMT_VINFO_DATA_REF (stmt_info
))
2533 tree vectype
, nunits_vectype
;
2534 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2536 /* We checked this when building the node. */
2538 if (vectype
== boolean_type_node
)
2540 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2542 /* vect_get_mask_type_for_stmt has already explained the
2548 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2549 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2552 /* Calculate the number of vector statements to be created for the
2553 scalar stmts in this node. For SLP reductions it is equal to the
2554 number of vector statements in the children (which has already been
2555 calculated by the recursive call). Otherwise it is the number of
2556 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2557 VF divided by the number of elements in a vector. */
2558 if (GROUP_FIRST_ELEMENT (stmt_info
)
2559 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2560 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2561 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2565 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2566 vf
= loop_vinfo
->vectorization_factor
;
2569 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2570 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2571 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2572 = vect_get_num_vectors (vf
* group_size
, vectype
);
2575 /* Push SLP node def-type to stmt operands. */
2576 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2577 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2578 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2579 = SLP_TREE_DEF_TYPE (child
);
2580 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
, cost_vec
);
2581 /* Restore def-types. */
2582 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2583 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2584 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2585 = vect_internal_def
;
2593 /* Analyze statements in SLP instances of VINFO. Return true if the
2594 operations are supported. */
2597 vect_slp_analyze_operations (vec_info
*vinfo
)
2599 slp_instance instance
;
2602 if (dump_enabled_p ())
2603 dump_printf_loc (MSG_NOTE
, vect_location
,
2604 "=== vect_slp_analyze_operations ===\n");
2606 scalar_stmts_to_slp_tree_map_t
*visited
2607 = new scalar_stmts_to_slp_tree_map_t ();
2608 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2610 scalar_stmts_to_slp_tree_map_t lvisited
;
2611 stmt_vector_for_cost cost_vec
;
2612 cost_vec
.create (2);
2613 if (!vect_slp_analyze_node_operations (vinfo
,
2614 SLP_INSTANCE_TREE (instance
),
2615 instance
, visited
, &lvisited
,
2618 dump_printf_loc (MSG_NOTE
, vect_location
,
2619 "removing SLP instance operations starting from: ");
2620 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2621 SLP_TREE_SCALAR_STMTS
2622 (SLP_INSTANCE_TREE (instance
))[0], 0);
2623 vect_free_slp_instance (instance
);
2624 vinfo
->slp_instances
.ordered_remove (i
);
2625 cost_vec
.release ();
2629 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2630 x
!= lvisited
.end(); ++x
)
2631 visited
->put ((*x
).first
.copy (), (*x
).second
);
2634 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2635 cost_vec
.release ();
2640 return !vinfo
->slp_instances
.is_empty ();
2644 /* Compute the scalar cost of the SLP node NODE and its children
2645 and return it. Do not account defs that are marked in LIFE and
2646 update LIFE according to uses of NODE. */
2649 vect_bb_slp_scalar_cost (basic_block bb
,
2650 slp_tree node
, vec
<bool, va_heap
> *life
,
2651 stmt_vector_for_cost
*cost_vec
)
2657 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2659 ssa_op_iter op_iter
;
2660 def_operand_p def_p
;
2661 stmt_vec_info stmt_info
;
2666 /* If there is a non-vectorized use of the defs then the scalar
2667 stmt is kept live in which case we do not account it or any
2668 required defs in the SLP children in the scalar cost. This
2669 way we make the vectorization more costly when compared to
2671 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2673 imm_use_iterator use_iter
;
2675 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2676 if (!is_gimple_debug (use_stmt
)
2677 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2679 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2682 BREAK_FROM_IMM_USE_STMT (use_iter
);
2688 /* Count scalar stmts only once. */
2689 if (gimple_visited_p (stmt
))
2691 gimple_set_visited (stmt
, true);
2693 stmt_info
= vinfo_for_stmt (stmt
);
2694 vect_cost_for_stmt kind
;
2695 if (STMT_VINFO_DATA_REF (stmt_info
))
2697 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2700 kind
= scalar_store
;
2704 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2707 auto_vec
<bool, 20> subtree_life
;
2708 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2710 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2712 /* Do not directly pass LIFE to the recursive call, copy it to
2713 confine changes in the callee to the current child/subtree. */
2714 subtree_life
.safe_splice (*life
);
2715 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
);
2716 subtree_life
.truncate (0);
2721 /* Check if vectorization of the basic block is profitable. */
2724 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2726 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2727 slp_instance instance
;
2729 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2730 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2732 /* Calculate scalar cost. */
2733 stmt_vector_for_cost scalar_costs
;
2734 scalar_costs
.create (0);
2735 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2737 auto_vec
<bool, 20> life
;
2738 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2739 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2740 SLP_INSTANCE_TREE (instance
),
2741 &life
, &scalar_costs
);
2743 void *target_cost_data
= init_cost (NULL
);
2744 add_stmt_costs (target_cost_data
, &scalar_costs
);
2745 scalar_costs
.release ();
2747 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2748 destroy_cost_data (target_cost_data
);
2750 /* Unset visited flag. */
2751 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2752 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2753 gimple_set_visited (gsi_stmt (gsi
), false);
2755 /* Complete the target-specific cost calculation. */
2756 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2757 &vec_inside_cost
, &vec_epilogue_cost
);
2759 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2761 if (dump_enabled_p ())
2763 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2764 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2766 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2767 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2768 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2771 /* Vectorization is profitable if its cost is more than the cost of scalar
2772 version. Note that we err on the vector side for equal cost because
2773 the cost estimate is otherwise quite pessimistic (constant uses are
2774 free on the scalar side but cost a load on the vector side for
2776 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2782 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2783 if so and sets fatal to true if failure is independent of
2784 current_vector_size. */
2787 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2788 gimple_stmt_iterator region_end
,
2789 vec
<data_reference_p
> datarefs
, int n_stmts
,
2792 bb_vec_info bb_vinfo
;
2793 slp_instance instance
;
2795 poly_uint64 min_vf
= 2;
2797 /* The first group of checks is independent of the vector size. */
2800 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2802 if (dump_enabled_p ())
2803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2804 "not vectorized: too many instructions in "
2806 free_data_refs (datarefs
);
2810 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2814 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2816 /* Analyze the data references. */
2818 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2822 "not vectorized: unhandled data-ref in basic "
2829 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2831 if (dump_enabled_p ())
2832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2833 "not vectorized: not enough data-refs in "
2840 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2842 if (dump_enabled_p ())
2843 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2844 "not vectorized: unhandled data access in "
2851 /* If there are no grouped stores in the region there is no need
2852 to continue with pattern recog as vect_analyze_slp will fail
2854 if (bb_vinfo
->grouped_stores
.is_empty ())
2856 if (dump_enabled_p ())
2857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2858 "not vectorized: no grouped stores in "
2865 /* While the rest of the analysis below depends on it in some way. */
2868 vect_pattern_recog (bb_vinfo
);
2870 /* Check the SLP opportunities in the basic block, analyze and build SLP
2872 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2874 if (dump_enabled_p ())
2876 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2877 "Failed to SLP the basic block.\n");
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2879 "not vectorized: failed to find SLP opportunities "
2880 "in basic block.\n");
2887 vect_record_base_alignments (bb_vinfo
);
2889 /* Analyze and verify the alignment of data references and the
2890 dependence in the SLP instances. */
2891 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2893 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2894 || ! vect_slp_analyze_instance_dependence (instance
))
2896 dump_printf_loc (MSG_NOTE
, vect_location
,
2897 "removing SLP instance operations starting from: ");
2898 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2899 SLP_TREE_SCALAR_STMTS
2900 (SLP_INSTANCE_TREE (instance
))[0], 0);
2901 vect_free_slp_instance (instance
);
2902 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2906 /* Mark all the statements that we want to vectorize as pure SLP and
2908 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2909 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2913 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2919 if (!vect_slp_analyze_operations (bb_vinfo
))
2921 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2923 "not vectorized: bad operation in basic block.\n");
2929 /* Cost model: check if the vectorization is worthwhile. */
2930 if (!unlimited_cost_model (NULL
)
2931 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2935 "not vectorized: vectorization is not "
2942 if (dump_enabled_p ())
2943 dump_printf_loc (MSG_NOTE
, vect_location
,
2944 "Basic block will be vectorized using SLP\n");
2950 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2951 true if anything in the basic-block was vectorized. */
2954 vect_slp_bb (basic_block bb
)
2956 bb_vec_info bb_vinfo
;
2957 gimple_stmt_iterator gsi
;
2958 bool any_vectorized
= false;
2959 auto_vector_sizes vector_sizes
;
2961 if (dump_enabled_p ())
2962 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2964 /* Autodetect first vector size we try. */
2965 current_vector_size
= 0;
2966 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
2967 unsigned int next_size
= 0;
2969 gsi
= gsi_start_bb (bb
);
2971 poly_uint64 autodetected_vector_size
= 0;
2974 if (gsi_end_p (gsi
))
2977 gimple_stmt_iterator region_begin
= gsi
;
2978 vec
<data_reference_p
> datarefs
= vNULL
;
2981 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2983 gimple
*stmt
= gsi_stmt (gsi
);
2984 if (is_gimple_debug (stmt
))
2988 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
2989 vect_location
= gimple_location (stmt
);
2991 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
2995 /* Skip leading unhandled stmts. */
2996 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3002 gimple_stmt_iterator region_end
= gsi
;
3004 bool vectorized
= false;
3006 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3007 datarefs
, insns
, fatal
);
3009 && dbg_cnt (vect_slp
))
3011 if (dump_enabled_p ())
3012 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3014 vect_schedule_slp (bb_vinfo
);
3016 if (dump_enabled_p ())
3017 dump_printf_loc (MSG_NOTE
, vect_location
,
3018 "basic block part vectorized\n");
3024 any_vectorized
|= vectorized
;
3027 autodetected_vector_size
= current_vector_size
;
3029 if (next_size
< vector_sizes
.length ()
3030 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3034 || next_size
== vector_sizes
.length ()
3035 || known_eq (current_vector_size
, 0U)
3036 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3037 vector sizes will fail do not bother iterating. */
3040 if (gsi_end_p (region_end
))
3043 /* Skip the unhandled stmt. */
3046 /* And reset vector sizes. */
3047 current_vector_size
= 0;
3052 /* Try the next biggest vector size. */
3053 current_vector_size
= vector_sizes
[next_size
++];
3054 if (dump_enabled_p ())
3056 dump_printf_loc (MSG_NOTE
, vect_location
,
3057 "***** Re-trying analysis with "
3059 dump_dec (MSG_NOTE
, current_vector_size
);
3060 dump_printf (MSG_NOTE
, "\n");
3068 return any_vectorized
;
3072 /* Return 1 if vector type of boolean constant which is OPNUM
3073 operand in statement STMT is a boolean vector. */
3076 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3078 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3079 enum tree_code code
= gimple_expr_code (stmt
);
3082 enum vect_def_type dt
;
3084 /* For comparison and COND_EXPR type is chosen depending
3085 on the other comparison operand. */
3086 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3089 op
= gimple_assign_rhs1 (stmt
);
3091 op
= gimple_assign_rhs2 (stmt
);
3093 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3097 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3100 if (code
== COND_EXPR
)
3102 tree cond
= gimple_assign_rhs1 (stmt
);
3104 if (TREE_CODE (cond
) == SSA_NAME
)
3107 op
= TREE_OPERAND (cond
, 1);
3109 op
= TREE_OPERAND (cond
, 0);
3111 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3115 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3118 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3121 /* Build a variable-length vector in which the elements in ELTS are repeated
3122 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3123 RESULTS and add any new instructions to SEQ.
3125 The approach we use is:
3127 (1) Find a vector mode VM with integer elements of mode IM.
3129 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3130 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3131 from small vectors to IM.
3133 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3135 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3136 correct byte contents.
3138 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3140 We try to find the largest IM for which this sequence works, in order
3141 to cut down on the number of interleaves. */
3144 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3145 unsigned int nresults
, vec
<tree
> &results
)
3147 unsigned int nelts
= elts
.length ();
3148 tree element_type
= TREE_TYPE (vector_type
);
3150 /* (1) Find a vector mode VM with integer elements of mode IM. */
3151 unsigned int nvectors
= 1;
3152 tree new_vector_type
;
3154 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3155 &nvectors
, &new_vector_type
,
3159 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3160 unsigned int partial_nelts
= nelts
/ nvectors
;
3161 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3163 tree_vector_builder partial_elts
;
3164 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3165 pieces
.quick_grow (nvectors
* 2);
3166 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3168 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3169 ELTS' has mode IM. */
3170 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3171 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3172 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3173 tree t
= gimple_build_vector (seq
, &partial_elts
);
3174 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3175 TREE_TYPE (new_vector_type
), t
);
3177 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3178 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3181 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3182 correct byte contents.
3184 We need to repeat the following operation log2(nvectors) times:
3186 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3187 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3189 However, if each input repeats every N elements and the VF is
3190 a multiple of N * 2, the HI result is the same as the LO. */
3191 unsigned int in_start
= 0;
3192 unsigned int out_start
= nvectors
;
3193 unsigned int hi_start
= nvectors
/ 2;
3194 /* A bound on the number of outputs needed to produce NRESULTS results
3195 in the final iteration. */
3196 unsigned int noutputs_bound
= nvectors
* nresults
;
3197 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3199 noutputs_bound
/= 2;
3200 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3201 for (unsigned int i
= 0; i
< limit
; ++i
)
3204 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3207 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3211 tree output
= make_ssa_name (new_vector_type
);
3212 tree input1
= pieces
[in_start
+ (i
/ 2)];
3213 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3214 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3217 gimple_seq_add_stmt (seq
, stmt
);
3218 pieces
[out_start
+ i
] = output
;
3220 std::swap (in_start
, out_start
);
3223 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3224 results
.reserve (nresults
);
3225 for (unsigned int i
= 0; i
< nresults
; ++i
)
3227 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3228 pieces
[in_start
+ i
]));
3230 results
.quick_push (results
[i
- nvectors
]);
3234 /* For constant and loop invariant defs of SLP_NODE this function returns
3235 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3236 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3237 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3238 REDUC_INDEX is the index of the reduction operand in the statements, unless
3242 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3243 vec
<tree
> *vec_oprnds
,
3244 unsigned int op_num
, unsigned int number_of_vectors
)
3246 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3247 gimple
*stmt
= stmts
[0];
3248 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3249 unsigned HOST_WIDE_INT nunits
;
3251 unsigned j
, number_of_places_left_in_vector
;
3254 int group_size
= stmts
.length ();
3255 unsigned int vec_num
, i
;
3256 unsigned number_of_copies
= 1;
3258 voprnds
.create (number_of_vectors
);
3259 bool constant_p
, is_store
;
3260 tree neutral_op
= NULL
;
3261 enum tree_code code
= gimple_expr_code (stmt
);
3262 gimple_seq ctor_seq
= NULL
;
3263 auto_vec
<tree
, 16> permute_results
;
3265 /* Check if vector type is a boolean vector. */
3266 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3267 && vect_mask_constant_operand_p (stmt
, op_num
))
3269 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3271 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3273 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3276 op
= gimple_assign_rhs1 (stmt
);
3283 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3284 created vectors. It is greater than 1 if unrolling is performed.
3286 For example, we have two scalar operands, s1 and s2 (e.g., group of
3287 strided accesses of size two), while NUNITS is four (i.e., four scalars
3288 of this type can be packed in a vector). The output vector will contain
3289 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3292 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3293 containing the operands.
3295 For example, NUNITS is four as before, and the group size is 8
3296 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3297 {s5, s6, s7, s8}. */
3299 /* When using duplicate_and_interleave, we just need one element for
3300 each scalar statement. */
3301 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3302 nunits
= group_size
;
3304 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3306 number_of_places_left_in_vector
= nunits
;
3308 tree_vector_builder
elts (vector_type
, nunits
, 1);
3309 elts
.quick_grow (nunits
);
3310 bool place_after_defs
= false;
3311 for (j
= 0; j
< number_of_copies
; j
++)
3313 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3316 op
= gimple_assign_rhs1 (stmt
);
3323 tree cond
= gimple_assign_rhs1 (stmt
);
3324 if (TREE_CODE (cond
) == SSA_NAME
)
3325 op
= gimple_op (stmt
, op_num
+ 1);
3326 else if (op_num
== 0 || op_num
== 1)
3327 op
= TREE_OPERAND (cond
, op_num
);
3331 op
= gimple_assign_rhs2 (stmt
);
3333 op
= gimple_assign_rhs3 (stmt
);
3339 op
= gimple_call_arg (stmt
, op_num
);
3346 op
= gimple_op (stmt
, op_num
+ 1);
3347 /* Unlike the other binary operators, shifts/rotates have
3348 the shift count being int, instead of the same type as
3349 the lhs, so make sure the scalar is the right type if
3350 we are dealing with vectors of
3351 long long/long/short/char. */
3352 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3353 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3357 op
= gimple_op (stmt
, op_num
+ 1);
3362 /* Create 'vect_ = {op0,op1,...,opn}'. */
3363 number_of_places_left_in_vector
--;
3365 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3367 if (CONSTANT_CLASS_P (op
))
3369 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3371 /* Can't use VIEW_CONVERT_EXPR for booleans because
3372 of possibly different sizes of scalar value and
3374 if (integer_zerop (op
))
3375 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3376 else if (integer_onep (op
))
3377 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3382 op
= fold_unary (VIEW_CONVERT_EXPR
,
3383 TREE_TYPE (vector_type
), op
);
3384 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3388 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3390 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3393 = build_all_ones_cst (TREE_TYPE (vector_type
));
3395 = build_zero_cst (TREE_TYPE (vector_type
));
3396 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3397 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3403 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3406 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3409 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3413 elts
[number_of_places_left_in_vector
] = op
;
3414 if (!CONSTANT_CLASS_P (op
))
3416 if (TREE_CODE (orig_op
) == SSA_NAME
3417 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3418 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3419 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3420 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3421 place_after_defs
= true;
3423 if (number_of_places_left_in_vector
== 0)
3426 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3427 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3428 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3431 if (vec_oprnds
->is_empty ())
3432 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3435 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3438 gimple_stmt_iterator gsi
;
3439 if (place_after_defs
)
3442 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3443 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3446 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3447 if (ctor_seq
!= NULL
)
3449 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3450 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3453 voprnds
.quick_push (init
);
3454 place_after_defs
= false;
3455 number_of_places_left_in_vector
= nunits
;
3457 elts
.new_vector (vector_type
, nunits
, 1);
3458 elts
.quick_grow (nunits
);
3463 /* Since the vectors are created in the reverse order, we should invert
3465 vec_num
= voprnds
.length ();
3466 for (j
= vec_num
; j
!= 0; j
--)
3468 vop
= voprnds
[j
- 1];
3469 vec_oprnds
->quick_push (vop
);
3474 /* In case that VF is greater than the unrolling factor needed for the SLP
3475 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3476 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3477 to replicate the vectors. */
3478 while (number_of_vectors
> vec_oprnds
->length ())
3480 tree neutral_vec
= NULL
;
3485 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3487 vec_oprnds
->quick_push (neutral_vec
);
3491 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3492 vec_oprnds
->quick_push (vop
);
3498 /* Get vectorized definitions from SLP_NODE that contains corresponding
3499 vectorized def-stmts. */
3502 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3505 gimple
*vec_def_stmt
;
3508 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3510 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3512 gcc_assert (vec_def_stmt
);
3513 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3514 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3516 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3517 vec_oprnds
->quick_push (vec_oprnd
);
3522 /* Get vectorized definitions for SLP_NODE.
3523 If the scalar definitions are loop invariants or constants, collect them and
3524 call vect_get_constant_vectors() to create vector stmts.
3525 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3526 must be stored in the corresponding child of SLP_NODE, and we call
3527 vect_get_slp_vect_defs () to retrieve them. */
3530 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3531 vec
<vec
<tree
> > *vec_oprnds
)
3534 int number_of_vects
= 0, i
;
3535 unsigned int child_index
= 0;
3536 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3537 slp_tree child
= NULL
;
3540 bool vectorized_defs
;
3542 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3543 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3545 /* For each operand we check if it has vectorized definitions in a child
3546 node or we need to create them (for invariants and constants). We
3547 check if the LHS of the first stmt of the next child matches OPRND.
3548 If it does, we found the correct child. Otherwise, we call
3549 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3550 to check this child node for the next operand. */
3551 vectorized_defs
= false;
3552 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3554 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3556 /* We have to check both pattern and original def, if available. */
3557 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3559 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3561 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3564 if (gimple_code (first_def
) == GIMPLE_PHI
)
3565 first_def_op
= gimple_phi_result (first_def
);
3567 first_def_op
= gimple_get_lhs (first_def
);
3568 if (operand_equal_p (oprnd
, first_def_op
, 0)
3570 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3572 /* The number of vector defs is determined by the number of
3573 vector statements in the node from which we get those
3575 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3576 vectorized_defs
= true;
3584 if (!vectorized_defs
)
3588 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3589 /* Number of vector stmts was calculated according to LHS in
3590 vect_schedule_slp_instance (), fix it by replacing LHS with
3591 RHS, if necessary. See vect_get_smallest_scalar_type () for
3593 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3595 if (rhs_size_unit
!= lhs_size_unit
)
3597 number_of_vects
*= rhs_size_unit
;
3598 number_of_vects
/= lhs_size_unit
;
3603 /* Allocate memory for vectorized defs. */
3605 vec_defs
.create (number_of_vects
);
3607 /* For reduction defs we call vect_get_constant_vectors (), since we are
3608 looking for initial loop invariant values. */
3609 if (vectorized_defs
)
3610 /* The defs are already vectorized. */
3611 vect_get_slp_vect_defs (child
, &vec_defs
);
3613 /* Build vectors from scalar defs. */
3614 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3617 vec_oprnds
->quick_push (vec_defs
);
3621 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3622 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3623 permute statements for the SLP node NODE of the SLP instance
3624 SLP_NODE_INSTANCE. */
3627 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3628 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3629 slp_instance slp_node_instance
, bool analyze_only
,
3632 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3633 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3634 tree mask_element_type
= NULL_TREE
, mask_type
;
3636 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3637 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3638 unsigned int mask_element
;
3640 unsigned HOST_WIDE_INT nunits
, const_vf
;
3642 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3645 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3647 mode
= TYPE_MODE (vectype
);
3649 /* At the moment, all permutations are represented using per-element
3650 indices, so we can't cope with variable vector lengths or
3651 vectorization factors. */
3652 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
3653 || !vf
.is_constant (&const_vf
))
3656 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3657 same size as the vector element being permuted. */
3658 mask_element_type
= lang_hooks
.types
.type_for_mode
3659 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3660 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3661 vec_perm_builder
mask (nunits
, nunits
, 1);
3662 mask
.quick_grow (nunits
);
3663 vec_perm_indices indices
;
3665 /* Initialize the vect stmts of NODE to properly insert the generated
3668 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3669 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3670 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3672 /* Generate permutation masks for every NODE. Number of masks for each NODE
3673 is equal to GROUP_SIZE.
3674 E.g., we have a group of three nodes with three loads from the same
3675 location in each node, and the vector size is 4. I.e., we have a
3676 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3677 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3678 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3681 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3682 The last mask is illegal since we assume two operands for permute
3683 operation, and the mask element values can't be outside that range.
3684 Hence, the last mask must be converted into {2,5,5,5}.
3685 For the first two permutations we need the first and the second input
3686 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3687 we need the second and the third vectors: {b1,c1,a2,b2} and
3690 int vect_stmts_counter
= 0;
3691 unsigned int index
= 0;
3692 int first_vec_index
= -1;
3693 int second_vec_index
= -1;
3697 for (unsigned int j
= 0; j
< const_vf
; j
++)
3699 for (int k
= 0; k
< group_size
; k
++)
3701 unsigned int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3702 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3703 vec_index
= i
/ nunits
;
3704 mask_element
= i
% nunits
;
3705 if (vec_index
== first_vec_index
3706 || first_vec_index
== -1)
3708 first_vec_index
= vec_index
;
3710 else if (vec_index
== second_vec_index
3711 || second_vec_index
== -1)
3713 second_vec_index
= vec_index
;
3714 mask_element
+= nunits
;
3718 if (dump_enabled_p ())
3720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3721 "permutation requires at "
3722 "least three vectors ");
3723 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3726 gcc_assert (analyze_only
);
3730 gcc_assert (mask_element
< 2 * nunits
);
3731 if (mask_element
!= index
)
3733 mask
[index
++] = mask_element
;
3735 if (index
== nunits
&& !noop_p
)
3737 indices
.new_vector (mask
, 2, nunits
);
3738 if (!can_vec_perm_const_p (mode
, indices
))
3740 if (dump_enabled_p ())
3742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3744 "unsupported vect permute { ");
3745 for (i
= 0; i
< nunits
; ++i
)
3747 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3748 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3750 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3752 gcc_assert (analyze_only
);
3759 if (index
== nunits
)
3763 tree mask_vec
= NULL_TREE
;
3766 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3768 if (second_vec_index
== -1)
3769 second_vec_index
= first_vec_index
;
3771 /* Generate the permute statement if necessary. */
3772 tree first_vec
= dr_chain
[first_vec_index
];
3773 tree second_vec
= dr_chain
[second_vec_index
];
3778 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3780 perm_dest
= make_ssa_name (perm_dest
);
3781 perm_stmt
= gimple_build_assign (perm_dest
,
3783 first_vec
, second_vec
,
3785 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3788 /* If mask was NULL_TREE generate the requested
3789 identity transform. */
3790 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3792 /* Store the vector statement in NODE. */
3793 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3797 first_vec_index
= -1;
3798 second_vec_index
= -1;
3807 /* Vectorize SLP instance tree in postorder. */
3810 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3811 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3814 bool grouped_store
, is_store
;
3815 gimple_stmt_iterator si
;
3816 stmt_vec_info stmt_info
;
3817 unsigned int group_size
;
3822 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3825 /* See if we have already vectorized the same set of stmts and reuse their
3826 vectorized stmts. */
3827 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3829 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3833 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3834 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3835 vect_schedule_slp_instance (child
, instance
, bst_map
);
3837 /* Push SLP node def-type to stmts. */
3838 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3839 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3840 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3841 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3843 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3844 stmt_info
= vinfo_for_stmt (stmt
);
3846 /* VECTYPE is the type of the destination. */
3847 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3848 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3849 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3851 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3852 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3853 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3855 if (dump_enabled_p ())
3857 dump_printf_loc (MSG_NOTE
,vect_location
,
3858 "------>vectorizing SLP node starting from: ");
3859 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3862 /* Vectorized stmts go before the last scalar stmt which is where
3863 all uses are ready. */
3864 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3866 /* Mark the first element of the reduction chain as reduction to properly
3867 transform the node. In the analysis phase only the last element of the
3868 chain is marked as reduction. */
3869 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3870 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3872 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3873 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3876 /* Handle two-operation SLP nodes by vectorizing the group with
3877 both operations and then performing a merge. */
3878 if (SLP_TREE_TWO_OPERATORS (node
))
3880 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3881 enum tree_code ocode
= ERROR_MARK
;
3883 vec_perm_builder
mask (group_size
, group_size
, 1);
3884 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3885 if (gimple_assign_rhs_code (ostmt
) != code0
)
3887 mask
.quick_push (1);
3888 ocode
= gimple_assign_rhs_code (ostmt
);
3891 mask
.quick_push (0);
3892 if (ocode
!= ERROR_MARK
)
3897 tree tmask
= NULL_TREE
;
3898 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3899 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3900 SLP_TREE_VEC_STMTS (node
).truncate (0);
3901 gimple_assign_set_rhs_code (stmt
, ocode
);
3902 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3903 gimple_assign_set_rhs_code (stmt
, code0
);
3904 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3905 SLP_TREE_VEC_STMTS (node
).truncate (0);
3906 tree meltype
= build_nonstandard_integer_type
3907 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3908 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3910 for (j
= 0; j
< v0
.length (); ++j
)
3912 /* Enforced by vect_build_slp_tree, which rejects variable-length
3913 vectors for SLP_TREE_TWO_OPERATORS. */
3914 unsigned int const_nunits
= nunits
.to_constant ();
3915 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3916 for (l
= 0; l
< const_nunits
; ++l
)
3918 if (k
>= group_size
)
3920 tree t
= build_int_cst (meltype
,
3921 mask
[k
++] * const_nunits
+ l
);
3922 melts
.quick_push (t
);
3924 tmask
= melts
.build ();
3926 /* ??? Not all targets support a VEC_PERM_EXPR with a
3927 constant mask that would translate to a vec_merge RTX
3928 (with their vec_perm_const_ok). We can either not
3929 vectorize in that case or let veclower do its job.
3930 Unfortunately that isn't too great and at least for
3931 plus/minus we'd eventually like to match targets
3932 vector addsub instructions. */
3934 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3936 gimple_assign_lhs (v0
[j
]),
3937 gimple_assign_lhs (v1
[j
]), tmask
);
3938 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3939 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3946 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3948 /* Restore stmt def-types. */
3949 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3950 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3951 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3952 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3957 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3958 For loop vectorization this is done in vectorizable_call, but for SLP
3959 it needs to be deferred until end of vect_schedule_slp, because multiple
3960 SLP instances may refer to the same scalar stmt. */
3963 vect_remove_slp_scalar_calls (slp_tree node
)
3965 gimple
*stmt
, *new_stmt
;
3966 gimple_stmt_iterator gsi
;
3970 stmt_vec_info stmt_info
;
3972 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3975 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3976 vect_remove_slp_scalar_calls (child
);
3978 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3980 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3982 stmt_info
= vinfo_for_stmt (stmt
);
3983 if (stmt_info
== NULL
3984 || is_pattern_stmt_p (stmt_info
)
3985 || !PURE_SLP_STMT (stmt_info
))
3987 lhs
= gimple_call_lhs (stmt
);
3988 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3989 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3990 set_vinfo_for_stmt (stmt
, NULL
);
3991 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3992 gsi
= gsi_for_stmt (stmt
);
3993 gsi_replace (&gsi
, new_stmt
, false);
3994 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3998 /* Generate vector code for all SLP instances in the loop/basic block. */
4001 vect_schedule_slp (vec_info
*vinfo
)
4003 vec
<slp_instance
> slp_instances
;
4004 slp_instance instance
;
4006 bool is_store
= false;
4009 scalar_stmts_to_slp_tree_map_t
*bst_map
4010 = new scalar_stmts_to_slp_tree_map_t ();
4011 slp_instances
= vinfo
->slp_instances
;
4012 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4014 /* Schedule the tree of INSTANCE. */
4015 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4017 if (dump_enabled_p ())
4018 dump_printf_loc (MSG_NOTE
, vect_location
,
4019 "vectorizing stmts using SLP.\n");
4023 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4025 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4028 gimple_stmt_iterator gsi
;
4030 /* Remove scalar call stmts. Do not do this for basic-block
4031 vectorization as not all uses may be vectorized.
4032 ??? Why should this be necessary? DCE should be able to
4033 remove the stmts itself.
4034 ??? For BB vectorization we can as well remove scalar
4035 stmts starting from the SLP tree root if they have no
4037 if (is_a
<loop_vec_info
> (vinfo
))
4038 vect_remove_slp_scalar_calls (root
);
4040 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
4041 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4043 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4046 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4047 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4048 /* Free the attached stmt_vec_info and remove the stmt. */
4049 gsi
= gsi_for_stmt (store
);
4050 unlink_stmt_vdef (store
);
4051 gsi_remove (&gsi
, true);
4052 release_defs (store
);
4053 free_stmt_vec_info (store
);