1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
55 vect_free_slp_tree (slp_tree node
, bool final_p
)
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
61 vect_free_slp_tree (child
, final_p
);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
69 stmt_vec_info stmt_info
;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
77 SLP_TREE_CHILDREN (node
).release ();
78 SLP_TREE_SCALAR_STMTS (node
).release ();
79 SLP_TREE_VEC_STMTS (node
).release ();
80 SLP_TREE_LOAD_PERMUTATION (node
).release ();
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
91 vect_free_slp_instance (slp_instance instance
, bool final_p
)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
94 SLP_INSTANCE_LOADS (instance
).release ();
99 /* Create an SLP node for SCALAR_STMTS. */
102 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
105 stmt_vec_info stmt_info
= scalar_stmts
[0];
108 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
109 nops
= gimple_call_num_args (stmt
);
110 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
112 nops
= gimple_num_ops (stmt
) - 1;
113 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
116 else if (is_a
<gphi
*> (stmt_info
->stmt
))
121 node
= XNEW (struct _slp_tree
);
122 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
123 SLP_TREE_VEC_STMTS (node
).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
125 SLP_TREE_CHILDREN (node
).create (nops
);
126 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
127 SLP_TREE_TWO_OPERATORS (node
) = false;
128 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
131 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
132 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec
<stmt_vec_info
> def_stmts
;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
149 enum vect_def_type first_dt
;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
157 static vec
<slp_oprnd_info
>
158 vect_create_oprnd_info (int nops
, int group_size
)
161 slp_oprnd_info oprnd_info
;
162 vec
<slp_oprnd_info
> oprnds_info
;
164 oprnds_info
.create (nops
);
165 for (i
= 0; i
< nops
; i
++)
167 oprnd_info
= XNEW (struct _slp_oprnd_info
);
168 oprnd_info
->def_stmts
.create (group_size
);
169 oprnd_info
->first_dt
= vect_uninitialized_def
;
170 oprnd_info
->first_op_type
= NULL_TREE
;
171 oprnd_info
->first_pattern
= false;
172 oprnd_info
->second_pattern
= false;
173 oprnds_info
.quick_push (oprnd_info
);
180 /* Free operands info. */
183 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
186 slp_oprnd_info oprnd_info
;
188 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
190 oprnd_info
->def_stmts
.release ();
191 XDELETE (oprnd_info
);
194 oprnds_info
.release ();
198 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
203 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
204 stmt_vec_info first_stmt_info
)
206 stmt_vec_info next_stmt_info
= first_stmt_info
;
209 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
214 if (next_stmt_info
== stmt_info
)
216 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
218 result
+= DR_GROUP_GAP (next_stmt_info
);
220 while (next_stmt_info
);
225 /* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
232 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
233 unsigned int *nvectors_out
,
234 tree
*vector_type_out
,
237 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
239 unsigned int nvectors
= 1;
242 scalar_int_mode int_mode
;
243 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
244 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
245 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
247 tree int_type
= build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode
), 1);
249 tree vector_type
= build_vector_type (int_type
, nelts
);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
252 vec_perm_builder
sel1 (nelts
, 2, 3);
253 vec_perm_builder
sel2 (nelts
, 2, 3);
254 poly_int64 half_nelts
= exact_div (nelts
, 2);
255 for (unsigned int i
= 0; i
< 3; ++i
)
258 sel1
.quick_push (i
+ nelts
);
259 sel2
.quick_push (half_nelts
+ i
);
260 sel2
.quick_push (half_nelts
+ i
+ nelts
);
262 vec_perm_indices
indices1 (sel1
, 2, nelts
);
263 vec_perm_indices
indices2 (sel2
, 2, nelts
);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
268 *nvectors_out
= nvectors
;
270 *vector_type_out
= vector_type
;
273 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
275 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
282 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
288 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
289 they are of a valid type and that they match the defs of the first stmt of
290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
293 is 1 if STMT is cond and operands of comparison need to be swapped;
294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
295 If there is any operand swap in this function, *SWAP is set to non-zero
297 If there was a fatal error return -1; if the error could be corrected by
298 swapping operands of father node of this one, return 1; if everything is
301 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
302 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
303 vec
<slp_oprnd_info
> *oprnds_info
)
305 stmt_vec_info stmt_info
= stmts
[stmt_num
];
307 unsigned int i
, number_of_oprnds
;
308 enum vect_def_type dt
= vect_uninitialized_def
;
309 bool pattern
= false;
310 slp_oprnd_info oprnd_info
;
311 int first_op_idx
= 1;
312 unsigned int commutative_op
= -1U;
313 bool first_op_cond
= false;
314 bool first
= stmt_num
== 0;
315 bool second
= stmt_num
== 1;
317 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
319 number_of_oprnds
= gimple_call_num_args (stmt
);
321 if (gimple_call_internal_p (stmt
))
323 internal_fn ifn
= gimple_call_internal_fn (stmt
);
324 commutative_op
= first_commutative_argument (ifn
);
327 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
329 enum tree_code code
= gimple_assign_rhs_code (stmt
);
330 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
331 /* Swap can only be done for cond_expr if asked to, otherwise we
332 could result in different comparison code to the first stmt. */
333 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
334 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
336 first_op_cond
= true;
340 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
345 bool swapped
= (*swap
!= 0);
346 gcc_assert (!swapped
|| first_op_cond
);
347 for (i
= 0; i
< number_of_oprnds
; i
++)
352 /* Map indicating how operands of cond_expr should be swapped. */
353 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
354 int *map
= maps
[*swap
];
357 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
358 first_op_idx
), map
[i
]);
360 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
363 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
365 oprnd_info
= (*oprnds_info
)[i
];
367 stmt_vec_info def_stmt_info
;
368 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
370 if (dump_enabled_p ())
372 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
373 "Build SLP failed: can't analyze def for ");
374 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
375 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
382 oprnd_info
->second_pattern
= pattern
;
386 oprnd_info
->first_dt
= dt
;
387 oprnd_info
->first_pattern
= pattern
;
388 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
392 /* Not first stmt of the group, check that the def-stmt/s match
393 the def-stmt/s of the first stmt. Allow different definition
394 types for reduction chains: the first stmt must be a
395 vect_reduction_def (a phi node), and the rest
396 vect_internal_def. */
397 tree type
= TREE_TYPE (oprnd
);
398 if ((oprnd_info
->first_dt
!= dt
399 && !(oprnd_info
->first_dt
== vect_reduction_def
400 && dt
== vect_internal_def
)
401 && !((oprnd_info
->first_dt
== vect_external_def
402 || oprnd_info
->first_dt
== vect_constant_def
)
403 && (dt
== vect_external_def
404 || dt
== vect_constant_def
)))
405 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
407 /* Try swapping operands if we got a mismatch. */
408 if (i
== commutative_op
&& !swapped
)
414 if (dump_enabled_p ())
415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
416 "Build SLP failed: different types\n");
420 if ((dt
== vect_constant_def
421 || dt
== vect_external_def
)
422 && !current_vector_size
.is_constant ()
423 && (TREE_CODE (type
) == BOOLEAN_TYPE
424 || !can_duplicate_and_interleave_p (stmts
.length (),
427 if (dump_enabled_p ())
429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
430 "Build SLP failed: invalid type of def "
431 "for variable-length SLP ");
432 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
433 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
439 /* Check the types of the definitions. */
442 case vect_constant_def
:
443 case vect_external_def
:
446 case vect_reduction_def
:
447 case vect_induction_def
:
448 case vect_internal_def
:
449 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
453 /* FORNOW: Not supported. */
454 if (dump_enabled_p ())
456 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
457 "Build SLP failed: illegal type of def ");
458 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
459 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
469 /* If there are already uses of this stmt in a SLP instance then
470 we've committed to the operand order and can't swap it. */
471 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
476 "Build SLP failed: cannot swap operands of "
478 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
486 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
487 tree cond
= gimple_assign_rhs1 (stmt
);
488 enum tree_code code
= TREE_CODE (cond
);
493 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
494 &TREE_OPERAND (cond
, 1));
495 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
500 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
501 gimple_assign_rhs3_ptr (stmt
));
502 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
503 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
504 gcc_assert (code
!= ERROR_MARK
);
505 TREE_SET_CODE (cond
, code
);
510 unsigned int op
= commutative_op
+ first_op_idx
;
511 swap_ssa_operands (stmt_info
->stmt
,
512 gimple_op_ptr (stmt_info
->stmt
, op
),
513 gimple_op_ptr (stmt_info
->stmt
, op
+ 1));
515 if (dump_enabled_p ())
517 dump_printf_loc (MSG_NOTE
, vect_location
,
518 "swapped operands to match def types in ");
519 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
527 /* Return true if call statements CALL1 and CALL2 are similar enough
528 to be combined into the same SLP group. */
531 compatible_calls_p (gcall
*call1
, gcall
*call2
)
533 unsigned int nargs
= gimple_call_num_args (call1
);
534 if (nargs
!= gimple_call_num_args (call2
))
537 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
540 if (gimple_call_internal_p (call1
))
542 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
543 TREE_TYPE (gimple_call_lhs (call2
))))
545 for (unsigned int i
= 0; i
< nargs
; ++i
)
546 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
547 TREE_TYPE (gimple_call_arg (call2
, i
))))
552 if (!operand_equal_p (gimple_call_fn (call1
),
553 gimple_call_fn (call2
), 0))
556 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
562 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
563 caller's attempt to find the vector type in STMT_INFO with the narrowest
564 element type. Return true if VECTYPE is nonnull and if it is valid
565 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
566 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
567 vect_build_slp_tree. */
570 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
571 tree vectype
, poly_uint64
*max_nunits
)
575 if (dump_enabled_p ())
577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
578 "Build SLP failed: unsupported data-type in ");
579 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
581 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
583 /* Fatal mismatch. */
587 /* If populating the vector type requires unrolling then fail
588 before adjusting *max_nunits for basic-block vectorization. */
589 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
590 unsigned HOST_WIDE_INT const_nunits
;
591 if (STMT_VINFO_BB_VINFO (stmt_info
)
592 && (!nunits
.is_constant (&const_nunits
)
593 || const_nunits
> group_size
))
595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
596 "Build SLP failed: unrolling required "
597 "in basic block SLP\n");
598 /* Fatal mismatch. */
602 /* In case of multiple types we need to detect the smallest type. */
603 vect_update_max_nunits (max_nunits
, vectype
);
607 /* STMTS is a group of GROUP_SIZE SLP statements in which some
608 statements do the same operation as the first statement and in which
609 the others do ALT_STMT_CODE. Return true if we can take one vector
610 of the first operation and one vector of the second and permute them
611 to get the required result. VECTYPE is the type of the vector that
612 would be permuted. */
615 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
616 unsigned int group_size
, tree vectype
,
617 tree_code alt_stmt_code
)
619 unsigned HOST_WIDE_INT count
;
620 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
623 vec_perm_builder
sel (count
, count
, 1);
624 for (unsigned int i
= 0; i
< count
; ++i
)
626 unsigned int elt
= i
;
627 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
628 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
630 sel
.quick_push (elt
);
632 vec_perm_indices
indices (sel
, 2, count
);
633 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
636 /* Verify if the scalar stmts STMTS are isomorphic, require data
637 permutation or are of unsupported types of operation. Return
638 true if they are, otherwise return false and indicate in *MATCHES
639 which stmts are not isomorphic to the first one. If MATCHES[0]
640 is false then this indicates the comparison could not be
641 carried out or the stmts will never be vectorized by SLP.
643 Note COND_EXPR is possibly ismorphic to another one after swapping its
644 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
645 the first stmt by swapping the two operands of comparison; set SWAP[i]
646 to 2 if stmt I is isormorphic to the first stmt by inverting the code
647 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
648 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
651 vect_build_slp_tree_1 (unsigned char *swap
,
652 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
653 poly_uint64
*max_nunits
, bool *matches
,
657 stmt_vec_info first_stmt_info
= stmts
[0];
658 enum tree_code first_stmt_code
= ERROR_MARK
;
659 enum tree_code alt_stmt_code
= ERROR_MARK
;
660 enum tree_code rhs_code
= ERROR_MARK
;
661 enum tree_code first_cond_code
= ERROR_MARK
;
663 bool need_same_oprnds
= false;
664 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
667 machine_mode optab_op2_mode
;
668 machine_mode vec_mode
;
669 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
671 /* For every stmt in NODE find its def stmt/s. */
672 stmt_vec_info stmt_info
;
673 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
675 gimple
*stmt
= stmt_info
->stmt
;
679 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
682 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
685 /* Fail to vectorize statements marked as unvectorizable. */
686 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
691 "Build SLP failed: unvectorizable statement ");
692 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
694 /* Fatal mismatch. */
699 lhs
= gimple_get_lhs (stmt
);
700 if (lhs
== NULL_TREE
)
702 if (dump_enabled_p ())
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
705 "Build SLP failed: not GIMPLE_ASSIGN nor "
707 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
709 /* Fatal mismatch. */
715 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
718 && !vect_record_max_nunits (stmt_info
, group_size
,
719 nunits_vectype
, max_nunits
)))
721 /* Fatal mismatch. */
726 gcc_assert (vectype
);
728 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
730 rhs_code
= CALL_EXPR
;
731 if ((gimple_call_internal_p (call_stmt
)
732 && (!vectorizable_internal_fn_p
733 (gimple_call_internal_fn (call_stmt
))))
734 || gimple_call_tail_p (call_stmt
)
735 || gimple_call_noreturn_p (call_stmt
)
736 || !gimple_call_nothrow_p (call_stmt
)
737 || gimple_call_chain (call_stmt
))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: unsupported call type ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
746 /* Fatal mismatch. */
752 rhs_code
= gimple_assign_rhs_code (stmt
);
754 /* Check the operation. */
757 first_stmt_code
= rhs_code
;
759 /* Shift arguments should be equal in all the packed stmts for a
760 vector shift with scalar shift operand. */
761 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
762 || rhs_code
== LROTATE_EXPR
763 || rhs_code
== RROTATE_EXPR
)
765 if (vectype
== boolean_type_node
)
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
769 "Build SLP failed: shift of a"
771 /* Fatal mismatch. */
776 vec_mode
= TYPE_MODE (vectype
);
778 /* First see if we have a vector/vector shift. */
779 optab
= optab_for_tree_code (rhs_code
, vectype
,
783 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
785 /* No vector/vector shift, try for a vector/scalar shift. */
786 optab
= optab_for_tree_code (rhs_code
, vectype
,
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
793 "Build SLP failed: no optab.\n");
794 /* Fatal mismatch. */
798 icode
= (int) optab_handler (optab
, vec_mode
);
799 if (icode
== CODE_FOR_nothing
)
801 if (dump_enabled_p ())
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
804 "op not supported by target.\n");
805 /* Fatal mismatch. */
809 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
810 if (!VECTOR_MODE_P (optab_op2_mode
))
812 need_same_oprnds
= true;
813 first_op1
= gimple_assign_rhs2 (stmt
);
817 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
819 need_same_oprnds
= true;
820 first_op1
= gimple_assign_rhs2 (stmt
);
825 if (first_stmt_code
!= rhs_code
826 && alt_stmt_code
== ERROR_MARK
)
827 alt_stmt_code
= rhs_code
;
828 if (first_stmt_code
!= rhs_code
829 && (first_stmt_code
!= IMAGPART_EXPR
830 || rhs_code
!= REALPART_EXPR
)
831 && (first_stmt_code
!= REALPART_EXPR
832 || rhs_code
!= IMAGPART_EXPR
)
833 /* Handle mismatches in plus/minus by computing both
834 and merging the results. */
835 && !((first_stmt_code
== PLUS_EXPR
836 || first_stmt_code
== MINUS_EXPR
)
837 && (alt_stmt_code
== PLUS_EXPR
838 || alt_stmt_code
== MINUS_EXPR
)
839 && rhs_code
== alt_stmt_code
)
840 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
841 && (first_stmt_code
== ARRAY_REF
842 || first_stmt_code
== BIT_FIELD_REF
843 || first_stmt_code
== INDIRECT_REF
844 || first_stmt_code
== COMPONENT_REF
845 || first_stmt_code
== MEM_REF
)))
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
850 "Build SLP failed: different operation "
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
856 first_stmt_info
->stmt
, 0);
863 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
865 if (dump_enabled_p ())
867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
868 "Build SLP failed: different shift "
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
876 if (rhs_code
== CALL_EXPR
)
878 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
879 as_a
<gcall
*> (stmt
)))
881 if (dump_enabled_p ())
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
884 "Build SLP failed: different calls in ");
885 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
894 /* Grouped store or load. */
895 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
897 if (REFERENCE_CLASS_P (lhs
))
905 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
908 /* Check that there are no loads from different interleaving
909 chains in the same node. */
910 if (prev_first_load
!= first_load
)
912 if (dump_enabled_p ())
914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
916 "Build SLP failed: different "
917 "interleaving chains in one node ");
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
926 prev_first_load
= first_load
;
928 } /* Grouped access. */
931 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
933 /* Not grouped load. */
934 if (dump_enabled_p ())
936 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
937 "Build SLP failed: not grouped load ");
938 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
941 /* FORNOW: Not grouped loads are not supported. */
942 /* Fatal mismatch. */
947 /* Not memory operation. */
948 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
949 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
950 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
951 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
952 && rhs_code
!= CALL_EXPR
)
954 if (dump_enabled_p ())
956 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
957 "Build SLP failed: operation");
958 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
959 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
961 /* Fatal mismatch. */
966 if (rhs_code
== COND_EXPR
)
968 tree cond_expr
= gimple_assign_rhs1 (stmt
);
969 enum tree_code cond_code
= TREE_CODE (cond_expr
);
970 enum tree_code swap_code
= ERROR_MARK
;
971 enum tree_code invert_code
= ERROR_MARK
;
974 first_cond_code
= TREE_CODE (cond_expr
);
975 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
977 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
978 swap_code
= swap_tree_comparison (cond_code
);
979 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
982 if (first_cond_code
== cond_code
)
984 /* Isomorphic can be achieved by swapping. */
985 else if (first_cond_code
== swap_code
)
987 /* Isomorphic can be achieved by inverting. */
988 else if (first_cond_code
== invert_code
)
992 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
995 "Build SLP failed: different"
997 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1009 for (i
= 0; i
< group_size
; ++i
)
1013 /* If we allowed a two-operation SLP node verify the target can cope
1014 with the permute we are going to use. */
1015 if (alt_stmt_code
!= ERROR_MARK
1016 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1018 if (vectype
== boolean_type_node
1019 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1020 vectype
, alt_stmt_code
))
1022 for (i
= 0; i
< group_size
; ++i
)
1023 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1026 if (dump_enabled_p ())
1028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1029 "Build SLP failed: different operation "
1031 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1035 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1036 first_stmt_info
->stmt
, 0);
1041 *two_operators
= true;
1047 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1048 Note we never remove apart from at destruction time so we do not
1049 need a special value for deleted that differs from empty. */
1052 typedef vec
<stmt_vec_info
> value_type
;
1053 typedef vec
<stmt_vec_info
> compare_type
;
1054 static inline hashval_t
hash (value_type
);
1055 static inline bool equal (value_type existing
, value_type candidate
);
1056 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1057 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1058 static inline void mark_empty (value_type
&x
) { x
.release (); }
1059 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1060 static inline void remove (value_type
&x
) { x
.release (); }
1063 bst_traits::hash (value_type x
)
1066 for (unsigned i
= 0; i
< x
.length (); ++i
)
1067 h
.add_int (gimple_uid (x
[i
]->stmt
));
1071 bst_traits::equal (value_type existing
, value_type candidate
)
1073 if (existing
.length () != candidate
.length ())
1075 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1076 if (existing
[i
] != candidate
[i
])
1081 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
1082 static scalar_stmts_set_t
*bst_fail
;
1084 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1085 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1086 scalar_stmts_to_slp_tree_map_t
;
1089 vect_build_slp_tree_2 (vec_info
*vinfo
,
1090 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1091 poly_uint64
*max_nunits
,
1092 vec
<slp_tree
> *loads
,
1093 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1094 unsigned max_tree_size
);
1097 vect_build_slp_tree (vec_info
*vinfo
,
1098 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1099 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1100 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1101 unsigned max_tree_size
)
1103 if (bst_fail
->contains (stmts
))
1105 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1106 loads
, matches
, npermutes
, tree_size
,
1108 /* When SLP build fails for stmts record this, otherwise SLP build
1109 can be exponential in time when we allow to construct parts from
1110 scalars, see PR81723. */
1113 vec
<stmt_vec_info
> x
;
1114 x
.create (stmts
.length ());
1121 /* Recursively build an SLP tree starting from NODE.
1122 Fail (and return a value not equal to zero) if def-stmts are not
1123 isomorphic, require data permutation or are of unsupported types of
1124 operation. Otherwise, return 0.
1125 The value returned is the depth in the SLP tree where a mismatch
1129 vect_build_slp_tree_2 (vec_info
*vinfo
,
1130 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1131 poly_uint64
*max_nunits
,
1132 vec
<slp_tree
> *loads
,
1133 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1134 unsigned max_tree_size
)
1136 unsigned nops
, i
, this_tree_size
= 0;
1137 poly_uint64 this_max_nunits
= *max_nunits
;
1142 stmt_vec_info stmt_info
= stmts
[0];
1143 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1144 nops
= gimple_call_num_args (stmt
);
1145 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1147 nops
= gimple_num_ops (stmt
) - 1;
1148 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1151 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1156 /* If the SLP node is a PHI (induction or reduction), terminate
1158 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1160 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1161 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1162 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1165 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1166 /* Induction from different IVs is not supported. */
1167 if (def_type
== vect_induction_def
)
1169 stmt_vec_info other_info
;
1170 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1171 if (stmt_info
!= other_info
)
1176 /* Else def types have to match. */
1177 stmt_vec_info other_info
;
1178 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1180 /* But for reduction chains only check on the first stmt. */
1181 if (REDUC_GROUP_FIRST_ELEMENT (other_info
)
1182 && REDUC_GROUP_FIRST_ELEMENT (other_info
) != stmt_info
)
1184 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1188 node
= vect_create_new_slp_node (stmts
);
1193 bool two_operators
= false;
1194 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1195 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1196 &this_max_nunits
, matches
, &two_operators
))
1199 /* If the SLP node is a load, terminate the recursion. */
1200 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1201 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1203 *max_nunits
= this_max_nunits
;
1204 node
= vect_create_new_slp_node (stmts
);
1205 loads
->safe_push (node
);
1209 /* Get at the operands, verifying they are compatible. */
1210 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1211 slp_oprnd_info oprnd_info
;
1212 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1214 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1215 stmts
, i
, &oprnds_info
);
1217 matches
[(res
== -1) ? 0 : i
] = false;
1221 for (i
= 0; i
< group_size
; ++i
)
1224 vect_free_oprnd_info (oprnds_info
);
1228 auto_vec
<slp_tree
, 4> children
;
1229 auto_vec
<slp_tree
> this_loads
;
1231 stmt_info
= stmts
[0];
1234 max_tree_size
-= *tree_size
;
1236 /* Create SLP_TREE nodes for the definition node/s. */
1237 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1240 unsigned old_nloads
= this_loads
.length ();
1241 unsigned old_tree_size
= this_tree_size
;
1244 if (oprnd_info
->first_dt
!= vect_internal_def
1245 && oprnd_info
->first_dt
!= vect_reduction_def
1246 && oprnd_info
->first_dt
!= vect_induction_def
)
1249 if (++this_tree_size
> max_tree_size
)
1251 FOR_EACH_VEC_ELT (children
, j
, child
)
1252 vect_free_slp_tree (child
, false);
1253 vect_free_oprnd_info (oprnds_info
);
1257 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1258 group_size
, &this_max_nunits
,
1259 &this_loads
, matches
, npermutes
,
1261 max_tree_size
)) != NULL
)
1263 /* If we have all children of child built up from scalars then just
1264 throw that away and build it up this node from scalars. */
1265 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1266 /* ??? Rejecting patterns this way doesn't work. We'd have to
1267 do extra work to cancel the pattern so the uses see the
1269 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1271 slp_tree grandchild
;
1273 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1274 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1279 this_loads
.truncate (old_nloads
);
1280 this_tree_size
= old_tree_size
;
1281 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1282 vect_free_slp_tree (grandchild
, false);
1283 SLP_TREE_CHILDREN (child
).truncate (0);
1285 dump_printf_loc (MSG_NOTE
, vect_location
,
1286 "Building parent vector operands from "
1287 "scalars instead\n");
1288 oprnd_info
->def_stmts
= vNULL
;
1289 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1290 children
.safe_push (child
);
1295 oprnd_info
->def_stmts
= vNULL
;
1296 children
.safe_push (child
);
1300 /* If the SLP build failed fatally and we analyze a basic-block
1301 simply treat nodes we fail to build as externally defined
1302 (and thus build vectors from the scalar defs).
1303 The cost model will reject outright expensive cases.
1304 ??? This doesn't treat cases where permutation ultimatively
1305 fails (or we don't try permutation below). Ideally we'd
1306 even compute a permutation that will end up with the maximum
1308 if (is_a
<bb_vec_info
> (vinfo
)
1310 /* ??? Rejecting patterns this way doesn't work. We'd have to
1311 do extra work to cancel the pattern so the uses see the
1313 && !is_pattern_stmt_p (stmt_info
))
1315 dump_printf_loc (MSG_NOTE
, vect_location
,
1316 "Building vector operands from scalars\n");
1317 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1318 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1319 children
.safe_push (child
);
1320 oprnd_info
->def_stmts
= vNULL
;
1324 /* If the SLP build for operand zero failed and operand zero
1325 and one can be commutated try that for the scalar stmts
1326 that failed the match. */
1328 /* A first scalar stmt mismatch signals a fatal mismatch. */
1330 /* ??? For COND_EXPRs we can swap the comparison operands
1331 as well as the arms under some constraints. */
1333 && oprnds_info
[1]->first_dt
== vect_internal_def
1334 && is_gimple_assign (stmt_info
->stmt
)
1335 /* Do so only if the number of not successful permutes was nor more
1336 than a cut-ff as re-trying the recursive match on
1337 possibly each level of the tree would expose exponential
1341 /* See whether we can swap the matching or the non-matching
1343 bool swap_not_matching
= true;
1346 for (j
= 0; j
< group_size
; ++j
)
1348 if (matches
[j
] != !swap_not_matching
)
1350 stmt_vec_info stmt_info
= stmts
[j
];
1351 /* Verify if we can swap operands of this stmt. */
1352 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1354 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1356 if (!swap_not_matching
)
1358 swap_not_matching
= false;
1361 /* Verify if we can safely swap or if we committed to a
1362 specific operand order already.
1363 ??? Instead of modifying GIMPLE stmts here we could
1364 record whether we want to swap operands in the SLP
1365 node and temporarily do that when processing it
1366 (or wrap operand accessors in a helper). */
1367 else if (swap
[j
] != 0
1368 || STMT_VINFO_NUM_SLP_USES (stmt_info
))
1370 if (!swap_not_matching
)
1372 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1376 "Build SLP failed: cannot swap "
1377 "operands of shared stmt ");
1378 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
1379 TDF_SLIM
, stmts
[j
]->stmt
, 0);
1383 swap_not_matching
= false;
1388 while (j
!= group_size
);
1390 /* Swap mismatched definition stmts. */
1391 dump_printf_loc (MSG_NOTE
, vect_location
,
1392 "Re-trying with swapped operands of stmts ");
1393 for (j
= 0; j
< group_size
; ++j
)
1394 if (matches
[j
] == !swap_not_matching
)
1396 std::swap (oprnds_info
[0]->def_stmts
[j
],
1397 oprnds_info
[1]->def_stmts
[j
]);
1398 dump_printf (MSG_NOTE
, "%d ", j
);
1400 dump_printf (MSG_NOTE
, "\n");
1401 /* And try again with scratch 'matches' ... */
1402 bool *tem
= XALLOCAVEC (bool, group_size
);
1403 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1404 group_size
, &this_max_nunits
,
1405 &this_loads
, tem
, npermutes
,
1407 max_tree_size
)) != NULL
)
1409 /* ... so if successful we can apply the operand swapping
1410 to the GIMPLE IL. This is necessary because for example
1411 vect_get_slp_defs uses operand indexes and thus expects
1412 canonical operand order. This is also necessary even
1413 if we end up building the operand from scalars as
1414 we'll continue to process swapped operand two. */
1415 for (j
= 0; j
< group_size
; ++j
)
1416 gimple_set_plf (stmts
[j
]->stmt
, GF_PLF_1
, false);
1417 for (j
= 0; j
< group_size
; ++j
)
1418 if (matches
[j
] == !swap_not_matching
)
1420 gassign
*stmt
= as_a
<gassign
*> (stmts
[j
]->stmt
);
1421 /* Avoid swapping operands twice. */
1422 if (gimple_plf (stmt
, GF_PLF_1
))
1424 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1425 gimple_assign_rhs2_ptr (stmt
));
1426 gimple_set_plf (stmt
, GF_PLF_1
, true);
1428 /* Verify we swap all duplicates or none. */
1430 for (j
= 0; j
< group_size
; ++j
)
1431 gcc_assert (gimple_plf (stmts
[j
]->stmt
, GF_PLF_1
)
1432 == (matches
[j
] == !swap_not_matching
));
1434 /* If we have all children of child built up from scalars then
1435 just throw that away and build it up this node from scalars. */
1436 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1437 /* ??? Rejecting patterns this way doesn't work. We'd have
1438 to do extra work to cancel the pattern so the uses see the
1440 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1443 slp_tree grandchild
;
1445 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1446 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1451 this_loads
.truncate (old_nloads
);
1452 this_tree_size
= old_tree_size
;
1453 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1454 vect_free_slp_tree (grandchild
, false);
1455 SLP_TREE_CHILDREN (child
).truncate (0);
1457 dump_printf_loc (MSG_NOTE
, vect_location
,
1458 "Building parent vector operands from "
1459 "scalars instead\n");
1460 oprnd_info
->def_stmts
= vNULL
;
1461 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1462 children
.safe_push (child
);
1467 oprnd_info
->def_stmts
= vNULL
;
1468 children
.safe_push (child
);
1476 gcc_assert (child
== NULL
);
1477 FOR_EACH_VEC_ELT (children
, j
, child
)
1478 vect_free_slp_tree (child
, false);
1479 vect_free_oprnd_info (oprnds_info
);
1483 vect_free_oprnd_info (oprnds_info
);
1486 *tree_size
+= this_tree_size
;
1487 *max_nunits
= this_max_nunits
;
1488 loads
->safe_splice (this_loads
);
1490 node
= vect_create_new_slp_node (stmts
);
1491 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1492 SLP_TREE_CHILDREN (node
).splice (children
);
1496 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1499 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1503 stmt_vec_info stmt_info
;
1506 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1507 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1508 ? " (external)" : "");
1509 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1511 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1512 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt_info
->stmt
, 0);
1514 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1515 vect_print_slp_tree (dump_kind
, loc
, child
);
1519 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1520 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1521 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1522 stmts in NODE are to be marked. */
1525 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1528 stmt_vec_info stmt_info
;
1531 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1534 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1535 if (j
< 0 || i
== j
)
1536 STMT_SLP_TYPE (stmt_info
) = mark
;
1538 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1539 vect_mark_slp_stmts (child
, mark
, j
);
1543 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1546 vect_mark_slp_stmts_relevant (slp_tree node
)
1549 stmt_vec_info stmt_info
;
1552 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1555 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1557 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1558 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1559 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1562 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1563 vect_mark_slp_stmts_relevant (child
);
1567 /* Rearrange the statements of NODE according to PERMUTATION. */
1570 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1571 vec
<unsigned> permutation
)
1573 stmt_vec_info stmt_info
;
1574 vec
<stmt_vec_info
> tmp_stmts
;
1578 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1579 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1581 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1582 tmp_stmts
.create (group_size
);
1583 tmp_stmts
.quick_grow_cleared (group_size
);
1585 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1586 tmp_stmts
[permutation
[i
]] = stmt_info
;
1588 SLP_TREE_SCALAR_STMTS (node
).release ();
1589 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1593 /* Attempt to reorder stmts in a reduction chain so that we don't
1594 require any load permutation. Return true if that was possible,
1595 otherwise return false. */
1598 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1600 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1603 slp_tree node
, load
;
1605 /* Compare all the permutation sequences to the first one. We know
1606 that at least one load is permuted. */
1607 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1608 if (!node
->load_permutation
.exists ())
1610 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1612 if (!load
->load_permutation
.exists ())
1614 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1615 if (lidx
!= node
->load_permutation
[j
])
1619 /* Check that the loads in the first sequence are different and there
1620 are no gaps between them. */
1621 auto_sbitmap
load_index (group_size
);
1622 bitmap_clear (load_index
);
1623 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1625 if (lidx
>= group_size
)
1627 if (bitmap_bit_p (load_index
, lidx
))
1630 bitmap_set_bit (load_index
, lidx
);
1632 for (i
= 0; i
< group_size
; i
++)
1633 if (!bitmap_bit_p (load_index
, i
))
1636 /* This permutation is valid for reduction. Since the order of the
1637 statements in the nodes is not important unless they are memory
1638 accesses, we can rearrange the statements in all the nodes
1639 according to the order of the loads. */
1640 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1641 node
->load_permutation
);
1643 /* We are done, no actual permutations need to be generated. */
1644 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1645 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1647 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1648 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1649 /* But we have to keep those permutations that are required because
1650 of handling of gaps. */
1651 if (known_eq (unrolling_factor
, 1U)
1652 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1653 && DR_GROUP_GAP (first_stmt_info
) == 0))
1654 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1656 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1657 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1663 /* Check if the required load permutations in the SLP instance
1664 SLP_INSTN are supported. */
1667 vect_supported_load_permutation_p (slp_instance slp_instn
)
1669 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1670 unsigned int i
, j
, k
, next
;
1673 if (dump_enabled_p ())
1675 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1676 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1677 if (node
->load_permutation
.exists ())
1678 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1679 dump_printf (MSG_NOTE
, "%d ", next
);
1681 for (k
= 0; k
< group_size
; ++k
)
1682 dump_printf (MSG_NOTE
, "%d ", k
);
1683 dump_printf (MSG_NOTE
, "\n");
1686 /* In case of reduction every load permutation is allowed, since the order
1687 of the reduction statements is not important (as opposed to the case of
1688 grouped stores). The only condition we need to check is that all the
1689 load nodes are of the same size and have the same permutation (and then
1690 rearrange all the nodes of the SLP instance according to this
1693 /* Check that all the load nodes are of the same size. */
1694 /* ??? Can't we assert this? */
1695 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1696 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1699 node
= SLP_INSTANCE_TREE (slp_instn
);
1700 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1702 /* Reduction (there are no data-refs in the root).
1703 In reduction chain the order of the loads is not important. */
1704 if (!STMT_VINFO_DATA_REF (stmt_info
)
1705 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1706 vect_attempt_slp_rearrange_stmts (slp_instn
);
1708 /* In basic block vectorization we allow any subchain of an interleaving
1710 FORNOW: not supported in loop SLP because of realignment compications. */
1711 if (STMT_VINFO_BB_VINFO (stmt_info
))
1713 /* Check whether the loads in an instance form a subchain and thus
1714 no permutation is necessary. */
1715 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1717 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1719 bool subchain_p
= true;
1720 stmt_vec_info next_load_info
= NULL
;
1721 stmt_vec_info load_info
;
1722 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1725 && (next_load_info
!= load_info
1726 || DR_GROUP_GAP (load_info
) != 1))
1731 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1734 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1737 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1738 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1739 unsigned HOST_WIDE_INT nunits
;
1740 unsigned k
, maxk
= 0;
1741 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1744 /* In BB vectorization we may not actually use a loaded vector
1745 accessing elements in excess of DR_GROUP_SIZE. */
1746 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1747 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1748 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1751 "BB vectorization with gaps at the end of "
1752 "a load is not supported\n");
1756 /* Verify the permutation can be generated. */
1759 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1760 1, slp_instn
, true, &n_perms
))
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1764 "unsupported load permutation\n");
1772 /* For loop vectorization verify we can generate the permutation. Be
1773 conservative about the vectorization factor, there are permutations
1774 that will use three vector inputs only starting from a specific factor
1775 and the vectorization factor is not yet final.
1776 ??? The SLP instance unrolling factor might not be the maximum one. */
1779 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1780 LOOP_VINFO_VECT_FACTOR
1781 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
1782 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1783 if (node
->load_permutation
.exists ()
1784 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1785 slp_instn
, true, &n_perms
))
1792 /* Find the last store in SLP INSTANCE. */
1795 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1797 stmt_vec_info last
= NULL
;
1798 stmt_vec_info stmt_vinfo
;
1800 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1802 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1803 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1809 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1810 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1811 (also containing the first GROUP1_SIZE stmts, since stores are
1812 consecutive), the second containing the remainder.
1813 Return the first stmt in the second group. */
1815 static stmt_vec_info
1816 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1818 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1819 gcc_assert (group1_size
> 0);
1820 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1821 gcc_assert (group2_size
> 0);
1822 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1824 stmt_vec_info stmt_info
= first_vinfo
;
1825 for (unsigned i
= group1_size
; i
> 1; i
--)
1827 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1828 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1830 /* STMT is now the last element of the first group. */
1831 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1832 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1834 DR_GROUP_SIZE (group2
) = group2_size
;
1835 for (stmt_info
= group2
; stmt_info
;
1836 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1838 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1839 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1842 /* For the second group, the DR_GROUP_GAP is that before the original group,
1843 plus skipping over the first vector. */
1844 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1846 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1847 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1851 group1_size
, group2_size
);
1856 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1857 statements and a vector of NUNITS elements. */
1860 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1862 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1865 /* Analyze an SLP instance starting from a group of grouped stores. Call
1866 vect_build_slp_tree to build a tree of packed stmts if possible.
1867 Return FALSE if it's impossible to SLP any stmt in the loop. */
1870 vect_analyze_slp_instance (vec_info
*vinfo
,
1871 stmt_vec_info stmt_info
, unsigned max_tree_size
)
1873 slp_instance new_instance
;
1875 unsigned int group_size
;
1876 tree vectype
, scalar_type
= NULL_TREE
;
1878 vec
<slp_tree
> loads
;
1879 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1880 vec
<stmt_vec_info
> scalar_stmts
;
1882 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1884 scalar_type
= TREE_TYPE (DR_REF (dr
));
1885 vectype
= get_vectype_for_scalar_type (scalar_type
);
1886 group_size
= DR_GROUP_SIZE (stmt_info
);
1888 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1890 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1891 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1892 group_size
= REDUC_GROUP_SIZE (stmt_info
);
1896 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1897 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1898 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1903 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1906 "Build SLP failed: unsupported data-type ");
1907 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1908 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1913 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1915 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1916 scalar_stmts
.create (group_size
);
1917 stmt_vec_info next_info
= stmt_info
;
1918 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1920 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1923 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
1924 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
1927 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1929 /* Collect the reduction stmts and store them in
1930 SLP_TREE_SCALAR_STMTS. */
1933 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
1934 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
1936 /* Mark the first element of the reduction chain as reduction to properly
1937 transform the node. In the reduction analysis phase only the last
1938 element of the chain is marked as reduction. */
1939 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
1943 /* Collect reduction statements. */
1944 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1945 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
1946 scalar_stmts
.safe_push (next_info
);
1949 loads
.create (group_size
);
1951 /* Build the tree for the SLP instance. */
1952 bool *matches
= XALLOCAVEC (bool, group_size
);
1953 unsigned npermutes
= 0;
1954 bst_fail
= new scalar_stmts_set_t ();
1955 poly_uint64 max_nunits
= nunits
;
1956 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
1957 &max_nunits
, &loads
, matches
, &npermutes
,
1958 NULL
, max_tree_size
);
1962 /* Calculate the unrolling factor based on the smallest type. */
1963 poly_uint64 unrolling_factor
1964 = calculate_unrolling_factor (max_nunits
, group_size
);
1966 if (maybe_ne (unrolling_factor
, 1U)
1967 && is_a
<bb_vec_info
> (vinfo
))
1969 unsigned HOST_WIDE_INT const_max_nunits
;
1970 if (!max_nunits
.is_constant (&const_max_nunits
)
1971 || const_max_nunits
> group_size
)
1973 if (dump_enabled_p ())
1974 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1975 "Build SLP failed: store group "
1976 "size not a multiple of the vector size "
1977 "in basic block SLP\n");
1978 vect_free_slp_tree (node
, false);
1982 /* Fatal mismatch. */
1983 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
1984 vect_free_slp_tree (node
, false);
1989 /* Create a new SLP instance. */
1990 new_instance
= XNEW (struct _slp_instance
);
1991 SLP_INSTANCE_TREE (new_instance
) = node
;
1992 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1993 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1994 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1996 /* Compute the load permutation. */
1998 bool loads_permuted
= false;
1999 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2001 vec
<unsigned> load_permutation
;
2003 stmt_vec_info load_info
;
2004 bool this_load_permuted
= false;
2005 load_permutation
.create (group_size
);
2006 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2007 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2008 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2010 int load_place
= vect_get_place_in_interleaving_chain
2011 (load_info
, first_stmt_info
);
2012 gcc_assert (load_place
!= -1);
2013 if (load_place
!= j
)
2014 this_load_permuted
= true;
2015 load_permutation
.safe_push (load_place
);
2017 if (!this_load_permuted
2018 /* The load requires permutation when unrolling exposes
2019 a gap either because the group is larger than the SLP
2020 group-size or because there is a gap between the groups. */
2021 && (known_eq (unrolling_factor
, 1U)
2022 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2023 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2025 load_permutation
.release ();
2028 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2029 loads_permuted
= true;
2034 if (!vect_supported_load_permutation_p (new_instance
))
2036 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2039 "Build SLP failed: unsupported load "
2041 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2042 TDF_SLIM
, stmt_info
->stmt
, 0);
2044 vect_free_slp_instance (new_instance
, false);
2049 /* If the loads and stores can be handled with load/store-lan
2050 instructions do not generate this SLP instance. */
2051 if (is_a
<loop_vec_info
> (vinfo
)
2053 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2056 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2058 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2059 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2060 /* Use SLP for strided accesses (or if we can't load-lanes). */
2061 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2062 || ! vect_load_lanes_supported
2063 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2064 DR_GROUP_SIZE (stmt_vinfo
), false))
2067 if (i
== loads
.length ())
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2071 "Built SLP cancelled: can use "
2072 "load/store-lanes\n");
2073 vect_free_slp_instance (new_instance
, false);
2078 vinfo
->slp_instances
.safe_push (new_instance
);
2080 if (dump_enabled_p ())
2082 dump_printf_loc (MSG_NOTE
, vect_location
,
2083 "Final SLP tree for instance:\n");
2084 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2092 /* Failed to SLP. */
2093 /* Free the allocated memory. */
2094 scalar_stmts
.release ();
2098 /* For basic block SLP, try to break the group up into multiples of the
2100 unsigned HOST_WIDE_INT const_nunits
;
2101 if (is_a
<bb_vec_info
> (vinfo
)
2102 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2103 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2104 && nunits
.is_constant (&const_nunits
))
2106 /* We consider breaking the group only on VF boundaries from the existing
2108 for (i
= 0; i
< group_size
; i
++)
2109 if (!matches
[i
]) break;
2111 if (i
>= const_nunits
&& i
< group_size
)
2113 /* Split into two groups at the first vector boundary before i. */
2114 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2115 unsigned group1_size
= i
& ~(const_nunits
- 1);
2117 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2119 bool res
= vect_analyze_slp_instance (vinfo
, stmt_info
,
2121 /* If the first non-match was in the middle of a vector,
2122 skip the rest of that vector. */
2123 if (group1_size
< i
)
2125 i
= group1_size
+ const_nunits
;
2127 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2130 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2133 /* Even though the first vector did not all match, we might be able to SLP
2134 (some) of the remainder. FORNOW ignore this possibility. */
2141 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2142 trees of packed scalar stmts if SLP is possible. */
2145 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2148 stmt_vec_info first_element
;
2150 DUMP_VECT_SCOPE ("vect_analyze_slp");
2152 /* Find SLP sequences starting from groups of grouped stores. */
2153 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2154 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2156 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2158 if (loop_vinfo
->reduction_chains
.length () > 0)
2160 /* Find SLP sequences starting from reduction chains. */
2161 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2162 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2165 /* Dissolve reduction chain group. */
2166 stmt_vec_info vinfo
= first_element
;
2169 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2170 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2171 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2174 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2178 /* Find SLP sequences starting from groups of reductions. */
2179 if (loop_vinfo
->reductions
.length () > 1)
2180 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2188 /* For each possible SLP instance decide whether to SLP it and calculate overall
2189 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2190 least one instance. */
2193 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2196 poly_uint64 unrolling_factor
= 1;
2197 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2198 slp_instance instance
;
2199 int decided_to_slp
= 0;
2201 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2203 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2205 /* FORNOW: SLP if you can. */
2206 /* All unroll factors have the form current_vector_size * X for some
2207 rational X, so they must have a common multiple. */
2209 = force_common_multiple (unrolling_factor
,
2210 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2212 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2213 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2214 loop-based vectorization. Such stmts will be marked as HYBRID. */
2215 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2219 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2221 if (decided_to_slp
&& dump_enabled_p ())
2223 dump_printf_loc (MSG_NOTE
, vect_location
,
2224 "Decided to SLP %d instances. Unrolling factor ",
2226 dump_dec (MSG_NOTE
, unrolling_factor
);
2227 dump_printf (MSG_NOTE
, "\n");
2230 return (decided_to_slp
> 0);
2234 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2235 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2238 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2240 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2241 imm_use_iterator imm_iter
;
2243 stmt_vec_info use_vinfo
;
2245 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2248 /* Propagate hybrid down the SLP tree. */
2249 if (stype
== hybrid
)
2251 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2255 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2256 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2257 /* If we get a pattern stmt here we have to use the LHS of the
2258 original stmt for immediate uses. */
2259 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2261 if (gimple_code (stmt
) == GIMPLE_PHI
)
2262 def
= gimple_phi_result (stmt
);
2264 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2266 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2268 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2271 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2272 if (!STMT_SLP_TYPE (use_vinfo
)
2273 && (STMT_VINFO_RELEVANT (use_vinfo
)
2274 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2275 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2276 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2278 if (dump_enabled_p ())
2280 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2281 "def in non-SLP stmt: ");
2282 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2290 && !HYBRID_SLP_STMT (stmt_vinfo
))
2292 if (dump_enabled_p ())
2294 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2295 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_vinfo
->stmt
, 0);
2297 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2300 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2301 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2302 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2305 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2308 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2310 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2311 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2316 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2317 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2319 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2322 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt_info
->stmt
, 0);
2324 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2331 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2334 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2335 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2336 /* If the stmt is in a SLP instance then this isn't a reason
2337 to mark use definitions in other SLP instances as hybrid. */
2338 if (! STMT_SLP_TYPE (use_vinfo
)
2339 && (STMT_VINFO_RELEVANT (use_vinfo
)
2340 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2341 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2342 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2349 /* Find stmts that must be both vectorized and SLPed. */
2352 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2355 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2356 slp_instance instance
;
2358 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2360 /* First walk all pattern stmt in the loop and mark defs of uses as
2361 hybrid because immediate uses in them are not recorded. */
2362 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2364 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2365 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2368 gimple
*stmt
= gsi_stmt (gsi
);
2369 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2370 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2373 memset (&wi
, 0, sizeof (wi
));
2374 wi
.info
= loop_vinfo
;
2375 gimple_stmt_iterator gsi2
2376 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2377 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2378 vect_detect_hybrid_slp_1
, &wi
);
2379 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2380 vect_detect_hybrid_slp_2
,
2381 vect_detect_hybrid_slp_1
, &wi
);
2386 /* Then walk the SLP instance trees marking stmts with uses in
2387 non-SLP stmts as hybrid, also propagating hybrid down the
2388 SLP tree, collecting the above info on-the-fly. */
2389 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2391 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2392 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2398 /* Initialize a bb_vec_info struct for the statements between
2399 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2401 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2402 gimple_stmt_iterator region_end_in
,
2403 vec_info_shared
*shared
)
2404 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2405 bb (gsi_bb (region_begin_in
)),
2406 region_begin (region_begin_in
),
2407 region_end (region_end_in
)
2409 gimple_stmt_iterator gsi
;
2411 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2414 gimple
*stmt
= gsi_stmt (gsi
);
2415 gimple_set_uid (stmt
, 0);
2423 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2424 stmts in the basic block. */
2426 _bb_vec_info::~_bb_vec_info ()
2428 for (gimple_stmt_iterator si
= region_begin
;
2429 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2430 /* Reset region marker. */
2431 gimple_set_uid (gsi_stmt (si
), -1);
2436 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2437 given then that child nodes have already been processed, and that
2438 their def types currently match their SLP node's def type. */
2441 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2442 slp_instance node_instance
,
2443 stmt_vector_for_cost
*cost_vec
)
2445 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2446 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2448 /* For BB vectorization vector types are assigned here.
2449 Memory accesses already got their vector type assigned
2450 in vect_analyze_data_refs. */
2451 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2453 && ! STMT_VINFO_DATA_REF (stmt_info
))
2455 tree vectype
, nunits_vectype
;
2456 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2458 /* We checked this when building the node. */
2460 if (vectype
== boolean_type_node
)
2462 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2464 /* vect_get_mask_type_for_stmt has already explained the
2469 stmt_vec_info sstmt_info
;
2471 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt_info
)
2472 STMT_VINFO_VECTYPE (sstmt_info
) = vectype
;
2475 /* Calculate the number of vector statements to be created for the
2476 scalar stmts in this node. For SLP reductions it is equal to the
2477 number of vector statements in the children (which has already been
2478 calculated by the recursive call). Otherwise it is the number of
2479 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2480 VF divided by the number of elements in a vector. */
2481 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2482 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2483 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2484 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2488 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2489 vf
= loop_vinfo
->vectorization_factor
;
2492 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2493 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2494 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2495 = vect_get_num_vectors (vf
* group_size
, vectype
);
2499 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2502 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2503 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2505 Return true if the operations are supported. */
2508 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2509 slp_instance node_instance
,
2510 scalar_stmts_to_slp_tree_map_t
*visited
,
2511 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2512 stmt_vector_for_cost
*cost_vec
)
2517 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2520 /* If we already analyzed the exact same set of scalar stmts we're done.
2521 We share the generated vector stmts for those. */
2523 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2524 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2526 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2527 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2531 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2532 doesn't result in any issue since we throw away the lvisited set
2534 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2536 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2537 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2538 visited
, lvisited
, cost_vec
))
2541 /* Push SLP node def-type to stmt operands. */
2542 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2543 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2544 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2545 = SLP_TREE_DEF_TYPE (child
);
2546 bool res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2548 /* Restore def-types. */
2549 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2550 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2551 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2552 = vect_internal_def
;
2560 /* Analyze statements in SLP instances of VINFO. Return true if the
2561 operations are supported. */
2564 vect_slp_analyze_operations (vec_info
*vinfo
)
2566 slp_instance instance
;
2569 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2571 scalar_stmts_to_slp_tree_map_t
*visited
2572 = new scalar_stmts_to_slp_tree_map_t ();
2573 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2575 scalar_stmts_to_slp_tree_map_t lvisited
;
2576 stmt_vector_for_cost cost_vec
;
2577 cost_vec
.create (2);
2578 if (!vect_slp_analyze_node_operations (vinfo
,
2579 SLP_INSTANCE_TREE (instance
),
2580 instance
, visited
, &lvisited
,
2583 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2584 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2585 dump_printf_loc (MSG_NOTE
, vect_location
,
2586 "removing SLP instance operations starting from: ");
2587 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
2588 vect_free_slp_instance (instance
, false);
2589 vinfo
->slp_instances
.ordered_remove (i
);
2590 cost_vec
.release ();
2594 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2595 x
!= lvisited
.end(); ++x
)
2596 visited
->put ((*x
).first
.copy (), (*x
).second
);
2599 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2600 cost_vec
.release ();
2605 return !vinfo
->slp_instances
.is_empty ();
2609 /* Compute the scalar cost of the SLP node NODE and its children
2610 and return it. Do not account defs that are marked in LIFE and
2611 update LIFE according to uses of NODE. */
2614 vect_bb_slp_scalar_cost (basic_block bb
,
2615 slp_tree node
, vec
<bool, va_heap
> *life
,
2616 stmt_vector_for_cost
*cost_vec
)
2619 stmt_vec_info stmt_info
;
2622 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2624 gimple
*stmt
= stmt_info
->stmt
;
2625 vec_info
*vinfo
= stmt_info
->vinfo
;
2626 ssa_op_iter op_iter
;
2627 def_operand_p def_p
;
2632 /* If there is a non-vectorized use of the defs then the scalar
2633 stmt is kept live in which case we do not account it or any
2634 required defs in the SLP children in the scalar cost. This
2635 way we make the vectorization more costly when compared to
2637 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2639 imm_use_iterator use_iter
;
2641 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2642 if (!is_gimple_debug (use_stmt
))
2644 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2645 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2648 BREAK_FROM_IMM_USE_STMT (use_iter
);
2655 /* Count scalar stmts only once. */
2656 if (gimple_visited_p (stmt
))
2658 gimple_set_visited (stmt
, true);
2660 vect_cost_for_stmt kind
;
2661 if (STMT_VINFO_DATA_REF (stmt_info
))
2663 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2666 kind
= scalar_store
;
2670 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2673 auto_vec
<bool, 20> subtree_life
;
2674 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2676 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2678 /* Do not directly pass LIFE to the recursive call, copy it to
2679 confine changes in the callee to the current child/subtree. */
2680 subtree_life
.safe_splice (*life
);
2681 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
);
2682 subtree_life
.truncate (0);
2687 /* Check if vectorization of the basic block is profitable. */
2690 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2692 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2693 slp_instance instance
;
2695 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2696 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2698 /* Calculate scalar cost. */
2699 stmt_vector_for_cost scalar_costs
;
2700 scalar_costs
.create (0);
2701 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2703 auto_vec
<bool, 20> life
;
2704 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2705 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2706 SLP_INSTANCE_TREE (instance
),
2707 &life
, &scalar_costs
);
2709 void *target_cost_data
= init_cost (NULL
);
2710 add_stmt_costs (target_cost_data
, &scalar_costs
);
2711 scalar_costs
.release ();
2713 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2714 destroy_cost_data (target_cost_data
);
2716 /* Unset visited flag. */
2717 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2718 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2719 gimple_set_visited (gsi_stmt (gsi
), false);
2721 /* Complete the target-specific cost calculation. */
2722 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2723 &vec_inside_cost
, &vec_epilogue_cost
);
2725 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2727 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2730 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2732 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2733 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2734 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2737 /* Vectorization is profitable if its cost is more than the cost of scalar
2738 version. Note that we err on the vector side for equal cost because
2739 the cost estimate is otherwise quite pessimistic (constant uses are
2740 free on the scalar side but cost a load on the vector side for
2742 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2748 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2749 if so and sets fatal to true if failure is independent of
2750 current_vector_size. */
2753 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2754 gimple_stmt_iterator region_end
,
2755 vec
<data_reference_p
> datarefs
, int n_stmts
,
2756 bool &fatal
, vec_info_shared
*shared
)
2758 bb_vec_info bb_vinfo
;
2759 slp_instance instance
;
2761 poly_uint64 min_vf
= 2;
2763 /* The first group of checks is independent of the vector size. */
2766 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2768 if (dump_enabled_p ())
2769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2770 "not vectorized: too many instructions in "
2772 free_data_refs (datarefs
);
2776 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, shared
);
2780 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2781 bb_vinfo
->shared
->save_datarefs ();
2783 /* Analyze the data references. */
2785 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2787 if (dump_enabled_p ())
2788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2789 "not vectorized: unhandled data-ref in basic "
2796 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2798 if (dump_enabled_p ())
2799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2800 "not vectorized: not enough data-refs in "
2807 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2809 if (dump_enabled_p ())
2810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2811 "not vectorized: unhandled data access in "
2818 /* If there are no grouped stores in the region there is no need
2819 to continue with pattern recog as vect_analyze_slp will fail
2821 if (bb_vinfo
->grouped_stores
.is_empty ())
2823 if (dump_enabled_p ())
2824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2825 "not vectorized: no grouped stores in "
2832 /* While the rest of the analysis below depends on it in some way. */
2835 vect_pattern_recog (bb_vinfo
);
2837 /* Check the SLP opportunities in the basic block, analyze and build SLP
2839 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2841 if (dump_enabled_p ())
2843 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2844 "Failed to SLP the basic block.\n");
2845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2846 "not vectorized: failed to find SLP opportunities "
2847 "in basic block.\n");
2854 vect_record_base_alignments (bb_vinfo
);
2856 /* Analyze and verify the alignment of data references and the
2857 dependence in the SLP instances. */
2858 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2860 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2861 || ! vect_slp_analyze_instance_dependence (instance
))
2863 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2864 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2865 dump_printf_loc (MSG_NOTE
, vect_location
,
2866 "removing SLP instance operations starting from: ");
2867 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
2868 vect_free_slp_instance (instance
, false);
2869 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2873 /* Mark all the statements that we want to vectorize as pure SLP and
2875 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2876 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2880 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2886 if (!vect_slp_analyze_operations (bb_vinfo
))
2888 if (dump_enabled_p ())
2889 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2890 "not vectorized: bad operation in basic block.\n");
2896 /* Cost model: check if the vectorization is worthwhile. */
2897 if (!unlimited_cost_model (NULL
)
2898 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2900 if (dump_enabled_p ())
2901 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2902 "not vectorized: vectorization is not "
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_NOTE
, vect_location
,
2911 "Basic block will be vectorized using SLP\n");
2917 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2918 true if anything in the basic-block was vectorized. */
2921 vect_slp_bb (basic_block bb
)
2923 bb_vec_info bb_vinfo
;
2924 gimple_stmt_iterator gsi
;
2925 bool any_vectorized
= false;
2926 auto_vector_sizes vector_sizes
;
2928 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2930 /* Autodetect first vector size we try. */
2931 current_vector_size
= 0;
2932 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
2933 unsigned int next_size
= 0;
2935 gsi
= gsi_start_bb (bb
);
2937 poly_uint64 autodetected_vector_size
= 0;
2940 if (gsi_end_p (gsi
))
2943 gimple_stmt_iterator region_begin
= gsi
;
2944 vec
<data_reference_p
> datarefs
= vNULL
;
2947 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2949 gimple
*stmt
= gsi_stmt (gsi
);
2950 if (is_gimple_debug (stmt
))
2954 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
2955 vect_location
= stmt
;
2957 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
2961 /* Skip leading unhandled stmts. */
2962 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
2968 gimple_stmt_iterator region_end
= gsi
;
2970 bool vectorized
= false;
2972 vec_info_shared shared
;
2973 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
2974 datarefs
, insns
, fatal
, &shared
);
2976 && dbg_cnt (vect_slp
))
2978 if (dump_enabled_p ())
2979 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
2981 bb_vinfo
->shared
->check_datarefs ();
2982 vect_schedule_slp (bb_vinfo
);
2984 unsigned HOST_WIDE_INT bytes
;
2985 if (current_vector_size
.is_constant (&bytes
))
2986 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2987 "basic block part vectorized using "
2988 HOST_WIDE_INT_PRINT_UNSIGNED
" byte "
2989 "vectors\n", bytes
);
2991 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2992 "basic block part vectorized using variable "
2993 "length vectors\n");
2999 any_vectorized
|= vectorized
;
3002 autodetected_vector_size
= current_vector_size
;
3004 if (next_size
< vector_sizes
.length ()
3005 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3009 || next_size
== vector_sizes
.length ()
3010 || known_eq (current_vector_size
, 0U)
3011 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3012 vector sizes will fail do not bother iterating. */
3015 if (gsi_end_p (region_end
))
3018 /* Skip the unhandled stmt. */
3021 /* And reset vector sizes. */
3022 current_vector_size
= 0;
3027 /* Try the next biggest vector size. */
3028 current_vector_size
= vector_sizes
[next_size
++];
3029 if (dump_enabled_p ())
3031 dump_printf_loc (MSG_NOTE
, vect_location
,
3032 "***** Re-trying analysis with "
3034 dump_dec (MSG_NOTE
, current_vector_size
);
3035 dump_printf (MSG_NOTE
, "\n");
3043 return any_vectorized
;
3047 /* Return 1 if vector type of boolean constant which is OPNUM
3048 operand in statement STMT_VINFO is a boolean vector. */
3051 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
, int opnum
)
3053 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3055 enum vect_def_type dt
;
3057 /* For comparison and COND_EXPR type is chosen depending
3058 on the other comparison operand. */
3059 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3061 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3063 op
= gimple_assign_rhs1 (stmt
);
3065 op
= gimple_assign_rhs2 (stmt
);
3067 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3070 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3073 if (code
== COND_EXPR
)
3075 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3076 tree cond
= gimple_assign_rhs1 (stmt
);
3078 if (TREE_CODE (cond
) == SSA_NAME
)
3081 op
= TREE_OPERAND (cond
, 1);
3083 op
= TREE_OPERAND (cond
, 0);
3085 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3088 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3091 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3094 /* Build a variable-length vector in which the elements in ELTS are repeated
3095 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3096 RESULTS and add any new instructions to SEQ.
3098 The approach we use is:
3100 (1) Find a vector mode VM with integer elements of mode IM.
3102 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3103 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3104 from small vectors to IM.
3106 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3108 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3109 correct byte contents.
3111 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3113 We try to find the largest IM for which this sequence works, in order
3114 to cut down on the number of interleaves. */
3117 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3118 unsigned int nresults
, vec
<tree
> &results
)
3120 unsigned int nelts
= elts
.length ();
3121 tree element_type
= TREE_TYPE (vector_type
);
3123 /* (1) Find a vector mode VM with integer elements of mode IM. */
3124 unsigned int nvectors
= 1;
3125 tree new_vector_type
;
3127 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3128 &nvectors
, &new_vector_type
,
3132 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3133 unsigned int partial_nelts
= nelts
/ nvectors
;
3134 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3136 tree_vector_builder partial_elts
;
3137 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3138 pieces
.quick_grow (nvectors
* 2);
3139 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3141 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3142 ELTS' has mode IM. */
3143 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3144 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3145 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3146 tree t
= gimple_build_vector (seq
, &partial_elts
);
3147 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3148 TREE_TYPE (new_vector_type
), t
);
3150 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3151 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3154 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3155 correct byte contents.
3157 We need to repeat the following operation log2(nvectors) times:
3159 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3160 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3162 However, if each input repeats every N elements and the VF is
3163 a multiple of N * 2, the HI result is the same as the LO. */
3164 unsigned int in_start
= 0;
3165 unsigned int out_start
= nvectors
;
3166 unsigned int hi_start
= nvectors
/ 2;
3167 /* A bound on the number of outputs needed to produce NRESULTS results
3168 in the final iteration. */
3169 unsigned int noutputs_bound
= nvectors
* nresults
;
3170 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3172 noutputs_bound
/= 2;
3173 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3174 for (unsigned int i
= 0; i
< limit
; ++i
)
3177 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3180 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3184 tree output
= make_ssa_name (new_vector_type
);
3185 tree input1
= pieces
[in_start
+ (i
/ 2)];
3186 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3187 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3190 gimple_seq_add_stmt (seq
, stmt
);
3191 pieces
[out_start
+ i
] = output
;
3193 std::swap (in_start
, out_start
);
3196 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3197 results
.reserve (nresults
);
3198 for (unsigned int i
= 0; i
< nresults
; ++i
)
3200 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3201 pieces
[in_start
+ i
]));
3203 results
.quick_push (results
[i
- nvectors
]);
3207 /* For constant and loop invariant defs of SLP_NODE this function returns
3208 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3209 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3210 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3211 REDUC_INDEX is the index of the reduction operand in the statements, unless
3215 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3216 vec
<tree
> *vec_oprnds
,
3217 unsigned int op_num
, unsigned int number_of_vectors
)
3219 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3220 stmt_vec_info stmt_vinfo
= stmts
[0];
3221 gimple
*stmt
= stmt_vinfo
->stmt
;
3222 unsigned HOST_WIDE_INT nunits
;
3224 unsigned j
, number_of_places_left_in_vector
;
3227 int group_size
= stmts
.length ();
3228 unsigned int vec_num
, i
;
3229 unsigned number_of_copies
= 1;
3231 voprnds
.create (number_of_vectors
);
3232 bool constant_p
, is_store
;
3233 tree neutral_op
= NULL
;
3234 enum tree_code code
= gimple_expr_code (stmt
);
3235 gimple_seq ctor_seq
= NULL
;
3236 auto_vec
<tree
, 16> permute_results
;
3238 /* Check if vector type is a boolean vector. */
3239 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3240 && vect_mask_constant_operand_p (stmt_vinfo
, op_num
))
3242 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3244 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3246 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3249 op
= gimple_assign_rhs1 (stmt
);
3256 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3257 created vectors. It is greater than 1 if unrolling is performed.
3259 For example, we have two scalar operands, s1 and s2 (e.g., group of
3260 strided accesses of size two), while NUNITS is four (i.e., four scalars
3261 of this type can be packed in a vector). The output vector will contain
3262 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3265 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3266 containing the operands.
3268 For example, NUNITS is four as before, and the group size is 8
3269 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3270 {s5, s6, s7, s8}. */
3272 /* When using duplicate_and_interleave, we just need one element for
3273 each scalar statement. */
3274 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3275 nunits
= group_size
;
3277 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3279 number_of_places_left_in_vector
= nunits
;
3281 tree_vector_builder
elts (vector_type
, nunits
, 1);
3282 elts
.quick_grow (nunits
);
3283 bool place_after_defs
= false;
3284 for (j
= 0; j
< number_of_copies
; j
++)
3286 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt_vinfo
); i
--)
3288 stmt
= stmt_vinfo
->stmt
;
3290 op
= gimple_assign_rhs1 (stmt
);
3297 tree cond
= gimple_assign_rhs1 (stmt
);
3298 if (TREE_CODE (cond
) == SSA_NAME
)
3299 op
= gimple_op (stmt
, op_num
+ 1);
3300 else if (op_num
== 0 || op_num
== 1)
3301 op
= TREE_OPERAND (cond
, op_num
);
3305 op
= gimple_assign_rhs2 (stmt
);
3307 op
= gimple_assign_rhs3 (stmt
);
3313 op
= gimple_call_arg (stmt
, op_num
);
3320 op
= gimple_op (stmt
, op_num
+ 1);
3321 /* Unlike the other binary operators, shifts/rotates have
3322 the shift count being int, instead of the same type as
3323 the lhs, so make sure the scalar is the right type if
3324 we are dealing with vectors of
3325 long long/long/short/char. */
3326 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3327 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3331 op
= gimple_op (stmt
, op_num
+ 1);
3336 /* Create 'vect_ = {op0,op1,...,opn}'. */
3337 number_of_places_left_in_vector
--;
3339 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3341 if (CONSTANT_CLASS_P (op
))
3343 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3345 /* Can't use VIEW_CONVERT_EXPR for booleans because
3346 of possibly different sizes of scalar value and
3348 if (integer_zerop (op
))
3349 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3350 else if (integer_onep (op
))
3351 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3356 op
= fold_unary (VIEW_CONVERT_EXPR
,
3357 TREE_TYPE (vector_type
), op
);
3358 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3362 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3364 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3367 = build_all_ones_cst (TREE_TYPE (vector_type
));
3369 = build_zero_cst (TREE_TYPE (vector_type
));
3370 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3371 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3377 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3380 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3383 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3387 elts
[number_of_places_left_in_vector
] = op
;
3388 if (!CONSTANT_CLASS_P (op
))
3390 if (TREE_CODE (orig_op
) == SSA_NAME
3391 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3392 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3393 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3394 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3395 place_after_defs
= true;
3397 if (number_of_places_left_in_vector
== 0)
3400 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3401 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3402 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3405 if (vec_oprnds
->is_empty ())
3406 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3409 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3412 gimple_stmt_iterator gsi
;
3413 if (place_after_defs
)
3415 stmt_vec_info last_stmt_info
3416 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3417 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3418 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3422 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3424 if (ctor_seq
!= NULL
)
3426 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3427 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3430 voprnds
.quick_push (init
);
3431 place_after_defs
= false;
3432 number_of_places_left_in_vector
= nunits
;
3434 elts
.new_vector (vector_type
, nunits
, 1);
3435 elts
.quick_grow (nunits
);
3440 /* Since the vectors are created in the reverse order, we should invert
3442 vec_num
= voprnds
.length ();
3443 for (j
= vec_num
; j
!= 0; j
--)
3445 vop
= voprnds
[j
- 1];
3446 vec_oprnds
->quick_push (vop
);
3451 /* In case that VF is greater than the unrolling factor needed for the SLP
3452 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3453 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3454 to replicate the vectors. */
3455 while (number_of_vectors
> vec_oprnds
->length ())
3457 tree neutral_vec
= NULL
;
3462 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3464 vec_oprnds
->quick_push (neutral_vec
);
3468 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3469 vec_oprnds
->quick_push (vop
);
3475 /* Get vectorized definitions from SLP_NODE that contains corresponding
3476 vectorized def-stmts. */
3479 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3482 stmt_vec_info vec_def_stmt_info
;
3485 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3487 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3489 gcc_assert (vec_def_stmt_info
);
3490 if (gphi
*vec_def_phi
= dyn_cast
<gphi
*> (vec_def_stmt_info
->stmt
))
3491 vec_oprnd
= gimple_phi_result (vec_def_phi
);
3493 vec_oprnd
= gimple_get_lhs (vec_def_stmt_info
->stmt
);
3494 vec_oprnds
->quick_push (vec_oprnd
);
3499 /* Get vectorized definitions for SLP_NODE.
3500 If the scalar definitions are loop invariants or constants, collect them and
3501 call vect_get_constant_vectors() to create vector stmts.
3502 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3503 must be stored in the corresponding child of SLP_NODE, and we call
3504 vect_get_slp_vect_defs () to retrieve them. */
3507 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3508 vec
<vec
<tree
> > *vec_oprnds
)
3510 int number_of_vects
= 0, i
;
3511 unsigned int child_index
= 0;
3512 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3513 slp_tree child
= NULL
;
3516 bool vectorized_defs
;
3518 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3519 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3521 /* For each operand we check if it has vectorized definitions in a child
3522 node or we need to create them (for invariants and constants). We
3523 check if the LHS of the first stmt of the next child matches OPRND.
3524 If it does, we found the correct child. Otherwise, we call
3525 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3526 to check this child node for the next operand. */
3527 vectorized_defs
= false;
3528 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3530 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3532 /* We have to check both pattern and original def, if available. */
3533 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3535 stmt_vec_info first_def_info
= SLP_TREE_SCALAR_STMTS (child
)[0];
3536 stmt_vec_info related
= STMT_VINFO_RELATED_STMT (first_def_info
);
3539 if (gphi
*first_def
= dyn_cast
<gphi
*> (first_def_info
->stmt
))
3540 first_def_op
= gimple_phi_result (first_def
);
3542 first_def_op
= gimple_get_lhs (first_def_info
->stmt
);
3543 if (operand_equal_p (oprnd
, first_def_op
, 0)
3545 && operand_equal_p (oprnd
,
3546 gimple_get_lhs (related
->stmt
), 0)))
3548 /* The number of vector defs is determined by the number of
3549 vector statements in the node from which we get those
3551 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3552 vectorized_defs
= true;
3560 if (!vectorized_defs
)
3564 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3565 /* Number of vector stmts was calculated according to LHS in
3566 vect_schedule_slp_instance (), fix it by replacing LHS with
3567 RHS, if necessary. See vect_get_smallest_scalar_type () for
3569 vect_get_smallest_scalar_type (first_stmt_info
, &lhs_size_unit
,
3571 if (rhs_size_unit
!= lhs_size_unit
)
3573 number_of_vects
*= rhs_size_unit
;
3574 number_of_vects
/= lhs_size_unit
;
3579 /* Allocate memory for vectorized defs. */
3581 vec_defs
.create (number_of_vects
);
3583 /* For reduction defs we call vect_get_constant_vectors (), since we are
3584 looking for initial loop invariant values. */
3585 if (vectorized_defs
)
3586 /* The defs are already vectorized. */
3587 vect_get_slp_vect_defs (child
, &vec_defs
);
3589 /* Build vectors from scalar defs. */
3590 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3593 vec_oprnds
->quick_push (vec_defs
);
3597 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3598 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3599 permute statements for the SLP node NODE of the SLP instance
3600 SLP_NODE_INSTANCE. */
3603 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3604 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3605 slp_instance slp_node_instance
, bool analyze_only
,
3608 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3609 vec_info
*vinfo
= stmt_info
->vinfo
;
3610 tree mask_element_type
= NULL_TREE
, mask_type
;
3612 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3613 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3614 unsigned int mask_element
;
3616 unsigned HOST_WIDE_INT nunits
, const_vf
;
3618 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3621 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3623 mode
= TYPE_MODE (vectype
);
3625 /* At the moment, all permutations are represented using per-element
3626 indices, so we can't cope with variable vector lengths or
3627 vectorization factors. */
3628 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
3629 || !vf
.is_constant (&const_vf
))
3632 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3633 same size as the vector element being permuted. */
3634 mask_element_type
= lang_hooks
.types
.type_for_mode
3635 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3636 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3637 vec_perm_builder
mask (nunits
, nunits
, 1);
3638 mask
.quick_grow (nunits
);
3639 vec_perm_indices indices
;
3641 /* Initialize the vect stmts of NODE to properly insert the generated
3644 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3645 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3646 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3648 /* Generate permutation masks for every NODE. Number of masks for each NODE
3649 is equal to GROUP_SIZE.
3650 E.g., we have a group of three nodes with three loads from the same
3651 location in each node, and the vector size is 4. I.e., we have a
3652 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3653 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3654 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3657 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3658 The last mask is illegal since we assume two operands for permute
3659 operation, and the mask element values can't be outside that range.
3660 Hence, the last mask must be converted into {2,5,5,5}.
3661 For the first two permutations we need the first and the second input
3662 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3663 we need the second and the third vectors: {b1,c1,a2,b2} and
3666 int vect_stmts_counter
= 0;
3667 unsigned int index
= 0;
3668 int first_vec_index
= -1;
3669 int second_vec_index
= -1;
3673 for (unsigned int j
= 0; j
< const_vf
; j
++)
3675 for (int k
= 0; k
< group_size
; k
++)
3677 unsigned int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3678 + j
* DR_GROUP_SIZE (stmt_info
));
3679 vec_index
= i
/ nunits
;
3680 mask_element
= i
% nunits
;
3681 if (vec_index
== first_vec_index
3682 || first_vec_index
== -1)
3684 first_vec_index
= vec_index
;
3686 else if (vec_index
== second_vec_index
3687 || second_vec_index
== -1)
3689 second_vec_index
= vec_index
;
3690 mask_element
+= nunits
;
3694 if (dump_enabled_p ())
3696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3697 "permutation requires at "
3698 "least three vectors ");
3699 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3700 stmt_info
->stmt
, 0);
3702 gcc_assert (analyze_only
);
3706 gcc_assert (mask_element
< 2 * nunits
);
3707 if (mask_element
!= index
)
3709 mask
[index
++] = mask_element
;
3711 if (index
== nunits
&& !noop_p
)
3713 indices
.new_vector (mask
, 2, nunits
);
3714 if (!can_vec_perm_const_p (mode
, indices
))
3716 if (dump_enabled_p ())
3718 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3720 "unsupported vect permute { ");
3721 for (i
= 0; i
< nunits
; ++i
)
3723 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3724 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3726 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3728 gcc_assert (analyze_only
);
3735 if (index
== nunits
)
3739 tree mask_vec
= NULL_TREE
;
3742 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3744 if (second_vec_index
== -1)
3745 second_vec_index
= first_vec_index
;
3747 /* Generate the permute statement if necessary. */
3748 tree first_vec
= dr_chain
[first_vec_index
];
3749 tree second_vec
= dr_chain
[second_vec_index
];
3750 stmt_vec_info perm_stmt_info
;
3753 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3755 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3757 perm_dest
= make_ssa_name (perm_dest
);
3759 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3760 first_vec
, second_vec
,
3763 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
3767 /* If mask was NULL_TREE generate the requested
3768 identity transform. */
3769 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3771 /* Store the vector statement in NODE. */
3772 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3777 first_vec_index
= -1;
3778 second_vec_index
= -1;
3787 /* Vectorize SLP instance tree in postorder. */
3790 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3791 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3793 gimple_stmt_iterator si
;
3794 stmt_vec_info stmt_info
;
3795 unsigned int group_size
;
3800 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3803 /* See if we have already vectorized the same set of stmts and reuse their
3804 vectorized stmts. */
3805 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3807 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3811 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3812 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3813 vect_schedule_slp_instance (child
, instance
, bst_map
);
3815 /* Push SLP node def-type to stmts. */
3816 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3817 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3819 stmt_vec_info child_stmt_info
;
3820 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3821 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
3824 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3826 /* VECTYPE is the type of the destination. */
3827 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3828 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3829 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3831 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3832 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3833 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3835 if (dump_enabled_p ())
3837 dump_printf_loc (MSG_NOTE
,vect_location
,
3838 "------>vectorizing SLP node starting from: ");
3839 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
3842 /* Vectorized stmts go before the last scalar stmt which is where
3843 all uses are ready. */
3844 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
3845 si
= gsi_for_stmt (last_stmt_info
->stmt
);
3847 /* Mark the first element of the reduction chain as reduction to properly
3848 transform the node. In the analysis phase only the last element of the
3849 chain is marked as reduction. */
3850 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3851 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
3852 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == stmt_info
)
3854 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3855 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3858 /* Handle two-operation SLP nodes by vectorizing the group with
3859 both operations and then performing a merge. */
3860 if (SLP_TREE_TWO_OPERATORS (node
))
3862 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3863 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3864 enum tree_code ocode
= ERROR_MARK
;
3865 stmt_vec_info ostmt_info
;
3866 vec_perm_builder
mask (group_size
, group_size
, 1);
3867 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
3869 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
3870 if (gimple_assign_rhs_code (ostmt
) != code0
)
3872 mask
.quick_push (1);
3873 ocode
= gimple_assign_rhs_code (ostmt
);
3876 mask
.quick_push (0);
3878 if (ocode
!= ERROR_MARK
)
3880 vec
<stmt_vec_info
> v0
;
3881 vec
<stmt_vec_info
> v1
;
3883 tree tmask
= NULL_TREE
;
3884 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3885 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3886 SLP_TREE_VEC_STMTS (node
).truncate (0);
3887 gimple_assign_set_rhs_code (stmt
, ocode
);
3888 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3889 gimple_assign_set_rhs_code (stmt
, code0
);
3890 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3891 SLP_TREE_VEC_STMTS (node
).truncate (0);
3892 tree meltype
= build_nonstandard_integer_type
3893 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3894 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3896 for (j
= 0; j
< v0
.length (); ++j
)
3898 /* Enforced by vect_build_slp_tree, which rejects variable-length
3899 vectors for SLP_TREE_TWO_OPERATORS. */
3900 unsigned int const_nunits
= nunits
.to_constant ();
3901 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3902 for (l
= 0; l
< const_nunits
; ++l
)
3904 if (k
>= group_size
)
3906 tree t
= build_int_cst (meltype
,
3907 mask
[k
++] * const_nunits
+ l
);
3908 melts
.quick_push (t
);
3910 tmask
= melts
.build ();
3912 /* ??? Not all targets support a VEC_PERM_EXPR with a
3913 constant mask that would translate to a vec_merge RTX
3914 (with their vec_perm_const_ok). We can either not
3915 vectorize in that case or let veclower do its job.
3916 Unfortunately that isn't too great and at least for
3917 plus/minus we'd eventually like to match targets
3918 vector addsub instructions. */
3920 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3922 gimple_assign_lhs (v0
[j
]->stmt
),
3923 gimple_assign_lhs (v1
[j
]->stmt
),
3925 SLP_TREE_VEC_STMTS (node
).quick_push
3926 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
3933 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3935 /* Restore stmt def-types. */
3936 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3937 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3939 stmt_vec_info child_stmt_info
;
3940 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3941 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
3945 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3946 For loop vectorization this is done in vectorizable_call, but for SLP
3947 it needs to be deferred until end of vect_schedule_slp, because multiple
3948 SLP instances may refer to the same scalar stmt. */
3951 vect_remove_slp_scalar_calls (slp_tree node
)
3954 gimple_stmt_iterator gsi
;
3958 stmt_vec_info stmt_info
;
3960 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3963 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3964 vect_remove_slp_scalar_calls (child
);
3966 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3968 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3969 if (!stmt
|| gimple_bb (stmt
) == NULL
)
3971 if (is_pattern_stmt_p (stmt_info
)
3972 || !PURE_SLP_STMT (stmt_info
))
3974 lhs
= gimple_call_lhs (stmt
);
3975 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3976 gsi
= gsi_for_stmt (stmt
);
3977 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
3978 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3982 /* Generate vector code for all SLP instances in the loop/basic block. */
3985 vect_schedule_slp (vec_info
*vinfo
)
3987 vec
<slp_instance
> slp_instances
;
3988 slp_instance instance
;
3991 scalar_stmts_to_slp_tree_map_t
*bst_map
3992 = new scalar_stmts_to_slp_tree_map_t ();
3993 slp_instances
= vinfo
->slp_instances
;
3994 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3996 /* Schedule the tree of INSTANCE. */
3997 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3999 if (dump_enabled_p ())
4000 dump_printf_loc (MSG_NOTE
, vect_location
,
4001 "vectorizing stmts using SLP.\n");
4005 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4007 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4008 stmt_vec_info store_info
;
4011 /* Remove scalar call stmts. Do not do this for basic-block
4012 vectorization as not all uses may be vectorized.
4013 ??? Why should this be necessary? DCE should be able to
4014 remove the stmts itself.
4015 ??? For BB vectorization we can as well remove scalar
4016 stmts starting from the SLP tree root if they have no
4018 if (is_a
<loop_vec_info
> (vinfo
))
4019 vect_remove_slp_scalar_calls (root
);
4021 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4022 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4024 if (!STMT_VINFO_DATA_REF (store_info
))
4027 store_info
= vect_orig_stmt (store_info
);
4028 /* Free the attached stmt_vec_info and remove the stmt. */
4029 vinfo
->remove_stmt (store_info
);