1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
55 vect_free_slp_tree (slp_tree node
, bool final_p
)
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
61 vect_free_slp_tree (child
, final_p
);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
69 stmt_vec_info stmt_info
;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
77 SLP_TREE_CHILDREN (node
).release ();
78 SLP_TREE_SCALAR_STMTS (node
).release ();
79 SLP_TREE_VEC_STMTS (node
).release ();
80 SLP_TREE_LOAD_PERMUTATION (node
).release ();
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
91 vect_free_slp_instance (slp_instance instance
, bool final_p
)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
94 SLP_INSTANCE_LOADS (instance
).release ();
99 /* Create an SLP node for SCALAR_STMTS. */
102 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
105 stmt_vec_info stmt_info
= scalar_stmts
[0];
108 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
109 nops
= gimple_call_num_args (stmt
);
110 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
112 nops
= gimple_num_ops (stmt
) - 1;
113 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
116 else if (is_a
<gphi
*> (stmt_info
->stmt
))
121 node
= XNEW (struct _slp_tree
);
122 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
123 SLP_TREE_VEC_STMTS (node
).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
125 SLP_TREE_CHILDREN (node
).create (nops
);
126 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
127 SLP_TREE_TWO_OPERATORS (node
) = false;
128 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
131 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
132 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec
<stmt_vec_info
> def_stmts
;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
149 enum vect_def_type first_dt
;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
157 static vec
<slp_oprnd_info
>
158 vect_create_oprnd_info (int nops
, int group_size
)
161 slp_oprnd_info oprnd_info
;
162 vec
<slp_oprnd_info
> oprnds_info
;
164 oprnds_info
.create (nops
);
165 for (i
= 0; i
< nops
; i
++)
167 oprnd_info
= XNEW (struct _slp_oprnd_info
);
168 oprnd_info
->def_stmts
.create (group_size
);
169 oprnd_info
->first_dt
= vect_uninitialized_def
;
170 oprnd_info
->first_op_type
= NULL_TREE
;
171 oprnd_info
->first_pattern
= false;
172 oprnd_info
->second_pattern
= false;
173 oprnds_info
.quick_push (oprnd_info
);
180 /* Free operands info. */
183 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
186 slp_oprnd_info oprnd_info
;
188 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
190 oprnd_info
->def_stmts
.release ();
191 XDELETE (oprnd_info
);
194 oprnds_info
.release ();
198 /* Find the place of the data-ref in STMT in the interleaving chain that starts
199 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
202 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
204 gimple
*next_stmt
= first_stmt
;
207 if (first_stmt
!= DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
212 if (next_stmt
== stmt
)
214 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
216 result
+= DR_GROUP_GAP (vinfo_for_stmt (next_stmt
));
223 /* Check whether it is possible to load COUNT elements of type ELT_MODE
224 using the method implemented by duplicate_and_interleave. Return true
225 if so, returning the number of intermediate vectors in *NVECTORS_OUT
226 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
230 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
231 unsigned int *nvectors_out
,
232 tree
*vector_type_out
,
235 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
237 unsigned int nvectors
= 1;
240 scalar_int_mode int_mode
;
241 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
242 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
243 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
245 tree int_type
= build_nonstandard_integer_type
246 (GET_MODE_BITSIZE (int_mode
), 1);
247 tree vector_type
= build_vector_type (int_type
, nelts
);
248 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
250 vec_perm_builder
sel1 (nelts
, 2, 3);
251 vec_perm_builder
sel2 (nelts
, 2, 3);
252 poly_int64 half_nelts
= exact_div (nelts
, 2);
253 for (unsigned int i
= 0; i
< 3; ++i
)
256 sel1
.quick_push (i
+ nelts
);
257 sel2
.quick_push (half_nelts
+ i
);
258 sel2
.quick_push (half_nelts
+ i
+ nelts
);
260 vec_perm_indices
indices1 (sel1
, 2, nelts
);
261 vec_perm_indices
indices2 (sel2
, 2, nelts
);
262 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
263 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
266 *nvectors_out
= nvectors
;
268 *vector_type_out
= vector_type
;
271 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
273 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
280 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
286 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
287 they are of a valid type and that they match the defs of the first stmt of
288 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
289 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
290 indicates swap is required for cond_expr stmts. Specifically, *SWAP
291 is 1 if STMT is cond and operands of comparison need to be swapped;
292 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
293 If there is any operand swap in this function, *SWAP is set to non-zero
295 If there was a fatal error return -1; if the error could be corrected by
296 swapping operands of father node of this one, return 1; if everything is
299 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
300 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
301 vec
<slp_oprnd_info
> *oprnds_info
)
303 stmt_vec_info stmt_info
= stmts
[stmt_num
];
305 unsigned int i
, number_of_oprnds
;
306 enum vect_def_type dt
= vect_uninitialized_def
;
307 bool pattern
= false;
308 slp_oprnd_info oprnd_info
;
309 int first_op_idx
= 1;
310 bool commutative
= false;
311 bool first_op_cond
= false;
312 bool first
= stmt_num
== 0;
313 bool second
= stmt_num
== 1;
315 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
317 number_of_oprnds
= gimple_call_num_args (stmt
);
320 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
322 enum tree_code code
= gimple_assign_rhs_code (stmt
);
323 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
324 /* Swap can only be done for cond_expr if asked to, otherwise we
325 could result in different comparison code to the first stmt. */
326 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
327 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
329 first_op_cond
= true;
333 commutative
= commutative_tree_code (code
);
338 bool swapped
= (*swap
!= 0);
339 gcc_assert (!swapped
|| first_op_cond
);
340 for (i
= 0; i
< number_of_oprnds
; i
++)
345 /* Map indicating how operands of cond_expr should be swapped. */
346 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
347 int *map
= maps
[*swap
];
350 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
351 first_op_idx
), map
[i
]);
353 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
356 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
358 oprnd_info
= (*oprnds_info
)[i
];
360 stmt_vec_info def_stmt_info
;
361 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
363 if (dump_enabled_p ())
365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
366 "Build SLP failed: can't analyze def for ");
367 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
368 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
374 /* Check if DEF_STMT_INFO is a part of a pattern in LOOP and get
375 the def stmt from the pattern. Check that all the stmts of the
376 node are in the pattern. */
377 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
380 if (!first
&& !oprnd_info
->first_pattern
381 /* Allow different pattern state for the defs of the
382 first stmt in reduction chains. */
383 && (oprnd_info
->first_dt
!= vect_reduction_def
384 || (!second
&& !oprnd_info
->second_pattern
)))
394 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
397 "Build SLP failed: some of the stmts"
398 " are in a pattern, and others are not ");
399 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
400 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
406 dt
= STMT_VINFO_DEF_TYPE (def_stmt_info
);
408 if (dt
== vect_unknown_def_type
)
410 if (dump_enabled_p ())
411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
412 "Unsupported pattern.\n");
416 switch (gimple_code (def_stmt_info
->stmt
))
423 if (dump_enabled_p ())
424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
425 "unsupported defining stmt:\n");
431 oprnd_info
->second_pattern
= pattern
;
435 oprnd_info
->first_dt
= dt
;
436 oprnd_info
->first_pattern
= pattern
;
437 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
441 /* Not first stmt of the group, check that the def-stmt/s match
442 the def-stmt/s of the first stmt. Allow different definition
443 types for reduction chains: the first stmt must be a
444 vect_reduction_def (a phi node), and the rest
445 vect_internal_def. */
446 tree type
= TREE_TYPE (oprnd
);
447 if ((oprnd_info
->first_dt
!= dt
448 && !(oprnd_info
->first_dt
== vect_reduction_def
449 && dt
== vect_internal_def
)
450 && !((oprnd_info
->first_dt
== vect_external_def
451 || oprnd_info
->first_dt
== vect_constant_def
)
452 && (dt
== vect_external_def
453 || dt
== vect_constant_def
)))
454 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
456 /* Try swapping operands if we got a mismatch. */
465 if (dump_enabled_p ())
466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
467 "Build SLP failed: different types\n");
471 if ((dt
== vect_constant_def
472 || dt
== vect_external_def
)
473 && !current_vector_size
.is_constant ()
474 && (TREE_CODE (type
) == BOOLEAN_TYPE
475 || !can_duplicate_and_interleave_p (stmts
.length (),
478 if (dump_enabled_p ())
480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
481 "Build SLP failed: invalid type of def "
482 "for variable-length SLP ");
483 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
484 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
490 /* Check the types of the definitions. */
493 case vect_constant_def
:
494 case vect_external_def
:
497 case vect_reduction_def
:
498 case vect_induction_def
:
499 case vect_internal_def
:
500 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
504 /* FORNOW: Not supported. */
505 if (dump_enabled_p ())
507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
508 "Build SLP failed: illegal type of def ");
509 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
510 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
520 /* If there are already uses of this stmt in a SLP instance then
521 we've committed to the operand order and can't swap it. */
522 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
524 if (dump_enabled_p ())
526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
527 "Build SLP failed: cannot swap operands of "
529 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
535 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
538 tree cond
= gimple_assign_rhs1 (stmt
);
539 enum tree_code code
= TREE_CODE (cond
);
544 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
545 &TREE_OPERAND (cond
, 1));
546 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
551 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
552 gimple_assign_rhs3_ptr (stmt
));
553 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
554 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
555 gcc_assert (code
!= ERROR_MARK
);
556 TREE_SET_CODE (cond
, code
);
560 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
561 gimple_assign_rhs2_ptr (stmt
));
562 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE
, vect_location
,
565 "swapped operands to match def types in ");
566 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
574 /* Return true if call statements CALL1 and CALL2 are similar enough
575 to be combined into the same SLP group. */
578 compatible_calls_p (gcall
*call1
, gcall
*call2
)
580 unsigned int nargs
= gimple_call_num_args (call1
);
581 if (nargs
!= gimple_call_num_args (call2
))
584 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
587 if (gimple_call_internal_p (call1
))
589 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
590 TREE_TYPE (gimple_call_lhs (call2
))))
592 for (unsigned int i
= 0; i
< nargs
; ++i
)
593 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
594 TREE_TYPE (gimple_call_arg (call2
, i
))))
599 if (!operand_equal_p (gimple_call_fn (call1
),
600 gimple_call_fn (call2
), 0))
603 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
609 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
610 caller's attempt to find the vector type in STMT with the narrowest
611 element type. Return true if VECTYPE is nonnull and if it is valid
612 for VINFO. When returning true, update MAX_NUNITS to reflect the
613 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
614 as for vect_build_slp_tree. */
617 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
618 tree vectype
, poly_uint64
*max_nunits
)
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
625 "Build SLP failed: unsupported data-type in ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
627 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
629 /* Fatal mismatch. */
633 /* If populating the vector type requires unrolling then fail
634 before adjusting *max_nunits for basic-block vectorization. */
635 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
636 unsigned HOST_WIDE_INT const_nunits
;
637 if (is_a
<bb_vec_info
> (vinfo
)
638 && (!nunits
.is_constant (&const_nunits
)
639 || const_nunits
> group_size
))
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
642 "Build SLP failed: unrolling required "
643 "in basic block SLP\n");
644 /* Fatal mismatch. */
648 /* In case of multiple types we need to detect the smallest type. */
649 vect_update_max_nunits (max_nunits
, vectype
);
653 /* STMTS is a group of GROUP_SIZE SLP statements in which some
654 statements do the same operation as the first statement and in which
655 the others do ALT_STMT_CODE. Return true if we can take one vector
656 of the first operation and one vector of the second and permute them
657 to get the required result. VECTYPE is the type of the vector that
658 would be permuted. */
661 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
662 unsigned int group_size
, tree vectype
,
663 tree_code alt_stmt_code
)
665 unsigned HOST_WIDE_INT count
;
666 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
669 vec_perm_builder
sel (count
, count
, 1);
670 for (unsigned int i
= 0; i
< count
; ++i
)
672 unsigned int elt
= i
;
673 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
674 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
676 sel
.quick_push (elt
);
678 vec_perm_indices
indices (sel
, 2, count
);
679 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
682 /* Verify if the scalar stmts STMTS are isomorphic, require data
683 permutation or are of unsupported types of operation. Return
684 true if they are, otherwise return false and indicate in *MATCHES
685 which stmts are not isomorphic to the first one. If MATCHES[0]
686 is false then this indicates the comparison could not be
687 carried out or the stmts will never be vectorized by SLP.
689 Note COND_EXPR is possibly ismorphic to another one after swapping its
690 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
691 the first stmt by swapping the two operands of comparison; set SWAP[i]
692 to 2 if stmt I is isormorphic to the first stmt by inverting the code
693 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
694 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
697 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
698 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
699 poly_uint64
*max_nunits
, bool *matches
,
703 stmt_vec_info first_stmt_info
= stmts
[0];
704 enum tree_code first_stmt_code
= ERROR_MARK
;
705 enum tree_code alt_stmt_code
= ERROR_MARK
;
706 enum tree_code rhs_code
= ERROR_MARK
;
707 enum tree_code first_cond_code
= ERROR_MARK
;
709 bool need_same_oprnds
= false;
710 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
713 machine_mode optab_op2_mode
;
714 machine_mode vec_mode
;
715 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
717 /* For every stmt in NODE find its def stmt/s. */
718 stmt_vec_info stmt_info
;
719 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
721 gimple
*stmt
= stmt_info
->stmt
;
725 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
728 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
731 /* Fail to vectorize statements marked as unvectorizable. */
732 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
734 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
737 "Build SLP failed: unvectorizable statement ");
738 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
740 /* Fatal mismatch. */
745 lhs
= gimple_get_lhs (stmt
);
746 if (lhs
== NULL_TREE
)
748 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
751 "Build SLP failed: not GIMPLE_ASSIGN nor "
753 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
755 /* Fatal mismatch. */
761 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
764 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
765 nunits_vectype
, max_nunits
)))
767 /* Fatal mismatch. */
772 gcc_assert (vectype
);
774 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
776 rhs_code
= CALL_EXPR
;
777 if ((gimple_call_internal_p (call_stmt
)
778 && (!vectorizable_internal_fn_p
779 (gimple_call_internal_fn (call_stmt
))))
780 || gimple_call_tail_p (call_stmt
)
781 || gimple_call_noreturn_p (call_stmt
)
782 || !gimple_call_nothrow_p (call_stmt
)
783 || gimple_call_chain (call_stmt
))
785 if (dump_enabled_p ())
787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
788 "Build SLP failed: unsupported call type ");
789 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
792 /* Fatal mismatch. */
798 rhs_code
= gimple_assign_rhs_code (stmt
);
800 /* Check the operation. */
803 first_stmt_code
= rhs_code
;
805 /* Shift arguments should be equal in all the packed stmts for a
806 vector shift with scalar shift operand. */
807 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
808 || rhs_code
== LROTATE_EXPR
809 || rhs_code
== RROTATE_EXPR
)
811 if (vectype
== boolean_type_node
)
813 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
815 "Build SLP failed: shift of a"
817 /* Fatal mismatch. */
822 vec_mode
= TYPE_MODE (vectype
);
824 /* First see if we have a vector/vector shift. */
825 optab
= optab_for_tree_code (rhs_code
, vectype
,
829 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
831 /* No vector/vector shift, try for a vector/scalar shift. */
832 optab
= optab_for_tree_code (rhs_code
, vectype
,
837 if (dump_enabled_p ())
838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
839 "Build SLP failed: no optab.\n");
840 /* Fatal mismatch. */
844 icode
= (int) optab_handler (optab
, vec_mode
);
845 if (icode
== CODE_FOR_nothing
)
847 if (dump_enabled_p ())
848 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
850 "op not supported by target.\n");
851 /* Fatal mismatch. */
855 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
856 if (!VECTOR_MODE_P (optab_op2_mode
))
858 need_same_oprnds
= true;
859 first_op1
= gimple_assign_rhs2 (stmt
);
863 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
865 need_same_oprnds
= true;
866 first_op1
= gimple_assign_rhs2 (stmt
);
871 if (first_stmt_code
!= rhs_code
872 && alt_stmt_code
== ERROR_MARK
)
873 alt_stmt_code
= rhs_code
;
874 if (first_stmt_code
!= rhs_code
875 && (first_stmt_code
!= IMAGPART_EXPR
876 || rhs_code
!= REALPART_EXPR
)
877 && (first_stmt_code
!= REALPART_EXPR
878 || rhs_code
!= IMAGPART_EXPR
)
879 /* Handle mismatches in plus/minus by computing both
880 and merging the results. */
881 && !((first_stmt_code
== PLUS_EXPR
882 || first_stmt_code
== MINUS_EXPR
)
883 && (alt_stmt_code
== PLUS_EXPR
884 || alt_stmt_code
== MINUS_EXPR
)
885 && rhs_code
== alt_stmt_code
)
886 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
887 && (first_stmt_code
== ARRAY_REF
888 || first_stmt_code
== BIT_FIELD_REF
889 || first_stmt_code
== INDIRECT_REF
890 || first_stmt_code
== COMPONENT_REF
891 || first_stmt_code
== MEM_REF
)))
893 if (dump_enabled_p ())
895 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
896 "Build SLP failed: different operation "
898 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
901 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
902 first_stmt_info
->stmt
, 0);
909 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
911 if (dump_enabled_p ())
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
914 "Build SLP failed: different shift "
916 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
922 if (rhs_code
== CALL_EXPR
)
924 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
925 as_a
<gcall
*> (stmt
)))
927 if (dump_enabled_p ())
929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
930 "Build SLP failed: different calls in ");
931 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
940 /* Grouped store or load. */
941 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
943 if (REFERENCE_CLASS_P (lhs
))
951 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
954 /* Check that there are no loads from different interleaving
955 chains in the same node. */
956 if (prev_first_load
!= first_load
)
958 if (dump_enabled_p ())
960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
962 "Build SLP failed: different "
963 "interleaving chains in one node ");
964 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
972 prev_first_load
= first_load
;
974 } /* Grouped access. */
977 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
979 /* Not grouped load. */
980 if (dump_enabled_p ())
982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
983 "Build SLP failed: not grouped load ");
984 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
987 /* FORNOW: Not grouped loads are not supported. */
988 /* Fatal mismatch. */
993 /* Not memory operation. */
994 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
995 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
996 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
997 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
998 && rhs_code
!= CALL_EXPR
)
1000 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1003 "Build SLP failed: operation");
1004 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
1005 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1007 /* Fatal mismatch. */
1012 if (rhs_code
== COND_EXPR
)
1014 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1015 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1016 enum tree_code swap_code
= ERROR_MARK
;
1017 enum tree_code invert_code
= ERROR_MARK
;
1020 first_cond_code
= TREE_CODE (cond_expr
);
1021 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1023 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1024 swap_code
= swap_tree_comparison (cond_code
);
1025 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1028 if (first_cond_code
== cond_code
)
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code
== swap_code
)
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code
== invert_code
)
1038 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1041 "Build SLP failed: different"
1043 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1055 for (i
= 0; i
< group_size
; ++i
)
1059 /* If we allowed a two-operation SLP node verify the target can cope
1060 with the permute we are going to use. */
1061 if (alt_stmt_code
!= ERROR_MARK
1062 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1064 if (vectype
== boolean_type_node
1065 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1066 vectype
, alt_stmt_code
))
1068 for (i
= 0; i
< group_size
; ++i
)
1069 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1072 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1075 "Build SLP failed: different operation "
1077 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1081 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1082 first_stmt_info
->stmt
, 0);
1087 *two_operators
= true;
1093 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1094 Note we never remove apart from at destruction time so we do not
1095 need a special value for deleted that differs from empty. */
1098 typedef vec
<stmt_vec_info
> value_type
;
1099 typedef vec
<stmt_vec_info
> compare_type
;
1100 static inline hashval_t
hash (value_type
);
1101 static inline bool equal (value_type existing
, value_type candidate
);
1102 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1103 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1104 static inline void mark_empty (value_type
&x
) { x
.release (); }
1105 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1106 static inline void remove (value_type
&x
) { x
.release (); }
1109 bst_traits::hash (value_type x
)
1112 for (unsigned i
= 0; i
< x
.length (); ++i
)
1113 h
.add_int (gimple_uid (x
[i
]->stmt
));
1117 bst_traits::equal (value_type existing
, value_type candidate
)
1119 if (existing
.length () != candidate
.length ())
1121 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1122 if (existing
[i
] != candidate
[i
])
1127 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
1128 static scalar_stmts_set_t
*bst_fail
;
1130 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1131 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1132 scalar_stmts_to_slp_tree_map_t
;
1135 vect_build_slp_tree_2 (vec_info
*vinfo
,
1136 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1137 poly_uint64
*max_nunits
,
1138 vec
<slp_tree
> *loads
,
1139 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1140 unsigned max_tree_size
);
1143 vect_build_slp_tree (vec_info
*vinfo
,
1144 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1145 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1146 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1147 unsigned max_tree_size
)
1149 if (bst_fail
->contains (stmts
))
1151 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1152 loads
, matches
, npermutes
, tree_size
,
1154 /* When SLP build fails for stmts record this, otherwise SLP build
1155 can be exponential in time when we allow to construct parts from
1156 scalars, see PR81723. */
1159 vec
<stmt_vec_info
> x
;
1160 x
.create (stmts
.length ());
1167 /* Recursively build an SLP tree starting from NODE.
1168 Fail (and return a value not equal to zero) if def-stmts are not
1169 isomorphic, require data permutation or are of unsupported types of
1170 operation. Otherwise, return 0.
1171 The value returned is the depth in the SLP tree where a mismatch
1175 vect_build_slp_tree_2 (vec_info
*vinfo
,
1176 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1177 poly_uint64
*max_nunits
,
1178 vec
<slp_tree
> *loads
,
1179 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1180 unsigned max_tree_size
)
1182 unsigned nops
, i
, this_tree_size
= 0;
1183 poly_uint64 this_max_nunits
= *max_nunits
;
1188 stmt_vec_info stmt_info
= stmts
[0];
1189 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1190 nops
= gimple_call_num_args (stmt
);
1191 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1193 nops
= gimple_num_ops (stmt
) - 1;
1194 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1197 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1202 /* If the SLP node is a PHI (induction or reduction), terminate
1204 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1206 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1207 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1208 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1212 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1213 /* Induction from different IVs is not supported. */
1214 if (def_type
== vect_induction_def
)
1216 stmt_vec_info other_info
;
1217 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1218 if (stmt_info
!= other_info
)
1223 /* Else def types have to match. */
1224 stmt_vec_info other_info
;
1225 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1227 /* But for reduction chains only check on the first stmt. */
1228 if (REDUC_GROUP_FIRST_ELEMENT (other_info
)
1229 && REDUC_GROUP_FIRST_ELEMENT (other_info
) != stmt_info
)
1231 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1235 node
= vect_create_new_slp_node (stmts
);
1240 bool two_operators
= false;
1241 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1242 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1243 &this_max_nunits
, matches
, &two_operators
))
1246 /* If the SLP node is a load, terminate the recursion. */
1247 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1248 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1250 *max_nunits
= this_max_nunits
;
1251 node
= vect_create_new_slp_node (stmts
);
1252 loads
->safe_push (node
);
1256 /* Get at the operands, verifying they are compatible. */
1257 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1258 slp_oprnd_info oprnd_info
;
1259 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1261 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1262 stmts
, i
, &oprnds_info
);
1264 matches
[(res
== -1) ? 0 : i
] = false;
1268 for (i
= 0; i
< group_size
; ++i
)
1271 vect_free_oprnd_info (oprnds_info
);
1275 auto_vec
<slp_tree
, 4> children
;
1276 auto_vec
<slp_tree
> this_loads
;
1278 stmt_info
= stmts
[0];
1281 max_tree_size
-= *tree_size
;
1283 /* Create SLP_TREE nodes for the definition node/s. */
1284 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1287 unsigned old_nloads
= this_loads
.length ();
1288 unsigned old_tree_size
= this_tree_size
;
1291 if (oprnd_info
->first_dt
!= vect_internal_def
1292 && oprnd_info
->first_dt
!= vect_reduction_def
1293 && oprnd_info
->first_dt
!= vect_induction_def
)
1296 if (++this_tree_size
> max_tree_size
)
1298 FOR_EACH_VEC_ELT (children
, j
, child
)
1299 vect_free_slp_tree (child
, false);
1300 vect_free_oprnd_info (oprnds_info
);
1304 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1305 group_size
, &this_max_nunits
,
1306 &this_loads
, matches
, npermutes
,
1308 max_tree_size
)) != NULL
)
1310 /* If we have all children of child built up from scalars then just
1311 throw that away and build it up this node from scalars. */
1312 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1313 /* ??? Rejecting patterns this way doesn't work. We'd have to
1314 do extra work to cancel the pattern so the uses see the
1316 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1318 slp_tree grandchild
;
1320 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1321 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1326 this_loads
.truncate (old_nloads
);
1327 this_tree_size
= old_tree_size
;
1328 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1329 vect_free_slp_tree (grandchild
, false);
1330 SLP_TREE_CHILDREN (child
).truncate (0);
1332 dump_printf_loc (MSG_NOTE
, vect_location
,
1333 "Building parent vector operands from "
1334 "scalars instead\n");
1335 oprnd_info
->def_stmts
= vNULL
;
1336 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1337 children
.safe_push (child
);
1342 oprnd_info
->def_stmts
= vNULL
;
1343 children
.safe_push (child
);
1347 /* If the SLP build failed fatally and we analyze a basic-block
1348 simply treat nodes we fail to build as externally defined
1349 (and thus build vectors from the scalar defs).
1350 The cost model will reject outright expensive cases.
1351 ??? This doesn't treat cases where permutation ultimatively
1352 fails (or we don't try permutation below). Ideally we'd
1353 even compute a permutation that will end up with the maximum
1355 if (is_a
<bb_vec_info
> (vinfo
)
1357 /* ??? Rejecting patterns this way doesn't work. We'd have to
1358 do extra work to cancel the pattern so the uses see the
1360 && !is_pattern_stmt_p (stmt_info
))
1362 dump_printf_loc (MSG_NOTE
, vect_location
,
1363 "Building vector operands from scalars\n");
1364 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1365 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1366 children
.safe_push (child
);
1367 oprnd_info
->def_stmts
= vNULL
;
1371 /* If the SLP build for operand zero failed and operand zero
1372 and one can be commutated try that for the scalar stmts
1373 that failed the match. */
1375 /* A first scalar stmt mismatch signals a fatal mismatch. */
1377 /* ??? For COND_EXPRs we can swap the comparison operands
1378 as well as the arms under some constraints. */
1380 && oprnds_info
[1]->first_dt
== vect_internal_def
1381 && is_gimple_assign (stmt_info
->stmt
)
1382 /* Do so only if the number of not successful permutes was nor more
1383 than a cut-ff as re-trying the recursive match on
1384 possibly each level of the tree would expose exponential
1388 /* See whether we can swap the matching or the non-matching
1390 bool swap_not_matching
= true;
1393 for (j
= 0; j
< group_size
; ++j
)
1395 if (matches
[j
] != !swap_not_matching
)
1397 stmt_vec_info stmt_info
= stmts
[j
];
1398 /* Verify if we can swap operands of this stmt. */
1399 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1401 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1403 if (!swap_not_matching
)
1405 swap_not_matching
= false;
1408 /* Verify if we can safely swap or if we committed to a
1409 specific operand order already.
1410 ??? Instead of modifying GIMPLE stmts here we could
1411 record whether we want to swap operands in the SLP
1412 node and temporarily do that when processing it
1413 (or wrap operand accessors in a helper). */
1414 else if (swap
[j
] != 0
1415 || STMT_VINFO_NUM_SLP_USES (stmt_info
))
1417 if (!swap_not_matching
)
1419 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1423 "Build SLP failed: cannot swap "
1424 "operands of shared stmt ");
1425 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
1426 TDF_SLIM
, stmts
[j
]->stmt
, 0);
1430 swap_not_matching
= false;
1435 while (j
!= group_size
);
1437 /* Swap mismatched definition stmts. */
1438 dump_printf_loc (MSG_NOTE
, vect_location
,
1439 "Re-trying with swapped operands of stmts ");
1440 for (j
= 0; j
< group_size
; ++j
)
1441 if (matches
[j
] == !swap_not_matching
)
1443 std::swap (oprnds_info
[0]->def_stmts
[j
],
1444 oprnds_info
[1]->def_stmts
[j
]);
1445 dump_printf (MSG_NOTE
, "%d ", j
);
1447 dump_printf (MSG_NOTE
, "\n");
1448 /* And try again with scratch 'matches' ... */
1449 bool *tem
= XALLOCAVEC (bool, group_size
);
1450 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1451 group_size
, &this_max_nunits
,
1452 &this_loads
, tem
, npermutes
,
1454 max_tree_size
)) != NULL
)
1456 /* ... so if successful we can apply the operand swapping
1457 to the GIMPLE IL. This is necessary because for example
1458 vect_get_slp_defs uses operand indexes and thus expects
1459 canonical operand order. This is also necessary even
1460 if we end up building the operand from scalars as
1461 we'll continue to process swapped operand two. */
1462 for (j
= 0; j
< group_size
; ++j
)
1463 gimple_set_plf (stmts
[j
]->stmt
, GF_PLF_1
, false);
1464 for (j
= 0; j
< group_size
; ++j
)
1465 if (matches
[j
] == !swap_not_matching
)
1467 gassign
*stmt
= as_a
<gassign
*> (stmts
[j
]->stmt
);
1468 /* Avoid swapping operands twice. */
1469 if (gimple_plf (stmt
, GF_PLF_1
))
1471 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1472 gimple_assign_rhs2_ptr (stmt
));
1473 gimple_set_plf (stmt
, GF_PLF_1
, true);
1475 /* Verify we swap all duplicates or none. */
1477 for (j
= 0; j
< group_size
; ++j
)
1478 gcc_assert (gimple_plf (stmts
[j
]->stmt
, GF_PLF_1
)
1479 == (matches
[j
] == !swap_not_matching
));
1481 /* If we have all children of child built up from scalars then
1482 just throw that away and build it up this node from scalars. */
1483 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1484 /* ??? Rejecting patterns this way doesn't work. We'd have
1485 to do extra work to cancel the pattern so the uses see the
1487 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1490 slp_tree grandchild
;
1492 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1493 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1498 this_loads
.truncate (old_nloads
);
1499 this_tree_size
= old_tree_size
;
1500 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1501 vect_free_slp_tree (grandchild
, false);
1502 SLP_TREE_CHILDREN (child
).truncate (0);
1504 dump_printf_loc (MSG_NOTE
, vect_location
,
1505 "Building parent vector operands from "
1506 "scalars instead\n");
1507 oprnd_info
->def_stmts
= vNULL
;
1508 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1509 children
.safe_push (child
);
1514 oprnd_info
->def_stmts
= vNULL
;
1515 children
.safe_push (child
);
1523 gcc_assert (child
== NULL
);
1524 FOR_EACH_VEC_ELT (children
, j
, child
)
1525 vect_free_slp_tree (child
, false);
1526 vect_free_oprnd_info (oprnds_info
);
1530 vect_free_oprnd_info (oprnds_info
);
1533 *tree_size
+= this_tree_size
;
1534 *max_nunits
= this_max_nunits
;
1535 loads
->safe_splice (this_loads
);
1537 node
= vect_create_new_slp_node (stmts
);
1538 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1539 SLP_TREE_CHILDREN (node
).splice (children
);
1543 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1546 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1550 stmt_vec_info stmt_info
;
1553 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1554 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1555 ? " (external)" : "");
1556 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1558 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1559 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt_info
->stmt
, 0);
1561 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1562 vect_print_slp_tree (dump_kind
, loc
, child
);
1566 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1567 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1568 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1569 stmts in NODE are to be marked. */
1572 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1575 stmt_vec_info stmt_info
;
1578 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1581 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1582 if (j
< 0 || i
== j
)
1583 STMT_SLP_TYPE (stmt_info
) = mark
;
1585 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1586 vect_mark_slp_stmts (child
, mark
, j
);
1590 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1593 vect_mark_slp_stmts_relevant (slp_tree node
)
1596 stmt_vec_info stmt_info
;
1599 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1602 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1604 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1605 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1606 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1609 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1610 vect_mark_slp_stmts_relevant (child
);
1614 /* Rearrange the statements of NODE according to PERMUTATION. */
1617 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1618 vec
<unsigned> permutation
)
1620 stmt_vec_info stmt_info
;
1621 vec
<stmt_vec_info
> tmp_stmts
;
1625 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1626 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1628 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1629 tmp_stmts
.create (group_size
);
1630 tmp_stmts
.quick_grow_cleared (group_size
);
1632 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1633 tmp_stmts
[permutation
[i
]] = stmt_info
;
1635 SLP_TREE_SCALAR_STMTS (node
).release ();
1636 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1640 /* Attempt to reorder stmts in a reduction chain so that we don't
1641 require any load permutation. Return true if that was possible,
1642 otherwise return false. */
1645 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1647 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1650 slp_tree node
, load
;
1652 /* Compare all the permutation sequences to the first one. We know
1653 that at least one load is permuted. */
1654 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1655 if (!node
->load_permutation
.exists ())
1657 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1659 if (!load
->load_permutation
.exists ())
1661 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1662 if (lidx
!= node
->load_permutation
[j
])
1666 /* Check that the loads in the first sequence are different and there
1667 are no gaps between them. */
1668 auto_sbitmap
load_index (group_size
);
1669 bitmap_clear (load_index
);
1670 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1672 if (lidx
>= group_size
)
1674 if (bitmap_bit_p (load_index
, lidx
))
1677 bitmap_set_bit (load_index
, lidx
);
1679 for (i
= 0; i
< group_size
; i
++)
1680 if (!bitmap_bit_p (load_index
, i
))
1683 /* This permutation is valid for reduction. Since the order of the
1684 statements in the nodes is not important unless they are memory
1685 accesses, we can rearrange the statements in all the nodes
1686 according to the order of the loads. */
1687 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1688 node
->load_permutation
);
1690 /* We are done, no actual permutations need to be generated. */
1691 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1692 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1694 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1695 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1696 /* But we have to keep those permutations that are required because
1697 of handling of gaps. */
1698 if (known_eq (unrolling_factor
, 1U)
1699 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1700 && DR_GROUP_GAP (first_stmt_info
) == 0))
1701 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1703 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1704 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1710 /* Check if the required load permutations in the SLP instance
1711 SLP_INSTN are supported. */
1714 vect_supported_load_permutation_p (slp_instance slp_instn
)
1716 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1717 unsigned int i
, j
, k
, next
;
1720 if (dump_enabled_p ())
1722 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1724 if (node
->load_permutation
.exists ())
1725 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1726 dump_printf (MSG_NOTE
, "%d ", next
);
1728 for (k
= 0; k
< group_size
; ++k
)
1729 dump_printf (MSG_NOTE
, "%d ", k
);
1730 dump_printf (MSG_NOTE
, "\n");
1733 /* In case of reduction every load permutation is allowed, since the order
1734 of the reduction statements is not important (as opposed to the case of
1735 grouped stores). The only condition we need to check is that all the
1736 load nodes are of the same size and have the same permutation (and then
1737 rearrange all the nodes of the SLP instance according to this
1740 /* Check that all the load nodes are of the same size. */
1741 /* ??? Can't we assert this? */
1742 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1743 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1746 node
= SLP_INSTANCE_TREE (slp_instn
);
1747 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1749 /* Reduction (there are no data-refs in the root).
1750 In reduction chain the order of the loads is not important. */
1751 if (!STMT_VINFO_DATA_REF (stmt_info
)
1752 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1753 vect_attempt_slp_rearrange_stmts (slp_instn
);
1755 /* In basic block vectorization we allow any subchain of an interleaving
1757 FORNOW: not supported in loop SLP because of realignment compications. */
1758 if (STMT_VINFO_BB_VINFO (stmt_info
))
1760 /* Check whether the loads in an instance form a subchain and thus
1761 no permutation is necessary. */
1762 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1764 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1766 bool subchain_p
= true;
1767 stmt_vec_info next_load_info
= NULL
;
1768 stmt_vec_info load_info
;
1769 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1772 && (next_load_info
!= load_info
1773 || DR_GROUP_GAP (load_info
) != 1))
1778 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1781 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1784 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1785 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1786 unsigned HOST_WIDE_INT nunits
;
1787 unsigned k
, maxk
= 0;
1788 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1791 /* In BB vectorization we may not actually use a loaded vector
1792 accessing elements in excess of DR_GROUP_SIZE. */
1793 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1794 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1795 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1798 "BB vectorization with gaps at the end of "
1799 "a load is not supported\n");
1803 /* Verify the permutation can be generated. */
1806 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1807 1, slp_instn
, true, &n_perms
))
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1811 "unsupported load permutation\n");
1819 /* For loop vectorization verify we can generate the permutation. Be
1820 conservative about the vectorization factor, there are permutations
1821 that will use three vector inputs only starting from a specific factor
1822 and the vectorization factor is not yet final.
1823 ??? The SLP instance unrolling factor might not be the maximum one. */
1826 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1827 LOOP_VINFO_VECT_FACTOR
1828 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
1829 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1830 if (node
->load_permutation
.exists ()
1831 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1832 slp_instn
, true, &n_perms
))
1839 /* Find the last store in SLP INSTANCE. */
1842 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1844 stmt_vec_info last
= NULL
;
1845 stmt_vec_info stmt_vinfo
;
1847 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1849 if (is_pattern_stmt_p (stmt_vinfo
))
1850 stmt_vinfo
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
1851 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1857 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1858 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1859 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1860 containing the remainder.
1861 Return the first stmt in the second group. */
1864 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1866 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1867 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1868 gcc_assert (group1_size
> 0);
1869 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1870 gcc_assert (group2_size
> 0);
1871 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1873 stmt_vec_info stmt_info
= first_vinfo
;
1874 for (unsigned i
= group1_size
; i
> 1; i
--)
1876 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1877 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1879 /* STMT is now the last element of the first group. */
1880 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1881 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1883 DR_GROUP_SIZE (group2
) = group2_size
;
1884 for (stmt_info
= group2
; stmt_info
;
1885 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1887 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1888 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1891 /* For the second group, the DR_GROUP_GAP is that before the original group,
1892 plus skipping over the first vector. */
1893 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1895 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1896 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1900 group1_size
, group2_size
);
1905 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1906 statements and a vector of NUNITS elements. */
1909 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1911 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1914 /* Analyze an SLP instance starting from a group of grouped stores. Call
1915 vect_build_slp_tree to build a tree of packed stmts if possible.
1916 Return FALSE if it's impossible to SLP any stmt in the loop. */
1919 vect_analyze_slp_instance (vec_info
*vinfo
,
1920 gimple
*stmt
, unsigned max_tree_size
)
1922 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1923 slp_instance new_instance
;
1925 unsigned int group_size
;
1926 tree vectype
, scalar_type
= NULL_TREE
;
1928 vec
<slp_tree
> loads
;
1929 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1930 vec
<stmt_vec_info
> scalar_stmts
;
1932 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1934 scalar_type
= TREE_TYPE (DR_REF (dr
));
1935 vectype
= get_vectype_for_scalar_type (scalar_type
);
1936 group_size
= DR_GROUP_SIZE (stmt_info
);
1938 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1940 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1941 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1942 group_size
= REDUC_GROUP_SIZE (stmt_info
);
1946 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1947 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1948 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1953 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1956 "Build SLP failed: unsupported data-type ");
1957 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1958 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1963 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1965 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1966 scalar_stmts
.create (group_size
);
1967 stmt_vec_info next_info
= stmt_info
;
1968 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1970 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1973 if (STMT_VINFO_IN_PATTERN_P (next_info
)
1974 && STMT_VINFO_RELATED_STMT (next_info
))
1975 scalar_stmts
.safe_push (STMT_VINFO_RELATED_STMT (next_info
));
1977 scalar_stmts
.safe_push (next_info
);
1978 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
1981 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1983 /* Collect the reduction stmts and store them in
1984 SLP_TREE_SCALAR_STMTS. */
1987 if (STMT_VINFO_IN_PATTERN_P (next_info
)
1988 && STMT_VINFO_RELATED_STMT (next_info
))
1989 scalar_stmts
.safe_push (STMT_VINFO_RELATED_STMT (next_info
));
1991 scalar_stmts
.safe_push (next_info
);
1992 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
1994 /* Mark the first element of the reduction chain as reduction to properly
1995 transform the node. In the reduction analysis phase only the last
1996 element of the chain is marked as reduction. */
1997 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2001 /* Collect reduction statements. */
2002 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2003 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2004 scalar_stmts
.safe_push (next_info
);
2007 loads
.create (group_size
);
2009 /* Build the tree for the SLP instance. */
2010 bool *matches
= XALLOCAVEC (bool, group_size
);
2011 unsigned npermutes
= 0;
2012 bst_fail
= new scalar_stmts_set_t ();
2013 poly_uint64 max_nunits
= nunits
;
2014 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2015 &max_nunits
, &loads
, matches
, &npermutes
,
2016 NULL
, max_tree_size
);
2020 /* Calculate the unrolling factor based on the smallest type. */
2021 poly_uint64 unrolling_factor
2022 = calculate_unrolling_factor (max_nunits
, group_size
);
2024 if (maybe_ne (unrolling_factor
, 1U)
2025 && is_a
<bb_vec_info
> (vinfo
))
2027 unsigned HOST_WIDE_INT const_max_nunits
;
2028 if (!max_nunits
.is_constant (&const_max_nunits
)
2029 || const_max_nunits
> group_size
)
2031 if (dump_enabled_p ())
2032 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2033 "Build SLP failed: store group "
2034 "size not a multiple of the vector size "
2035 "in basic block SLP\n");
2036 vect_free_slp_tree (node
, false);
2040 /* Fatal mismatch. */
2041 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2042 vect_free_slp_tree (node
, false);
2047 /* Create a new SLP instance. */
2048 new_instance
= XNEW (struct _slp_instance
);
2049 SLP_INSTANCE_TREE (new_instance
) = node
;
2050 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2051 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2052 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2054 /* Compute the load permutation. */
2056 bool loads_permuted
= false;
2057 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2059 vec
<unsigned> load_permutation
;
2061 stmt_vec_info load_info
;
2062 bool this_load_permuted
= false;
2063 load_permutation
.create (group_size
);
2064 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2065 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2066 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2068 int load_place
= vect_get_place_in_interleaving_chain
2069 (load_info
, first_stmt_info
);
2070 gcc_assert (load_place
!= -1);
2071 if (load_place
!= j
)
2072 this_load_permuted
= true;
2073 load_permutation
.safe_push (load_place
);
2075 if (!this_load_permuted
2076 /* The load requires permutation when unrolling exposes
2077 a gap either because the group is larger than the SLP
2078 group-size or because there is a gap between the groups. */
2079 && (known_eq (unrolling_factor
, 1U)
2080 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2081 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2083 load_permutation
.release ();
2086 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2087 loads_permuted
= true;
2092 if (!vect_supported_load_permutation_p (new_instance
))
2094 if (dump_enabled_p ())
2096 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2097 "Build SLP failed: unsupported load "
2099 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2102 vect_free_slp_instance (new_instance
, false);
2107 /* If the loads and stores can be handled with load/store-lan
2108 instructions do not generate this SLP instance. */
2109 if (is_a
<loop_vec_info
> (vinfo
)
2111 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2114 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2116 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2117 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2118 /* Use SLP for strided accesses (or if we can't load-lanes). */
2119 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2120 || ! vect_load_lanes_supported
2121 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2122 DR_GROUP_SIZE (stmt_vinfo
), false))
2125 if (i
== loads
.length ())
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2129 "Built SLP cancelled: can use "
2130 "load/store-lanes\n");
2131 vect_free_slp_instance (new_instance
, false);
2136 vinfo
->slp_instances
.safe_push (new_instance
);
2138 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_NOTE
, vect_location
,
2141 "Final SLP tree for instance:\n");
2142 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2150 /* Failed to SLP. */
2151 /* Free the allocated memory. */
2152 scalar_stmts
.release ();
2156 /* For basic block SLP, try to break the group up into multiples of the
2158 unsigned HOST_WIDE_INT const_nunits
;
2159 if (is_a
<bb_vec_info
> (vinfo
)
2160 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2161 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2162 && nunits
.is_constant (&const_nunits
))
2164 /* We consider breaking the group only on VF boundaries from the existing
2166 for (i
= 0; i
< group_size
; i
++)
2167 if (!matches
[i
]) break;
2169 if (i
>= const_nunits
&& i
< group_size
)
2171 /* Split into two groups at the first vector boundary before i. */
2172 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2173 unsigned group1_size
= i
& ~(const_nunits
- 1);
2175 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2176 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2177 /* If the first non-match was in the middle of a vector,
2178 skip the rest of that vector. */
2179 if (group1_size
< i
)
2181 i
= group1_size
+ const_nunits
;
2183 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2186 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2189 /* Even though the first vector did not all match, we might be able to SLP
2190 (some) of the remainder. FORNOW ignore this possibility. */
2197 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2198 trees of packed scalar stmts if SLP is possible. */
2201 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2204 stmt_vec_info first_element
;
2206 DUMP_VECT_SCOPE ("vect_analyze_slp");
2208 /* Find SLP sequences starting from groups of grouped stores. */
2209 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2210 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2212 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2214 if (loop_vinfo
->reduction_chains
.length () > 0)
2216 /* Find SLP sequences starting from reduction chains. */
2217 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2218 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2221 /* Dissolve reduction chain group. */
2222 stmt_vec_info vinfo
= first_element
;
2225 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2226 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2227 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2230 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2234 /* Find SLP sequences starting from groups of reductions. */
2235 if (loop_vinfo
->reductions
.length () > 1)
2236 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2244 /* For each possible SLP instance decide whether to SLP it and calculate overall
2245 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2246 least one instance. */
2249 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2252 poly_uint64 unrolling_factor
= 1;
2253 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2254 slp_instance instance
;
2255 int decided_to_slp
= 0;
2257 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2259 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2261 /* FORNOW: SLP if you can. */
2262 /* All unroll factors have the form current_vector_size * X for some
2263 rational X, so they must have a common multiple. */
2265 = force_common_multiple (unrolling_factor
,
2266 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2268 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2269 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2270 loop-based vectorization. Such stmts will be marked as HYBRID. */
2271 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2275 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2277 if (decided_to_slp
&& dump_enabled_p ())
2279 dump_printf_loc (MSG_NOTE
, vect_location
,
2280 "Decided to SLP %d instances. Unrolling factor ",
2282 dump_dec (MSG_NOTE
, unrolling_factor
);
2283 dump_printf (MSG_NOTE
, "\n");
2286 return (decided_to_slp
> 0);
2290 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2291 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2294 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2296 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2297 imm_use_iterator imm_iter
;
2299 stmt_vec_info use_vinfo
;
2301 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2304 /* Propagate hybrid down the SLP tree. */
2305 if (stype
== hybrid
)
2307 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2311 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2312 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2313 /* If we get a pattern stmt here we have to use the LHS of the
2314 original stmt for immediate uses. */
2315 gimple
*stmt
= stmt_vinfo
->stmt
;
2316 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2317 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2318 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
)->stmt
;
2320 if (gimple_code (stmt
) == GIMPLE_PHI
)
2321 def
= gimple_phi_result (stmt
);
2323 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2325 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2327 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2330 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2331 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2332 use_vinfo
= STMT_VINFO_RELATED_STMT (use_vinfo
);
2333 if (!STMT_SLP_TYPE (use_vinfo
)
2334 && (STMT_VINFO_RELEVANT (use_vinfo
)
2335 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2336 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2337 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2339 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2342 "def in non-SLP stmt: ");
2343 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2351 && !HYBRID_SLP_STMT (stmt_vinfo
))
2353 if (dump_enabled_p ())
2355 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2356 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_vinfo
->stmt
, 0);
2358 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2361 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2362 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2363 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2366 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2369 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2371 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2372 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2377 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2378 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2380 if (dump_enabled_p ())
2382 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2383 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt_info
->stmt
, 0);
2385 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2392 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2395 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2396 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2397 /* If the stmt is in a SLP instance then this isn't a reason
2398 to mark use definitions in other SLP instances as hybrid. */
2399 if (! STMT_SLP_TYPE (use_vinfo
)
2400 && (STMT_VINFO_RELEVANT (use_vinfo
)
2401 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2402 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2403 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2410 /* Find stmts that must be both vectorized and SLPed. */
2413 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2416 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2417 slp_instance instance
;
2419 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2421 /* First walk all pattern stmt in the loop and mark defs of uses as
2422 hybrid because immediate uses in them are not recorded. */
2423 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2425 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2426 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2429 gimple
*stmt
= gsi_stmt (gsi
);
2430 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2431 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2434 memset (&wi
, 0, sizeof (wi
));
2435 wi
.info
= loop_vinfo
;
2436 gimple_stmt_iterator gsi2
2437 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2438 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2439 vect_detect_hybrid_slp_1
, &wi
);
2440 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2441 vect_detect_hybrid_slp_2
,
2442 vect_detect_hybrid_slp_1
, &wi
);
2447 /* Then walk the SLP instance trees marking stmts with uses in
2448 non-SLP stmts as hybrid, also propagating hybrid down the
2449 SLP tree, collecting the above info on-the-fly. */
2450 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2452 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2453 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2459 /* Initialize a bb_vec_info struct for the statements between
2460 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2462 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2463 gimple_stmt_iterator region_end_in
,
2464 vec_info_shared
*shared
)
2465 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2466 bb (gsi_bb (region_begin_in
)),
2467 region_begin (region_begin_in
),
2468 region_end (region_end_in
)
2470 gimple_stmt_iterator gsi
;
2472 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2475 gimple
*stmt
= gsi_stmt (gsi
);
2476 gimple_set_uid (stmt
, 0);
2484 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2485 stmts in the basic block. */
2487 _bb_vec_info::~_bb_vec_info ()
2489 for (gimple_stmt_iterator si
= region_begin
;
2490 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2492 gimple
*stmt
= gsi_stmt (si
);
2493 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2496 /* Free stmt_vec_info. */
2497 free_stmt_vec_info (stmt
);
2499 /* Reset region marker. */
2500 gimple_set_uid (stmt
, -1);
2506 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2507 given then that child nodes have already been processed, and that
2508 their def types currently match their SLP node's def type. */
2511 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2512 slp_instance node_instance
,
2513 stmt_vector_for_cost
*cost_vec
)
2515 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2516 gimple
*stmt
= stmt_info
->stmt
;
2517 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2519 /* For BB vectorization vector types are assigned here.
2520 Memory accesses already got their vector type assigned
2521 in vect_analyze_data_refs. */
2522 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2524 && ! STMT_VINFO_DATA_REF (stmt_info
))
2526 tree vectype
, nunits_vectype
;
2527 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2529 /* We checked this when building the node. */
2531 if (vectype
== boolean_type_node
)
2533 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2535 /* vect_get_mask_type_for_stmt has already explained the
2540 stmt_vec_info sstmt_info
;
2542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt_info
)
2543 STMT_VINFO_VECTYPE (sstmt_info
) = vectype
;
2546 /* Calculate the number of vector statements to be created for the
2547 scalar stmts in this node. For SLP reductions it is equal to the
2548 number of vector statements in the children (which has already been
2549 calculated by the recursive call). Otherwise it is the number of
2550 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2551 VF divided by the number of elements in a vector. */
2552 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2553 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2554 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2555 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2559 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2560 vf
= loop_vinfo
->vectorization_factor
;
2563 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2564 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2565 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2566 = vect_get_num_vectors (vf
* group_size
, vectype
);
2570 return vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
, cost_vec
);
2573 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2574 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2576 Return true if the operations are supported. */
2579 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2580 slp_instance node_instance
,
2581 scalar_stmts_to_slp_tree_map_t
*visited
,
2582 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2583 stmt_vector_for_cost
*cost_vec
)
2588 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2591 /* If we already analyzed the exact same set of scalar stmts we're done.
2592 We share the generated vector stmts for those. */
2594 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2595 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2597 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2598 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2602 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2603 doesn't result in any issue since we throw away the lvisited set
2605 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2607 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2608 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2609 visited
, lvisited
, cost_vec
))
2612 /* Push SLP node def-type to stmt operands. */
2613 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2614 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2615 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2616 = SLP_TREE_DEF_TYPE (child
);
2617 bool res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2619 /* Restore def-types. */
2620 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2621 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2622 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2623 = vect_internal_def
;
2631 /* Analyze statements in SLP instances of VINFO. Return true if the
2632 operations are supported. */
2635 vect_slp_analyze_operations (vec_info
*vinfo
)
2637 slp_instance instance
;
2640 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2642 scalar_stmts_to_slp_tree_map_t
*visited
2643 = new scalar_stmts_to_slp_tree_map_t ();
2644 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2646 scalar_stmts_to_slp_tree_map_t lvisited
;
2647 stmt_vector_for_cost cost_vec
;
2648 cost_vec
.create (2);
2649 if (!vect_slp_analyze_node_operations (vinfo
,
2650 SLP_INSTANCE_TREE (instance
),
2651 instance
, visited
, &lvisited
,
2654 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2655 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2656 dump_printf_loc (MSG_NOTE
, vect_location
,
2657 "removing SLP instance operations starting from: ");
2658 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
2659 vect_free_slp_instance (instance
, false);
2660 vinfo
->slp_instances
.ordered_remove (i
);
2661 cost_vec
.release ();
2665 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2666 x
!= lvisited
.end(); ++x
)
2667 visited
->put ((*x
).first
.copy (), (*x
).second
);
2670 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2671 cost_vec
.release ();
2676 return !vinfo
->slp_instances
.is_empty ();
2680 /* Compute the scalar cost of the SLP node NODE and its children
2681 and return it. Do not account defs that are marked in LIFE and
2682 update LIFE according to uses of NODE. */
2685 vect_bb_slp_scalar_cost (basic_block bb
,
2686 slp_tree node
, vec
<bool, va_heap
> *life
,
2687 stmt_vector_for_cost
*cost_vec
)
2690 stmt_vec_info stmt_info
;
2693 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2695 gimple
*stmt
= stmt_info
->stmt
;
2696 ssa_op_iter op_iter
;
2697 def_operand_p def_p
;
2702 /* If there is a non-vectorized use of the defs then the scalar
2703 stmt is kept live in which case we do not account it or any
2704 required defs in the SLP children in the scalar cost. This
2705 way we make the vectorization more costly when compared to
2707 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2709 imm_use_iterator use_iter
;
2711 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2712 if (!is_gimple_debug (use_stmt
)
2713 && (! vect_stmt_in_region_p (stmt_info
->vinfo
, use_stmt
)
2714 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2717 BREAK_FROM_IMM_USE_STMT (use_iter
);
2723 /* Count scalar stmts only once. */
2724 if (gimple_visited_p (stmt
))
2726 gimple_set_visited (stmt
, true);
2728 vect_cost_for_stmt kind
;
2729 if (STMT_VINFO_DATA_REF (stmt_info
))
2731 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2734 kind
= scalar_store
;
2738 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2741 auto_vec
<bool, 20> subtree_life
;
2742 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2744 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2746 /* Do not directly pass LIFE to the recursive call, copy it to
2747 confine changes in the callee to the current child/subtree. */
2748 subtree_life
.safe_splice (*life
);
2749 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
);
2750 subtree_life
.truncate (0);
2755 /* Check if vectorization of the basic block is profitable. */
2758 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2760 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2761 slp_instance instance
;
2763 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2764 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2766 /* Calculate scalar cost. */
2767 stmt_vector_for_cost scalar_costs
;
2768 scalar_costs
.create (0);
2769 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2771 auto_vec
<bool, 20> life
;
2772 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2773 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2774 SLP_INSTANCE_TREE (instance
),
2775 &life
, &scalar_costs
);
2777 void *target_cost_data
= init_cost (NULL
);
2778 add_stmt_costs (target_cost_data
, &scalar_costs
);
2779 scalar_costs
.release ();
2781 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2782 destroy_cost_data (target_cost_data
);
2784 /* Unset visited flag. */
2785 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2786 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2787 gimple_set_visited (gsi_stmt (gsi
), false);
2789 /* Complete the target-specific cost calculation. */
2790 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2791 &vec_inside_cost
, &vec_epilogue_cost
);
2793 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2795 if (dump_enabled_p ())
2797 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2798 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2800 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2801 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2802 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2805 /* Vectorization is profitable if its cost is more than the cost of scalar
2806 version. Note that we err on the vector side for equal cost because
2807 the cost estimate is otherwise quite pessimistic (constant uses are
2808 free on the scalar side but cost a load on the vector side for
2810 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2816 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2817 if so and sets fatal to true if failure is independent of
2818 current_vector_size. */
2821 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2822 gimple_stmt_iterator region_end
,
2823 vec
<data_reference_p
> datarefs
, int n_stmts
,
2824 bool &fatal
, vec_info_shared
*shared
)
2826 bb_vec_info bb_vinfo
;
2827 slp_instance instance
;
2829 poly_uint64 min_vf
= 2;
2831 /* The first group of checks is independent of the vector size. */
2834 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2836 if (dump_enabled_p ())
2837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2838 "not vectorized: too many instructions in "
2840 free_data_refs (datarefs
);
2844 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, shared
);
2848 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2849 bb_vinfo
->shared
->save_datarefs ();
2851 /* Analyze the data references. */
2853 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2855 if (dump_enabled_p ())
2856 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2857 "not vectorized: unhandled data-ref in basic "
2864 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2868 "not vectorized: not enough data-refs in "
2875 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2877 if (dump_enabled_p ())
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2879 "not vectorized: unhandled data access in "
2886 /* If there are no grouped stores in the region there is no need
2887 to continue with pattern recog as vect_analyze_slp will fail
2889 if (bb_vinfo
->grouped_stores
.is_empty ())
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2893 "not vectorized: no grouped stores in "
2900 /* While the rest of the analysis below depends on it in some way. */
2903 vect_pattern_recog (bb_vinfo
);
2905 /* Check the SLP opportunities in the basic block, analyze and build SLP
2907 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2909 if (dump_enabled_p ())
2911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2912 "Failed to SLP the basic block.\n");
2913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2914 "not vectorized: failed to find SLP opportunities "
2915 "in basic block.\n");
2922 vect_record_base_alignments (bb_vinfo
);
2924 /* Analyze and verify the alignment of data references and the
2925 dependence in the SLP instances. */
2926 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2928 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2929 || ! vect_slp_analyze_instance_dependence (instance
))
2931 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2932 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2933 dump_printf_loc (MSG_NOTE
, vect_location
,
2934 "removing SLP instance operations starting from: ");
2935 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
2936 vect_free_slp_instance (instance
, false);
2937 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2941 /* Mark all the statements that we want to vectorize as pure SLP and
2943 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2944 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2948 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2954 if (!vect_slp_analyze_operations (bb_vinfo
))
2956 if (dump_enabled_p ())
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2958 "not vectorized: bad operation in basic block.\n");
2964 /* Cost model: check if the vectorization is worthwhile. */
2965 if (!unlimited_cost_model (NULL
)
2966 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2968 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2970 "not vectorized: vectorization is not "
2977 if (dump_enabled_p ())
2978 dump_printf_loc (MSG_NOTE
, vect_location
,
2979 "Basic block will be vectorized using SLP\n");
2985 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2986 true if anything in the basic-block was vectorized. */
2989 vect_slp_bb (basic_block bb
)
2991 bb_vec_info bb_vinfo
;
2992 gimple_stmt_iterator gsi
;
2993 bool any_vectorized
= false;
2994 auto_vector_sizes vector_sizes
;
2996 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2998 /* Autodetect first vector size we try. */
2999 current_vector_size
= 0;
3000 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3001 unsigned int next_size
= 0;
3003 gsi
= gsi_start_bb (bb
);
3005 poly_uint64 autodetected_vector_size
= 0;
3008 if (gsi_end_p (gsi
))
3011 gimple_stmt_iterator region_begin
= gsi
;
3012 vec
<data_reference_p
> datarefs
= vNULL
;
3015 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3017 gimple
*stmt
= gsi_stmt (gsi
);
3018 if (is_gimple_debug (stmt
))
3022 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3023 vect_location
= stmt
;
3025 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3029 /* Skip leading unhandled stmts. */
3030 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3036 gimple_stmt_iterator region_end
= gsi
;
3038 bool vectorized
= false;
3040 vec_info_shared shared
;
3041 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3042 datarefs
, insns
, fatal
, &shared
);
3044 && dbg_cnt (vect_slp
))
3046 if (dump_enabled_p ())
3047 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3049 bb_vinfo
->shared
->check_datarefs ();
3050 vect_schedule_slp (bb_vinfo
);
3052 unsigned HOST_WIDE_INT bytes
;
3053 if (current_vector_size
.is_constant (&bytes
))
3054 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3055 "basic block part vectorized using "
3056 HOST_WIDE_INT_PRINT_UNSIGNED
" byte "
3057 "vectors\n", bytes
);
3059 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3060 "basic block part vectorized using variable "
3061 "length vectors\n");
3067 any_vectorized
|= vectorized
;
3070 autodetected_vector_size
= current_vector_size
;
3072 if (next_size
< vector_sizes
.length ()
3073 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3077 || next_size
== vector_sizes
.length ()
3078 || known_eq (current_vector_size
, 0U)
3079 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3080 vector sizes will fail do not bother iterating. */
3083 if (gsi_end_p (region_end
))
3086 /* Skip the unhandled stmt. */
3089 /* And reset vector sizes. */
3090 current_vector_size
= 0;
3095 /* Try the next biggest vector size. */
3096 current_vector_size
= vector_sizes
[next_size
++];
3097 if (dump_enabled_p ())
3099 dump_printf_loc (MSG_NOTE
, vect_location
,
3100 "***** Re-trying analysis with "
3102 dump_dec (MSG_NOTE
, current_vector_size
);
3103 dump_printf (MSG_NOTE
, "\n");
3111 return any_vectorized
;
3115 /* Return 1 if vector type of boolean constant which is OPNUM
3116 operand in statement STMT is a boolean vector. */
3119 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3121 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3122 enum tree_code code
= gimple_expr_code (stmt
);
3124 enum vect_def_type dt
;
3126 /* For comparison and COND_EXPR type is chosen depending
3127 on the other comparison operand. */
3128 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3131 op
= gimple_assign_rhs1 (stmt
);
3133 op
= gimple_assign_rhs2 (stmt
);
3135 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3138 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3141 if (code
== COND_EXPR
)
3143 tree cond
= gimple_assign_rhs1 (stmt
);
3145 if (TREE_CODE (cond
) == SSA_NAME
)
3148 op
= TREE_OPERAND (cond
, 1);
3150 op
= TREE_OPERAND (cond
, 0);
3152 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3155 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3158 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3161 /* Build a variable-length vector in which the elements in ELTS are repeated
3162 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3163 RESULTS and add any new instructions to SEQ.
3165 The approach we use is:
3167 (1) Find a vector mode VM with integer elements of mode IM.
3169 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3170 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3171 from small vectors to IM.
3173 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3175 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3176 correct byte contents.
3178 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3180 We try to find the largest IM for which this sequence works, in order
3181 to cut down on the number of interleaves. */
3184 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3185 unsigned int nresults
, vec
<tree
> &results
)
3187 unsigned int nelts
= elts
.length ();
3188 tree element_type
= TREE_TYPE (vector_type
);
3190 /* (1) Find a vector mode VM with integer elements of mode IM. */
3191 unsigned int nvectors
= 1;
3192 tree new_vector_type
;
3194 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3195 &nvectors
, &new_vector_type
,
3199 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3200 unsigned int partial_nelts
= nelts
/ nvectors
;
3201 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3203 tree_vector_builder partial_elts
;
3204 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3205 pieces
.quick_grow (nvectors
* 2);
3206 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3208 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3209 ELTS' has mode IM. */
3210 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3211 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3212 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3213 tree t
= gimple_build_vector (seq
, &partial_elts
);
3214 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3215 TREE_TYPE (new_vector_type
), t
);
3217 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3218 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3221 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3222 correct byte contents.
3224 We need to repeat the following operation log2(nvectors) times:
3226 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3227 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3229 However, if each input repeats every N elements and the VF is
3230 a multiple of N * 2, the HI result is the same as the LO. */
3231 unsigned int in_start
= 0;
3232 unsigned int out_start
= nvectors
;
3233 unsigned int hi_start
= nvectors
/ 2;
3234 /* A bound on the number of outputs needed to produce NRESULTS results
3235 in the final iteration. */
3236 unsigned int noutputs_bound
= nvectors
* nresults
;
3237 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3239 noutputs_bound
/= 2;
3240 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3241 for (unsigned int i
= 0; i
< limit
; ++i
)
3244 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3247 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3251 tree output
= make_ssa_name (new_vector_type
);
3252 tree input1
= pieces
[in_start
+ (i
/ 2)];
3253 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3254 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3257 gimple_seq_add_stmt (seq
, stmt
);
3258 pieces
[out_start
+ i
] = output
;
3260 std::swap (in_start
, out_start
);
3263 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3264 results
.reserve (nresults
);
3265 for (unsigned int i
= 0; i
< nresults
; ++i
)
3267 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3268 pieces
[in_start
+ i
]));
3270 results
.quick_push (results
[i
- nvectors
]);
3274 /* For constant and loop invariant defs of SLP_NODE this function returns
3275 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3276 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3277 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3278 REDUC_INDEX is the index of the reduction operand in the statements, unless
3282 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3283 vec
<tree
> *vec_oprnds
,
3284 unsigned int op_num
, unsigned int number_of_vectors
)
3286 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3287 stmt_vec_info stmt_vinfo
= stmts
[0];
3288 gimple
*stmt
= stmt_vinfo
->stmt
;
3289 unsigned HOST_WIDE_INT nunits
;
3291 unsigned j
, number_of_places_left_in_vector
;
3294 int group_size
= stmts
.length ();
3295 unsigned int vec_num
, i
;
3296 unsigned number_of_copies
= 1;
3298 voprnds
.create (number_of_vectors
);
3299 bool constant_p
, is_store
;
3300 tree neutral_op
= NULL
;
3301 enum tree_code code
= gimple_expr_code (stmt
);
3302 gimple_seq ctor_seq
= NULL
;
3303 auto_vec
<tree
, 16> permute_results
;
3305 /* Check if vector type is a boolean vector. */
3306 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3307 && vect_mask_constant_operand_p (stmt_vinfo
, op_num
))
3309 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3311 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3313 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3316 op
= gimple_assign_rhs1 (stmt
);
3323 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3324 created vectors. It is greater than 1 if unrolling is performed.
3326 For example, we have two scalar operands, s1 and s2 (e.g., group of
3327 strided accesses of size two), while NUNITS is four (i.e., four scalars
3328 of this type can be packed in a vector). The output vector will contain
3329 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3332 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3333 containing the operands.
3335 For example, NUNITS is four as before, and the group size is 8
3336 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3337 {s5, s6, s7, s8}. */
3339 /* When using duplicate_and_interleave, we just need one element for
3340 each scalar statement. */
3341 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3342 nunits
= group_size
;
3344 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3346 number_of_places_left_in_vector
= nunits
;
3348 tree_vector_builder
elts (vector_type
, nunits
, 1);
3349 elts
.quick_grow (nunits
);
3350 bool place_after_defs
= false;
3351 for (j
= 0; j
< number_of_copies
; j
++)
3353 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt_vinfo
); i
--)
3355 stmt
= stmt_vinfo
->stmt
;
3357 op
= gimple_assign_rhs1 (stmt
);
3364 tree cond
= gimple_assign_rhs1 (stmt
);
3365 if (TREE_CODE (cond
) == SSA_NAME
)
3366 op
= gimple_op (stmt
, op_num
+ 1);
3367 else if (op_num
== 0 || op_num
== 1)
3368 op
= TREE_OPERAND (cond
, op_num
);
3372 op
= gimple_assign_rhs2 (stmt
);
3374 op
= gimple_assign_rhs3 (stmt
);
3380 op
= gimple_call_arg (stmt
, op_num
);
3387 op
= gimple_op (stmt
, op_num
+ 1);
3388 /* Unlike the other binary operators, shifts/rotates have
3389 the shift count being int, instead of the same type as
3390 the lhs, so make sure the scalar is the right type if
3391 we are dealing with vectors of
3392 long long/long/short/char. */
3393 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3394 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3398 op
= gimple_op (stmt
, op_num
+ 1);
3403 /* Create 'vect_ = {op0,op1,...,opn}'. */
3404 number_of_places_left_in_vector
--;
3406 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3408 if (CONSTANT_CLASS_P (op
))
3410 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3412 /* Can't use VIEW_CONVERT_EXPR for booleans because
3413 of possibly different sizes of scalar value and
3415 if (integer_zerop (op
))
3416 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3417 else if (integer_onep (op
))
3418 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3423 op
= fold_unary (VIEW_CONVERT_EXPR
,
3424 TREE_TYPE (vector_type
), op
);
3425 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3429 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3431 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3434 = build_all_ones_cst (TREE_TYPE (vector_type
));
3436 = build_zero_cst (TREE_TYPE (vector_type
));
3437 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3438 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3444 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3447 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3450 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3454 elts
[number_of_places_left_in_vector
] = op
;
3455 if (!CONSTANT_CLASS_P (op
))
3457 if (TREE_CODE (orig_op
) == SSA_NAME
3458 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3459 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3460 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3461 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3462 place_after_defs
= true;
3464 if (number_of_places_left_in_vector
== 0)
3467 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3468 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3469 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3472 if (vec_oprnds
->is_empty ())
3473 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3476 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3479 gimple_stmt_iterator gsi
;
3480 if (place_after_defs
)
3482 stmt_vec_info last_stmt_info
3483 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3484 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3485 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3489 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3491 if (ctor_seq
!= NULL
)
3493 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3494 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3497 voprnds
.quick_push (init
);
3498 place_after_defs
= false;
3499 number_of_places_left_in_vector
= nunits
;
3501 elts
.new_vector (vector_type
, nunits
, 1);
3502 elts
.quick_grow (nunits
);
3507 /* Since the vectors are created in the reverse order, we should invert
3509 vec_num
= voprnds
.length ();
3510 for (j
= vec_num
; j
!= 0; j
--)
3512 vop
= voprnds
[j
- 1];
3513 vec_oprnds
->quick_push (vop
);
3518 /* In case that VF is greater than the unrolling factor needed for the SLP
3519 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3520 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3521 to replicate the vectors. */
3522 while (number_of_vectors
> vec_oprnds
->length ())
3524 tree neutral_vec
= NULL
;
3529 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3531 vec_oprnds
->quick_push (neutral_vec
);
3535 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3536 vec_oprnds
->quick_push (vop
);
3542 /* Get vectorized definitions from SLP_NODE that contains corresponding
3543 vectorized def-stmts. */
3546 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3549 stmt_vec_info vec_def_stmt_info
;
3552 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3554 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3556 gcc_assert (vec_def_stmt_info
);
3557 if (gphi
*vec_def_phi
= dyn_cast
<gphi
*> (vec_def_stmt_info
->stmt
))
3558 vec_oprnd
= gimple_phi_result (vec_def_phi
);
3560 vec_oprnd
= gimple_get_lhs (vec_def_stmt_info
->stmt
);
3561 vec_oprnds
->quick_push (vec_oprnd
);
3566 /* Get vectorized definitions for SLP_NODE.
3567 If the scalar definitions are loop invariants or constants, collect them and
3568 call vect_get_constant_vectors() to create vector stmts.
3569 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3570 must be stored in the corresponding child of SLP_NODE, and we call
3571 vect_get_slp_vect_defs () to retrieve them. */
3574 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3575 vec
<vec
<tree
> > *vec_oprnds
)
3578 int number_of_vects
= 0, i
;
3579 unsigned int child_index
= 0;
3580 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3581 slp_tree child
= NULL
;
3584 bool vectorized_defs
;
3586 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3587 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3589 /* For each operand we check if it has vectorized definitions in a child
3590 node or we need to create them (for invariants and constants). We
3591 check if the LHS of the first stmt of the next child matches OPRND.
3592 If it does, we found the correct child. Otherwise, we call
3593 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3594 to check this child node for the next operand. */
3595 vectorized_defs
= false;
3596 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3598 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3600 /* We have to check both pattern and original def, if available. */
3601 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3603 stmt_vec_info first_def_info
= SLP_TREE_SCALAR_STMTS (child
)[0];
3604 stmt_vec_info related
= STMT_VINFO_RELATED_STMT (first_def_info
);
3607 if (gphi
*first_def
= dyn_cast
<gphi
*> (first_def_info
->stmt
))
3608 first_def_op
= gimple_phi_result (first_def
);
3610 first_def_op
= gimple_get_lhs (first_def_info
->stmt
);
3611 if (operand_equal_p (oprnd
, first_def_op
, 0)
3613 && operand_equal_p (oprnd
,
3614 gimple_get_lhs (related
->stmt
), 0)))
3616 /* The number of vector defs is determined by the number of
3617 vector statements in the node from which we get those
3619 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3620 vectorized_defs
= true;
3628 if (!vectorized_defs
)
3632 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3633 /* Number of vector stmts was calculated according to LHS in
3634 vect_schedule_slp_instance (), fix it by replacing LHS with
3635 RHS, if necessary. See vect_get_smallest_scalar_type () for
3637 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3639 if (rhs_size_unit
!= lhs_size_unit
)
3641 number_of_vects
*= rhs_size_unit
;
3642 number_of_vects
/= lhs_size_unit
;
3647 /* Allocate memory for vectorized defs. */
3649 vec_defs
.create (number_of_vects
);
3651 /* For reduction defs we call vect_get_constant_vectors (), since we are
3652 looking for initial loop invariant values. */
3653 if (vectorized_defs
)
3654 /* The defs are already vectorized. */
3655 vect_get_slp_vect_defs (child
, &vec_defs
);
3657 /* Build vectors from scalar defs. */
3658 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3661 vec_oprnds
->quick_push (vec_defs
);
3665 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3666 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3667 permute statements for the SLP node NODE of the SLP instance
3668 SLP_NODE_INSTANCE. */
3671 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3672 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3673 slp_instance slp_node_instance
, bool analyze_only
,
3676 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3677 vec_info
*vinfo
= stmt_info
->vinfo
;
3678 tree mask_element_type
= NULL_TREE
, mask_type
;
3680 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3681 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3682 unsigned int mask_element
;
3684 unsigned HOST_WIDE_INT nunits
, const_vf
;
3686 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3689 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3691 mode
= TYPE_MODE (vectype
);
3693 /* At the moment, all permutations are represented using per-element
3694 indices, so we can't cope with variable vector lengths or
3695 vectorization factors. */
3696 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
3697 || !vf
.is_constant (&const_vf
))
3700 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3701 same size as the vector element being permuted. */
3702 mask_element_type
= lang_hooks
.types
.type_for_mode
3703 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3704 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3705 vec_perm_builder
mask (nunits
, nunits
, 1);
3706 mask
.quick_grow (nunits
);
3707 vec_perm_indices indices
;
3709 /* Initialize the vect stmts of NODE to properly insert the generated
3712 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3713 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3714 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3716 /* Generate permutation masks for every NODE. Number of masks for each NODE
3717 is equal to GROUP_SIZE.
3718 E.g., we have a group of three nodes with three loads from the same
3719 location in each node, and the vector size is 4. I.e., we have a
3720 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3721 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3722 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3725 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3726 The last mask is illegal since we assume two operands for permute
3727 operation, and the mask element values can't be outside that range.
3728 Hence, the last mask must be converted into {2,5,5,5}.
3729 For the first two permutations we need the first and the second input
3730 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3731 we need the second and the third vectors: {b1,c1,a2,b2} and
3734 int vect_stmts_counter
= 0;
3735 unsigned int index
= 0;
3736 int first_vec_index
= -1;
3737 int second_vec_index
= -1;
3741 for (unsigned int j
= 0; j
< const_vf
; j
++)
3743 for (int k
= 0; k
< group_size
; k
++)
3745 unsigned int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3746 + j
* DR_GROUP_SIZE (stmt_info
));
3747 vec_index
= i
/ nunits
;
3748 mask_element
= i
% nunits
;
3749 if (vec_index
== first_vec_index
3750 || first_vec_index
== -1)
3752 first_vec_index
= vec_index
;
3754 else if (vec_index
== second_vec_index
3755 || second_vec_index
== -1)
3757 second_vec_index
= vec_index
;
3758 mask_element
+= nunits
;
3762 if (dump_enabled_p ())
3764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3765 "permutation requires at "
3766 "least three vectors ");
3767 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3768 stmt_info
->stmt
, 0);
3770 gcc_assert (analyze_only
);
3774 gcc_assert (mask_element
< 2 * nunits
);
3775 if (mask_element
!= index
)
3777 mask
[index
++] = mask_element
;
3779 if (index
== nunits
&& !noop_p
)
3781 indices
.new_vector (mask
, 2, nunits
);
3782 if (!can_vec_perm_const_p (mode
, indices
))
3784 if (dump_enabled_p ())
3786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3788 "unsupported vect permute { ");
3789 for (i
= 0; i
< nunits
; ++i
)
3791 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3792 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3794 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3796 gcc_assert (analyze_only
);
3803 if (index
== nunits
)
3807 tree mask_vec
= NULL_TREE
;
3810 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3812 if (second_vec_index
== -1)
3813 second_vec_index
= first_vec_index
;
3815 /* Generate the permute statement if necessary. */
3816 tree first_vec
= dr_chain
[first_vec_index
];
3817 tree second_vec
= dr_chain
[second_vec_index
];
3818 stmt_vec_info perm_stmt_info
;
3821 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3823 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3825 perm_dest
= make_ssa_name (perm_dest
);
3827 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3828 first_vec
, second_vec
,
3831 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
3835 /* If mask was NULL_TREE generate the requested
3836 identity transform. */
3837 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3839 /* Store the vector statement in NODE. */
3840 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3845 first_vec_index
= -1;
3846 second_vec_index
= -1;
3855 /* Vectorize SLP instance tree in postorder. */
3858 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3859 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3861 bool grouped_store
, is_store
;
3862 gimple_stmt_iterator si
;
3863 stmt_vec_info stmt_info
;
3864 unsigned int group_size
;
3869 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3872 /* See if we have already vectorized the same set of stmts and reuse their
3873 vectorized stmts. */
3874 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3876 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3880 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3881 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3882 vect_schedule_slp_instance (child
, instance
, bst_map
);
3884 /* Push SLP node def-type to stmts. */
3885 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3886 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3888 stmt_vec_info child_stmt_info
;
3889 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3890 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
3893 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3895 /* VECTYPE is the type of the destination. */
3896 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3897 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3898 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3900 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3901 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3902 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3904 if (dump_enabled_p ())
3906 dump_printf_loc (MSG_NOTE
,vect_location
,
3907 "------>vectorizing SLP node starting from: ");
3908 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
3911 /* Vectorized stmts go before the last scalar stmt which is where
3912 all uses are ready. */
3913 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
3914 si
= gsi_for_stmt (last_stmt_info
->stmt
);
3916 /* Mark the first element of the reduction chain as reduction to properly
3917 transform the node. In the analysis phase only the last element of the
3918 chain is marked as reduction. */
3919 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3920 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
3921 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == stmt_info
)
3923 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3924 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3927 /* Handle two-operation SLP nodes by vectorizing the group with
3928 both operations and then performing a merge. */
3929 if (SLP_TREE_TWO_OPERATORS (node
))
3931 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3932 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3933 enum tree_code ocode
= ERROR_MARK
;
3934 stmt_vec_info ostmt_info
;
3935 vec_perm_builder
mask (group_size
, group_size
, 1);
3936 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
3938 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
3939 if (gimple_assign_rhs_code (ostmt
) != code0
)
3941 mask
.quick_push (1);
3942 ocode
= gimple_assign_rhs_code (ostmt
);
3945 mask
.quick_push (0);
3947 if (ocode
!= ERROR_MARK
)
3949 vec
<stmt_vec_info
> v0
;
3950 vec
<stmt_vec_info
> v1
;
3952 tree tmask
= NULL_TREE
;
3953 vect_transform_stmt (stmt_info
, &si
, &grouped_store
, node
, instance
);
3954 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3955 SLP_TREE_VEC_STMTS (node
).truncate (0);
3956 gimple_assign_set_rhs_code (stmt
, ocode
);
3957 vect_transform_stmt (stmt_info
, &si
, &grouped_store
, node
, instance
);
3958 gimple_assign_set_rhs_code (stmt
, code0
);
3959 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3960 SLP_TREE_VEC_STMTS (node
).truncate (0);
3961 tree meltype
= build_nonstandard_integer_type
3962 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3963 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3965 for (j
= 0; j
< v0
.length (); ++j
)
3967 /* Enforced by vect_build_slp_tree, which rejects variable-length
3968 vectors for SLP_TREE_TWO_OPERATORS. */
3969 unsigned int const_nunits
= nunits
.to_constant ();
3970 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3971 for (l
= 0; l
< const_nunits
; ++l
)
3973 if (k
>= group_size
)
3975 tree t
= build_int_cst (meltype
,
3976 mask
[k
++] * const_nunits
+ l
);
3977 melts
.quick_push (t
);
3979 tmask
= melts
.build ();
3981 /* ??? Not all targets support a VEC_PERM_EXPR with a
3982 constant mask that would translate to a vec_merge RTX
3983 (with their vec_perm_const_ok). We can either not
3984 vectorize in that case or let veclower do its job.
3985 Unfortunately that isn't too great and at least for
3986 plus/minus we'd eventually like to match targets
3987 vector addsub instructions. */
3989 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3991 gimple_assign_lhs (v0
[j
]->stmt
),
3992 gimple_assign_lhs (v1
[j
]->stmt
),
3994 SLP_TREE_VEC_STMTS (node
).quick_push
3995 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
4002 is_store
= vect_transform_stmt (stmt_info
, &si
, &grouped_store
, node
,
4005 /* Restore stmt def-types. */
4006 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4007 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4009 stmt_vec_info child_stmt_info
;
4010 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4011 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4017 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4018 For loop vectorization this is done in vectorizable_call, but for SLP
4019 it needs to be deferred until end of vect_schedule_slp, because multiple
4020 SLP instances may refer to the same scalar stmt. */
4023 vect_remove_slp_scalar_calls (slp_tree node
)
4026 gimple_stmt_iterator gsi
;
4030 stmt_vec_info stmt_info
;
4032 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4035 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4036 vect_remove_slp_scalar_calls (child
);
4038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4040 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4041 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4043 if (is_pattern_stmt_p (stmt_info
)
4044 || !PURE_SLP_STMT (stmt_info
))
4046 lhs
= gimple_call_lhs (stmt
);
4047 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4048 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4049 set_vinfo_for_stmt (stmt
, NULL
);
4050 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4051 gsi
= gsi_for_stmt (stmt
);
4052 gsi_replace (&gsi
, new_stmt
, false);
4053 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4057 /* Generate vector code for all SLP instances in the loop/basic block. */
4060 vect_schedule_slp (vec_info
*vinfo
)
4062 vec
<slp_instance
> slp_instances
;
4063 slp_instance instance
;
4065 bool is_store
= false;
4068 scalar_stmts_to_slp_tree_map_t
*bst_map
4069 = new scalar_stmts_to_slp_tree_map_t ();
4070 slp_instances
= vinfo
->slp_instances
;
4071 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4073 /* Schedule the tree of INSTANCE. */
4074 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4076 if (dump_enabled_p ())
4077 dump_printf_loc (MSG_NOTE
, vect_location
,
4078 "vectorizing stmts using SLP.\n");
4082 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4084 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4085 stmt_vec_info store_info
;
4087 gimple_stmt_iterator gsi
;
4089 /* Remove scalar call stmts. Do not do this for basic-block
4090 vectorization as not all uses may be vectorized.
4091 ??? Why should this be necessary? DCE should be able to
4092 remove the stmts itself.
4093 ??? For BB vectorization we can as well remove scalar
4094 stmts starting from the SLP tree root if they have no
4096 if (is_a
<loop_vec_info
> (vinfo
))
4097 vect_remove_slp_scalar_calls (root
);
4099 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4100 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4102 if (!STMT_VINFO_DATA_REF (store_info
))
4105 if (is_pattern_stmt_p (store_info
))
4106 store_info
= STMT_VINFO_RELATED_STMT (store_info
);
4107 /* Free the attached stmt_vec_info and remove the stmt. */
4108 gsi
= gsi_for_stmt (store_info
);
4109 unlink_stmt_vdef (store_info
);
4110 gsi_remove (&gsi
, true);
4111 release_defs (store_info
);
4112 free_stmt_vec_info (store_info
);