1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
47 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
50 vect_free_slp_tree (slp_tree node
)
55 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
56 vect_free_slp_tree (child
);
59 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
60 /* After transform some stmts are removed and thus their vinfo is gone. */
61 if (vinfo_for_stmt (stmt
))
63 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
64 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
67 SLP_TREE_CHILDREN (node
).release ();
68 SLP_TREE_SCALAR_STMTS (node
).release ();
69 SLP_TREE_VEC_STMTS (node
).release ();
70 SLP_TREE_LOAD_PERMUTATION (node
).release ();
76 /* Free the memory allocated for the SLP instance. */
79 vect_free_slp_instance (slp_instance instance
)
81 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
82 SLP_INSTANCE_LOADS (instance
).release ();
87 /* Create an SLP node for SCALAR_STMTS. */
90 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
93 gimple
*stmt
= scalar_stmts
[0];
96 if (is_gimple_call (stmt
))
97 nops
= gimple_call_num_args (stmt
);
98 else if (is_gimple_assign (stmt
))
100 nops
= gimple_num_ops (stmt
) - 1;
101 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
104 else if (gimple_code (stmt
) == GIMPLE_PHI
)
109 node
= XNEW (struct _slp_tree
);
110 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
111 SLP_TREE_VEC_STMTS (node
).create (0);
112 SLP_TREE_CHILDREN (node
).create (nops
);
113 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
114 SLP_TREE_TWO_OPERATORS (node
) = false;
115 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
118 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
119 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
125 /* This structure is used in creation of an SLP tree. Each instance
126 corresponds to the same operand in a group of scalar stmts in an SLP
128 typedef struct _slp_oprnd_info
130 /* Def-stmts for the operands. */
131 vec
<gimple
*> def_stmts
;
132 /* Information about the first statement, its vector def-type, type, the
133 operand itself in case it's constant, and an indication if it's a pattern
136 enum vect_def_type first_dt
;
142 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
144 static vec
<slp_oprnd_info
>
145 vect_create_oprnd_info (int nops
, int group_size
)
148 slp_oprnd_info oprnd_info
;
149 vec
<slp_oprnd_info
> oprnds_info
;
151 oprnds_info
.create (nops
);
152 for (i
= 0; i
< nops
; i
++)
154 oprnd_info
= XNEW (struct _slp_oprnd_info
);
155 oprnd_info
->def_stmts
.create (group_size
);
156 oprnd_info
->first_dt
= vect_uninitialized_def
;
157 oprnd_info
->first_op_type
= NULL_TREE
;
158 oprnd_info
->first_pattern
= false;
159 oprnd_info
->second_pattern
= false;
160 oprnds_info
.quick_push (oprnd_info
);
167 /* Free operands info. */
170 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
173 slp_oprnd_info oprnd_info
;
175 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
177 oprnd_info
->def_stmts
.release ();
178 XDELETE (oprnd_info
);
181 oprnds_info
.release ();
185 /* Find the place of the data-ref in STMT in the interleaving chain that starts
186 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
189 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
191 gimple
*next_stmt
= first_stmt
;
194 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
199 if (next_stmt
== stmt
)
201 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
203 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
211 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
212 they are of a valid type and that they match the defs of the first stmt of
213 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
214 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
215 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
216 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
217 and code of comparison needs to be inverted. If there is any operand swap
218 in this function, *SWAP is set to non-zero value.
219 If there was a fatal error return -1; if the error could be corrected by
220 swapping operands of father node of this one, return 1; if everything is
224 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
225 gimple
*stmt
, unsigned stmt_num
,
226 vec
<slp_oprnd_info
> *oprnds_info
)
229 unsigned int i
, number_of_oprnds
;
231 enum vect_def_type dt
= vect_uninitialized_def
;
232 bool pattern
= false;
233 slp_oprnd_info oprnd_info
;
234 int first_op_idx
= 1;
235 bool commutative
= false;
236 bool first_op_cond
= false;
237 bool first
= stmt_num
== 0;
238 bool second
= stmt_num
== 1;
240 if (is_gimple_call (stmt
))
242 number_of_oprnds
= gimple_call_num_args (stmt
);
245 else if (is_gimple_assign (stmt
))
247 enum tree_code code
= gimple_assign_rhs_code (stmt
);
248 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
249 /* Swap can only be done for cond_expr if asked to, otherwise we
250 could result in different comparison code to the first stmt. */
251 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
252 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
254 first_op_cond
= true;
258 commutative
= commutative_tree_code (code
);
263 bool swapped
= (*swap
!= 0);
264 gcc_assert (!swapped
|| first_op_cond
);
265 for (i
= 0; i
< number_of_oprnds
; i
++)
270 /* Map indicating how operands of cond_expr should be swapped. */
271 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
272 int *map
= maps
[*swap
];
275 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
277 oprnd
= gimple_op (stmt
, map
[i
]);
280 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
282 oprnd_info
= (*oprnds_info
)[i
];
284 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
286 if (dump_enabled_p ())
288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
289 "Build SLP failed: can't analyze def for ");
290 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
291 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
297 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
298 from the pattern. Check that all the stmts of the node are in the
300 if (def_stmt
&& gimple_bb (def_stmt
)
301 && vect_stmt_in_region_p (vinfo
, def_stmt
)
302 && vinfo_for_stmt (def_stmt
)
303 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
304 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
305 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
308 if (!first
&& !oprnd_info
->first_pattern
309 /* Allow different pattern state for the defs of the
310 first stmt in reduction chains. */
311 && (oprnd_info
->first_dt
!= vect_reduction_def
312 || (!second
&& !oprnd_info
->second_pattern
)))
322 if (dump_enabled_p ())
324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
325 "Build SLP failed: some of the stmts"
326 " are in a pattern, and others are not ");
327 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
328 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
334 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
335 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
337 if (dt
== vect_unknown_def_type
)
339 if (dump_enabled_p ())
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
341 "Unsupported pattern.\n");
345 switch (gimple_code (def_stmt
))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
354 "unsupported defining stmt:\n");
360 oprnd_info
->second_pattern
= pattern
;
364 oprnd_info
->first_dt
= dt
;
365 oprnd_info
->first_pattern
= pattern
;
366 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
370 /* Not first stmt of the group, check that the def-stmt/s match
371 the def-stmt/s of the first stmt. Allow different definition
372 types for reduction chains: the first stmt must be a
373 vect_reduction_def (a phi node), and the rest
374 vect_internal_def. */
375 if (((oprnd_info
->first_dt
!= dt
376 && !(oprnd_info
->first_dt
== vect_reduction_def
377 && dt
== vect_internal_def
)
378 && !((oprnd_info
->first_dt
== vect_external_def
379 || oprnd_info
->first_dt
== vect_constant_def
)
380 && (dt
== vect_external_def
381 || dt
== vect_constant_def
)))
382 || !types_compatible_p (oprnd_info
->first_op_type
,
385 /* Try swapping operands if we got a mismatch. */
394 if (dump_enabled_p ())
395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
396 "Build SLP failed: different types\n");
402 /* Check the types of the definitions. */
405 case vect_constant_def
:
406 case vect_external_def
:
409 case vect_reduction_def
:
410 case vect_induction_def
:
411 case vect_internal_def
:
412 oprnd_info
->def_stmts
.quick_push (def_stmt
);
416 /* FORNOW: Not supported. */
417 if (dump_enabled_p ())
419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
420 "Build SLP failed: illegal type of def ");
421 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
422 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
432 /* If there are already uses of this stmt in a SLP instance then
433 we've committed to the operand order and can't swap it. */
434 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
436 if (dump_enabled_p ())
438 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
439 "Build SLP failed: cannot swap operands of "
441 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
448 tree cond
= gimple_assign_rhs1 (stmt
);
449 enum tree_code code
= TREE_CODE (cond
);
454 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
455 &TREE_OPERAND (cond
, 1));
456 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
461 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
462 gimple_assign_rhs3_ptr (stmt
));
463 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
464 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
465 gcc_assert (code
!= ERROR_MARK
);
466 TREE_SET_CODE (cond
, code
);
470 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
471 gimple_assign_rhs2_ptr (stmt
));
472 if (dump_enabled_p ())
474 dump_printf_loc (MSG_NOTE
, vect_location
,
475 "swapped operands to match def types in ");
476 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
484 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
485 caller's attempt to find the vector type in STMT with the narrowest
486 element type. Return true if VECTYPE is nonnull and if it is valid
487 for VINFO. When returning true, update MAX_NUNITS to reflect the
488 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
489 as for vect_build_slp_tree. */
492 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
493 tree vectype
, unsigned int *max_nunits
)
497 if (dump_enabled_p ())
499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
500 "Build SLP failed: unsupported data-type in ");
501 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
502 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
504 /* Fatal mismatch. */
508 /* If populating the vector type requires unrolling then fail
509 before adjusting *max_nunits for basic-block vectorization. */
510 if (is_a
<bb_vec_info
> (vinfo
)
511 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
514 "Build SLP failed: unrolling required "
515 "in basic block SLP\n");
516 /* Fatal mismatch. */
520 /* In case of multiple types we need to detect the smallest type. */
521 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
522 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
527 /* Verify if the scalar stmts STMTS are isomorphic, require data
528 permutation or are of unsupported types of operation. Return
529 true if they are, otherwise return false and indicate in *MATCHES
530 which stmts are not isomorphic to the first one. If MATCHES[0]
531 is false then this indicates the comparison could not be
532 carried out or the stmts will never be vectorized by SLP.
534 Note COND_EXPR is possibly ismorphic to another one after swapping its
535 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
536 the first stmt by swapping the two operands of comparison; set SWAP[i]
537 to 2 if stmt I is isormorphic to the first stmt by inverting the code
538 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
539 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
542 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
543 vec
<gimple
*> stmts
, unsigned int group_size
,
544 unsigned nops
, unsigned int *max_nunits
,
545 bool *matches
, bool *two_operators
)
548 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
549 enum tree_code first_stmt_code
= ERROR_MARK
;
550 enum tree_code alt_stmt_code
= ERROR_MARK
;
551 enum tree_code rhs_code
= ERROR_MARK
;
552 enum tree_code first_cond_code
= ERROR_MARK
;
554 bool need_same_oprnds
= false;
555 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
558 machine_mode optab_op2_mode
;
559 machine_mode vec_mode
;
561 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
563 /* For every stmt in NODE find its def stmt/s. */
564 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
569 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
572 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
575 /* Fail to vectorize statements marked as unvectorizable. */
576 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
578 if (dump_enabled_p ())
580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
581 "Build SLP failed: unvectorizable statement ");
582 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
584 /* Fatal mismatch. */
589 lhs
= gimple_get_lhs (stmt
);
590 if (lhs
== NULL_TREE
)
592 if (dump_enabled_p ())
594 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
595 "Build SLP failed: not GIMPLE_ASSIGN nor "
597 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
599 /* Fatal mismatch. */
604 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
605 vectype
= get_vectype_for_scalar_type (scalar_type
);
606 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
609 /* Fatal mismatch. */
614 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
616 rhs_code
= CALL_EXPR
;
617 if (gimple_call_internal_p (call_stmt
)
618 || gimple_call_tail_p (call_stmt
)
619 || gimple_call_noreturn_p (call_stmt
)
620 || !gimple_call_nothrow_p (call_stmt
)
621 || gimple_call_chain (call_stmt
))
623 if (dump_enabled_p ())
625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
626 "Build SLP failed: unsupported call type ");
627 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
630 /* Fatal mismatch. */
636 rhs_code
= gimple_assign_rhs_code (stmt
);
638 /* Check the operation. */
641 first_stmt_code
= rhs_code
;
643 /* Shift arguments should be equal in all the packed stmts for a
644 vector shift with scalar shift operand. */
645 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
646 || rhs_code
== LROTATE_EXPR
647 || rhs_code
== RROTATE_EXPR
)
649 vec_mode
= TYPE_MODE (vectype
);
651 /* First see if we have a vector/vector shift. */
652 optab
= optab_for_tree_code (rhs_code
, vectype
,
656 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
658 /* No vector/vector shift, try for a vector/scalar shift. */
659 optab
= optab_for_tree_code (rhs_code
, vectype
,
664 if (dump_enabled_p ())
665 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
666 "Build SLP failed: no optab.\n");
667 /* Fatal mismatch. */
671 icode
= (int) optab_handler (optab
, vec_mode
);
672 if (icode
== CODE_FOR_nothing
)
674 if (dump_enabled_p ())
675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
677 "op not supported by target.\n");
678 /* Fatal mismatch. */
682 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
683 if (!VECTOR_MODE_P (optab_op2_mode
))
685 need_same_oprnds
= true;
686 first_op1
= gimple_assign_rhs2 (stmt
);
690 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
692 need_same_oprnds
= true;
693 first_op1
= gimple_assign_rhs2 (stmt
);
698 if (first_stmt_code
!= rhs_code
699 && alt_stmt_code
== ERROR_MARK
)
700 alt_stmt_code
= rhs_code
;
701 if (first_stmt_code
!= rhs_code
702 && (first_stmt_code
!= IMAGPART_EXPR
703 || rhs_code
!= REALPART_EXPR
)
704 && (first_stmt_code
!= REALPART_EXPR
705 || rhs_code
!= IMAGPART_EXPR
)
706 /* Handle mismatches in plus/minus by computing both
707 and merging the results. */
708 && !((first_stmt_code
== PLUS_EXPR
709 || first_stmt_code
== MINUS_EXPR
)
710 && (alt_stmt_code
== PLUS_EXPR
711 || alt_stmt_code
== MINUS_EXPR
)
712 && rhs_code
== alt_stmt_code
)
713 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
714 && (first_stmt_code
== ARRAY_REF
715 || first_stmt_code
== BIT_FIELD_REF
716 || first_stmt_code
== INDIRECT_REF
717 || first_stmt_code
== COMPONENT_REF
718 || first_stmt_code
== MEM_REF
)))
720 if (dump_enabled_p ())
722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
723 "Build SLP failed: different operation "
725 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
728 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
736 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
738 if (dump_enabled_p ())
740 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
741 "Build SLP failed: different shift "
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
749 if (rhs_code
== CALL_EXPR
)
751 gimple
*first_stmt
= stmts
[0];
752 if (gimple_call_num_args (stmt
) != nops
753 || !operand_equal_p (gimple_call_fn (first_stmt
),
754 gimple_call_fn (stmt
), 0)
755 || gimple_call_fntype (first_stmt
)
756 != gimple_call_fntype (stmt
))
758 if (dump_enabled_p ())
760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
761 "Build SLP failed: different calls in ");
762 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
771 /* Grouped store or load. */
772 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
774 if (REFERENCE_CLASS_P (lhs
))
782 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
785 /* Check that there are no loads from different interleaving
786 chains in the same node. */
787 if (prev_first_load
!= first_load
)
789 if (dump_enabled_p ())
791 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
793 "Build SLP failed: different "
794 "interleaving chains in one node ");
795 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
803 prev_first_load
= first_load
;
805 } /* Grouped access. */
808 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
810 /* Not grouped load. */
811 if (dump_enabled_p ())
813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
814 "Build SLP failed: not grouped load ");
815 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
818 /* FORNOW: Not grouped loads are not supported. */
819 /* Fatal mismatch. */
824 /* Not memory operation. */
825 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
826 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
827 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
828 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
829 && rhs_code
!= CALL_EXPR
)
831 if (dump_enabled_p ())
833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
834 "Build SLP failed: operation");
835 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
836 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
838 /* Fatal mismatch. */
843 if (rhs_code
== COND_EXPR
)
845 tree cond_expr
= gimple_assign_rhs1 (stmt
);
846 enum tree_code cond_code
= TREE_CODE (cond_expr
);
847 enum tree_code swap_code
= ERROR_MARK
;
848 enum tree_code invert_code
= ERROR_MARK
;
851 first_cond_code
= TREE_CODE (cond_expr
);
852 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
854 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
855 swap_code
= swap_tree_comparison (cond_code
);
856 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
859 if (first_cond_code
== cond_code
)
861 /* Isomorphic can be achieved by swapping. */
862 else if (first_cond_code
== swap_code
)
864 /* Isomorphic can be achieved by inverting. */
865 else if (first_cond_code
== invert_code
)
869 if (dump_enabled_p ())
871 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
872 "Build SLP failed: different"
874 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
886 for (i
= 0; i
< group_size
; ++i
)
890 /* If we allowed a two-operation SLP node verify the target can cope
891 with the permute we are going to use. */
892 if (alt_stmt_code
!= ERROR_MARK
893 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
895 unsigned int count
= TYPE_VECTOR_SUBPARTS (vectype
);
896 auto_vec_perm_indices
sel (count
);
897 for (i
= 0; i
< count
; ++i
)
899 unsigned int elt
= i
;
900 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
902 sel
.quick_push (elt
);
904 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
))
906 for (i
= 0; i
< group_size
; ++i
)
907 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
910 if (dump_enabled_p ())
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
913 "Build SLP failed: different operation "
915 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
919 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
925 *two_operators
= true;
931 /* Traits for the hash_set to record failed SLP builds for a stmt set.
932 Note we never remove apart from at destruction time so we do not
933 need a special value for deleted that differs from empty. */
936 typedef vec
<gimple
*> value_type
;
937 typedef vec
<gimple
*> compare_type
;
938 static inline hashval_t
hash (value_type
);
939 static inline bool equal (value_type existing
, value_type candidate
);
940 static inline bool is_empty (value_type x
) { return !x
.exists (); }
941 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
942 static inline void mark_empty (value_type
&x
) { x
.release (); }
943 static inline void mark_deleted (value_type
&x
) { x
.release (); }
944 static inline void remove (value_type
&x
) { x
.release (); }
947 bst_traits::hash (value_type x
)
950 for (unsigned i
= 0; i
< x
.length (); ++i
)
951 h
.add_int (gimple_uid (x
[i
]));
955 bst_traits::equal (value_type existing
, value_type candidate
)
957 if (existing
.length () != candidate
.length ())
959 for (unsigned i
= 0; i
< existing
.length (); ++i
)
960 if (existing
[i
] != candidate
[i
])
965 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
966 static scalar_stmts_set_t
*bst_fail
;
969 vect_build_slp_tree_2 (vec_info
*vinfo
,
970 vec
<gimple
*> stmts
, unsigned int group_size
,
971 unsigned int *max_nunits
,
972 vec
<slp_tree
> *loads
,
973 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
974 unsigned max_tree_size
);
977 vect_build_slp_tree (vec_info
*vinfo
,
978 vec
<gimple
*> stmts
, unsigned int group_size
,
979 unsigned int *max_nunits
,
980 vec
<slp_tree
> *loads
,
981 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
982 unsigned max_tree_size
)
984 if (bst_fail
->contains (stmts
))
986 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
987 loads
, matches
, npermutes
, tree_size
,
989 /* When SLP build fails for stmts record this, otherwise SLP build
990 can be exponential in time when we allow to construct parts from
991 scalars, see PR81723. */
995 x
.create (stmts
.length ());
1002 /* Recursively build an SLP tree starting from NODE.
1003 Fail (and return a value not equal to zero) if def-stmts are not
1004 isomorphic, require data permutation or are of unsupported types of
1005 operation. Otherwise, return 0.
1006 The value returned is the depth in the SLP tree where a mismatch
1010 vect_build_slp_tree_2 (vec_info
*vinfo
,
1011 vec
<gimple
*> stmts
, unsigned int group_size
,
1012 unsigned int *max_nunits
,
1013 vec
<slp_tree
> *loads
,
1014 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1015 unsigned max_tree_size
)
1017 unsigned nops
, i
, this_tree_size
= 0, this_max_nunits
= *max_nunits
;
1024 if (is_gimple_call (stmt
))
1025 nops
= gimple_call_num_args (stmt
);
1026 else if (is_gimple_assign (stmt
))
1028 nops
= gimple_num_ops (stmt
) - 1;
1029 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1032 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1037 /* If the SLP node is a PHI (induction or reduction), terminate
1039 if (gimple_code (stmt
) == GIMPLE_PHI
)
1041 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1042 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1043 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1047 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1048 /* Induction from different IVs is not supported. */
1049 if (def_type
== vect_induction_def
)
1051 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1052 if (stmt
!= stmts
[0])
1057 /* Else def types have to match. */
1058 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1060 /* But for reduction chains only check on the first stmt. */
1061 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1062 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1064 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1068 node
= vect_create_new_slp_node (stmts
);
1073 bool two_operators
= false;
1074 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1075 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1076 stmts
, group_size
, nops
,
1077 &this_max_nunits
, matches
, &two_operators
))
1080 /* If the SLP node is a load, terminate the recursion. */
1081 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1082 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1084 *max_nunits
= this_max_nunits
;
1085 node
= vect_create_new_slp_node (stmts
);
1086 loads
->safe_push (node
);
1090 /* Get at the operands, verifying they are compatible. */
1091 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1092 slp_oprnd_info oprnd_info
;
1093 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1095 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1096 stmt
, i
, &oprnds_info
);
1098 matches
[(res
== -1) ? 0 : i
] = false;
1102 for (i
= 0; i
< group_size
; ++i
)
1105 vect_free_oprnd_info (oprnds_info
);
1109 auto_vec
<slp_tree
, 4> children
;
1110 auto_vec
<slp_tree
> this_loads
;
1115 max_tree_size
-= *tree_size
;
1117 /* Create SLP_TREE nodes for the definition node/s. */
1118 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1121 unsigned old_nloads
= this_loads
.length ();
1122 unsigned old_tree_size
= this_tree_size
;
1125 if (oprnd_info
->first_dt
!= vect_internal_def
1126 && oprnd_info
->first_dt
!= vect_reduction_def
1127 && oprnd_info
->first_dt
!= vect_induction_def
)
1130 if (++this_tree_size
> max_tree_size
)
1132 FOR_EACH_VEC_ELT (children
, j
, child
)
1133 vect_free_slp_tree (child
);
1134 vect_free_oprnd_info (oprnds_info
);
1138 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1139 group_size
, &this_max_nunits
,
1140 &this_loads
, matches
, npermutes
,
1142 max_tree_size
)) != NULL
)
1144 /* If we have all children of child built up from scalars then just
1145 throw that away and build it up this node from scalars. */
1146 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1147 /* ??? Rejecting patterns this way doesn't work. We'd have to
1148 do extra work to cancel the pattern so the uses see the
1150 && !is_pattern_stmt_p
1151 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1153 slp_tree grandchild
;
1155 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1156 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1161 this_loads
.truncate (old_nloads
);
1162 this_tree_size
= old_tree_size
;
1163 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1164 vect_free_slp_tree (grandchild
);
1165 SLP_TREE_CHILDREN (child
).truncate (0);
1167 dump_printf_loc (MSG_NOTE
, vect_location
,
1168 "Building parent vector operands from "
1169 "scalars instead\n");
1170 oprnd_info
->def_stmts
= vNULL
;
1171 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1172 children
.safe_push (child
);
1177 oprnd_info
->def_stmts
= vNULL
;
1178 children
.safe_push (child
);
1182 /* If the SLP build failed fatally and we analyze a basic-block
1183 simply treat nodes we fail to build as externally defined
1184 (and thus build vectors from the scalar defs).
1185 The cost model will reject outright expensive cases.
1186 ??? This doesn't treat cases where permutation ultimatively
1187 fails (or we don't try permutation below). Ideally we'd
1188 even compute a permutation that will end up with the maximum
1190 if (is_a
<bb_vec_info
> (vinfo
)
1192 /* ??? Rejecting patterns this way doesn't work. We'd have to
1193 do extra work to cancel the pattern so the uses see the
1195 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1197 dump_printf_loc (MSG_NOTE
, vect_location
,
1198 "Building vector operands from scalars\n");
1199 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1200 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1201 children
.safe_push (child
);
1202 oprnd_info
->def_stmts
= vNULL
;
1206 /* If the SLP build for operand zero failed and operand zero
1207 and one can be commutated try that for the scalar stmts
1208 that failed the match. */
1210 /* A first scalar stmt mismatch signals a fatal mismatch. */
1212 /* ??? For COND_EXPRs we can swap the comparison operands
1213 as well as the arms under some constraints. */
1215 && oprnds_info
[1]->first_dt
== vect_internal_def
1216 && is_gimple_assign (stmt
)
1217 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1219 /* Do so only if the number of not successful permutes was nor more
1220 than a cut-ff as re-trying the recursive match on
1221 possibly each level of the tree would expose exponential
1225 /* Verify if we can safely swap or if we committed to a specific
1226 operand order already. */
1227 for (j
= 0; j
< group_size
; ++j
)
1230 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1232 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1235 "Build SLP failed: cannot swap operands "
1237 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1243 /* Swap mismatched definition stmts. */
1244 dump_printf_loc (MSG_NOTE
, vect_location
,
1245 "Re-trying with swapped operands of stmts ");
1246 for (j
= 0; j
< group_size
; ++j
)
1249 std::swap (oprnds_info
[0]->def_stmts
[j
],
1250 oprnds_info
[1]->def_stmts
[j
]);
1251 dump_printf (MSG_NOTE
, "%d ", j
);
1253 dump_printf (MSG_NOTE
, "\n");
1254 /* And try again with scratch 'matches' ... */
1255 bool *tem
= XALLOCAVEC (bool, group_size
);
1256 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1257 group_size
, &this_max_nunits
,
1258 &this_loads
, tem
, npermutes
,
1260 max_tree_size
)) != NULL
)
1262 /* ... so if successful we can apply the operand swapping
1263 to the GIMPLE IL. This is necessary because for example
1264 vect_get_slp_defs uses operand indexes and thus expects
1265 canonical operand order. This is also necessary even
1266 if we end up building the operand from scalars as
1267 we'll continue to process swapped operand two. */
1268 for (j
= 0; j
< group_size
; ++j
)
1270 gimple
*stmt
= stmts
[j
];
1271 gimple_set_plf (stmt
, GF_PLF_1
, false);
1273 for (j
= 0; j
< group_size
; ++j
)
1275 gimple
*stmt
= stmts
[j
];
1278 /* Avoid swapping operands twice. */
1279 if (gimple_plf (stmt
, GF_PLF_1
))
1281 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1282 gimple_assign_rhs2_ptr (stmt
));
1283 gimple_set_plf (stmt
, GF_PLF_1
, true);
1286 /* Verify we swap all duplicates or none. */
1288 for (j
= 0; j
< group_size
; ++j
)
1290 gimple
*stmt
= stmts
[j
];
1291 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1294 /* If we have all children of child built up from scalars then
1295 just throw that away and build it up this node from scalars. */
1296 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1297 /* ??? Rejecting patterns this way doesn't work. We'd have
1298 to do extra work to cancel the pattern so the uses see the
1300 && !is_pattern_stmt_p
1301 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1304 slp_tree grandchild
;
1306 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1307 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1312 this_loads
.truncate (old_nloads
);
1313 this_tree_size
= old_tree_size
;
1314 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1315 vect_free_slp_tree (grandchild
);
1316 SLP_TREE_CHILDREN (child
).truncate (0);
1318 dump_printf_loc (MSG_NOTE
, vect_location
,
1319 "Building parent vector operands from "
1320 "scalars instead\n");
1321 oprnd_info
->def_stmts
= vNULL
;
1322 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1323 children
.safe_push (child
);
1328 oprnd_info
->def_stmts
= vNULL
;
1329 children
.safe_push (child
);
1337 gcc_assert (child
== NULL
);
1338 FOR_EACH_VEC_ELT (children
, j
, child
)
1339 vect_free_slp_tree (child
);
1340 vect_free_oprnd_info (oprnds_info
);
1344 vect_free_oprnd_info (oprnds_info
);
1347 *tree_size
+= this_tree_size
;
1348 *max_nunits
= this_max_nunits
;
1349 loads
->safe_splice (this_loads
);
1351 node
= vect_create_new_slp_node (stmts
);
1352 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1353 SLP_TREE_CHILDREN (node
).splice (children
);
1357 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1360 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1366 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1367 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1368 ? " (external)" : "");
1369 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1371 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1372 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1374 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1375 vect_print_slp_tree (dump_kind
, loc
, child
);
1379 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1380 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1381 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1382 stmts in NODE are to be marked. */
1385 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1391 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1394 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1395 if (j
< 0 || i
== j
)
1396 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1398 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1399 vect_mark_slp_stmts (child
, mark
, j
);
1403 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1406 vect_mark_slp_stmts_relevant (slp_tree node
)
1410 stmt_vec_info stmt_info
;
1413 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1416 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1418 stmt_info
= vinfo_for_stmt (stmt
);
1419 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1420 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1421 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1424 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1425 vect_mark_slp_stmts_relevant (child
);
1429 /* Rearrange the statements of NODE according to PERMUTATION. */
1432 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1433 vec
<unsigned> permutation
)
1436 vec
<gimple
*> tmp_stmts
;
1440 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1441 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1443 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1444 tmp_stmts
.create (group_size
);
1445 tmp_stmts
.quick_grow_cleared (group_size
);
1447 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1448 tmp_stmts
[permutation
[i
]] = stmt
;
1450 SLP_TREE_SCALAR_STMTS (node
).release ();
1451 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1455 /* Attempt to reorder stmts in a reduction chain so that we don't
1456 require any load permutation. Return true if that was possible,
1457 otherwise return false. */
1460 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1462 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1465 slp_tree node
, load
;
1467 /* Compare all the permutation sequences to the first one. We know
1468 that at least one load is permuted. */
1469 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1470 if (!node
->load_permutation
.exists ())
1472 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1474 if (!load
->load_permutation
.exists ())
1476 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1477 if (lidx
!= node
->load_permutation
[j
])
1481 /* Check that the loads in the first sequence are different and there
1482 are no gaps between them. */
1483 auto_sbitmap
load_index (group_size
);
1484 bitmap_clear (load_index
);
1485 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1487 if (lidx
>= group_size
)
1489 if (bitmap_bit_p (load_index
, lidx
))
1492 bitmap_set_bit (load_index
, lidx
);
1494 for (i
= 0; i
< group_size
; i
++)
1495 if (!bitmap_bit_p (load_index
, i
))
1498 /* This permutation is valid for reduction. Since the order of the
1499 statements in the nodes is not important unless they are memory
1500 accesses, we can rearrange the statements in all the nodes
1501 according to the order of the loads. */
1502 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1503 node
->load_permutation
);
1505 /* We are done, no actual permutations need to be generated. */
1506 unsigned int unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1507 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1509 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1510 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1511 /* But we have to keep those permutations that are required because
1512 of handling of gaps. */
1513 if (unrolling_factor
== 1
1514 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1515 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1516 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1518 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1519 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1525 /* Check if the required load permutations in the SLP instance
1526 SLP_INSTN are supported. */
1529 vect_supported_load_permutation_p (slp_instance slp_instn
)
1531 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1532 unsigned int i
, j
, k
, next
;
1534 gimple
*stmt
, *load
, *next_load
;
1536 if (dump_enabled_p ())
1538 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1539 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1540 if (node
->load_permutation
.exists ())
1541 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1542 dump_printf (MSG_NOTE
, "%d ", next
);
1544 for (k
= 0; k
< group_size
; ++k
)
1545 dump_printf (MSG_NOTE
, "%d ", k
);
1546 dump_printf (MSG_NOTE
, "\n");
1549 /* In case of reduction every load permutation is allowed, since the order
1550 of the reduction statements is not important (as opposed to the case of
1551 grouped stores). The only condition we need to check is that all the
1552 load nodes are of the same size and have the same permutation (and then
1553 rearrange all the nodes of the SLP instance according to this
1556 /* Check that all the load nodes are of the same size. */
1557 /* ??? Can't we assert this? */
1558 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1559 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1562 node
= SLP_INSTANCE_TREE (slp_instn
);
1563 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1565 /* Reduction (there are no data-refs in the root).
1566 In reduction chain the order of the loads is not important. */
1567 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1568 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1569 vect_attempt_slp_rearrange_stmts (slp_instn
);
1571 /* In basic block vectorization we allow any subchain of an interleaving
1573 FORNOW: not supported in loop SLP because of realignment compications. */
1574 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1576 /* Check whether the loads in an instance form a subchain and thus
1577 no permutation is necessary. */
1578 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1580 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1582 bool subchain_p
= true;
1584 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1587 && (next_load
!= load
1588 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1593 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1596 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1599 stmt_vec_info group_info
1600 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1601 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1603 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1604 unsigned k
, maxk
= 0;
1605 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1608 /* In BB vectorization we may not actually use a loaded vector
1609 accessing elements in excess of GROUP_SIZE. */
1610 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1612 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1613 "BB vectorization with gaps at the end of "
1614 "a load is not supported\n");
1618 /* Verify the permutation can be generated. */
1621 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1622 1, slp_instn
, true, &n_perms
))
1624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1626 "unsupported load permutation\n");
1634 /* For loop vectorization verify we can generate the permutation. Be
1635 conservative about the vectorization factor, there are permutations
1636 that will use three vector inputs only starting from a specific factor
1637 and the vectorization factor is not yet final.
1638 ??? The SLP instance unrolling factor might not be the maximum one. */
1641 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1642 LOOP_VINFO_VECT_FACTOR
1643 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1644 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1645 if (node
->load_permutation
.exists ()
1646 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1647 slp_instn
, true, &n_perms
))
1654 /* Find the last store in SLP INSTANCE. */
1657 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1659 gimple
*last
= NULL
, *stmt
;
1661 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1663 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1664 if (is_pattern_stmt_p (stmt_vinfo
))
1665 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1667 last
= get_later_stmt (stmt
, last
);
1673 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1676 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1677 stmt_vector_for_cost
*prologue_cost_vec
,
1678 stmt_vector_for_cost
*body_cost_vec
,
1679 unsigned ncopies_for_cost
,
1680 scalar_stmts_set_t
* visited
)
1685 stmt_vec_info stmt_info
;
1688 /* If we already costed the exact same set of scalar stmts we're done.
1689 We share the generated vector stmts for those. */
1690 if (visited
->contains (SLP_TREE_SCALAR_STMTS (node
)))
1693 visited
->add (SLP_TREE_SCALAR_STMTS (node
).copy ());
1695 /* Recurse down the SLP tree. */
1696 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1697 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1698 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1699 body_cost_vec
, ncopies_for_cost
, visited
);
1701 /* Look at the first scalar stmt to determine the cost. */
1702 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1703 stmt_info
= vinfo_for_stmt (stmt
);
1704 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1706 vect_memory_access_type memory_access_type
1707 = (STMT_VINFO_STRIDED_P (stmt_info
)
1710 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1711 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1712 memory_access_type
, vect_uninitialized_def
,
1713 node
, prologue_cost_vec
, body_cost_vec
);
1716 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1717 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1719 /* If the load is permuted then the alignment is determined by
1720 the first group element not by the first scalar stmt DR. */
1721 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1722 stmt_info
= vinfo_for_stmt (stmt
);
1723 /* Record the cost for the permutation. */
1725 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1726 ncopies_for_cost
, instance
, true,
1728 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1729 stmt_info
, 0, vect_body
);
1731 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1732 /* And adjust the number of loads performed. This handles
1733 redundancies as well as loads that are later dead. */
1734 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1735 bitmap_clear (perm
);
1736 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1737 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1738 ncopies_for_cost
= 0;
1739 bool load_seen
= false;
1740 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1742 if (i
% nunits
== 0)
1748 if (bitmap_bit_p (perm
, i
))
1753 gcc_assert (ncopies_for_cost
1754 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1755 + nunits
- 1) / nunits
);
1756 ncopies_for_cost
*= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1758 /* Record the cost for the vector loads. */
1759 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1760 memory_access_type
, node
, prologue_cost_vec
,
1765 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1767 /* ncopies_for_cost is the number of IVs we generate. */
1768 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1769 stmt_info
, 0, vect_body
);
1771 /* Prologue cost for the initial values and step vector. */
1772 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1774 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1776 ? vector_load
: vec_construct
,
1777 stmt_info
, 0, vect_prologue
);
1778 record_stmt_cost (prologue_cost_vec
, 1,
1780 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1781 ? vector_load
: vec_construct
,
1782 stmt_info
, 0, vect_prologue
);
1784 /* ??? No easy way to get at the actual number of vector stmts
1785 to be geneated and thus the derived IVs. */
1789 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1790 stmt_info
, 0, vect_body
);
1791 if (SLP_TREE_TWO_OPERATORS (node
))
1793 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1794 stmt_info
, 0, vect_body
);
1795 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1796 stmt_info
, 0, vect_body
);
1800 /* Push SLP node def-type to stmts. */
1801 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1802 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1803 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1804 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1806 /* Scan operands and account for prologue cost of constants/externals.
1807 ??? This over-estimates cost for multiple uses and should be
1809 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1810 lhs
= gimple_get_lhs (stmt
);
1811 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1813 tree op
= gimple_op (stmt
, i
);
1815 enum vect_def_type dt
;
1816 if (!op
|| op
== lhs
)
1818 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1820 /* Without looking at the actual initializer a vector of
1821 constants can be implemented as load from the constant pool.
1822 ??? We need to pass down stmt_info for a vector type
1823 even if it points to the wrong stmt. */
1824 if (dt
== vect_constant_def
)
1825 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1826 stmt_info
, 0, vect_prologue
);
1827 else if (dt
== vect_external_def
)
1828 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1829 stmt_info
, 0, vect_prologue
);
1833 /* Restore stmt def-types. */
1834 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1835 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1837 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1840 /* Compute the cost for the SLP instance INSTANCE. */
1843 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1845 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1846 unsigned ncopies_for_cost
;
1847 stmt_info_for_cost
*si
;
1850 if (dump_enabled_p ())
1851 dump_printf_loc (MSG_NOTE
, vect_location
,
1852 "=== vect_analyze_slp_cost ===\n");
1854 /* Calculate the number of vector stmts to create based on the unrolling
1855 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1856 GROUP_SIZE / NUNITS otherwise. */
1857 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1858 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1859 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1860 /* Adjust the group_size by the vectorization factor which is always one
1861 for basic-block vectorization. */
1862 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1863 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1864 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1865 /* For reductions look at a reduction operand in case the reduction
1866 operation is widening like DOT_PROD or SAD. */
1867 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1869 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1870 switch (gimple_assign_rhs_code (stmt
))
1874 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1875 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1880 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1882 prologue_cost_vec
.create (10);
1883 body_cost_vec
.create (10);
1884 scalar_stmts_set_t
*visited
= new scalar_stmts_set_t ();
1885 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1886 &prologue_cost_vec
, &body_cost_vec
,
1887 ncopies_for_cost
, visited
);
1890 /* Record the prologue costs, which were delayed until we were
1891 sure that SLP was successful. */
1892 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1894 struct _stmt_vec_info
*stmt_info
1895 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1896 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1897 si
->misalign
, vect_prologue
);
1900 /* Record the instance's instructions in the target cost model. */
1901 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1903 struct _stmt_vec_info
*stmt_info
1904 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1905 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1906 si
->misalign
, vect_body
);
1909 prologue_cost_vec
.release ();
1910 body_cost_vec
.release ();
1913 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1914 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1915 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1916 containing the remainder.
1917 Return the first stmt in the second group. */
1920 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1922 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1923 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1924 gcc_assert (group1_size
> 0);
1925 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1926 gcc_assert (group2_size
> 0);
1927 GROUP_SIZE (first_vinfo
) = group1_size
;
1929 gimple
*stmt
= first_stmt
;
1930 for (unsigned i
= group1_size
; i
> 1; i
--)
1932 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1933 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1935 /* STMT is now the last element of the first group. */
1936 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1937 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1939 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1940 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1942 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1943 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1946 /* For the second group, the GROUP_GAP is that before the original group,
1947 plus skipping over the first vector. */
1948 GROUP_GAP (vinfo_for_stmt (group2
)) =
1949 GROUP_GAP (first_vinfo
) + group1_size
;
1951 /* GROUP_GAP of the first group now has to skip over the second group too. */
1952 GROUP_GAP (first_vinfo
) += group2_size
;
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1956 group1_size
, group2_size
);
1961 /* Analyze an SLP instance starting from a group of grouped stores. Call
1962 vect_build_slp_tree to build a tree of packed stmts if possible.
1963 Return FALSE if it's impossible to SLP any stmt in the loop. */
1966 vect_analyze_slp_instance (vec_info
*vinfo
,
1967 gimple
*stmt
, unsigned max_tree_size
)
1969 slp_instance new_instance
;
1971 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1972 unsigned int unrolling_factor
= 1, nunits
;
1973 tree vectype
, scalar_type
= NULL_TREE
;
1976 unsigned int max_nunits
= 0;
1977 vec
<slp_tree
> loads
;
1978 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1979 vec
<gimple
*> scalar_stmts
;
1981 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1985 scalar_type
= TREE_TYPE (DR_REF (dr
));
1986 vectype
= get_vectype_for_scalar_type (scalar_type
);
1990 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1991 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1994 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1998 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1999 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2000 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2005 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2008 "Build SLP failed: unsupported data-type ");
2009 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
2010 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2015 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2017 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2018 scalar_stmts
.create (group_size
);
2020 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2022 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2025 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2026 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2027 scalar_stmts
.safe_push (
2028 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2030 scalar_stmts
.safe_push (next
);
2031 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2033 /* Mark the first element of the reduction chain as reduction to properly
2034 transform the node. In the reduction analysis phase only the last
2035 element of the chain is marked as reduction. */
2036 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2037 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2041 /* Collect reduction statements. */
2042 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2043 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2044 scalar_stmts
.safe_push (next
);
2047 loads
.create (group_size
);
2049 /* Build the tree for the SLP instance. */
2050 bool *matches
= XALLOCAVEC (bool, group_size
);
2051 unsigned npermutes
= 0;
2052 bst_fail
= new scalar_stmts_set_t ();
2053 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2054 &max_nunits
, &loads
, matches
, &npermutes
,
2055 NULL
, max_tree_size
);
2059 /* Calculate the unrolling factor based on the smallest type. */
2061 = least_common_multiple (max_nunits
, group_size
) / group_size
;
2063 if (unrolling_factor
!= 1
2064 && is_a
<bb_vec_info
> (vinfo
))
2067 if (max_nunits
> group_size
)
2069 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2070 "Build SLP failed: store group "
2071 "size not a multiple of the vector size "
2072 "in basic block SLP\n");
2073 vect_free_slp_tree (node
);
2077 /* Fatal mismatch. */
2078 matches
[group_size
/max_nunits
* max_nunits
] = false;
2079 vect_free_slp_tree (node
);
2084 /* Create a new SLP instance. */
2085 new_instance
= XNEW (struct _slp_instance
);
2086 SLP_INSTANCE_TREE (new_instance
) = node
;
2087 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2088 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2089 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2091 /* Compute the load permutation. */
2093 bool loads_permuted
= false;
2094 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2096 vec
<unsigned> load_permutation
;
2098 gimple
*load
, *first_stmt
;
2099 bool this_load_permuted
= false;
2100 load_permutation
.create (group_size
);
2101 first_stmt
= GROUP_FIRST_ELEMENT
2102 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2103 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2105 int load_place
= vect_get_place_in_interleaving_chain
2107 gcc_assert (load_place
!= -1);
2108 if (load_place
!= j
)
2109 this_load_permuted
= true;
2110 load_permutation
.safe_push (load_place
);
2112 if (!this_load_permuted
2113 /* The load requires permutation when unrolling exposes
2114 a gap either because the group is larger than the SLP
2115 group-size or because there is a gap between the groups. */
2116 && (unrolling_factor
== 1
2117 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2118 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2120 load_permutation
.release ();
2123 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2124 loads_permuted
= true;
2129 if (!vect_supported_load_permutation_p (new_instance
))
2131 if (dump_enabled_p ())
2133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2134 "Build SLP failed: unsupported load "
2136 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2139 vect_free_slp_instance (new_instance
);
2144 /* If the loads and stores can be handled with load/store-lan
2145 instructions do not generate this SLP instance. */
2146 if (is_a
<loop_vec_info
> (vinfo
)
2148 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2151 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2153 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2154 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2155 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2156 /* Use SLP for strided accesses (or if we
2157 can't load-lanes). */
2158 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2159 || ! vect_load_lanes_supported
2160 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2161 GROUP_SIZE (stmt_vinfo
)))
2164 if (i
== loads
.length ())
2166 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2168 "Built SLP cancelled: can use "
2169 "load/store-lanes\n");
2170 vect_free_slp_instance (new_instance
);
2175 vinfo
->slp_instances
.safe_push (new_instance
);
2177 if (dump_enabled_p ())
2179 dump_printf_loc (MSG_NOTE
, vect_location
,
2180 "Final SLP tree for instance:\n");
2181 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2189 /* Failed to SLP. */
2190 /* Free the allocated memory. */
2191 scalar_stmts
.release ();
2195 /* For basic block SLP, try to break the group up into multiples of the
2197 if (is_a
<bb_vec_info
> (vinfo
)
2198 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2199 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2201 /* We consider breaking the group only on VF boundaries from the existing
2203 for (i
= 0; i
< group_size
; i
++)
2204 if (!matches
[i
]) break;
2206 if (i
>= nunits
&& i
< group_size
)
2208 /* Split into two groups at the first vector boundary before i. */
2209 gcc_assert ((nunits
& (nunits
- 1)) == 0);
2210 unsigned group1_size
= i
& ~(nunits
- 1);
2212 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2213 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2214 /* If the first non-match was in the middle of a vector,
2215 skip the rest of that vector. */
2216 if (group1_size
< i
)
2218 i
= group1_size
+ nunits
;
2220 rest
= vect_split_slp_store_group (rest
, nunits
);
2223 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2226 /* Even though the first vector did not all match, we might be able to SLP
2227 (some) of the remainder. FORNOW ignore this possibility. */
2234 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2235 trees of packed scalar stmts if SLP is possible. */
2238 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2241 gimple
*first_element
;
2243 if (dump_enabled_p ())
2244 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2246 /* Find SLP sequences starting from groups of grouped stores. */
2247 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2248 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2250 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2252 if (loop_vinfo
->reduction_chains
.length () > 0)
2254 /* Find SLP sequences starting from reduction chains. */
2255 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2256 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2259 /* Dissolve reduction chain group. */
2260 gimple
*next
, *stmt
= first_element
;
2263 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2264 next
= GROUP_NEXT_ELEMENT (vinfo
);
2265 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2266 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2269 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2270 = vect_internal_def
;
2274 /* Find SLP sequences starting from groups of reductions. */
2275 if (loop_vinfo
->reductions
.length () > 1)
2276 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2284 /* For each possible SLP instance decide whether to SLP it and calculate overall
2285 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2286 least one instance. */
2289 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2291 unsigned int i
, unrolling_factor
= 1;
2292 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2293 slp_instance instance
;
2294 int decided_to_slp
= 0;
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2300 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2302 /* FORNOW: SLP if you can. */
2303 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2304 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2306 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2307 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2308 loop-based vectorization. Such stmts will be marked as HYBRID. */
2309 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2313 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2315 if (decided_to_slp
&& dump_enabled_p ())
2316 dump_printf_loc (MSG_NOTE
, vect_location
,
2317 "Decided to SLP %d instances. Unrolling factor %d\n",
2318 decided_to_slp
, unrolling_factor
);
2320 return (decided_to_slp
> 0);
2324 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2325 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2328 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2330 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2331 imm_use_iterator imm_iter
;
2333 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2335 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2336 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2339 /* Propagate hybrid down the SLP tree. */
2340 if (stype
== hybrid
)
2342 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2346 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2347 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2348 /* If we get a pattern stmt here we have to use the LHS of the
2349 original stmt for immediate uses. */
2350 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2351 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2352 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2354 if (gimple_code (stmt
) == GIMPLE_PHI
)
2355 def
= gimple_phi_result (stmt
);
2357 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2359 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2361 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2363 use_vinfo
= vinfo_for_stmt (use_stmt
);
2364 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2365 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2366 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2367 if (!STMT_SLP_TYPE (use_vinfo
)
2368 && (STMT_VINFO_RELEVANT (use_vinfo
)
2369 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2370 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2371 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2373 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2376 "def in non-SLP stmt: ");
2377 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2385 && !HYBRID_SLP_STMT (stmt_vinfo
))
2387 if (dump_enabled_p ())
2389 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2390 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2392 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2395 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2396 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2397 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2400 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2403 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2405 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2406 struct loop
*loopp
= (struct loop
*)wi
->info
;
2411 if (TREE_CODE (*tp
) == SSA_NAME
2412 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2414 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2415 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2416 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2418 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2421 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2423 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2431 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2434 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2435 /* If the stmt is in a SLP instance then this isn't a reason
2436 to mark use definitions in other SLP instances as hybrid. */
2437 if (! STMT_SLP_TYPE (use_vinfo
)
2438 && (STMT_VINFO_RELEVANT (use_vinfo
)
2439 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2440 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2441 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2448 /* Find stmts that must be both vectorized and SLPed. */
2451 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2454 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2455 slp_instance instance
;
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2461 /* First walk all pattern stmt in the loop and mark defs of uses as
2462 hybrid because immediate uses in them are not recorded. */
2463 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2465 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2466 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2469 gimple
*stmt
= gsi_stmt (gsi
);
2470 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2471 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2474 memset (&wi
, 0, sizeof (wi
));
2475 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2476 gimple_stmt_iterator gsi2
2477 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2478 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2479 vect_detect_hybrid_slp_1
, &wi
);
2480 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2481 vect_detect_hybrid_slp_2
,
2482 vect_detect_hybrid_slp_1
, &wi
);
2487 /* Then walk the SLP instance trees marking stmts with uses in
2488 non-SLP stmts as hybrid, also propagating hybrid down the
2489 SLP tree, collecting the above info on-the-fly. */
2490 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2492 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2493 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2499 /* Initialize a bb_vec_info struct for the statements between
2500 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2502 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2503 gimple_stmt_iterator region_end_in
)
2504 : vec_info (vec_info::bb
, init_cost (NULL
)),
2505 bb (gsi_bb (region_begin_in
)),
2506 region_begin (region_begin_in
),
2507 region_end (region_end_in
)
2509 gimple_stmt_iterator gsi
;
2511 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2514 gimple
*stmt
= gsi_stmt (gsi
);
2515 gimple_set_uid (stmt
, 0);
2516 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2523 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2524 stmts in the basic block. */
2526 _bb_vec_info::~_bb_vec_info ()
2528 for (gimple_stmt_iterator si
= region_begin
;
2529 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2531 gimple
*stmt
= gsi_stmt (si
);
2532 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2535 /* Free stmt_vec_info. */
2536 free_stmt_vec_info (stmt
);
2538 /* Reset region marker. */
2539 gimple_set_uid (stmt
, -1);
2546 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2547 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2549 Return true if the operations are supported. */
2552 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2553 slp_instance node_instance
)
2560 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2564 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2567 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2568 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2569 gcc_assert (stmt_info
);
2570 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2572 /* For BB vectorization vector types are assigned here.
2573 Memory accesses already got their vector type assigned
2574 in vect_analyze_data_refs. */
2575 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2577 && ! STMT_VINFO_DATA_REF (stmt_info
))
2579 gcc_assert (PURE_SLP_STMT (stmt_info
));
2581 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2582 if (dump_enabled_p ())
2584 dump_printf_loc (MSG_NOTE
, vect_location
,
2585 "get vectype for scalar type: ");
2586 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2587 dump_printf (MSG_NOTE
, "\n");
2590 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2593 if (dump_enabled_p ())
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2596 "not SLPed: unsupported data-type ");
2597 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2599 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2604 if (dump_enabled_p ())
2606 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2607 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2608 dump_printf (MSG_NOTE
, "\n");
2612 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2613 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2616 /* Calculate the number of vector statements to be created for the
2617 scalar stmts in this node. For SLP reductions it is equal to the
2618 number of vector statements in the children (which has already been
2619 calculated by the recursive call). Otherwise it is the number of
2620 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2621 VF divided by the number of elements in a vector. */
2622 if (GROUP_FIRST_ELEMENT (stmt_info
)
2623 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2624 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2625 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2629 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2630 vf
= loop_vinfo
->vectorization_factor
;
2633 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2634 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2635 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2636 = vf
* group_size
/ TYPE_VECTOR_SUBPARTS (vectype
);
2639 /* Push SLP node def-type to stmt operands. */
2640 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2641 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2642 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2643 = SLP_TREE_DEF_TYPE (child
);
2644 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2645 /* Restore def-types. */
2646 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2647 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2648 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2649 = vect_internal_def
;
2657 /* Analyze statements in SLP instances of VINFO. Return true if the
2658 operations are supported. */
2661 vect_slp_analyze_operations (vec_info
*vinfo
)
2663 slp_instance instance
;
2666 if (dump_enabled_p ())
2667 dump_printf_loc (MSG_NOTE
, vect_location
,
2668 "=== vect_slp_analyze_operations ===\n");
2670 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2672 if (!vect_slp_analyze_node_operations (vinfo
,
2673 SLP_INSTANCE_TREE (instance
),
2676 dump_printf_loc (MSG_NOTE
, vect_location
,
2677 "removing SLP instance operations starting from: ");
2678 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2679 SLP_TREE_SCALAR_STMTS
2680 (SLP_INSTANCE_TREE (instance
))[0], 0);
2681 vect_free_slp_instance (instance
);
2682 vinfo
->slp_instances
.ordered_remove (i
);
2686 /* Compute the costs of the SLP instance. */
2687 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2692 return !vinfo
->slp_instances
.is_empty ();
2696 /* Compute the scalar cost of the SLP node NODE and its children
2697 and return it. Do not account defs that are marked in LIFE and
2698 update LIFE according to uses of NODE. */
2701 vect_bb_slp_scalar_cost (basic_block bb
,
2702 slp_tree node
, vec
<bool, va_heap
> *life
)
2704 unsigned scalar_cost
= 0;
2709 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2712 ssa_op_iter op_iter
;
2713 def_operand_p def_p
;
2714 stmt_vec_info stmt_info
;
2719 /* If there is a non-vectorized use of the defs then the scalar
2720 stmt is kept live in which case we do not account it or any
2721 required defs in the SLP children in the scalar cost. This
2722 way we make the vectorization more costly when compared to
2724 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2726 imm_use_iterator use_iter
;
2728 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2729 if (!is_gimple_debug (use_stmt
)
2730 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2732 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2735 BREAK_FROM_IMM_USE_STMT (use_iter
);
2741 /* Count scalar stmts only once. */
2742 if (gimple_visited_p (stmt
))
2744 gimple_set_visited (stmt
, true);
2746 stmt_info
= vinfo_for_stmt (stmt
);
2747 if (STMT_VINFO_DATA_REF (stmt_info
))
2749 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2750 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2752 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2755 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2757 scalar_cost
+= stmt_cost
;
2760 auto_vec
<bool, 20> subtree_life
;
2761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2763 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2765 /* Do not directly pass LIFE to the recursive call, copy it to
2766 confine changes in the callee to the current child/subtree. */
2767 subtree_life
.safe_splice (*life
);
2768 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2769 subtree_life
.truncate (0);
2776 /* Check if vectorization of the basic block is profitable. */
2779 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2781 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2782 slp_instance instance
;
2784 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2785 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2787 /* Calculate scalar cost. */
2788 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2790 auto_vec
<bool, 20> life
;
2791 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2792 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2793 SLP_INSTANCE_TREE (instance
),
2797 /* Unset visited flag. */
2798 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2799 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2800 gimple_set_visited (gsi_stmt (gsi
), false);
2802 /* Complete the target-specific cost calculation. */
2803 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2804 &vec_inside_cost
, &vec_epilogue_cost
);
2806 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2808 if (dump_enabled_p ())
2810 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2811 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2813 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2814 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2815 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2818 /* Vectorization is profitable if its cost is more than the cost of scalar
2819 version. Note that we err on the vector side for equal cost because
2820 the cost estimate is otherwise quite pessimistic (constant uses are
2821 free on the scalar side but cost a load on the vector side for
2823 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2829 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2830 if so and sets fatal to true if failure is independent of
2831 current_vector_size. */
2834 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2835 gimple_stmt_iterator region_end
,
2836 vec
<data_reference_p
> datarefs
, int n_stmts
,
2839 bb_vec_info bb_vinfo
;
2840 slp_instance instance
;
2844 /* The first group of checks is independent of the vector size. */
2847 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2851 "not vectorized: too many instructions in "
2853 free_data_refs (datarefs
);
2857 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2861 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2863 /* Analyze the data references. */
2865 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2867 if (dump_enabled_p ())
2868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2869 "not vectorized: unhandled data-ref in basic "
2876 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2878 if (dump_enabled_p ())
2879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2880 "not vectorized: not enough data-refs in "
2887 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2889 if (dump_enabled_p ())
2890 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2891 "not vectorized: unhandled data access in "
2898 /* If there are no grouped stores in the region there is no need
2899 to continue with pattern recog as vect_analyze_slp will fail
2901 if (bb_vinfo
->grouped_stores
.is_empty ())
2903 if (dump_enabled_p ())
2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2905 "not vectorized: no grouped stores in "
2912 /* While the rest of the analysis below depends on it in some way. */
2915 vect_pattern_recog (bb_vinfo
);
2917 /* Check the SLP opportunities in the basic block, analyze and build SLP
2919 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2921 if (dump_enabled_p ())
2923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2924 "Failed to SLP the basic block.\n");
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2926 "not vectorized: failed to find SLP opportunities "
2927 "in basic block.\n");
2934 vect_record_base_alignments (bb_vinfo
);
2936 /* Analyze and verify the alignment of data references and the
2937 dependence in the SLP instances. */
2938 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2940 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2941 || ! vect_slp_analyze_instance_dependence (instance
))
2943 dump_printf_loc (MSG_NOTE
, vect_location
,
2944 "removing SLP instance operations starting from: ");
2945 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2946 SLP_TREE_SCALAR_STMTS
2947 (SLP_INSTANCE_TREE (instance
))[0], 0);
2948 vect_free_slp_instance (instance
);
2949 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2953 /* Mark all the statements that we want to vectorize as pure SLP and
2955 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2956 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2960 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2966 if (!vect_slp_analyze_operations (bb_vinfo
))
2968 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2970 "not vectorized: bad operation in basic block.\n");
2976 /* Cost model: check if the vectorization is worthwhile. */
2977 if (!unlimited_cost_model (NULL
)
2978 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2980 if (dump_enabled_p ())
2981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2982 "not vectorized: vectorization is not "
2989 if (dump_enabled_p ())
2990 dump_printf_loc (MSG_NOTE
, vect_location
,
2991 "Basic block will be vectorized using SLP\n");
2997 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2998 true if anything in the basic-block was vectorized. */
3001 vect_slp_bb (basic_block bb
)
3003 bb_vec_info bb_vinfo
;
3004 gimple_stmt_iterator gsi
;
3005 unsigned int vector_sizes
;
3006 bool any_vectorized
= false;
3008 if (dump_enabled_p ())
3009 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
3011 /* Autodetect first vector size we try. */
3012 current_vector_size
= 0;
3013 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
3015 gsi
= gsi_start_bb (bb
);
3019 if (gsi_end_p (gsi
))
3022 gimple_stmt_iterator region_begin
= gsi
;
3023 vec
<data_reference_p
> datarefs
= vNULL
;
3026 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3028 gimple
*stmt
= gsi_stmt (gsi
);
3029 if (is_gimple_debug (stmt
))
3033 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3034 vect_location
= gimple_location (stmt
);
3036 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3040 /* Skip leading unhandled stmts. */
3041 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3047 gimple_stmt_iterator region_end
= gsi
;
3049 bool vectorized
= false;
3051 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3052 datarefs
, insns
, fatal
);
3054 && dbg_cnt (vect_slp
))
3056 if (dump_enabled_p ())
3057 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3059 vect_schedule_slp (bb_vinfo
);
3061 if (dump_enabled_p ())
3062 dump_printf_loc (MSG_NOTE
, vect_location
,
3063 "basic block part vectorized\n");
3069 any_vectorized
|= vectorized
;
3071 vector_sizes
&= ~current_vector_size
;
3073 || vector_sizes
== 0
3074 || current_vector_size
== 0
3075 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3076 vector sizes will fail do not bother iterating. */
3079 if (gsi_end_p (region_end
))
3082 /* Skip the unhandled stmt. */
3085 /* And reset vector sizes. */
3086 current_vector_size
= 0;
3087 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
3091 /* Try the next biggest vector size. */
3092 current_vector_size
= 1 << floor_log2 (vector_sizes
);
3093 if (dump_enabled_p ())
3094 dump_printf_loc (MSG_NOTE
, vect_location
,
3095 "***** Re-trying analysis with "
3096 "vector size %d\n", current_vector_size
);
3103 return any_vectorized
;
3107 /* Return 1 if vector type of boolean constant which is OPNUM
3108 operand in statement STMT is a boolean vector. */
3111 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3113 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3114 enum tree_code code
= gimple_expr_code (stmt
);
3117 enum vect_def_type dt
;
3119 /* For comparison and COND_EXPR type is chosen depending
3120 on the other comparison operand. */
3121 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3124 op
= gimple_assign_rhs1 (stmt
);
3126 op
= gimple_assign_rhs2 (stmt
);
3128 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3132 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3135 if (code
== COND_EXPR
)
3137 tree cond
= gimple_assign_rhs1 (stmt
);
3139 if (TREE_CODE (cond
) == SSA_NAME
)
3142 op
= TREE_OPERAND (cond
, 1);
3144 op
= TREE_OPERAND (cond
, 0);
3146 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3150 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3153 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3157 /* For constant and loop invariant defs of SLP_NODE this function returns
3158 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3159 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3160 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3161 REDUC_INDEX is the index of the reduction operand in the statements, unless
3165 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3166 vec
<tree
> *vec_oprnds
,
3167 unsigned int op_num
, unsigned int number_of_vectors
)
3169 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3170 gimple
*stmt
= stmts
[0];
3171 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3174 unsigned j
, number_of_places_left_in_vector
;
3177 int group_size
= stmts
.length ();
3178 unsigned int vec_num
, i
;
3179 unsigned number_of_copies
= 1;
3181 voprnds
.create (number_of_vectors
);
3182 bool constant_p
, is_store
;
3183 tree neutral_op
= NULL
;
3184 enum tree_code code
= gimple_expr_code (stmt
);
3185 gimple_seq ctor_seq
= NULL
;
3187 /* Check if vector type is a boolean vector. */
3188 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3189 && vect_mask_constant_operand_p (stmt
, op_num
))
3191 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3193 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3194 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3196 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3199 op
= gimple_assign_rhs1 (stmt
);
3206 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3207 created vectors. It is greater than 1 if unrolling is performed.
3209 For example, we have two scalar operands, s1 and s2 (e.g., group of
3210 strided accesses of size two), while NUNITS is four (i.e., four scalars
3211 of this type can be packed in a vector). The output vector will contain
3212 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3215 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3216 containing the operands.
3218 For example, NUNITS is four as before, and the group size is 8
3219 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3220 {s5, s6, s7, s8}. */
3222 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3224 number_of_places_left_in_vector
= nunits
;
3226 tree_vector_builder
elts (vector_type
, nunits
, 1);
3227 elts
.quick_grow (nunits
);
3228 bool place_after_defs
= false;
3229 for (j
= 0; j
< number_of_copies
; j
++)
3231 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3234 op
= gimple_assign_rhs1 (stmt
);
3241 tree cond
= gimple_assign_rhs1 (stmt
);
3242 if (TREE_CODE (cond
) == SSA_NAME
)
3243 op
= gimple_op (stmt
, op_num
+ 1);
3244 else if (op_num
== 0 || op_num
== 1)
3245 op
= TREE_OPERAND (cond
, op_num
);
3249 op
= gimple_assign_rhs2 (stmt
);
3251 op
= gimple_assign_rhs3 (stmt
);
3257 op
= gimple_call_arg (stmt
, op_num
);
3264 op
= gimple_op (stmt
, op_num
+ 1);
3265 /* Unlike the other binary operators, shifts/rotates have
3266 the shift count being int, instead of the same type as
3267 the lhs, so make sure the scalar is the right type if
3268 we are dealing with vectors of
3269 long long/long/short/char. */
3270 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3271 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3275 op
= gimple_op (stmt
, op_num
+ 1);
3280 /* Create 'vect_ = {op0,op1,...,opn}'. */
3281 number_of_places_left_in_vector
--;
3283 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3285 if (CONSTANT_CLASS_P (op
))
3287 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3289 /* Can't use VIEW_CONVERT_EXPR for booleans because
3290 of possibly different sizes of scalar value and
3292 if (integer_zerop (op
))
3293 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3294 else if (integer_onep (op
))
3295 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3300 op
= fold_unary (VIEW_CONVERT_EXPR
,
3301 TREE_TYPE (vector_type
), op
);
3302 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3306 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3308 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3311 = build_all_ones_cst (TREE_TYPE (vector_type
));
3313 = build_zero_cst (TREE_TYPE (vector_type
));
3314 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3315 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3321 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3324 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3327 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3331 elts
[number_of_places_left_in_vector
] = op
;
3332 if (!CONSTANT_CLASS_P (op
))
3334 if (TREE_CODE (orig_op
) == SSA_NAME
3335 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3336 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3337 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3338 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3339 place_after_defs
= true;
3341 if (number_of_places_left_in_vector
== 0)
3344 vec_cst
= elts
.build ();
3347 vec
<constructor_elt
, va_gc
> *v
;
3349 vec_alloc (v
, nunits
);
3350 for (k
= 0; k
< nunits
; ++k
)
3351 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3352 vec_cst
= build_constructor (vector_type
, v
);
3355 gimple_stmt_iterator gsi
;
3356 if (place_after_defs
)
3359 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3360 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3363 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3364 if (ctor_seq
!= NULL
)
3366 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3367 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3371 voprnds
.quick_push (init
);
3372 place_after_defs
= false;
3373 number_of_places_left_in_vector
= nunits
;
3375 elts
.new_vector (vector_type
, nunits
, 1);
3376 elts
.quick_grow (nunits
);
3381 /* Since the vectors are created in the reverse order, we should invert
3383 vec_num
= voprnds
.length ();
3384 for (j
= vec_num
; j
!= 0; j
--)
3386 vop
= voprnds
[j
- 1];
3387 vec_oprnds
->quick_push (vop
);
3392 /* In case that VF is greater than the unrolling factor needed for the SLP
3393 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3394 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3395 to replicate the vectors. */
3396 while (number_of_vectors
> vec_oprnds
->length ())
3398 tree neutral_vec
= NULL
;
3403 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3405 vec_oprnds
->quick_push (neutral_vec
);
3409 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3410 vec_oprnds
->quick_push (vop
);
3416 /* Get vectorized definitions from SLP_NODE that contains corresponding
3417 vectorized def-stmts. */
3420 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3423 gimple
*vec_def_stmt
;
3426 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3428 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3430 gcc_assert (vec_def_stmt
);
3431 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3432 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3434 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3435 vec_oprnds
->quick_push (vec_oprnd
);
3440 /* Get vectorized definitions for SLP_NODE.
3441 If the scalar definitions are loop invariants or constants, collect them and
3442 call vect_get_constant_vectors() to create vector stmts.
3443 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3444 must be stored in the corresponding child of SLP_NODE, and we call
3445 vect_get_slp_vect_defs () to retrieve them. */
3448 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3449 vec
<vec
<tree
> > *vec_oprnds
)
3452 int number_of_vects
= 0, i
;
3453 unsigned int child_index
= 0;
3454 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3455 slp_tree child
= NULL
;
3458 bool vectorized_defs
;
3460 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3461 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3463 /* For each operand we check if it has vectorized definitions in a child
3464 node or we need to create them (for invariants and constants). We
3465 check if the LHS of the first stmt of the next child matches OPRND.
3466 If it does, we found the correct child. Otherwise, we call
3467 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3468 to check this child node for the next operand. */
3469 vectorized_defs
= false;
3470 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3472 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3474 /* We have to check both pattern and original def, if available. */
3475 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3477 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3479 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3482 if (gimple_code (first_def
) == GIMPLE_PHI
)
3483 first_def_op
= gimple_phi_result (first_def
);
3485 first_def_op
= gimple_get_lhs (first_def
);
3486 if (operand_equal_p (oprnd
, first_def_op
, 0)
3488 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3490 /* The number of vector defs is determined by the number of
3491 vector statements in the node from which we get those
3493 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3494 vectorized_defs
= true;
3502 if (!vectorized_defs
)
3506 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3507 /* Number of vector stmts was calculated according to LHS in
3508 vect_schedule_slp_instance (), fix it by replacing LHS with
3509 RHS, if necessary. See vect_get_smallest_scalar_type () for
3511 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3513 if (rhs_size_unit
!= lhs_size_unit
)
3515 number_of_vects
*= rhs_size_unit
;
3516 number_of_vects
/= lhs_size_unit
;
3521 /* Allocate memory for vectorized defs. */
3523 vec_defs
.create (number_of_vects
);
3525 /* For reduction defs we call vect_get_constant_vectors (), since we are
3526 looking for initial loop invariant values. */
3527 if (vectorized_defs
)
3528 /* The defs are already vectorized. */
3529 vect_get_slp_vect_defs (child
, &vec_defs
);
3531 /* Build vectors from scalar defs. */
3532 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3535 vec_oprnds
->quick_push (vec_defs
);
3539 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3540 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3541 permute statements for the SLP node NODE of the SLP instance
3542 SLP_NODE_INSTANCE. */
3545 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3546 gimple_stmt_iterator
*gsi
, int vf
,
3547 slp_instance slp_node_instance
, bool analyze_only
,
3550 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3551 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3552 tree mask_element_type
= NULL_TREE
, mask_type
;
3553 int nunits
, vec_index
= 0;
3554 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3555 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3559 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3562 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3564 mode
= TYPE_MODE (vectype
);
3566 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3567 same size as the vector element being permuted. */
3568 mask_element_type
= lang_hooks
.types
.type_for_mode
3569 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3570 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3571 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3572 auto_vec_perm_indices
mask (nunits
);
3573 mask
.quick_grow (nunits
);
3575 /* Initialize the vect stmts of NODE to properly insert the generated
3578 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3579 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3580 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3582 /* Generate permutation masks for every NODE. Number of masks for each NODE
3583 is equal to GROUP_SIZE.
3584 E.g., we have a group of three nodes with three loads from the same
3585 location in each node, and the vector size is 4. I.e., we have a
3586 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3587 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3588 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3591 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3592 The last mask is illegal since we assume two operands for permute
3593 operation, and the mask element values can't be outside that range.
3594 Hence, the last mask must be converted into {2,5,5,5}.
3595 For the first two permutations we need the first and the second input
3596 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3597 we need the second and the third vectors: {b1,c1,a2,b2} and
3600 int vect_stmts_counter
= 0;
3602 int first_vec_index
= -1;
3603 int second_vec_index
= -1;
3607 for (int j
= 0; j
< vf
; j
++)
3609 for (int k
= 0; k
< group_size
; k
++)
3611 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3612 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3613 vec_index
= i
/ nunits
;
3614 mask_element
= i
% nunits
;
3615 if (vec_index
== first_vec_index
3616 || first_vec_index
== -1)
3618 first_vec_index
= vec_index
;
3620 else if (vec_index
== second_vec_index
3621 || second_vec_index
== -1)
3623 second_vec_index
= vec_index
;
3624 mask_element
+= nunits
;
3628 if (dump_enabled_p ())
3630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3631 "permutation requires at "
3632 "least three vectors ");
3633 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3636 gcc_assert (analyze_only
);
3640 gcc_assert (mask_element
>= 0
3641 && mask_element
< 2 * nunits
);
3642 if (mask_element
!= index
)
3644 mask
[index
++] = mask_element
;
3646 if (index
== nunits
)
3649 && ! can_vec_perm_p (mode
, false, &mask
))
3651 if (dump_enabled_p ())
3653 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3655 "unsupported vect permute { ");
3656 for (i
= 0; i
< nunits
; ++i
)
3657 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ", mask
[i
]);
3658 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3660 gcc_assert (analyze_only
);
3669 tree mask_vec
= NULL_TREE
;
3673 tree_vector_builder
mask_elts (mask_type
, nunits
, 1);
3674 for (int l
= 0; l
< nunits
; ++l
)
3675 mask_elts
.quick_push (build_int_cst (mask_element_type
,
3677 mask_vec
= mask_elts
.build ();
3680 if (second_vec_index
== -1)
3681 second_vec_index
= first_vec_index
;
3683 /* Generate the permute statement if necessary. */
3684 tree first_vec
= dr_chain
[first_vec_index
];
3685 tree second_vec
= dr_chain
[second_vec_index
];
3690 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3692 perm_dest
= make_ssa_name (perm_dest
);
3693 perm_stmt
= gimple_build_assign (perm_dest
,
3695 first_vec
, second_vec
,
3697 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3700 /* If mask was NULL_TREE generate the requested
3701 identity transform. */
3702 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3704 /* Store the vector statement in NODE. */
3705 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3709 first_vec_index
= -1;
3710 second_vec_index
= -1;
3719 typedef hash_map
<vec
<gimple
*>, slp_tree
,
3720 simple_hashmap_traits
<bst_traits
, slp_tree
> >
3721 scalar_stmts_to_slp_tree_map_t
;
3723 /* Vectorize SLP instance tree in postorder. */
3726 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3727 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3730 bool grouped_store
, is_store
;
3731 gimple_stmt_iterator si
;
3732 stmt_vec_info stmt_info
;
3733 unsigned int group_size
;
3738 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3741 /* See if we have already vectorized the same set of stmts and reuse their
3742 vectorized stmts. */
3744 = bst_map
->get_or_insert (SLP_TREE_SCALAR_STMTS (node
).copy ());
3747 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (leader
));
3752 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3753 vect_schedule_slp_instance (child
, instance
, bst_map
);
3755 /* Push SLP node def-type to stmts. */
3756 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3757 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3758 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3759 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3761 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3762 stmt_info
= vinfo_for_stmt (stmt
);
3764 /* VECTYPE is the type of the destination. */
3765 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3766 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3768 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3769 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3771 if (dump_enabled_p ())
3773 dump_printf_loc (MSG_NOTE
,vect_location
,
3774 "------>vectorizing SLP node starting from: ");
3775 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3778 /* Vectorized stmts go before the last scalar stmt which is where
3779 all uses are ready. */
3780 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3782 /* Mark the first element of the reduction chain as reduction to properly
3783 transform the node. In the analysis phase only the last element of the
3784 chain is marked as reduction. */
3785 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3786 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3788 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3789 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3792 /* Handle two-operation SLP nodes by vectorizing the group with
3793 both operations and then performing a merge. */
3794 if (SLP_TREE_TWO_OPERATORS (node
))
3796 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3797 enum tree_code ocode
= ERROR_MARK
;
3799 auto_vec_perm_indices
mask (group_size
);
3800 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3801 if (gimple_assign_rhs_code (ostmt
) != code0
)
3803 mask
.quick_push (1);
3804 ocode
= gimple_assign_rhs_code (ostmt
);
3807 mask
.quick_push (0);
3808 if (ocode
!= ERROR_MARK
)
3813 tree tmask
= NULL_TREE
;
3814 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3815 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3816 SLP_TREE_VEC_STMTS (node
).truncate (0);
3817 gimple_assign_set_rhs_code (stmt
, ocode
);
3818 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3819 gimple_assign_set_rhs_code (stmt
, code0
);
3820 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3821 SLP_TREE_VEC_STMTS (node
).truncate (0);
3822 tree meltype
= build_nonstandard_integer_type
3823 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3824 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3826 for (j
= 0; j
< v0
.length (); ++j
)
3828 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3829 tree_vector_builder
melts (mvectype
, nunits
, 1);
3830 for (l
= 0; l
< nunits
; ++l
)
3832 if (k
>= group_size
)
3834 tree t
= build_int_cst (meltype
, mask
[k
++] * nunits
+ l
);
3835 melts
.quick_push (t
);
3837 tmask
= melts
.build ();
3839 /* ??? Not all targets support a VEC_PERM_EXPR with a
3840 constant mask that would translate to a vec_merge RTX
3841 (with their vec_perm_const_ok). We can either not
3842 vectorize in that case or let veclower do its job.
3843 Unfortunately that isn't too great and at least for
3844 plus/minus we'd eventually like to match targets
3845 vector addsub instructions. */
3847 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3849 gimple_assign_lhs (v0
[j
]),
3850 gimple_assign_lhs (v1
[j
]), tmask
);
3851 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3852 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3859 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3861 /* Restore stmt def-types. */
3862 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3863 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3864 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3865 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3870 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3871 For loop vectorization this is done in vectorizable_call, but for SLP
3872 it needs to be deferred until end of vect_schedule_slp, because multiple
3873 SLP instances may refer to the same scalar stmt. */
3876 vect_remove_slp_scalar_calls (slp_tree node
)
3878 gimple
*stmt
, *new_stmt
;
3879 gimple_stmt_iterator gsi
;
3883 stmt_vec_info stmt_info
;
3885 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3889 vect_remove_slp_scalar_calls (child
);
3891 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3893 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3895 stmt_info
= vinfo_for_stmt (stmt
);
3896 if (stmt_info
== NULL
3897 || is_pattern_stmt_p (stmt_info
)
3898 || !PURE_SLP_STMT (stmt_info
))
3900 lhs
= gimple_call_lhs (stmt
);
3901 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3902 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3903 set_vinfo_for_stmt (stmt
, NULL
);
3904 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3905 gsi
= gsi_for_stmt (stmt
);
3906 gsi_replace (&gsi
, new_stmt
, false);
3907 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3911 /* Generate vector code for all SLP instances in the loop/basic block. */
3914 vect_schedule_slp (vec_info
*vinfo
)
3916 vec
<slp_instance
> slp_instances
;
3917 slp_instance instance
;
3919 bool is_store
= false;
3921 slp_instances
= vinfo
->slp_instances
;
3922 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3924 /* Schedule the tree of INSTANCE. */
3925 scalar_stmts_to_slp_tree_map_t
*bst_map
3926 = new scalar_stmts_to_slp_tree_map_t ();
3927 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3930 if (dump_enabled_p ())
3931 dump_printf_loc (MSG_NOTE
, vect_location
,
3932 "vectorizing stmts using SLP.\n");
3935 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3937 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3940 gimple_stmt_iterator gsi
;
3942 /* Remove scalar call stmts. Do not do this for basic-block
3943 vectorization as not all uses may be vectorized.
3944 ??? Why should this be necessary? DCE should be able to
3945 remove the stmts itself.
3946 ??? For BB vectorization we can as well remove scalar
3947 stmts starting from the SLP tree root if they have no
3949 if (is_a
<loop_vec_info
> (vinfo
))
3950 vect_remove_slp_scalar_calls (root
);
3952 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3953 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3955 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3958 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3959 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3960 /* Free the attached stmt_vec_info and remove the stmt. */
3961 gsi
= gsi_for_stmt (store
);
3962 unlink_stmt_vdef (store
);
3963 gsi_remove (&gsi
, true);
3964 release_defs (store
);
3965 free_stmt_vec_info (store
);